...
 
Commits (13)
......@@ -152,11 +152,12 @@ are not thread safe. To work with data in multiple goroutines you must start
a transaction for each one or use locking to ensure only one goroutine accesses
a transaction at a time. Creating transaction from the `DB` is thread safe.
Read-only transactions and read-write transactions should not depend on one
another and generally shouldn't be opened simultaneously in the same goroutine.
This can cause a deadlock as the read-write transaction needs to periodically
re-map the data file but it cannot do so while a read-only transaction is open.
Transactions should not depend on one another and generally shouldn't be opened
simultaneously in the same goroutine. This can cause a deadlock as the read-write
transaction needs to periodically re-map the data file but it cannot do so while
any read-only transaction is open. Even a nested read-only transaction can cause
a deadlock, as the child transaction can block the parent transaction from releasing
its resources.
#### Read-write transactions
......
......@@ -2,7 +2,6 @@ package bolt
import (
"fmt"
"reflect"
"sort"
"unsafe"
)
......@@ -94,24 +93,8 @@ func (f *freelist) pending_count() int {
return count
}
// copyallunsafe copies a list of all free ids and all pending ids in one sorted list.
// copyall copies a list of all free ids and all pending ids in one sorted list.
// f.count returns the minimum length required for dst.
func (f *freelist) copyallunsafe(dstptr unsafe.Pointer) { // dstptr is []pgid data pointer
m := make(pgids, 0, f.pending_count())
for _, txp := range f.pending {
m = append(m, txp.ids...)
}
sort.Sort(m)
fpgids := f.getFreePageIDs()
sz := len(fpgids) + len(m)
dst := *(*[]pgid)(unsafe.Pointer(&reflect.SliceHeader{
Data: uintptr(dstptr),
Len: sz,
Cap: sz,
}))
mergepgids(dst, fpgids, m)
}
func (f *freelist) copyall(dst []pgid) {
m := make(pgids, 0, f.pending_count())
for _, txp := range f.pending {
......@@ -284,21 +267,23 @@ func (f *freelist) read(p *page) {
}
// If the page.count is at the max uint16 value (64k) then it's considered
// an overflow and the size of the freelist is stored as the first element.
var idx, count uintptr = 0, uintptr(p.count)
var idx, count = 0, int(p.count)
if count == 0xFFFF {
idx = 1
count = uintptr(*(*pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p))))
c := *(*pgid)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)))
count = int(c)
if count < 0 {
panic(fmt.Sprintf("leading element count %d overflows int", c))
}
}
// Copy the list of page ids from the freelist.
if count == 0 {
f.ids = nil
} else {
ids := *(*[]pgid)(unsafe.Pointer(&reflect.SliceHeader{
Data: uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + idx*unsafe.Sizeof(pgid(0)),
Len: int(count),
Cap: int(count),
}))
var ids []pgid
data := unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), unsafe.Sizeof(ids[0]), idx)
unsafeSlice(unsafe.Pointer(&ids), data, count)
// copy the ids, so we don't modify on the freelist page directly
idsCopy := make([]pgid, count)
......@@ -331,16 +316,22 @@ func (f *freelist) write(p *page) error {
// The page.count can only hold up to 64k elements so if we overflow that
// number then we handle it by putting the size in the first element.
lenids := f.count()
if lenids == 0 {
p.count = uint16(lenids)
} else if lenids < 0xFFFF {
p.count = uint16(lenids)
f.copyallunsafe(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p)))
l := f.count()
if l == 0 {
p.count = uint16(l)
} else if l < 0xFFFF {
p.count = uint16(l)
var ids []pgid
data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
unsafeSlice(unsafe.Pointer(&ids), data, l)
f.copyall(ids)
} else {
p.count = 0xFFFF
*(*pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p))) = pgid(lenids)
f.copyallunsafe(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + unsafe.Sizeof(pgid(0))))
var ids []pgid
data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
unsafeSlice(unsafe.Pointer(&ids), data, l+1)
ids[0] = pgid(l)
f.copyall(ids[1:])
}
return nil
......
package bolt
import (
"fmt"
"io/ioutil"
"math/rand"
"os"
"path/filepath"
"testing"
)
func createDb(t *testing.T) (*DB, func()) {
// First, create a temporary directory to be used for the duration of
// this test.
tempDirName, err := ioutil.TempDir("", "bboltmemtest")
if err != nil {
t.Fatalf("error creating temp dir: %v", err)
}
path := filepath.Join(tempDirName, "testdb.db")
bdb, err := Open(path, 0600, nil)
if err != nil {
t.Fatalf("error creating bbolt db: %v", err)
}
cleanup := func() {
bdb.Close()
os.RemoveAll(tempDirName)
}
return bdb, cleanup
}
func createAndPutKeys(t *testing.T) {
t.Parallel()
db, cleanup := createDb(t)
defer cleanup()
bucketName := []byte("bucket")
for i := 0; i < 100; i++ {
err := db.Update(func(tx *Tx) error {
nodes, err := tx.CreateBucketIfNotExists(bucketName)
if err != nil {
return err
}
var key [16]byte
rand.Read(key[:])
if err := nodes.Put(key[:], nil); err != nil {
return err
}
return nil
})
if err != nil {
t.Fatal(err)
}
}
}
func TestManyDBs(t *testing.T) {
for i := 0; i < 100; i++ {
t.Run(fmt.Sprintf("%d", i), createAndPutKeys)
}
}
......@@ -3,7 +3,6 @@ package bolt
import (
"bytes"
"fmt"
"reflect"
"sort"
"unsafe"
)
......@@ -208,36 +207,32 @@ func (n *node) write(p *page) {
}
// Loop over each item and write it to the page.
bp := uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + n.pageElementSize()*uintptr(len(n.inodes))
// off tracks the offset into p of the start of the next data.
off := unsafe.Sizeof(*p) + n.pageElementSize()*uintptr(len(n.inodes))
for i, item := range n.inodes {
_assert(len(item.key) > 0, "write: zero-length inode key")
// Create a slice to write into of needed size and advance
// byte pointer for next iteration.
sz := len(item.key) + len(item.value)
b := unsafeByteSlice(unsafe.Pointer(p), off, 0, sz)
off += uintptr(sz)
// Write the page element.
if n.isLeaf {
elem := p.leafPageElement(uint16(i))
elem.pos = uint32(bp - uintptr(unsafe.Pointer(elem)))
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
elem.flags = item.flags
elem.ksize = uint32(len(item.key))
elem.vsize = uint32(len(item.value))
} else {
elem := p.branchPageElement(uint16(i))
elem.pos = uint32(bp - uintptr(unsafe.Pointer(elem)))
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
elem.ksize = uint32(len(item.key))
elem.pgid = item.pgid
_assert(elem.pgid != p.id, "write: circular dependency occurred")
}
// Create a slice to write into of needed size and advance
// byte pointer for next iteration.
klen, vlen := len(item.key), len(item.value)
sz := klen + vlen
b := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
Data: bp,
Len: sz,
Cap: sz,
}))
bp += uintptr(sz)
// Write data for the element to the end of the page.
l := copy(b, item.key)
copy(b[l:], item.value)
......
......@@ -44,9 +44,9 @@ func TestNode_read_LeafPage(t *testing.T) {
nodes[1] = leafPageElement{flags: 0, pos: 23, ksize: 10, vsize: 3} // pos = sizeof(leafPageElement) + 3 + 4
// Write data for the nodes at the end.
data := (*[4096]byte)(unsafe.Pointer(&nodes[2]))
copy(data[:], "barfooz")
copy(data[7:], "helloworldbye")
const s = "barfoozhelloworldbye"
data := unsafeByteSlice(unsafe.Pointer(&nodes[2]), 0, 0, len(s))
copy(data, s)
// Deserialize page into a leaf.
n := &node{}
......
......@@ -3,7 +3,6 @@ package bolt
import (
"fmt"
"os"
"reflect"
"sort"
"unsafe"
)
......@@ -51,13 +50,13 @@ func (p *page) typ() string {
// meta returns a pointer to the metadata section of the page.
func (p *page) meta() *meta {
return (*meta)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p)))
return (*meta)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)))
}
// leafPageElement retrieves the leaf node by index
func (p *page) leafPageElement(index uint16) *leafPageElement {
off := uintptr(index) * unsafe.Sizeof(leafPageElement{})
return (*leafPageElement)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + off))
return (*leafPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
leafPageElementSize, int(index)))
}
// leafPageElements retrieves a list of leaf nodes.
......@@ -65,17 +64,16 @@ func (p *page) leafPageElements() []leafPageElement {
if p.count == 0 {
return nil
}
return *(*[]leafPageElement)(unsafe.Pointer(&reflect.SliceHeader{
Data: uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p),
Len: int(p.count),
Cap: int(p.count),
}))
var elems []leafPageElement
data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
unsafeSlice(unsafe.Pointer(&elems), data, int(p.count))
return elems
}
// branchPageElement retrieves the branch node by index
func (p *page) branchPageElement(index uint16) *branchPageElement {
off := uintptr(index) * unsafe.Sizeof(branchPageElement{})
return (*branchPageElement)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + off))
return (*branchPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
unsafe.Sizeof(branchPageElement{}), int(index)))
}
// branchPageElements retrieves a list of branch nodes.
......@@ -83,20 +81,15 @@ func (p *page) branchPageElements() []branchPageElement {
if p.count == 0 {
return nil
}
return *(*[]branchPageElement)(unsafe.Pointer(&reflect.SliceHeader{
Data: uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p),
Len: int(p.count),
Cap: int(p.count),
}))
var elems []branchPageElement
data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
unsafeSlice(unsafe.Pointer(&elems), data, int(p.count))
return elems
}
// dump writes n bytes of the page to STDERR as hex output.
func (p *page) hexdump(n int) {
buf := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
Data: uintptr(unsafe.Pointer(p)),
Len: n,
Cap: n,
}))
buf := unsafeByteSlice(unsafe.Pointer(p), 0, 0, n)
fmt.Fprintf(os.Stderr, "%x\n", buf)
}
......@@ -115,11 +108,7 @@ type branchPageElement struct {
// key returns a byte slice of the node key.
func (n *branchPageElement) key() []byte {
return *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
Data: uintptr(unsafe.Pointer(n)) + uintptr(n.pos),
Len: int(n.ksize),
Cap: int(n.ksize),
}))
return unsafeByteSlice(unsafe.Pointer(n), 0, int(n.pos), int(n.pos)+int(n.ksize))
}
// leafPageElement represents a node on a leaf page.
......@@ -132,20 +121,16 @@ type leafPageElement struct {
// key returns a byte slice of the node key.
func (n *leafPageElement) key() []byte {
return *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
Data: uintptr(unsafe.Pointer(n)) + uintptr(n.pos),
Len: int(n.ksize),
Cap: int(n.ksize),
}))
i := int(n.pos)
j := i + int(n.ksize)
return unsafeByteSlice(unsafe.Pointer(n), 0, i, j)
}
// value returns a byte slice of the node value.
func (n *leafPageElement) value() []byte {
return *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
Data: uintptr(unsafe.Pointer(n)) + uintptr(n.pos) + uintptr(n.ksize),
Len: int(n.vsize),
Cap: int(n.vsize),
}))
i := int(n.pos) + int(n.ksize)
j := i + int(n.vsize)
return unsafeByteSlice(unsafe.Pointer(n), 0, i, j)
}
// PageInfo represents human readable information about a page.
......
......@@ -4,7 +4,6 @@ import (
"fmt"
"io"
"os"
"reflect"
"sort"
"strings"
"time"
......@@ -524,24 +523,18 @@ func (tx *Tx) write() error {
// Write pages to disk in order.
for _, p := range pages {
size := (int(p.overflow) + 1) * tx.db.pageSize
rem := (uint64(p.overflow) + 1) * uint64(tx.db.pageSize)
offset := int64(p.id) * int64(tx.db.pageSize)
var written uintptr
// Write out page in "max allocation" sized chunks.
ptr := uintptr(unsafe.Pointer(p))
for {
// Limit our write to our max allocation size.
sz := size
sz := rem
if sz > maxAllocSize-1 {
sz = maxAllocSize - 1
}
buf := unsafeByteSlice(unsafe.Pointer(p), written, 0, int(sz))
// Write chunk to disk.
buf := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
Data: ptr,
Len: sz,
Cap: sz,
}))
if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
return err
}
......@@ -550,14 +543,14 @@ func (tx *Tx) write() error {
tx.stats.Write++
// Exit inner for loop if we've written all the chunks.
size -= sz
if size == 0 {
rem -= sz
if rem == 0 {
break
}
// Otherwise move offset forward and move pointer to next chunk.
offset += int64(sz)
ptr += uintptr(sz)
written += uintptr(sz)
}
}
......@@ -576,11 +569,7 @@ func (tx *Tx) write() error {
continue
}
buf := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
Data: uintptr(unsafe.Pointer(p)),
Len: tx.db.pageSize,
Cap: tx.db.pageSize,
}))
buf := unsafeByteSlice(unsafe.Pointer(p), 0, 0, tx.db.pageSize)
// See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1
for i := range buf {
......
package bolt
import (
"reflect"
"unsafe"
)
func unsafeAdd(base unsafe.Pointer, offset uintptr) unsafe.Pointer {
return unsafe.Pointer(uintptr(base) + offset)
}
func unsafeIndex(base unsafe.Pointer, offset uintptr, elemsz uintptr, n int) unsafe.Pointer {
return unsafe.Pointer(uintptr(base) + offset + uintptr(n)*elemsz)
}
func unsafeByteSlice(base unsafe.Pointer, offset uintptr, i, j int) []byte {
// See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
//
// This memory is not allocated from C, but it is unmanaged by Go's
// garbage collector and should behave similarly, and the compiler
// should produce similar code. Note that this conversion allows a
// subslice to begin after the base address, with an optional offset,
// while the URL above does not cover this case and only slices from
// index 0. However, the wiki never says that the address must be to
// the beginning of a C allocation (or even that malloc was used at
// all), so this is believed to be correct.
return (*[maxAllocSize]byte)(unsafeAdd(base, offset))[i:j:j]
}
// unsafeSlice modifies the data, len, and cap of a slice variable pointed to by
// the slice parameter. This helper should be used over other direct
// manipulation of reflect.SliceHeader to prevent misuse, namely, converting
// from reflect.SliceHeader to a Go slice type.
func unsafeSlice(slice, data unsafe.Pointer, len int) {
s := (*reflect.SliceHeader)(slice)
s.Data = uintptr(data)
s.Cap = len
s.Len = len
}