Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve memory alignment #780

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,6 @@ func cloneBytes(v []byte) []byte {

type BucketStructure struct {
Name string `json:"name"` // name of the bucket
KeyN int `json:"keyN"` // number of key/value pairs
Children []BucketStructure `json:"buckets,omitempty"` // child buckets
KeyN int `json:"keyN"` // number of key/value pairs
}
4 changes: 2 additions & 2 deletions cmd/bbolt/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -370,10 +370,10 @@ func newPageItemCommand(m *Main) *pageItemCommand {
}

type pageItemOptions struct {
format string
help bool
keyOnly bool
valueOnly bool
format string
}

// Run executes the command.
Expand Down Expand Up @@ -1617,8 +1617,8 @@ func (r *BenchResults) OpsPerSecond() int {
}

type PageError struct {
ID int
Err error
ID int
}

func (e *PageError) Error() string {
Expand Down
181 changes: 93 additions & 88 deletions db.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,28 +41,23 @@ type DB struct {
// refer to discussion in https://github.com/etcd-io/bbolt/issues/577.
stats Stats

// When enabled, the database will perform a Check() after every commit.
// A panic is issued if the database is in an inconsistent state. This
// flag has a large performance impact so it should only be used for
// debugging purposes.
StrictMode bool
pagePool sync.Pool

// Setting the NoSync flag will cause the database to skip fsync()
// calls after each commit. This can be useful when bulk loading data
// into a database and you can restart the bulk load in the event of
// a system failure or database corruption. Do not set this flag for
// normal use.
//
// If the package global IgnoreNoSync constant is true, this value is
// ignored. See the comment on that constant for more details.
//
// THIS IS UNSAFE. PLEASE USE WITH CAUTION.
NoSync bool
logger Logger

// When true, skips syncing freelist to disk. This improves the database
// write performance under normal operation, but requires a full database
// re-sync during recovery.
NoFreelistSync bool
openFile func(string, int, os.FileMode) (*os.File, error)
file *os.File
data *[maxMapSize]byte
meta0 *common.Meta
meta1 *common.Meta
rwtx *Tx

freelist *freelist
batch *batch

ops struct {
writeAt func(b []byte, off int64) (n int, err error)
}

// FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
// dramatic performance degradation if database is large and fragmentation in freelist is common.
Expand All @@ -71,18 +66,12 @@ type DB struct {
// The default type is array
FreelistType FreelistType

// When true, skips the truncate call when growing the database.
// Setting this to true is only safe on non-ext3/ext4 systems.
// Skipping truncation avoids preallocation of hard drive space and
// bypasses a truncate() and fsync() syscall on remapping.
//
// https://github.com/boltdb/bolt/issues/284
NoGrowSync bool

// When `true`, bbolt will always load the free pages when opening the DB.
// When opening db in write mode, this flag will always automatically
// set to `true`.
PreLoadFreelist bool
path string
// `dataref` isn't used at all on Windows, and the golangci-lint
// always fails on Windows platform.
//nolint
dataref []byte // mmap'ed readonly, write throws SEGV
txs []*Tx

// If you want to read the entire database fast, you can set MmapFlag to
// syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead.
Expand All @@ -109,46 +98,61 @@ type DB struct {
// of truncate() and fsync() when growing the data file.
AllocSize int

// Mlock locks database file in memory when set to true.
// It prevents major page faults, however used memory can't be reclaimed.
//
// Supported only on Unix via mlock/munlock syscalls.
Mlock bool

logger Logger

path string
openFile func(string, int, os.FileMode) (*os.File, error)
file *os.File
// `dataref` isn't used at all on Windows, and the golangci-lint
// always fails on Windows platform.
//nolint
dataref []byte // mmap'ed readonly, write throws SEGV
data *[maxMapSize]byte
datasz int
meta0 *common.Meta
meta1 *common.Meta
pageSize int
opened bool
rwtx *Tx
txs []*Tx
mmaplock sync.RWMutex // Protects mmap access during remapping.
statlock sync.RWMutex // Protects stats access.

freelist *freelist
freelistLoad sync.Once

pagePool sync.Pool

batchMu sync.Mutex
batch *batch

rwlock sync.Mutex // Allows only one writer at a time.
metalock sync.Mutex // Protects meta page access.
mmaplock sync.RWMutex // Protects mmap access during remapping.
statlock sync.RWMutex // Protects stats access.
rwlock sync.Mutex // Allows only one writer at a time.
metalock sync.Mutex // Protects meta page access.

ops struct {
writeAt func(b []byte, off int64) (n int, err error)
}
// When enabled, the database will perform a Check() after every commit.
// A panic is issued if the database is in an inconsistent state. This
// flag has a large performance impact so it should only be used for
// debugging purposes.
StrictMode bool

// Setting the NoSync flag will cause the database to skip fsync()
// calls after each commit. This can be useful when bulk loading data
// into a database and you can restart the bulk load in the event of
// a system failure or database corruption. Do not set this flag for
// normal use.
//
// If the package global IgnoreNoSync constant is true, this value is
// ignored. See the comment on that constant for more details.
//
// THIS IS UNSAFE. PLEASE USE WITH CAUTION.
NoSync bool

// When true, skips syncing freelist to disk. This improves the database
// write performance under normal operation, but requires a full database
// re-sync during recovery.
NoFreelistSync bool

// When true, skips the truncate call when growing the database.
// Setting this to true is only safe on non-ext3/ext4 systems.
// Skipping truncation avoids preallocation of hard drive space and
// bypasses a truncate() and fsync() syscall on remapping.
//
// https://github.com/boltdb/bolt/issues/284
NoGrowSync bool

// When `true`, bbolt will always load the free pages when opening the DB.
// When opening db in write mode, this flag will always automatically
// set to `true`.
PreLoadFreelist bool

// Mlock locks database file in memory when set to true.
// It prevents major page faults, however used memory can't be reclaimed.
//
// Supported only on Unix via mlock/munlock syscalls.
Mlock bool

opened bool

// Read only mode.
// When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately.
Expand Down Expand Up @@ -995,8 +999,8 @@ type call struct {
type batch struct {
db *DB
timer *time.Timer
start sync.Once
calls []call
start sync.Once
}

// trigger runs the batch if it hasn't already been run.
Expand Down Expand Up @@ -1263,21 +1267,13 @@ func (db *DB) freepages() []common.Pgid {

// Options represents the options that can be set when opening a database.
type Options struct {
// Timeout is the amount of time to wait to obtain a file lock.
// When set to zero it will wait indefinitely.
Timeout time.Duration

// Sets the DB.NoGrowSync flag before memory mapping the file.
NoGrowSync bool

// Do not sync freelist to disk. This improves the database write performance
// under normal operation, but requires a full database re-sync during recovery.
NoFreelistSync bool
// Logger is the logger used for bbolt.
Logger Logger

// PreLoadFreelist sets whether to load the free pages when opening
// the db file. Note when opening db in write mode, bbolt will always
// load the free pages.
PreLoadFreelist bool
// OpenFile is used to open files. It defaults to os.OpenFile. This option
// is useful for writing hermetic tests.
OpenFile func(string, int, os.FileMode) (*os.File, error)

// FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
// dramatic performance degradation if database is large and fragmentation in freelist is common.
Expand All @@ -1286,9 +1282,9 @@ type Options struct {
// The default type is array
FreelistType FreelistType

// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
// grab a shared lock (UNIX).
ReadOnly bool
// Timeout is the amount of time to wait to obtain a file lock.
// When set to zero it will wait indefinitely.
Timeout time.Duration

// Sets the DB.MmapFlags flag before memory mapping the file.
MmapFlags int
Expand All @@ -1306,22 +1302,31 @@ type Options struct {
// PageSize overrides the default OS page size.
PageSize int

// Sets the DB.NoGrowSync flag before memory mapping the file.
NoGrowSync bool

// Do not sync freelist to disk. This improves the database write performance
// under normal operation, but requires a full database re-sync during recovery.
NoFreelistSync bool

// PreLoadFreelist sets whether to load the free pages when opening
// the db file. Note when opening db in write mode, bbolt will always
// load the free pages.
PreLoadFreelist bool

// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
// grab a shared lock (UNIX).
ReadOnly bool

// NoSync sets the initial value of DB.NoSync. Normally this can just be
// set directly on the DB itself when returned from Open(), but this option
// is useful in APIs which expose Options but not the underlying DB.
NoSync bool

// OpenFile is used to open files. It defaults to os.OpenFile. This option
// is useful for writing hermetic tests.
OpenFile func(string, int, os.FileMode) (*os.File, error)

// Mlock locks database file in memory when set to true.
// It prevents potential page faults, however
// used memory can't be reclaimed. (UNIX only)
Mlock bool

// Logger is the logger used for bbolt.
Logger Logger
}

func (o *Options) String() string {
Expand Down
6 changes: 3 additions & 3 deletions freelist.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ type pidSet map[common.Pgid]struct{}
// freelist represents a list of all pages that are available for allocation.
// It also tracks pages that have been freed but are still in use by open transactions.
type freelist struct {
freelistType FreelistType // freelist type
ids []common.Pgid // all free and available free page ids.
readonlyTXIDs []common.Txid // all readonly transaction IDs.
allocs map[common.Pgid]common.Txid // mapping of Txid that allocated a pgid.
pending map[common.Txid]*txPending // mapping of soon-to-be free page ids by tx.
cache map[common.Pgid]struct{} // fast lookup of all free and pending page ids.
Expand All @@ -38,6 +35,9 @@ type freelist struct {
mergeSpans func(ids common.Pgids) // the mergeSpan func
getFreePageIDs func() []common.Pgid // get free pgids func
readIDs func(pgids []common.Pgid) // readIDs func reads list of pages and init the freelist
freelistType FreelistType // freelist type
ids []common.Pgid // all free and available free page ids.
readonlyTXIDs []common.Txid // all readonly transaction IDs.
}

// newFreelist returns an empty, initialized freelist.
Expand Down
4 changes: 2 additions & 2 deletions internal/btesting/btesting.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ const (

// DB is a test wrapper for bolt.DB.
type DB struct {
t testing.TB
*bolt.DB
f string
o *bolt.Options
t testing.TB
f string
}

// MustCreateDB returns a new, open DB at a temporary location.
Expand Down
4 changes: 2 additions & 2 deletions internal/common/inode.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ import "unsafe"
// It can be used to point to elements in a page or point
// to an element which hasn't been added to a page yet.
type Inode struct {
flags uint32
pgid Pgid
key []byte
value []byte
pgid Pgid
flags uint32
}

type Inodes []Inode
Expand Down
2 changes: 1 addition & 1 deletion internal/common/page.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,8 +322,8 @@ func (n *leafPageElement) Bucket() *InBucket {

// PageInfo represents human readable information about a page.
type PageInfo struct {
ID int
Type string
ID int
Count int
OverflowCount int
}
Expand Down
10 changes: 5 additions & 5 deletions node.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ import (
// node represents an in-memory, deserialized page.
type node struct {
bucket *Bucket
isLeaf bool
unbalanced bool
spilled bool
key []byte
pgid common.Pgid
parent *node
key []byte
children nodes
inodes common.Inodes
pgid common.Pgid
isLeaf bool
unbalanced bool
spilled bool
}

// root returns the top-level node this node is attached to.
Expand Down