Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add native lock-free dynamic heap allocator #4749

Draft
wants to merge 21 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ jobs:
./odin check examples/all -vet -strict-style -disallow-do -target:netbsd_arm64
./odin check vendor/sdl3 -vet -strict-style -disallow-do -target:netbsd_amd64 -no-entry-point
./odin check vendor/sdl3 -vet -strict-style -disallow-do -target:netbsd_arm64 -no-entry-point
./odin run tests/heap_allocator -vet -strict-style -disallow-do -define:ODIN_DEBUG_HEAP=true -- -allocator=feoramalloc -vmem-tests -serial-tests -parallel-tests
./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
./odin test tests/core/speed.odin -file -all-packages -vet -strict-style -disallow-do -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
Expand Down Expand Up @@ -65,6 +66,7 @@ jobs:
gmake -C vendor/miniaudio/src
./odin check examples/all -vet -strict-style -disallow-do -target:freebsd_amd64
./odin check vendor/sdl3 -vet -strict-style -disallow-do -target:freebsd_amd64 -no-entry-point
./odin run tests/heap_allocator -vet -strict-style -disallow-do -define:ODIN_DEBUG_HEAP=true -- -allocator=feoramalloc -vmem-tests -serial-tests -parallel-tests
./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
./odin test tests/core/speed.odin -file -all-packages -vet -strict-style -disallow-do -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
Expand Down Expand Up @@ -123,6 +125,8 @@ jobs:
run: ./odin check examples/all -strict-style -vet -disallow-do
- name: Odin check vendor/sdl3
run: ./odin check vendor/sdl3 -strict-style -vet -disallow-do -no-entry-point
- name: Odin heap allocator tests
run: ./odin run tests/heap_allocator -vet -strict-style -disallow-do -define:ODIN_DEBUG_HEAP=true -sanitize:thread -- -allocator=feoramalloc -vmem-tests -serial-tests -parallel-tests
- name: Normal Core library tests
run: ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
- name: Optimized Core library tests
Expand Down Expand Up @@ -211,6 +215,11 @@ jobs:
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin check vendor/sdl3 -vet -strict-style -disallow-do -no-entry-point
- name: Odin heap allocator tests
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin run tests/heap_allocator -vet -strict-style -disallow-do -define:ODIN_DEBUG_HEAP=true -- -allocator=feoramalloc -vmem-tests -serial-tests -parallel-tests
- name: Core library tests
shell: cmd
run: |
Expand Down Expand Up @@ -305,6 +314,9 @@ jobs:
- name: Odin run -debug
run: ./odin run examples/demo -debug -vet -strict-style -disallow-do -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" -no-rpath

- name: Odin heap allocator tests
run: ./odin run tests/heap_allocator -vet -strict-style -disallow-do -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" -no-rpath -define:ODIN_DEBUG_HEAP=true -- -allocator=feoramalloc -vmem-tests -serial-tests -parallel-tests

- name: Normal Core library tests
run: ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" -no-rpath

Expand Down
3 changes: 0 additions & 3 deletions base/runtime/default_allocators_general.odin
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@ when ODIN_DEFAULT_TO_NIL_ALLOCATOR {
} else when ODIN_DEFAULT_TO_PANIC_ALLOCATOR {
default_allocator_proc :: panic_allocator_proc
default_allocator :: panic_allocator
} else when ODIN_OS != .Orca && (ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32) {
default_allocator :: default_wasm_allocator
default_allocator_proc :: wasm_allocator_proc
} else {
default_allocator :: heap_allocator
default_allocator_proc :: heap_allocator_proc
Expand Down
137 changes: 47 additions & 90 deletions base/runtime/heap_allocator.odin
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
#+build !js
#+build !orca
#+build !wasi
package runtime

import "base:intrinsics"
Expand All @@ -9,111 +12,65 @@ heap_allocator :: proc() -> Allocator {
}
}

heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
size, alignment: int,
old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
//
// NOTE(tetra, 2020-01-14): The heap doesn't respect alignment.
// Instead, we overallocate by `alignment + size_of(rawptr) - 1`, and insert
// padding. We also store the original pointer returned by heap_alloc right before
// the pointer we return to the user.
//

aligned_alloc :: proc(size, alignment: int, old_ptr: rawptr, old_size: int, zero_memory := true) -> ([]byte, Allocator_Error) {
// Not(flysand): We need to reserve enough space for alignment, which
// includes the user data itself, the space to store the pointer to
// allocation start, as well as the padding required to align both
// the user data and the pointer.
a := max(alignment, align_of(rawptr))
space := a-1 + size_of(rawptr) + size
allocated_mem: rawptr

force_copy := old_ptr != nil && alignment > align_of(rawptr)

if old_ptr != nil && !force_copy {
original_old_ptr := ([^]rawptr)(old_ptr)[-1]
allocated_mem = heap_resize(original_old_ptr, space)
} else {
allocated_mem = heap_alloc(space, zero_memory)
}
aligned_mem := rawptr(([^]u8)(allocated_mem)[size_of(rawptr):])

ptr := uintptr(aligned_mem)
aligned_ptr := (ptr + uintptr(a)-1) & ~(uintptr(a)-1)
if allocated_mem == nil {
aligned_free(old_ptr)
aligned_free(allocated_mem)
heap_allocator_proc :: proc(
allocator_data: rawptr,
mode: Allocator_Mode,
size, alignment: int,
old_memory: rawptr,
old_size: int,
loc := #caller_location,
) -> ([]byte, Allocator_Error) {
assert(alignment <= HEAP_MAX_ALIGNMENT, "Heap allocation alignment beyond HEAP_MAX_ALIGNMENT bytes is not supported.", loc = loc)
assert(alignment >= 0, "Alignment must be greater than or equal to zero.", loc = loc)
switch mode {
case .Alloc:
// All allocations are aligned to at least their size up to
// `HEAP_MAX_ALIGNMENT`, and by virtue of binary arithmetic, any
// address aligned to N will also be aligned to N>>1.
//
// Therefore, we have no book-keeping costs for alignment.
ptr := heap_alloc(max(size, alignment))
if ptr == nil {
return nil, .Out_Of_Memory
}

aligned_mem = rawptr(aligned_ptr)
([^]rawptr)(aligned_mem)[-1] = allocated_mem

if force_copy {
mem_copy_non_overlapping(aligned_mem, old_ptr, min(old_size, size))
aligned_free(old_ptr)
}

return byte_slice(aligned_mem, size), nil
}

aligned_free :: proc(p: rawptr) {
if p != nil {
heap_free(([^]rawptr)(p)[-1])
return transmute([]byte)Raw_Slice{ data = ptr, len = size }, nil
case .Alloc_Non_Zeroed:
ptr := heap_alloc(max(size, alignment), zero_memory = false)
if ptr == nil {
return nil, .Out_Of_Memory
}
}

aligned_resize :: proc(p: rawptr, old_size: int, new_size: int, new_alignment: int, zero_memory := true) -> (new_memory: []byte, err: Allocator_Error) {
if p == nil {
return aligned_alloc(new_size, new_alignment, nil, old_size, zero_memory)
return transmute([]byte)Raw_Slice{ data = ptr, len = size }, nil
case .Resize:
ptr := heap_resize(old_memory, old_size, max(size, alignment))
if ptr == nil {
return nil, .Out_Of_Memory
}

new_memory = aligned_alloc(new_size, new_alignment, p, old_size, zero_memory) or_return

// NOTE: heap_resize does not zero the new memory, so we do it
if zero_memory && new_size > old_size {
new_region := raw_data(new_memory[old_size:])
intrinsics.mem_zero(new_region, new_size - old_size)
return transmute([]byte)Raw_Slice{ data = ptr, len = size }, nil
case .Resize_Non_Zeroed:
ptr := heap_resize(old_memory, old_size, max(size, alignment), zero_memory = false)
if ptr == nil {
return nil, .Out_Of_Memory
}
return
}

switch mode {
case .Alloc, .Alloc_Non_Zeroed:
return aligned_alloc(size, alignment, nil, 0, mode == .Alloc)

return transmute([]byte)Raw_Slice{ data = ptr, len = size }, nil
case .Free:
aligned_free(old_memory)

heap_free(old_memory)
case .Free_All:
return nil, .Mode_Not_Implemented

case .Resize, .Resize_Non_Zeroed:
return aligned_resize(old_memory, old_size, size, alignment, mode == .Resize)

case .Query_Features:
set := (^Allocator_Mode_Set)(old_memory)
if set != nil {
set^ = {.Alloc, .Alloc_Non_Zeroed, .Free, .Resize, .Resize_Non_Zeroed, .Query_Features}
set^ = {
.Alloc,
.Alloc_Non_Zeroed,
.Resize,
.Resize_Non_Zeroed,
.Free,
.Query_Features,
}
}
return nil, nil

case .Query_Info:
return nil, .Mode_Not_Implemented
}

return nil, nil
}


heap_alloc :: proc "contextless" (size: int, zero_memory := true) -> rawptr {
return _heap_alloc(size, zero_memory)
}

heap_resize :: proc "contextless" (ptr: rawptr, new_size: int) -> rawptr {
return _heap_resize(ptr, new_size)
}

heap_free :: proc "contextless" (ptr: rawptr) {
_heap_free(ptr)
}
92 changes: 92 additions & 0 deletions base/runtime/heap_allocator_control.odin
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#+build !js
#+build !orca
#+build !wasi
package runtime

import "base:intrinsics"

/*
Merge all remote frees then free as many slabs as possible.

This bypasses any heuristics that keep slabs setup.

Returns true if the superpage was emptied and freed.
*/
@(private)
compact_superpage :: proc "contextless" (superpage: ^Heap_Superpage) -> (freed: bool) {
for i := 0; i < HEAP_SLAB_COUNT; /**/ {
slab := heap_superpage_index_slab(superpage, i)

if slab.bin_size > HEAP_MAX_BIN_SIZE {
// Skip contiguous slabs.
i += heap_slabs_needed_for_size(slab.bin_size)
} else {
i += 1
if slab.bin_size == 0 {
continue
}
}

slab_is_cached := slab.free_bins > 0
heap_merge_remote_frees(slab)

if slab.free_bins == slab.max_bins {
if slab.bin_size > HEAP_MAX_BIN_SIZE {
heap_free_wide_slab(superpage, slab)
} else {
if slab_is_cached {
heap_cache_remove_slab(slab, heap_bin_size_to_rank(slab.bin_size))
}
heap_free_slab(superpage, slab)
}
}
}

if superpage.free_slabs == HEAP_SLAB_COUNT && !superpage.cache_block.in_use {
heap_free_superpage(superpage)
freed = true
}
return
}

/*
Merge all remote frees then free as many slabs and superpages as possible.

This bypasses any heuristics that keep slabs setup.
*/
compact_heap :: proc "contextless" () {
superpage := local_heap
for {
if superpage == nil {
return
}
next_superpage := superpage.next
compact_superpage(superpage)
superpage = next_superpage
}
}

/*
Free any empty superpages in the orphanage.

This procedure assumes there won't ever be more than 128 superpages in the
orphanage. This limitation is due to the avoidance of heap allocation.
*/
compact_heap_orphanage :: proc "contextless" () {
// First, try to empty the orphanage so that we can evaluate each superpage.
buffer: [128]^Heap_Superpage
for &b in buffer {
b = heap_pop_orphan()
if b == nil {
break
}
}

// Next, compact each superpage and push it back to the orphanage if it was
// not freed.
for superpage in buffer {
if !compact_superpage(superpage) {
heap_push_orphan(superpage)
}
}
}
Loading
Loading