Skip to content

Commit

Permalink
Merge pull request #1337 from nanovms/heap-locking
Browse files Browse the repository at this point in the history
As enumerated in #1302, there are many paths in the kernel where the kernel lock is not held and yet allocations or deallocations are being made from one of the kernel heaps. To address this issue, these changes introduce a "locked" kernel heap. Like general, this is an mcache heap, accessed through a locking wrapper heap which guards accesses with a spinlock. The "backed" heap is also accessed via a locking wrapper.

Since id_heap-specific methods wouldn't be covered by a generic wrapper, a "locking" flag is specified on id_heap creation. The virtual huge and page heaps are now locking and thus safe to use from any context.

Allocations made (and released) under protection of the kernel lock should continue to use the general heap, as this will avoid unnecessary spinlock operations.
  • Loading branch information
wjhun authored Nov 25, 2020
2 parents 01caba4 + 6f5931c commit c719c3f
Show file tree
Hide file tree
Showing 31 changed files with 300 additions and 211 deletions.
2 changes: 2 additions & 0 deletions platform/pc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ SRCS-kernel.img= \
$(SRCDIR)/gdb/gdbutil.c \
$(SRCDIR)/http/http.c \
$(SRCDIR)/kernel/backed_heap.c \
$(SRCDIR)/kernel/locking_heap.c \
$(SRCDIR)/kernel/elf.c \
$(SRCDIR)/kernel/kernel.c \
$(SRCDIR)/kernel/klib.c \
Expand Down Expand Up @@ -159,6 +160,7 @@ SRCS-kernel.img+= \
endif

CFLAGS+= -DSPIN_LOCK_DEBUG_NOSMP
#CFLAGS+= -DSMP_ENABLE
#CFLAGS+= -DLWIPDIR_DEBUG -DEPOLL_DEBUG -DNETSYSCALL_DEBUG -DKERNEL_DEBUG
AFLAGS+= -felf64 -I$(OBJDIR)/
LDFLAGS+= $(KERNLDFLAGS) --undefined=_start -T linker_script
Expand Down
2 changes: 1 addition & 1 deletion platform/pc/boot/stage2.c
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ void centry()
working_p = u64_from_pointer(early_working);
working_end = working_p + EARLY_WORKING_SIZE;
kh.general = &working_heap;
init_runtime(&working_heap);
init_runtime(&working_heap, &working_heap);
init_tuples(allocate_tagged_region(&working_heap, tag_tuple));
init_symbols(allocate_tagged_region(&working_heap, tag_symbol), &working_heap);
init_sg(&working_heap);
Expand Down
31 changes: 18 additions & 13 deletions platform/pc/service.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ static heap allocate_tagged_region(kernel_heaps kh, u64 tag)
assert(tag < U64_FROM_BIT(VA_TAG_WIDTH));
u64 tag_base = KMEM_BASE | (tag << VA_TAG_OFFSET);
u64 tag_length = U64_FROM_BIT(VA_TAG_OFFSET);
heap v = (heap)create_id_heap(h, heap_backed(kh), tag_base, tag_length, p->pagesize);
heap v = (heap)create_id_heap(h, heap_backed(kh), tag_base, tag_length, p->pagesize, false);
assert(v != INVALID_ADDRESS);
heap backed = physically_backed(h, v, p, p->pagesize);
if (backed == INVALID_ADDRESS)
Expand Down Expand Up @@ -91,8 +91,7 @@ closure_function(2, 3, void, offset_block_io,
blocks.end += ds;

// split I/O to storage driver to PAGESIZE requests
heap h = heap_general(&heaps);
merge m = allocate_merge(h, sh);
merge m = allocate_merge(heap_locked(&heaps), sh);
status_handler k = apply_merge(m);
while (blocks.start < blocks.end) {
u64 span = MIN(range_span(blocks), MAX_BLOCK_IO_SIZE >> SECTOR_OFFSET);
Expand Down Expand Up @@ -233,7 +232,7 @@ closure_function(5, 1, void, mbr_read,
closure_function(0, 3, void, attach_storage,
block_io, r, block_io, w, u64, length)
{
heap h = heap_general(&heaps);
heap h = heap_locked(&heaps); /* to create fs under locked heap */

/* Look for partition table */
u8 *mbr = allocate(h, SECTOR_SIZE);
Expand Down Expand Up @@ -407,16 +406,17 @@ static void __attribute__((noinline)) init_service_new_stack()
{
kernel_heaps kh = &heaps;
heap misc = heap_general(kh);
heap locked = heap_locked(kh);
heap backed = heap_backed(kh);

/* runtime and console init */
init_debug("in init_service_new_stack");
init_debug("runtime");
init_runtime(misc);
init_runtime(misc, locked);
init_tuples(allocate_tagged_region(kh, tag_tuple));
init_symbols(allocate_tagged_region(kh, tag_symbol), misc);
init_sg(misc);
init_pagecache(misc, misc, (heap)heap_physical(kh), PAGESIZE);
init_sg(locked);
init_pagecache(locked, locked, (heap)heap_physical(kh), PAGESIZE);
unmap(0, PAGESIZE); /* unmap zero page */
reclaim_regions(); /* unmap and reclaim stage2 stack */
init_extra_prints();
Expand Down Expand Up @@ -474,7 +474,7 @@ static void __attribute__((noinline)) init_service_new_stack()
init_net(kh);

init_debug("probe fs, register storage drivers");
init_volumes(misc);
init_volumes(locked);
storage_attach sa = closure(misc, attach_storage);

boolean hyperv_storvsc_attached = false;
Expand Down Expand Up @@ -534,7 +534,7 @@ static range find_initial_pages(void)

static id_heap init_physical_id_heap(heap h)
{
id_heap physical = allocate_id_heap(h, h, PAGESIZE);
id_heap physical = allocate_id_heap(h, h, PAGESIZE, true);
boolean found = false;
init_debug("physical memory:");
for_regions(e) {
Expand Down Expand Up @@ -573,20 +573,25 @@ static void init_kernel_heaps()
bootstrap.dealloc = leak;

heaps.virtual_huge = create_id_heap(&bootstrap, &bootstrap, KMEM_BASE,
KMEM_LIMIT - KMEM_BASE, HUGE_PAGESIZE);
KMEM_LIMIT - KMEM_BASE, HUGE_PAGESIZE, true);
assert(heaps.virtual_huge != INVALID_ADDRESS);

heaps.virtual_page = create_id_heap_backed(&bootstrap, &bootstrap, (heap)heaps.virtual_huge, PAGESIZE);
heaps.virtual_page = create_id_heap_backed(&bootstrap, &bootstrap, (heap)heaps.virtual_huge, PAGESIZE, true);
assert(heaps.virtual_page != INVALID_ADDRESS);

heaps.physical = init_page_tables(&bootstrap, init_physical_id_heap(&bootstrap), find_initial_pages());
heaps.physical = init_physical_id_heap(&bootstrap);
assert(heaps.physical != INVALID_ADDRESS);

heaps.backed = physically_backed(&bootstrap, (heap)heaps.virtual_page, (heap)heaps.physical, PAGESIZE);
init_page_tables(&bootstrap, heaps.physical, find_initial_pages());

heaps.backed = locking_heap_wrapper(&bootstrap, physically_backed(&bootstrap, (heap)heaps.virtual_page, (heap)heaps.physical, PAGESIZE));
assert(heaps.backed != INVALID_ADDRESS);

heaps.general = allocate_mcache(&bootstrap, heaps.backed, 5, 20, PAGESIZE_2M);
assert(heaps.general != INVALID_ADDRESS);

heaps.locked = locking_heap_wrapper(&bootstrap, allocate_mcache(&bootstrap, heaps.backed, 5, 20, PAGESIZE_2M));
assert(heaps.locked != INVALID_ADDRESS);
}

static void jump_to_virtual(u64 kernel_size, u64 *pdpt, u64 *pdt) {
Expand Down
2 changes: 1 addition & 1 deletion src/drivers/ata-pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,6 @@ closure_function(3, 1, boolean, ata_pci_probe,

void ata_pci_register(kernel_heaps kh, storage_attach a)
{
heap h = heap_general(kh);
heap h = heap_locked(kh);
register_pci_driver(closure(h, ata_pci_probe, h, heap_backed(kh), a));
}
6 changes: 3 additions & 3 deletions src/hyperv/storvsc/storvsc.c
Original file line number Diff line number Diff line change
Expand Up @@ -889,7 +889,7 @@ static void storvsc_report_luns(struct storvsc_softc *sc, storage_attach a, u16
*/
static status storvsc_attach(kernel_heaps kh, hv_device* device, storage_attach a)
{
heap h = heap_general(kh);
heap h = heap_locked(kh);

struct storvsc_softc *sc = allocate_zero(h, sizeof(struct storvsc_softc));
assert(sc != INVALID_ADDRESS);
Expand Down Expand Up @@ -1258,6 +1258,6 @@ closure_function(1, 3, boolean, storvsc_probe,

void init_storvsc(kernel_heaps kh)
{
register_vmbus_driver(&gStorVscDeviceType, closure(heap_general(kh), storvsc_probe, kh));
register_vmbus_driver(&gBlkVscDeviceType, closure(heap_general(kh), storvsc_probe, kh));
register_vmbus_driver(&gStorVscDeviceType, closure(heap_locked(kh), storvsc_probe, kh));
register_vmbus_driver(&gBlkVscDeviceType, closure(heap_locked(kh), storvsc_probe, kh));
}
2 changes: 2 additions & 0 deletions src/kernel/kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ extern timerheap runloop_timers;

heap physically_backed(heap meta, heap virtual, heap physical, u64 pagesize);
void physically_backed_dealloc_virtual(heap h, u64 x, bytes length);
heap locking_heap_wrapper(heap meta, heap parent);

void print_stack(context c);
void print_frame(context f);

Expand Down
2 changes: 1 addition & 1 deletion src/kernel/klib.c
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ void init_klib(kernel_heaps kh, void *fs, tuple config_root, tuple klib_md)
u64 klib_heap_size = KERNEL_LIMIT - klib_heap_start;
klib_debug("%s: creating klib heap @ 0x%lx, size 0x%lx\n", __func__,
klib_heap_start, klib_heap_size);
klib_heap = create_id_heap(h, h, klib_heap_start, klib_heap_size, PAGESIZE);
klib_heap = create_id_heap(h, h, klib_heap_start, klib_heap_size, PAGESIZE, false);
assert(klib_heap != INVALID_ADDRESS);
if (table_find(config_root, sym(klib_test))) {
klib_debug(" loading klib test\n");
Expand Down
72 changes: 72 additions & 0 deletions src/kernel/locking_heap.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#include <kernel.h>

typedef struct heaplock {
struct heap h;
struct spinlock lock;
heap parent;
heap meta;
} *heaplock;

#define lock_heap(hl) u64 _flags = spin_lock_irq(&hl->lock)
#define unlock_heap(hl) spin_unlock_irq(&hl->lock, _flags)

static u64 heaplock_alloc(heap h, bytes size)
{
heaplock hl = (heaplock)h;
lock_heap(hl);
u64 a = allocate_u64(hl->parent, size);
unlock_heap(hl);
return a;
}

static void heaplock_dealloc(heap h, u64 x, bytes size)
{
heaplock hl = (heaplock)h;
lock_heap(hl);
deallocate_u64(hl->parent, x, size);
unlock_heap(hl);
}

/* assuming no contention on destroy */
static void heaplock_destroy(heap h)
{
heaplock hl = (heaplock)h;
destroy_heap(hl->parent);
deallocate(hl->meta, hl, sizeof(*hl));
}

static bytes heaplock_allocated(heap h)
{
heaplock hl = (heaplock)h;
lock_heap(hl);
bytes count = heap_allocated(hl->parent);
unlock_heap(hl);
return count;
}

static bytes heaplock_total(heap h)
{
heaplock hl = (heaplock)h;
lock_heap(hl);
bytes count = heap_total(hl->parent);
unlock_heap(hl);
return count;
}

/* meta only used on creation */
heap locking_heap_wrapper(heap meta, heap parent)
{
heaplock hl = allocate(meta, sizeof(*hl));
if (hl == INVALID_ADDRESS)
return INVALID_ADDRESS;
hl->h.alloc = heaplock_alloc;
hl->h.dealloc = heaplock_dealloc;
hl->h.destroy = heaplock_destroy;
hl->h.allocated = heaplock_allocated;
hl->h.total = heaplock_total;
hl->h.pagesize = parent->pagesize;
hl->parent = parent;
hl->meta = meta;
spin_lock_init(&hl->lock);
return (heap)hl;
}
48 changes: 23 additions & 25 deletions src/kernel/pagecache.c
Original file line number Diff line number Diff line change
Expand Up @@ -519,20 +519,22 @@ closure_function(6, 1, void, pagecache_write_sg_finish,
pn->pv->write_error = s;
}

do {
assert(pp != INVALID_ADDRESS && page_offset(pp) == pi);
pagecache_lock_state(pc);
assert(pp->write_count > 0);
if (pp->write_count-- == 1) {
if (page_state(pp) != PAGECACHE_PAGESTATE_DIRTY)
change_page_state_locked(pc, pp, PAGECACHE_PAGESTATE_NEW);
pagecache_page_queue_completions_locked(pc, pp, s);
}
pagecache_unlock_state(pc);
refcount_release(&pp->refcount);
pi++;
pp = (pagecache_page)rbnode_get_next((rbnode)pp);
} while (pi < end);
if (bound(complete)) {
do {
assert(pp != INVALID_ADDRESS && page_offset(pp) == pi);
pagecache_lock_state(pc);
assert(pp->write_count > 0);
if (pp->write_count-- == 1) {
if (page_state(pp) != PAGECACHE_PAGESTATE_DIRTY)
change_page_state_locked(pc, pp, PAGECACHE_PAGESTATE_NEW);
pagecache_page_queue_completions_locked(pc, pp, s);
}
pagecache_unlock_state(pc);
refcount_release(&pp->refcount);
pi++;
pp = (pagecache_page)rbnode_get_next((rbnode)pp);
} while (pi < end);
}
pagecache_unlock_node(pn);
closure_finish();
return;
Expand Down Expand Up @@ -1153,14 +1155,9 @@ closure_function(3, 3, boolean, pagecache_unmap_page_nodelocked,
assert(pp->refcount.c >= 1);
refcount_release(&pp->refcount);
} else {
/* private copy: free physical page
It's gross to go around the wrapped physical heap
specified to pagecache init, but this is really a
short-term fix; the physical heap parameter needs to be
replaced with the free page / tlb shootdown interface.
*/
deallocate_phys_page_from_traversal(phys, cache_pagesize(bound(pn)->pv->pc));
/* private copy: free physical page */
pagecache pc = bound(pn)->pv->pc;
deallocate_u64(pc->physical, phys, cache_pagesize(pc));
}
}
return true;
Expand Down Expand Up @@ -1320,9 +1317,10 @@ void init_pagecache(heap general, heap contiguous, heap physical, u64 pagesize)
assert(pc->zero_page != INVALID_ADDRESS);

#ifdef KERNEL
/* XXX lock, and move to locked general when ready */
pc->completions = allocate_objcache(general, contiguous,
sizeof(struct page_completion), PAGESIZE);
pc->completions =
locking_heap_wrapper(general, allocate_objcache(general, contiguous,
sizeof(struct page_completion),
PAGESIZE));
assert(pc->completions != INVALID_ADDRESS);
spin_lock_init(&pc->state_lock);
#else
Expand Down
Loading

0 comments on commit c719c3f

Please sign in to comment.