diff --git a/Makefile b/Makefile index 4ac857e9..557af34d 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,7 @@ CRUST_FLAGS=-g --edition 2021 -C opt-level=0 -C panic="abort" RSS=\ $(SRC)/arena.rs \ + $(SRC)/hashtable.rs \ $(SRC)/b.rs \ $(SRC)/ir.rs \ $(SRC)/crust.rs \ diff --git a/src/b.rs b/src/b.rs index 0193e3da..a9941aa9 100644 --- a/src/b.rs +++ b/src/b.rs @@ -38,16 +38,18 @@ pub mod params; pub mod ir; pub mod time; pub mod shlex; +pub mod hashtable; use core::ffi::*; use core::mem::zeroed; use core::ptr; use core::slice; use core::cmp; +use crust::Str; +use hashtable::{HashTable, HtEntry}; use nob::*; use flag::*; use crust::libc::*; -use crust::assoc_lookup_cstr; use arena::Arena; use targets::*; use lexer::{Lexer, Loc, Token}; @@ -61,8 +63,8 @@ pub unsafe fn add_libb_files(path: *const c_char, target: *const c_char, inputs: // why is rust like this. return Some(false); } - include_path_if_exists(inputs, arena::sprintf(&mut (*c).arena, c!("%s/all.b"), path)); - include_path_if_exists(inputs, arena::sprintf(&mut (*c).arena, c!("%s/%s.b"), path, target)); + include_path_if_exists(inputs, arena::sprintf(&mut (*c).interner.arena, c!("%s/all.b"), path)); + include_path_if_exists(inputs, arena::sprintf(&mut (*c).interner.arena, c!("%s/%s.b"), path, target)); Some(true) } @@ -139,38 +141,32 @@ pub enum Storage { } #[derive(Clone, Copy)] -pub struct Var { - pub name: *const c_char, +pub struct VarData { pub loc: Loc, pub storage: Storage, } -pub unsafe fn scope_push(vars: *mut Array>) { +pub unsafe fn scope_push(vars: *mut Array>) { if (*vars).count < (*vars).capacity { // Reusing already allocated scopes (*vars).count += 1; - (*da_last_mut(vars).expect("There should be always at least the global scope")).count = 0; + let last_scope = da_last_mut(vars).expect("There should be always at least the global scope"); + HashTable::clear(last_scope); } else { da_append(vars, zeroed()); } } -pub unsafe fn scope_pop(vars: *mut Array>) { +pub unsafe fn scope_pop(vars: *mut Array>) { assert!((*vars).count > 0); (*vars).count -= 1; } -pub unsafe fn find_var_near(vars: *const Array, name: *const c_char) -> *const Var { - for i in 0..(*vars).count { - let var = (*vars).items.add(i); - if strcmp((*var).name, name) == 0 { - return var - } - } - ptr::null() +pub unsafe fn find_var_near(vars: *const HashTable<*const c_char, VarData>, name: *const c_char) -> *const VarData { + HashTable::get(vars, name).unwrap_or(ptr::null()) } -pub unsafe fn find_var_deep(vars: *const Array>, name: *const c_char) -> *const Var { +pub unsafe fn find_var_deep(vars: *const Array>, name: *const c_char) -> *const VarData { let mut i = (*vars).count; while i > 0 { let var = find_var_near((*vars).items.add(i-1), name); @@ -195,7 +191,7 @@ pub unsafe fn declare_var(c: *mut Compiler, name: *const c_char, loc: Loc, stora da_append(&mut (*c).func_scope_events, ScopeEvent::Declare {name, index}); } - da_append(scope, Var {name, loc, storage}); + HashTable::insert(scope, name, VarData {loc, storage}); Some(()) } @@ -329,17 +325,26 @@ pub unsafe fn allocate_auto_var(t: *mut AutoVarsAtor) -> usize { pub unsafe fn compile_string(string: *const c_char, c: *mut Compiler) -> usize { - let offset = (*c).program.data.count; - let string_len = strlen(string); - da_append_many(&mut (*c).program.data, slice::from_raw_parts(string as *const u8, string_len)); - // TODO: Strings in B are not NULL-terminated. - // They are terminated with symbol '*e' ('*' is escape character akin to '\' in C) which according to the - // spec is called just "end-of-file" without any elaboration on what its value is. Maybe it had a specific - // value on PDP that was a common knowledge at the time? In any case that breaks compatibility with - // libc. While the language is still in development we gonna terminate it with 0. We will make it - // "spec complaint" later. - da_append(&mut (*c).program.data, 0); // NULL-terminator - offset + // TODO: Don't use second hashtable, which requires changes to the API, returning string address + // instead of data offset + let string = intern(&mut (*c).interner, string); + match HashTable::find(&(*c).string_offset, string) { + HtEntry::Occupied(entry) => (*entry).value, + HtEntry::Vacant(entry) => { + let offset = (*c).program.data.count; + let string_len = strlen(string); + da_append_many(&mut (*c).program.data, slice::from_raw_parts(string as *const u8, string_len)); + // TODO: Strings in B are not NULL-terminated. + // They are terminated with symbol '*e' ('*' is escape character akin to '\' in C) which according to the + // spec is called just "end-of-file" without any elaboration on what its value is. Maybe it had a specific + // value on PDP that was a common knowledge at the time? In any case that breaks compatibility with + // libc. While the language is still in development we gonna terminate it with 0. We will make it + // "spec complaint" later. + da_append(&mut (*c).program.data, 0); // NULL-terminator + HashTable::insert_new(&mut (*c).string_offset, entry, string, offset); + offset + }, + } } pub unsafe fn compile_primary_expression(l: *mut Lexer, c: *mut Compiler) -> Option<(Arg, bool)> { @@ -415,7 +420,7 @@ pub unsafe fn compile_primary_expression(l: *mut Lexer, c: *mut Compiler) -> Opt } Token::CharLit | Token::IntLit => Some((Arg::Literal((*l).int_number), false)), Token::ID => { - let name = arena::strdup(&mut (*c).arena, (*l).string); + let name = intern(&mut (*c).interner, (*l).string); let var_def = find_var_deep(&mut (*c).vars, name); if var_def.is_null() { @@ -674,7 +679,7 @@ pub unsafe fn compile_asm_stmts(l: *mut Lexer, c: *mut Compiler, stmts: *mut Arr get_and_expect_token(l, Token::String)?; match (*l).token { Token::String => { - let line = arena::strdup(&mut (*c).arena, (*l).string); + let line = intern(&mut (*c).interner, (*l).string); let loc = (*l).loc; da_append(stmts, AsmStmt { line, loc }); } @@ -712,7 +717,7 @@ pub unsafe fn compile_statement(l: *mut Lexer, c: *mut Compiler) -> Option<()> { Token::Extrn => { while (*l).token != Token::SemiColon { get_and_expect_token(l, Token::ID)?; - let name = arena::strdup(&mut (*c).arena, (*l).string); + let name = intern(&mut (*c).interner, (*l).string); name_declare_if_not_exists(&mut (*c).program.extrns, name); declare_var(c, name, (*l).loc, Storage::External {name})?; get_and_expect_tokens(l, &[Token::SemiColon, Token::Comma])?; @@ -722,7 +727,7 @@ pub unsafe fn compile_statement(l: *mut Lexer, c: *mut Compiler) -> Option<()> { Token::Auto => { while (*l).token != Token::SemiColon { get_and_expect_token(l, Token::ID)?; - let name = arena::strdup(&mut (*c).arena, (*l).string); + let name = intern(&mut (*c).interner, (*l).string); let index = allocate_auto_var(&mut (*c).auto_vars_ator); declare_var(c, name, (*l).loc, Storage::Auto {index})?; get_and_expect_tokens(l, &[Token::SemiColon, Token::Comma, Token::IntLit, Token::CharLit])?; @@ -805,7 +810,7 @@ pub unsafe fn compile_statement(l: *mut Lexer, c: *mut Compiler) -> Option<()> { } Token::Goto => { get_and_expect_token(l, Token::ID)?; - let name = arena::strdup(&mut (*c).arena, (*l).string); + let name = intern(&mut (*c).interner, (*l).string); let loc = (*l).loc; let addr = (*c).func_body.count; da_append(&mut (*c).func_gotos, Goto {name, loc, addr}); @@ -879,7 +884,7 @@ pub unsafe fn compile_statement(l: *mut Lexer, c: *mut Compiler) -> Option<()> { } _ => { if (*l).token == Token::ID { - let name = arena::strdup(&mut (*c).arena, (*l).string); + let name = intern(&mut (*c).interner, (*l).string); let name_loc = (*l).loc; lexer::get_token(l)?; if (*l).token == Token::Colon { @@ -913,10 +918,28 @@ pub struct Switch { pub cond: usize, } +/// Deduplicates and prolongs strings lifetime +#[derive(Clone, Copy)] +pub struct StringInterner { + pub deduper: HashTable, + pub arena: Arena, +} + +pub unsafe fn intern(interner: *mut StringInterner, string: *const c_char) -> *mut c_char { + match HashTable::find(&(*interner).deduper, Str(string)) { + HtEntry::Occupied(entry) => (*entry).key.0 as *mut c_char, + HtEntry::Vacant(entry) => { + let ptr = arena::strdup(&mut (*interner).arena, string); + HashTable::insert_new(&mut (*interner).deduper, entry, Str(ptr), ()); + ptr + } + } +} + #[derive(Clone, Copy)] pub struct Compiler { pub program: Program, - pub vars: Array>, + pub vars: Array>, pub auto_vars_ator: AutoVarsAtor, pub func_body: Array, pub func_goto_labels: Array, @@ -926,17 +949,8 @@ pub struct Compiler { pub used_funcs: Array, pub op_label_count: usize, pub switch_stack: Array, - /// Arena into which the Compiler allocates all the names and - /// objects that need to live for the duration of the - /// compilation. Even if some object/names don't need to live that - /// long (for example, function labels need to live only for the - /// duration of that function compilation), just letting them live - /// longer makes the memory management easier. - /// - /// Basically just dump everything into this arena and if you ever - /// need to reset the state of the Compiler, just reset all its - /// Dynamic Arrays and this Arena. - pub arena: Arena, + pub interner: StringInterner, + pub string_offset: HashTable<*const c_char, usize>, pub error_count: usize, pub historical: bool, } @@ -968,9 +982,9 @@ pub unsafe fn compile_program(l: *mut Lexer, c: *mut Compiler) -> Option<()> { Token::Variadic => { get_and_expect_token_but_continue(l, c, Token::OParen)?; get_and_expect_token_but_continue(l, c, Token::ID)?; - let func = arena::strdup(&mut (*c).arena, (*l).string); + let func = intern(&mut (*c).interner, (*l).string); let func_loc = (*l).loc; - if let Some(existing_variadic) = assoc_lookup_cstr(da_slice((*c).program.variadics), func) { + if let Some(existing_variadic) = HashTable::get(&(*c).program.variadics, func) { // TODO: report all the duplicate variadics maybe? diagf!(func_loc, c!("ERROR: duplicate variadic declaration `%s`\n"), func); diagf!((*existing_variadic).loc, c!("NOTE: the first declaration is located here\n")); @@ -982,17 +996,17 @@ pub unsafe fn compile_program(l: *mut Lexer, c: *mut Compiler) -> Option<()> { diagf!((*l).loc, c!("ERROR: variadic function `%s` cannot have 0 arguments\n"), func); bump_error_count(c)?; } - da_append(&mut (*c).program.variadics, (func, Variadic { + HashTable::insert(&mut (*c).program.variadics, func, Variadic { loc: func_loc, fixed_args: (*l).int_number as usize, - })); + }); get_and_expect_token_but_continue(l, c, Token::CParen)?; get_and_expect_token_but_continue(l, c, Token::SemiColon)?; } Token::Extrn => { while (*l).token != Token::SemiColon { get_and_expect_token(l, Token::ID)?; - let name = arena::strdup(&mut (*c).arena, (*l).string); + let name = intern(&mut (*c).interner, (*l).string); name_declare_if_not_exists(&mut (*c).program.extrns, name); declare_var(c, name, (*l).loc, Storage::External {name})?; get_and_expect_tokens(l, &[Token::SemiColon, Token::Comma])?; @@ -1000,7 +1014,7 @@ pub unsafe fn compile_program(l: *mut Lexer, c: *mut Compiler) -> Option<()> { } _ => { expect_token(l, Token::ID)?; - let name = arena::strdup(&mut (*c).arena, (*l).string); + let name = intern(&mut (*c).interner, (*l).string); let name_loc = (*l).loc; declare_var(c, name, name_loc, Storage::External{name})?; @@ -1017,7 +1031,7 @@ pub unsafe fn compile_program(l: *mut Lexer, c: *mut Compiler) -> Option<()> { (*l).parse_point = saved_point; 'params: loop { get_and_expect_token(l, Token::ID)?; - let name = arena::strdup(&mut (*c).arena, (*l).string); + let name = intern(&mut (*c).interner, (*l).string); let name_loc = (*l).loc; let index = allocate_auto_var(&mut (*c).auto_vars_ator); declare_var(c, name, name_loc, Storage::Auto{index})?; @@ -1099,7 +1113,7 @@ pub unsafe fn compile_program(l: *mut Lexer, c: *mut Compiler) -> Option<()> { Token::IntLit | Token::CharLit => ImmediateValue::Literal((*l).int_number), Token::String => ImmediateValue::DataOffset(compile_string((*l).string, c)), Token::ID => { - let name = arena::strdup(&mut (*c).arena, (*l).string); + let name = intern(&mut (*c).interner, (*l).string); let scope = da_last_mut(&mut (*c).vars).expect("There should be always at least the global scope"); let var = find_var_near(scope, name); if var.is_null() { @@ -1289,7 +1303,7 @@ pub unsafe fn main(mut argc: i32, mut argv: *mut*mut c_char) -> Option<()> { let mut c: Compiler = zeroed(); c.historical = *historical; - let executable_directory = arena::strdup(&mut c.arena, dirname(flag_program_name())); + let executable_directory = arena::strdup(&mut c.interner.arena, dirname(flag_program_name())); if (*linker).count > 0 { let mut s: Shlex = zeroed(); @@ -1302,7 +1316,7 @@ pub unsafe fn main(mut argc: i32, mut argv: *mut*mut c_char) -> Option<()> { log(Log_Level::WARNING, c!("Flag -%s is DEPRECATED! Interpreting it as `-%s %s` instead."), flag_name(linker), PARAM_FLAG_NAME, codegen_arg); } - let gen = target.new(&mut c.arena, da_slice(*codegen_args))?; + let gen = target.new(&mut c.interner.arena, da_slice(*codegen_args))?; if input_paths.count == 0 { usage(); @@ -1320,7 +1334,7 @@ pub unsafe fn main(mut argc: i32, mut argv: *mut*mut c_char) -> Option<()> { // - Some sort of instalation prefix? (Requires making build system more complicated) // // - rexim (2025-06-12 20:56:08) - add_libb_files(arena::sprintf(&mut c.arena, c!("%s/libb/"), executable_directory), *target_name, &mut input_paths, &mut c); + add_libb_files(arena::sprintf(&mut c.interner.arena, c!("%s/libb/"), executable_directory), *target_name, &mut input_paths, &mut c); } let mut sb: String_Builder = zeroed(); diff --git a/src/btest.rs b/src/btest.rs index e05b5c0b..ed2e7d20 100644 --- a/src/btest.rs +++ b/src/btest.rs @@ -20,6 +20,7 @@ pub mod lexer; pub mod codegen; pub mod shlex; pub mod params; +pub mod hashtable; use core::ffi::*; use core::cmp; diff --git a/src/codegen/gas_aarch64/mod.rs b/src/codegen/gas_aarch64/mod.rs index 99b8020b..c92d176e 100644 --- a/src/codegen/gas_aarch64/mod.rs +++ b/src/codegen/gas_aarch64/mod.rs @@ -1,8 +1,8 @@ use core::ffi::*; use core::mem::zeroed; +use crate::hashtable::HashTable; use crate::nob::*; use crate::crust::libc::*; -use crate::crust::assoc_lookup_cstr; use crate::ir::*; use crate::lexer::*; use crate::missingf; @@ -127,7 +127,7 @@ pub unsafe fn load_arg_to_reg(arg: Arg, reg: *const c_char, output: *mut String_ }; } -pub unsafe fn generate_function(name: *const c_char, _name_loc: Loc, params_count: usize, auto_vars_count: usize, os: Os, variadics: *const [(*const c_char, Variadic)], body: *const [OpWithLocation], output: *mut String_Builder) { +pub unsafe fn generate_function(name: *const c_char, _name_loc: Loc, params_count: usize, auto_vars_count: usize, os: Os, variadics: *const HashTable<*const c_char, Variadic>, body: *const [OpWithLocation], output: *mut String_Builder) { let stack_size = align_bytes(auto_vars_count*8, 16); match os { Os::Linux => { @@ -316,7 +316,7 @@ pub unsafe fn generate_function(name: *const c_char, _name_loc: Loc, params_coun let mut fixed_args = 0; match fun { Arg::External(name) | Arg::RefExternal(name) => { - if let Some(variadic) = assoc_lookup_cstr(variadics, name) { + if let Some(variadic) = HashTable::get(variadics, name) { fixed_args = (*variadic).fixed_args; } } @@ -395,7 +395,7 @@ pub unsafe fn generate_function(name: *const c_char, _name_loc: Loc, params_coun sb_appendf(output, c!(" ret\n")); } -pub unsafe fn generate_funcs(output: *mut String_Builder, funcs: *const [Func], variadics: *const [(*const c_char, Variadic)], os: Os) { +pub unsafe fn generate_funcs(output: *mut String_Builder, funcs: *const [Func], variadics: *const HashTable<*const c_char, Variadic>, os: Os) { sb_appendf(output, c!(".text\n")); for i in 0..funcs.len() { generate_function((*funcs)[i].name, (*funcs)[i].name_loc, (*funcs)[i].params_count, (*funcs)[i].auto_vars_count, os, variadics, da_slice((*funcs)[i].body), output); @@ -572,7 +572,7 @@ pub unsafe fn generate_program( if debug { todo!("Debug information for aarch64") } - generate_funcs(output, da_slice((*program).funcs), da_slice((*program).variadics), os); + generate_funcs(output, da_slice((*program).funcs), &(*program).variadics, os); generate_asm_funcs(output, da_slice((*program).asm_funcs), os); generate_globals(output, da_slice((*program). globals), os); generate_data_section(output, da_slice((*program).data)); diff --git a/src/crust.rs b/src/crust.rs index bd231d16..e58df6a4 100644 --- a/src/crust.rs +++ b/src/crust.rs @@ -1,5 +1,7 @@ // This is a module that facilitates Crust-style programming - https://github.com/tsoding/crust use crate::crust::libc::*; +use core::hash::{Hash, Hasher}; +use core::cmp::Ordering; use core::panic::PanicInfo; use core::ffi::*; @@ -38,44 +40,49 @@ pub unsafe fn slice_contains(slice: *const [Value], needle: *c false } -pub unsafe fn assoc_lookup_cstr_mut(assoc: *mut [(*const c_char, Value)], needle: *const c_char) -> Option<*mut Value> { - for i in 0..assoc.len() { - if strcmp((*assoc)[i].0, needle) == 0 { - return Some(&mut (*assoc)[i].1); - } +/// This is just a zero-cost wrapper around null-terminated C-string. +/// It would be nice to use `core::ffi::CStr` here, but it has two downsides: +/// 1. Overhead on construction from pointer +/// 2. It is a fat pointer (slice), which means it consumes two times more memory +/// +/// It is useful when you want to pass a `*... c_char` to a function or a struct +/// constraint by `Eq`, `Ord` or `Hash` traits and act it as a C-string. +#[repr(transparent)] +#[derive(Clone, Copy, Eq, Debug)] +pub struct Str(pub *const c_char); + +impl PartialEq for Str { + fn eq(&self, other: &Self) -> bool { + unsafe { strcmp(self.0, other.0) == 0 } } - None } -pub unsafe fn assoc_lookup_cstr(assoc: *const [(*const c_char, Value)], needle: *const c_char) -> Option<*const Value> { - for i in 0..assoc.len() { - if strcmp((*assoc)[i].0, needle) == 0 { - return Some(&(*assoc)[i].1); - } +impl PartialOrd for Str { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) } - None } -pub unsafe fn assoc_lookup_mut(assoc: *mut [(Key, Value)], needle: *const Key) -> Option<*mut Value> -where Key: PartialEq -{ - for i in 0..assoc.len() { - if (*assoc)[i].0 == *needle { - return Some(&mut (*assoc)[i].1); +impl Ord for Str { + fn cmp(&self, other: &Self) -> Ordering { + unsafe { + match strcmp(self.0, other.0) { + 0 => Ordering::Equal, + 1.. => Ordering::Greater, + _ => Ordering::Less, + } } } - None } -pub unsafe fn assoc_lookup(assoc: *const [(Key, Value)], needle: *const Key) -> Option<*const Value> -where Key: PartialEq -{ - for i in 0..assoc.len() { - if (*assoc)[i].0 == *needle { - return Some(&(*assoc)[i].1); +impl Hash for Str { + fn hash(&self, state: &mut H) { + unsafe { + let len = strlen(self.0); + let slice = core::slice::from_raw_parts(self.0 as *const u8, len); + state.write(slice); } } - None } #[macro_use] @@ -117,6 +124,13 @@ pub mod libc { pub fn dirname(path: *const c_char) -> *const c_char; } + pub unsafe fn alloc_items(count: usize) -> *mut T { + extern "C" { + fn malloc(size: usize) -> *mut c_void; + } + malloc(size_of::() * count) as *mut T + } + // count is the amount of items, not bytes pub unsafe fn realloc_items(ptr: *mut T, count: usize) -> *mut T { extern "C" { diff --git a/src/hashtable.rs b/src/hashtable.rs new file mode 100644 index 00000000..cbda9c2b --- /dev/null +++ b/src/hashtable.rs @@ -0,0 +1,228 @@ +use crate::crust::libc; +use core::hash::{BuildHasher, Hash, Hasher}; +use core::{cmp, mem, ptr}; + +/// General purpose hashtable, that accepts any kind of key and value types. +/// Current implementation uses open addressing and quadratic probing to minimize hash collisions. +#[derive(Clone, Copy)] +pub struct HashTable { + pub entries: *mut Entry, + pub capacity: usize, + pub count: usize, + pub occupied: *mut u8, + pub hasher_builder: S, +} + +#[derive(Clone, Copy)] +pub struct Entry { + pub key: K, + pub value: V, +} + +#[derive(Clone, Copy)] +pub enum HtEntry { + Occupied(*mut Entry), + Vacant(*mut Entry), +} + +impl HashTable +where + K: Clone + Copy + Hash + Eq, + V: Clone + Copy, + S: BuildHasher, + H: Hasher, +{ + // Must be power of 2 and greater than or equal to 8 + pub const MIN_CAPACITY: usize = 32; + + /// Returns previous value stored by this `key` or `None` + pub unsafe fn insert(ht: *mut Self, key: K, value: V) -> Option { + match Self::find(ht, key) { + HtEntry::Occupied(entry) => Some(mem::replace(&mut (*entry).value, value)), + HtEntry::Vacant(entry) => { + Self::insert_new(ht, entry, key, value); + None + } + } + } + + pub unsafe fn get(ht: *const Self, key: K) -> Option<*const V> { + match Self::find(ht, key) { + HtEntry::Occupied(entry) => Some(&(*entry).value), + HtEntry::Vacant(_) => None, + } + } + + pub unsafe fn get_mut(ht: *mut Self, key: K) -> Option<*mut V> { + match Self::find(ht, key) { + HtEntry::Occupied(entry) => Some(&mut (*entry).value), + HtEntry::Vacant(_) => None, + } + } + + pub unsafe fn clear(ht: *mut Self) { + (*ht).count = 0; + ptr::write_bytes((*ht).occupied, 0, (*ht).capacity >> 3); + } + + pub unsafe fn find(ht: *const Self, key: K) -> HtEntry { + if (*ht).capacity == 0 { + return HtEntry::Vacant(ptr::null_mut()); + } + + let hash = Self::hash_key(ht, key); + let mut index = Self::index_from_hash(hash, (*ht).capacity); + + let mut step = 1; + loop { + let entry = (*ht).entries.add(index); + if Self::is_occupied(ht, index) { + if (*entry).key == key { + return HtEntry::Occupied(entry); + } + } else { + return HtEntry::Vacant(entry); + } + + index = (index + step) & ((*ht).capacity - 1); + step += 1; + } + } + + pub unsafe fn insert_new(ht: *mut Self, entry: *mut Entry, key: K, value: V) { + if entry.is_null() { + Self::realloc_rehash(ht); + + // Executes only when capacity was 0 + let hash = Self::hash_key(ht, key); + let index = Self::index_from_hash(hash, (*ht).capacity); + Self::fill_entry(ht, (*ht).entries.add(index), index, key, value); + } else { + let index = entry.offset_from((*ht).entries); + debug_assert!(index >= 0); + Self::fill_entry(ht, entry, index as usize, key, value); + + // When load factor > 0.75 + if (3 * (*ht).capacity) / 4 < (*ht).count { + Self::realloc_rehash(ht); + } + } + } + + pub unsafe fn realloc_rehash(ht: *mut Self) { + let old_entries = (*ht).entries; + let old_occupied = (*ht).occupied; + let old_capacity = (*ht).capacity; + + (*ht).capacity = cmp::max(old_capacity << 1, Self::MIN_CAPACITY); + debug_assert!((*ht).capacity.is_power_of_two()); + + // We need new allocations here, to properly copy entries + (*ht).entries = libc::alloc_items((*ht).capacity); + (*ht).occupied = libc::alloc_items((*ht).capacity >> 3); + debug_assert!(!(*ht).entries.is_null()); + debug_assert!(!(*ht).occupied.is_null()); + + // Fill occupied with zeros + ptr::write_bytes((*ht).occupied, 0, (*ht).capacity >> 3); + + // Iterate over all occupied entries and rehash them + let buckets_count = old_capacity >> 3; + for i in 0..buckets_count { + let bucket = *old_occupied.add(i); + for j in 0..8 { + if (bucket >> j) & 1 == 1 { + let index = (i << 3) + j; + let entry = *old_entries.add(index); + let new_entry = Self::find_vacant(ht, entry.key); + Self::insert_new(ht, new_entry, entry.key, entry.value); + } + } + } + + libc::free(old_entries); + libc::free(old_occupied); + } + + pub unsafe fn fill_entry(ht: *mut Self, entry: *mut Entry, index: usize, key: K, value: V) { + *entry = Entry { key, value }; + Self::occupy_index(ht, index); + (*ht).count += 1; + } + + pub unsafe fn occupy_index(ht: *mut Self, index: usize) { + let bucket = (*ht).occupied.add(index >> 3); + let sub_index = index & 7; + *bucket |= 1 << sub_index; + } + + pub unsafe fn is_occupied(ht: *const Self, index: usize) -> bool { + let bucket = *(*ht).occupied.add(index >> 3); + let sub_index = index & 7; + (bucket >> sub_index) & 1 == 1 + } + + pub unsafe fn index_from_hash(hash: u64, capacity: usize) -> usize { + (hash & (capacity as u64 - 1)) as usize + } + + pub unsafe fn hash_key(ht: *const Self, key: K) -> u64 { + let mut hasher = (*ht).hasher_builder.build_hasher(); + key.hash(&mut hasher); + hasher.finish() + } + + + // This function is only for internal usage to speed up rehashing + unsafe fn find_vacant(ht: *mut Self, key: K) -> *mut Entry { + let hash = Self::hash_key(ht, key); + let mut index = Self::index_from_hash(hash, (*ht).capacity); + + let mut step = 1; + loop { + let entry = (*ht).entries.add(index); + if !Self::is_occupied(ht, index) { + return entry; + } + + index = (index + step) & ((*ht).capacity - 1); + step += 1; + } + } +} + +#[derive(Clone, Copy)] +pub struct DefaultHasher; + +impl BuildHasher for DefaultHasher { + type Hasher = Fnv1aHasher; + + fn build_hasher(&self) -> Self::Hasher { + Fnv1aHasher { + hash: Fnv1aHasher::OFFSET, + } + } +} + +#[derive(Clone, Copy)] +pub struct Fnv1aHasher { + pub hash: u64, +} + +impl Fnv1aHasher { + const OFFSET: u64 = 14695981039346656037; + const PRIME: u64 = 1099511628211; +} + +impl Hasher for Fnv1aHasher { + fn finish(&self) -> u64 { + self.hash + } + + fn write(&mut self, bytes: &[u8]) { + for byte in bytes { + self.hash ^= *byte as u64; + self.hash = self.hash.wrapping_mul(Self::PRIME); + } + } +} diff --git a/src/ir.rs b/src/ir.rs index 75a4f13b..df330700 100644 --- a/src/ir.rs +++ b/src/ir.rs @@ -1,4 +1,5 @@ use core::ffi::*; +use crate::hashtable::HashTable; use crate::lexer::*; use crate::nob::*; @@ -130,7 +131,7 @@ pub struct Program { pub funcs: Array, pub data: Array, pub extrns: Array<*const c_char>, - pub variadics: Array<(*const c_char, Variadic)>, + pub variadics: HashTable<*const c_char, Variadic>, pub globals: Array, pub asm_funcs: Array, }