From 4eba3698aa1dde15abd22de1452229293282efee Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 17 Feb 2025 09:47:49 +0000 Subject: [PATCH 01/19] Begin work on nested declarations --- src/types.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/types.cpp b/src/types.cpp index c88878b9c85..42530ecccfc 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -4919,6 +4919,12 @@ gb_internal u64 type_hash_canonical_type(Type *type) { return hash; } +gb_internal String type_to_canonical_string(gbAllocator allocator, Type *type) { + gbString w = gb_string_make(allocator, ""); + w = write_type_to_canonical_string(w, type); + return make_string(cast(u8 const *)w, gb_string_length(w)); +} + // NOTE(bill): This exists so that we deterministically hash a type by serializing it to a canonical string gb_internal gbString write_type_to_canonical_string(gbString w, Type *type) { if (type == nullptr) { @@ -5101,6 +5107,15 @@ gb_internal gbString write_type_to_canonical_string(gbString w, Type *type) { case Type_Named: if (type->Named.type_name != nullptr) { Entity *e = type->Named.type_name; + + if ((e->scope->flags & (ScopeFlag_File | ScopeFlag_Pkg)) == 0 || + e->flags & EntityFlag_NotExported) { + if (e->scope->flags & ScopeFlag_Proc) { + GB_PANIC("NESTED IN PROC\n"); + } else if (e->scope->flags & ScopeFlag_File) { + GB_PANIC("PRIVATE TO FILE\n"); + } + } if (e->pkg != nullptr) { w = gb_string_append_length(w, e->pkg->name.text, e->pkg->name.len); w = gb_string_appendc(w, "."); From 99d91ccd31366e78c7ec0e94b5e3d473806721ed Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 17 Feb 2025 11:32:49 +0000 Subject: [PATCH 02/19] Work on making name mangling deterministic --- src/check_decl.cpp | 6 + src/check_expr.cpp | 2 +- src/checker.cpp | 1 + src/checker.hpp | 2 + src/entity.cpp | 1 + src/gb/gb.h | 2 +- src/llvm_backend.hpp | 2 +- src/llvm_backend_general.cpp | 43 +++- src/llvm_backend_stmt.cpp | 3 +- src/name_canonicalization.cpp | 419 ++++++++++++++++++++++++++++++++++ src/types.cpp | 269 ---------------------- 11 files changed, 475 insertions(+), 275 deletions(-) create mode 100644 src/name_canonicalization.cpp diff --git a/src/check_decl.cpp b/src/check_decl.cpp index 9084f15f073..d6f8e6fa731 100644 --- a/src/check_decl.cpp +++ b/src/check_decl.cpp @@ -1784,6 +1784,10 @@ gb_internal bool check_proc_body(CheckerContext *ctx_, Token token, DeclInfo *de ctx->curr_proc_sig = type; ctx->curr_proc_calling_convention = type->Proc.calling_convention; + if (decl->parent && decl->entity && decl->parent->entity) { + decl->entity->parent_proc_decl = decl->parent; + } + if (ctx->pkg->name != "runtime") { switch (type->Proc.calling_convention) { case ProcCC_None: @@ -1873,6 +1877,8 @@ gb_internal bool check_proc_body(CheckerContext *ctx_, Token token, DeclInfo *de check_open_scope(ctx, body); { + ctx->scope->decl_info = decl; + for (auto const &entry : using_entities) { Entity *uvar = entry.uvar; Entity *prev = scope_insert(ctx->scope, uvar); diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 550a7749c7f..f0021e67ff4 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -345,7 +345,7 @@ gb_internal void check_scope_decls(CheckerContext *c, Slice const &nodes, check_collect_entities(c, nodes); for (auto const &entry : s->elements) { - Entity *e = entry.value; + Entity *e = entry.value;\ switch (e->kind) { case Entity_Constant: case Entity_TypeName: diff --git a/src/checker.cpp b/src/checker.cpp index bfcabe4fabc..c74a72a1416 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -3894,6 +3894,7 @@ gb_internal DECL_ATTRIBUTE_PROC(type_decl_attribute) { #include "check_expr.cpp" #include "check_builtin.cpp" #include "check_type.cpp" +#include "name_canonicalization.cpp" #include "check_decl.cpp" #include "check_stmt.cpp" diff --git a/src/checker.hpp b/src/checker.hpp index 4634047c092..472ab8e505c 100644 --- a/src/checker.hpp +++ b/src/checker.hpp @@ -276,6 +276,8 @@ struct Scope { StringMap elements; PtrSet imported; + DeclInfo *decl_info; + i32 flags; // ScopeFlag union { AstPackage *pkg; diff --git a/src/entity.cpp b/src/entity.cpp index d137a8674fe..b2148aa7bd2 100644 --- a/src/entity.cpp +++ b/src/entity.cpp @@ -257,6 +257,7 @@ struct Entity { bool has_instrumentation : 1; bool is_memcpy_like : 1; bool uses_branch_location : 1; + bool is_anonymous : 1; } Procedure; struct { Array entities; diff --git a/src/gb/gb.h b/src/gb/gb.h index 59611ceb6ad..98c362e9346 100644 --- a/src/gb/gb.h +++ b/src/gb/gb.h @@ -5856,7 +5856,7 @@ gb_inline isize gb_fprintf_va(struct gbFile *f, char const *fmt, va_list va) { gb_inline char *gb_bprintf_va(char const *fmt, va_list va) { - gb_local_persist char buffer[4096]; + gb_thread_local gb_local_persist char buffer[4096]; gb_snprintf_va(buffer, gb_size_of(buffer), fmt, va); return buffer; } diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index a0775ac3b81..dd6f1a08321 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -399,7 +399,7 @@ struct lbProcedure { gb_internal bool lb_init_generator(lbGenerator *gen, Checker *c); gb_internal String lb_mangle_name(Entity *e); -gb_internal String lb_get_entity_name(lbModule *m, Entity *e, String name = {}); +gb_internal String lb_get_entity_name(lbModule *m, Entity *e); gb_internal LLVMAttributeRef lb_create_enum_attribute(LLVMContextRef ctx, char const *name, u64 value=0); gb_internal LLVMAttributeRef lb_create_enum_attribute_with_type(LLVMContextRef ctx, char const *name, LLVMTypeRef type); diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 7425b9fd735..dc212e51d8d 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -1444,6 +1444,7 @@ gb_internal void lb_clone_struct_type(LLVMTypeRef dst, LLVMTypeRef src) { } gb_internal String lb_mangle_name(Entity *e) { +#if 1 String name = e->token.string; AstPackage *pkg = e->pkg; @@ -1483,9 +1484,18 @@ gb_internal String lb_mangle_name(Entity *e) { String mangled_name = make_string((u8 const *)new_name, new_name_len-1); return mangled_name; +#else + gbString w = gb_string_make(gb_heap_allocator(), ""); + w = write_canonical_entity_name(w, e); + gb_printf_err(">> %s\n", w); + + String mangled_name = make_string(cast(u8 const *)w, gb_string_length(w)); + return mangled_name; +#endif } gb_internal String lb_set_nested_type_name_ir_mangled_name(Entity *e, lbProcedure *p, lbModule *module) { +#if 0 // NOTE(bill, 2020-03-08): A polymorphic procedure may take a nested type declaration // and as a result, the declaration does not have time to determine what it should be @@ -1516,6 +1526,7 @@ gb_internal String lb_set_nested_type_name_ir_mangled_name(Entity *e, lbProcedur } } + // NOTE(bill): Generate a new name // parent_proc.name-guid String ts_name = e->token.string; @@ -1528,6 +1539,12 @@ gb_internal String lb_set_nested_type_name_ir_mangled_name(Entity *e, lbProcedur String name = make_string(cast(u8 *)name_text, name_len-1); e->TypeName.ir_mangled_name = name; + + { + String s = type_to_canonical_string(temporary_allocator(), e->type); + gb_printf_err("1) %.*s\n", LIT(s)); + gb_printf_err("2) %.*s\n", LIT(name)); + } return name; } else { // NOTE(bill): a nested type be required before its parameter procedure exists. Just give it a temp name for now @@ -1538,11 +1555,18 @@ gb_internal String lb_set_nested_type_name_ir_mangled_name(Entity *e, lbProcedur String name = make_string(cast(u8 *)name_text, name_len-1); e->TypeName.ir_mangled_name = name; + + { + String s = type_to_canonical_string(temporary_allocator(), e->type); + gb_printf_err("3) %.*s\n", LIT(s)); + gb_printf_err("4) %.*s\n", LIT(name)); + } return name; } +#endif } -gb_internal String lb_get_entity_name(lbModule *m, Entity *e, String default_name) { +gb_internal String lb_get_entity_name(lbModule *m, Entity *e) { GB_ASSERT(m != nullptr); if (e != nullptr && e->kind == Entity_TypeName && e->TypeName.ir_mangled_name.len != 0) { return e->TypeName.ir_mangled_name; @@ -1553,6 +1577,13 @@ gb_internal String lb_get_entity_name(lbModule *m, Entity *e, String default_nam return e->token.string; } +#if 1 + gbString w = gb_string_make(heap_allocator(), ""); + w = write_canonical_entity_name(w, e); + defer (gb_string_free(w)); + + String name = copy_string(permanent_allocator(), make_string(cast(u8 const *)w, gb_string_length(w))); +#else if (e->kind == Entity_TypeName && (e->scope->flags & ScopeFlag_File) == 0) { return lb_set_nested_type_name_ir_mangled_name(e, nullptr, m); } @@ -1576,11 +1607,17 @@ gb_internal String lb_get_entity_name(lbModule *m, Entity *e, String default_nam if (!no_name_mangle) { name = lb_mangle_name(e); + + gbString w = gb_string_make(gb_heap_allocator(), ""); + w = write_canonical_entity_name(w, e); + if (w[0] == 0) { + gb_printf_err(">> %s %.*s\n", w, LIT(name)); + } } if (name.len == 0) { name = e->token.string; } - +#endif if (e->kind == Entity_TypeName) { e->TypeName.ir_mangled_name = name; } else if (e->kind == Entity_Procedure) { @@ -2869,6 +2906,8 @@ gb_internal lbValue lb_generate_anonymous_proc_lit(lbModule *m, String const &pr pl->decl->code_gen_module = m; e->decl_info = pl->decl; pl->decl->entity = e; + e->parent_proc_decl = pl->decl->parent; + e->Procedure.is_anonymous = true; e->flags |= EntityFlag_ProcBodyChecked; lbProcedure *p = lb_create_procedure(m, e); diff --git a/src/llvm_backend_stmt.cpp b/src/llvm_backend_stmt.cpp index b05df0b462a..b8347207595 100644 --- a/src/llvm_backend_stmt.cpp +++ b/src/llvm_backend_stmt.cpp @@ -32,7 +32,8 @@ gb_internal void lb_build_constant_value_decl(lbProcedure *p, AstValueDecl *vd) continue; } - lb_set_nested_type_name_ir_mangled_name(e, p, p->module); + String name = lb_get_entity_name(p->module, e); + gb_unused(name); } for_array(i, vd->names) { diff --git a/src/name_canonicalization.cpp b/src/name_canonicalization.cpp new file mode 100644 index 00000000000..fa09f27c0af --- /dev/null +++ b/src/name_canonicalization.cpp @@ -0,0 +1,419 @@ +gb_internal gbString write_type_to_canonical_string(gbString w, Type *type); +gb_internal gbString write_canonical_params(gbString w, Type *params) { + w = gb_string_appendc(w, "("); + if (params) { + GB_ASSERT(params->kind == Type_Tuple); + for_array(i, params->Tuple.variables) { + Entity *v = params->Tuple.variables[i]; + if (i > 0) { + w = gb_string_appendc(w, ","); + } + if (v->kind == Entity_Variable) { + if (v->flags&EntityFlag_CVarArg) { + w = gb_string_appendc(w, "#c_vararg"); + } + if (v->flags&EntityFlag_Ellipsis) { + Type *slice = base_type(v->type); + w = gb_string_appendc(w, ".."); + GB_ASSERT(v->type->kind == Type_Slice); + w = write_type_to_canonical_string(w, slice->Slice.elem); + } else { + w = write_type_to_canonical_string(w, v->type); + } + } else if (v->kind == Entity_TypeName) { + w = gb_string_appendc(w, "$"); + w = write_type_to_canonical_string(w, v->type); + } else if (v->kind == Entity_Constant) { + w = gb_string_appendc(w, "$$"); + w = write_exact_value_to_string(w, v->Constant.value); + } else { + GB_PANIC("TODO(bill): handle non type/const parapoly parameter values"); + } + } + } + return gb_string_appendc(w, ")"); +} + +gb_internal u64 type_hash_canonical_type(Type *type) { + if (type == nullptr) { + return 0; + } + TEMPORARY_ALLOCATOR_GUARD(); + gbString w = write_type_to_canonical_string(gb_string_make(temporary_allocator(), ""), type); + u64 hash = fnv64a(w, gb_string_length(w)); + return hash; +} + +gb_internal String type_to_canonical_string(gbAllocator allocator, Type *type) { + gbString w = gb_string_make(allocator, ""); + w = write_type_to_canonical_string(w, type); + return make_string(cast(u8 const *)w, gb_string_length(w)); +} + +gb_internal void print_scope_flags(Scope *s) { + if (s->flags & ScopeFlag_Pkg) gb_printf_err("Pkg "); + if (s->flags & ScopeFlag_Builtin) gb_printf_err("Builtin "); + if (s->flags & ScopeFlag_Global) gb_printf_err("Global "); + if (s->flags & ScopeFlag_File) gb_printf_err("File "); + if (s->flags & ScopeFlag_Init) gb_printf_err("Init "); + if (s->flags & ScopeFlag_Proc) gb_printf_err("Proc "); + if (s->flags & ScopeFlag_Type) gb_printf_err("Type "); + if (s->flags & ScopeFlag_HasBeenImported) gb_printf_err("HasBeenImported "); + if (s->flags & ScopeFlag_ContextDefined) gb_printf_err("ContextDefined "); + gb_printf_err("\n"); +} + + + +gb_internal gbString write_canonical_parent_prefix(gbString w, Entity *e, bool ignore_final_dot=false) { + GB_ASSERT(e != nullptr); + + // auto const &parent_entity = [](Scope *s) -> Entity* { + // while ((s->flags & (ScopeFlag_Proc|ScopeFlag_File)) == 0 && s->decl_info == nullptr) { + // s = s->parent; + // } + // if (s->decl_info && s->decl_info->entity) { + // return s->decl_info->entity; + // } + // return nullptr; + // }; + + if (e->kind == Entity_Procedure) { + if (e->Procedure.is_export || e->Procedure.is_foreign) { + // no prefix + return w; + } + if (e->parent_proc_decl) { + Entity *p = e->parent_proc_decl->entity; + w = write_canonical_parent_prefix(w, p); + w = gb_string_append_length(w, p->token.string.text, p->token.string.len); + if (is_type_polymorphic(p->type)) { + w = gb_string_appendc(w, "::"); + w = write_type_to_canonical_string(w, p->type); + } + w = gb_string_appendc(w, "."); + + } else if (e->pkg && (scope_lookup_current(e->pkg->scope, e->token.string) == e)) { + w = gb_string_append_length(w, e->pkg->name.text, e->pkg->name.len); + if (e->pkg->name == "llvm") { + gb_string_appendc(w, "$"); + } + w = gb_string_appendc(w, "."); + } else { + String file_name = filename_without_directory(e->file->fullpath); + w = gb_string_append_length(w, e->pkg->name.text, e->pkg->name.len); + if (e->pkg->name == "llvm") { + gb_string_appendc(w, "$"); + } + w = gb_string_appendc(w, gb_bprintf(".[%.*s].", LIT(file_name))); + } + } else if (e->kind == Entity_Procedure) { + if (e->Procedure.is_export || e->Procedure.is_foreign) { + // no prefix + return w; + } + GB_PANIC("TODO(bill): handle entity kind: %d", e->kind); + } + + if (e->kind == Entity_Procedure && e->Procedure.is_anonymous) { + w = gb_string_appendc(w, gb_bprintf("$anon%d", e->token.pos.offset)); + } else { + w = gb_string_append_length(w, e->token.string.text, e->token.string.len); + } + + if (is_type_polymorphic(e->type)) { + w = gb_string_appendc(w, "::"); + w = write_type_to_canonical_string(w, e->type); + } + if (!ignore_final_dot) { + w = gb_string_appendc(w, "."); + } + + return w; +} + +gb_internal gbString write_canonical_entity_name(gbString w, Entity *e) { + GB_ASSERT(e != nullptr); + + if (e->token.string == "_") { + GB_PANIC("_ string"); + } + if (e->token.string.len == 0) { + GB_PANIC("empty string"); + } + + if (e->kind == Entity_Variable) { + bool is_foreign = e->Variable.is_foreign; + bool is_export = e->Variable.is_export; + if (e->Variable.link_name.len > 0) { + w = gb_string_append_length(w, e->Variable.link_name.text, e->Variable.link_name.len); + return w; + } else if (is_foreign || is_export) { + w = gb_string_append_length(w, e->token.string.text, e->token.string.len); + return w; + } + } else if (e->kind == Entity_Procedure && e->Procedure.link_name.len > 0) { + w = gb_string_append_length(w, e->Procedure.link_name.text, e->Procedure.link_name.len); + return w; + } else if (e->kind == Entity_Procedure && e->Procedure.is_export) { + w = gb_string_append_length(w, e->token.string.text, e->token.string.len); + return w; + } + + if ((e->scope->flags & (ScopeFlag_File | ScopeFlag_Pkg)) == 0 || + e->flags & EntityFlag_NotExported) { + + Scope *s = e->scope; + while ((s->flags & (ScopeFlag_Proc|ScopeFlag_File)) == 0 && s->decl_info == nullptr) { + s = s->parent; + } + + if (s->decl_info != nullptr && s->decl_info->entity) { + w = write_canonical_parent_prefix(w, s->decl_info->entity); + goto write_base_name; + } else if ((s->flags & ScopeFlag_File) && s->file != nullptr) { + String file_name = filename_without_directory(s->file->fullpath); + w = gb_string_append_length(w, e->pkg->name.text, e->pkg->name.len); + if (e->pkg->name == "llvm") { + gb_string_appendc(w, "$"); + } + w = gb_string_appendc(w, gb_bprintf(".[%.*s].", LIT(file_name))); + goto write_base_name; + } + gb_printf_err("%s HERE %s %u %p\n", token_pos_to_string(e->token.pos), type_to_string(e->type), s->flags, s->decl_info); + print_scope_flags(s); + GB_PANIC("weird entity"); + } + if (e->pkg != nullptr) { + w = gb_string_append_length(w, e->pkg->name.text, e->pkg->name.len); + w = gb_string_appendc(w, "."); + } + +write_base_name: + + switch (e->kind) { + case Entity_TypeName: + { + Type *params = nullptr; + Entity *parent = type_get_polymorphic_parent(e->type, ¶ms); + if (parent) { + w = gb_string_append_length(w, parent->token.string.text, parent->token.string.len); + w = write_canonical_params(w, params); + } else { + w = gb_string_append_length(w, e->token.string.text, e->token.string.len); + } + } + // Handle parapoly stuff here? + return w; + + case Entity_Procedure: + case Entity_Variable: + w = gb_string_append_length(w, e->token.string.text, e->token.string.len); + if (is_type_polymorphic(e->type)) { + w = gb_string_appendc(w, "::"); + w = write_type_to_canonical_string(w, e->type); + } + return w; + + default: + GB_PANIC("TODO(bill): entity kind %d", e->kind); + break; + } + return w; +} + +// NOTE(bill): This exists so that we deterministically hash a type by serializing it to a canonical string +gb_internal gbString write_type_to_canonical_string(gbString w, Type *type) { + if (type == nullptr) { + return gb_string_appendc(w, "<>"); // none/void type + } + + type = default_type(type); + GB_ASSERT(!is_type_untyped(type)); + + switch (type->kind) { + case Type_Basic: + return gb_string_append_length(w, type->Basic.name.text, type->Basic.name.len); + case Type_Pointer: + w = gb_string_append_rune(w, '^'); + return write_type_to_canonical_string(w, type->Pointer.elem); + case Type_MultiPointer: + w = gb_string_appendc(w, "[^]"); + return write_type_to_canonical_string(w, type->Pointer.elem); + case Type_SoaPointer: + w = gb_string_appendc(w, "#soa^"); + return write_type_to_canonical_string(w, type->Pointer.elem); + case Type_EnumeratedArray: + if (type->EnumeratedArray.is_sparse) { + w = gb_string_appendc(w, "#sparse"); + } + w = gb_string_append_rune(w, '['); + w = write_type_to_canonical_string(w, type->EnumeratedArray.index); + w = gb_string_append_rune(w, ']'); + return write_type_to_canonical_string(w, type->EnumeratedArray.elem); + case Type_Array: + w = gb_string_appendc(w, gb_bprintf("[%lld]", cast(long long)type->Array.count)); + return write_type_to_canonical_string(w, type->Array.elem); + case Type_Slice: + w = gb_string_appendc(w, "[]"); + return write_type_to_canonical_string(w, type->Array.elem); + case Type_DynamicArray: + w = gb_string_appendc(w, "[dynamic]"); + return write_type_to_canonical_string(w, type->DynamicArray.elem); + case Type_SimdVector: + w = gb_string_appendc(w, gb_bprintf("#simd[%lld]", cast(long long)type->SimdVector.count)); + return write_type_to_canonical_string(w, type->SimdVector.elem); + case Type_Matrix: + if (type->Matrix.is_row_major) { + w = gb_string_appendc(w, "#row_major "); + } + w = gb_string_appendc(w, gb_bprintf("matrix[%lld, %lld]", cast(long long)type->Matrix.row_count, cast(long long)type->Matrix.column_count)); + return write_type_to_canonical_string(w, type->Matrix.elem); + case Type_Map: + w = gb_string_appendc(w, "map["); + w = write_type_to_canonical_string(w, type->Map.key); + w = gb_string_appendc(w, "]"); + return write_type_to_canonical_string(w, type->Map.value); + + case Type_Enum: + w = gb_string_appendc(w, "enum"); + if (type->Enum.base_type != nullptr) { + w = gb_string_append_rune(w, ' '); + w = write_type_to_canonical_string(w, type->Enum.base_type); + w = gb_string_append_rune(w, ' '); + } + w = gb_string_append_rune(w, '{'); + for_array(i, type->Enum.fields) { + Entity *f = type->Enum.fields[i]; + GB_ASSERT(f->kind == Entity_Constant); + if (i > 0) { + w = gb_string_appendc(w, ","); + } + w = gb_string_append_length(w, f->token.string.text, f->token.string.len); + w = gb_string_appendc(w, "="); + w = write_exact_value_to_string(w, f->Constant.value); + } + return gb_string_append_rune(w, '}'); + case Type_BitSet: + w = gb_string_appendc(w, "bit_set["); + if (type->BitSet.elem == nullptr) { + w = write_type_to_canonical_string(w, type->BitSet.elem); + } else if (is_type_enum(type->BitSet.elem)) { + w = write_type_to_canonical_string(w, type->BitSet.elem); + } else { + w = gb_string_append_fmt(w, "%lld", type->BitSet.lower); + w = gb_string_append_fmt(w, "..="); + w = gb_string_append_fmt(w, "%lld", type->BitSet.upper); + } + if (type->BitSet.underlying != nullptr) { + w = gb_string_appendc(w, ";"); + w = write_type_to_canonical_string(w, type->BitSet.underlying); + } + return gb_string_appendc(w, "]"); + + case Type_Union: + w = gb_string_appendc(w, "union"); + + switch (type->Union.kind) { + case UnionType_no_nil: w = gb_string_appendc(w, "#no_nil"); break; + case UnionType_shared_nil: w = gb_string_appendc(w, "#shared_nil"); break; + } + if (type->Union.custom_align != 0) { + w = gb_string_append_fmt(w, "#align(%lld)", cast(long long)type->Union.custom_align); + } + w = gb_string_appendc(w, "{"); + for_array(i, type->Union.variants) { + Type *t = type->Union.variants[i]; + if (i > 0) w = gb_string_appendc(w, ", "); + w = write_type_to_canonical_string(w, t); + } + return gb_string_appendc(w, "}"); + case Type_Struct: + if (type->Struct.soa_kind != StructSoa_None) { + switch (type->Struct.soa_kind) { + case StructSoa_Fixed: w = gb_string_append_fmt(w, "#soa[%lld]", cast(long long)type->Struct.soa_count); break; + case StructSoa_Slice: w = gb_string_appendc(w, "#soa[]"); break; + case StructSoa_Dynamic: w = gb_string_appendc(w, "#soa[dynamic]"); break; + default: GB_PANIC("Unknown StructSoaKind"); break; + } + return write_type_to_canonical_string(w, type->Struct.soa_elem); + } + + w = gb_string_appendc(w, "struct"); + if (type->Struct.is_packed) w = gb_string_appendc(w, "#packed"); + if (type->Struct.is_raw_union) w = gb_string_appendc(w, "#raw_union"); + if (type->Struct.is_no_copy) w = gb_string_appendc(w, "#no_copy"); + if (type->Struct.custom_min_field_align != 0) w = gb_string_append_fmt(w, "#min_field_align(%lld)", cast(long long)type->Struct.custom_min_field_align); + if (type->Struct.custom_max_field_align != 0) w = gb_string_append_fmt(w, "#max_field_align(%lld)", cast(long long)type->Struct.custom_max_field_align); + if (type->Struct.custom_align != 0) w = gb_string_append_fmt(w, "#align(%lld)", cast(long long)type->Struct.custom_align); + w = gb_string_appendc(w, "{"); + for_array(i, type->Struct.fields) { + Entity *f = type->Struct.fields[i]; + GB_ASSERT(f->kind == Entity_Variable); + if (i > 0) { + w = gb_string_appendc(w, ","); + } + w = gb_string_append_length (w, f->token.string.text, f->token.string.len); + w = gb_string_appendc (w, ":"); + w = write_type_to_canonical_string(w, f->type); + String tag = type->Struct.tags[i]; + if (tag.len != 0) { + String s = quote_to_ascii(heap_allocator(), tag); + w = gb_string_append_length(w, s.text, s.len); + gb_free(heap_allocator(), s.text); + } + } + return gb_string_appendc(w, "}"); + + case Type_BitField: + w = gb_string_appendc(w, "bit_field"); + w = write_type_to_canonical_string(w, type->BitField.backing_type); + w = gb_string_appendc(w, " {"); + for (isize i = 0; i < type->BitField.fields.count; i++) { + Entity *f = type->BitField.fields[i]; + if (i > 0) { + w = gb_string_appendc(w, ","); + } + w = gb_string_append_length(w, f->token.string.text, f->token.string.len); + w = gb_string_appendc(w, ":"); + w = write_type_to_canonical_string(w, f->type); + w = gb_string_appendc(w, "|"); + w = gb_string_appendc(w, gb_bprintf("%u", type->BitField.bit_sizes[i])); + } + return gb_string_appendc(w, " }"); + + case Type_Proc: + w = gb_string_appendc(w, "proc"); + if (default_calling_convention() != type->Proc.calling_convention) { + w = gb_string_appendc(w, "\""); + w = gb_string_appendc(w, proc_calling_convention_strings[type->Proc.calling_convention]); + w = gb_string_appendc(w, "\""); + } + + w = write_canonical_params(w, type->Proc.params); + if (type->Proc.result_count > 0) { + w = gb_string_appendc(w, "->"); + w = write_canonical_params(w, type->Proc.results); + } + return w; + + case Type_Generic: + GB_PANIC("Type_Generic should never be hit"); + return w; + + case Type_Named: + if (type->Named.type_name != nullptr) { + return write_canonical_entity_name(w, type->Named.type_name); + } else { + w = gb_string_append_length(w, type->Named.name.text, type->Named.name.len); + } + // Handle parapoly stuff here? + return w; + + default: + GB_PANIC("unknown type kind %d", type->kind); + break; + } + + return w; +} \ No newline at end of file diff --git a/src/types.cpp b/src/types.cpp index 42530ecccfc..d6dea56ad5c 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -4872,272 +4872,3 @@ gb_internal gbString type_to_string(Type *type, bool shorthand) { gb_internal gbString type_to_string_shorthand(Type *type) { return type_to_string(type, true); } - -gb_internal gbString write_type_to_canonical_string(gbString w, Type *type); -gb_internal gbString write_canonical_params(gbString w, Type *params) { - w = gb_string_appendc(w, "("); - if (params) { - GB_ASSERT(params->kind == Type_Tuple); - for_array(i, params->Tuple.variables) { - Entity *v = params->Tuple.variables[i]; - if (i > 0) { - w = gb_string_appendc(w, ","); - } - if (v->kind == Entity_Variable) { - if (v->flags&EntityFlag_CVarArg) { - w = gb_string_appendc(w, "#c_vararg"); - } - if (v->flags&EntityFlag_Ellipsis) { - Type *slice = base_type(v->type); - w = gb_string_appendc(w, ".."); - GB_ASSERT(v->type->kind == Type_Slice); - w = write_type_to_canonical_string(w, slice->Slice.elem); - } else { - w = write_type_to_canonical_string(w, v->type); - } - } else if (v->kind == Entity_TypeName) { - w = gb_string_appendc(w, "$"); - w = write_type_to_canonical_string(w, v->type); - } else if (v->kind == Entity_Constant) { - w = gb_string_appendc(w, "$$"); - w = write_exact_value_to_string(w, v->Constant.value); - } else { - GB_PANIC("TODO(bill): handle non type/const parapoly parameter values"); - } - } - } - return gb_string_appendc(w, ")"); -} - -gb_internal u64 type_hash_canonical_type(Type *type) { - if (type == nullptr) { - return 0; - } - TEMPORARY_ALLOCATOR_GUARD(); - gbString w = write_type_to_canonical_string(gb_string_make(temporary_allocator(), ""), type); - u64 hash = fnv64a(w, gb_string_length(w)); - return hash; -} - -gb_internal String type_to_canonical_string(gbAllocator allocator, Type *type) { - gbString w = gb_string_make(allocator, ""); - w = write_type_to_canonical_string(w, type); - return make_string(cast(u8 const *)w, gb_string_length(w)); -} - -// NOTE(bill): This exists so that we deterministically hash a type by serializing it to a canonical string -gb_internal gbString write_type_to_canonical_string(gbString w, Type *type) { - if (type == nullptr) { - return gb_string_appendc(w, "<>"); // none/void type - } - - type = default_type(type); - GB_ASSERT(!is_type_untyped(type)); - - switch (type->kind) { - case Type_Basic: - return gb_string_append_length(w, type->Basic.name.text, type->Basic.name.len); - case Type_Pointer: - w = gb_string_append_rune(w, '^'); - return write_type_to_canonical_string(w, type->Pointer.elem); - case Type_MultiPointer: - w = gb_string_appendc(w, "[^]"); - return write_type_to_canonical_string(w, type->Pointer.elem); - case Type_SoaPointer: - w = gb_string_appendc(w, "#soa^"); - return write_type_to_canonical_string(w, type->Pointer.elem); - case Type_EnumeratedArray: - if (type->EnumeratedArray.is_sparse) { - w = gb_string_appendc(w, "#sparse"); - } - w = gb_string_append_rune(w, '['); - w = write_type_to_canonical_string(w, type->EnumeratedArray.index); - w = gb_string_append_rune(w, ']'); - return write_type_to_canonical_string(w, type->EnumeratedArray.elem); - case Type_Array: - w = gb_string_appendc(w, gb_bprintf("[%lld]", cast(long long)type->Array.count)); - return write_type_to_canonical_string(w, type->Array.elem); - case Type_Slice: - w = gb_string_appendc(w, "[]"); - return write_type_to_canonical_string(w, type->Array.elem); - case Type_DynamicArray: - w = gb_string_appendc(w, "[dynamic]"); - return write_type_to_canonical_string(w, type->DynamicArray.elem); - case Type_SimdVector: - w = gb_string_appendc(w, gb_bprintf("#simd[%lld]", cast(long long)type->SimdVector.count)); - return write_type_to_canonical_string(w, type->SimdVector.elem); - case Type_Matrix: - if (type->Matrix.is_row_major) { - w = gb_string_appendc(w, "#row_major "); - } - w = gb_string_appendc(w, gb_bprintf("matrix[%lld, %lld]", cast(long long)type->Matrix.row_count, cast(long long)type->Matrix.column_count)); - return write_type_to_canonical_string(w, type->Matrix.elem); - case Type_Map: - w = gb_string_appendc(w, "map["); - w = write_type_to_canonical_string(w, type->Map.key); - w = gb_string_appendc(w, "]"); - return write_type_to_canonical_string(w, type->Map.value); - - case Type_Enum: - w = gb_string_appendc(w, "enum"); - if (type->Enum.base_type != nullptr) { - w = gb_string_append_rune(w, ' '); - w = write_type_to_canonical_string(w, type->Enum.base_type); - w = gb_string_append_rune(w, ' '); - } - w = gb_string_append_rune(w, '{'); - for_array(i, type->Enum.fields) { - Entity *f = type->Enum.fields[i]; - GB_ASSERT(f->kind == Entity_Constant); - if (i > 0) { - w = gb_string_appendc(w, ","); - } - w = gb_string_append_length(w, f->token.string.text, f->token.string.len); - w = gb_string_appendc(w, "="); - w = write_exact_value_to_string(w, f->Constant.value); - } - return gb_string_append_rune(w, '}'); - case Type_BitSet: - w = gb_string_appendc(w, "bit_set["); - if (type->BitSet.elem == nullptr) { - w = write_type_to_canonical_string(w, type->BitSet.elem); - } else if (is_type_enum(type->BitSet.elem)) { - w = write_type_to_canonical_string(w, type->BitSet.elem); - } else { - w = gb_string_append_fmt(w, "%lld", type->BitSet.lower); - w = gb_string_append_fmt(w, "..="); - w = gb_string_append_fmt(w, "%lld", type->BitSet.upper); - } - if (type->BitSet.underlying != nullptr) { - w = gb_string_appendc(w, ";"); - w = write_type_to_canonical_string(w, type->BitSet.underlying); - } - return gb_string_appendc(w, "]"); - - case Type_Union: - w = gb_string_appendc(w, "union"); - - switch (type->Union.kind) { - case UnionType_no_nil: w = gb_string_appendc(w, "#no_nil"); break; - case UnionType_shared_nil: w = gb_string_appendc(w, "#shared_nil"); break; - } - if (type->Union.custom_align != 0) { - w = gb_string_append_fmt(w, "#align(%lld)", cast(long long)type->Union.custom_align); - } - w = gb_string_appendc(w, "{"); - for_array(i, type->Union.variants) { - Type *t = type->Union.variants[i]; - if (i > 0) w = gb_string_appendc(w, ", "); - w = write_type_to_canonical_string(w, t); - } - return gb_string_appendc(w, "}"); - case Type_Struct: - if (type->Struct.soa_kind != StructSoa_None) { - switch (type->Struct.soa_kind) { - case StructSoa_Fixed: w = gb_string_append_fmt(w, "#soa[%lld]", cast(long long)type->Struct.soa_count); break; - case StructSoa_Slice: w = gb_string_appendc(w, "#soa[]"); break; - case StructSoa_Dynamic: w = gb_string_appendc(w, "#soa[dynamic]"); break; - default: GB_PANIC("Unknown StructSoaKind"); break; - } - return write_type_to_canonical_string(w, type->Struct.soa_elem); - } - - w = gb_string_appendc(w, "struct"); - if (type->Struct.is_packed) w = gb_string_appendc(w, "#packed"); - if (type->Struct.is_raw_union) w = gb_string_appendc(w, "#raw_union"); - if (type->Struct.is_no_copy) w = gb_string_appendc(w, "#no_copy"); - if (type->Struct.custom_min_field_align != 0) w = gb_string_append_fmt(w, "#min_field_align(%lld)", cast(long long)type->Struct.custom_min_field_align); - if (type->Struct.custom_max_field_align != 0) w = gb_string_append_fmt(w, "#max_field_align(%lld)", cast(long long)type->Struct.custom_max_field_align); - if (type->Struct.custom_align != 0) w = gb_string_append_fmt(w, "#align(%lld)", cast(long long)type->Struct.custom_align); - w = gb_string_appendc(w, "{"); - for_array(i, type->Struct.fields) { - Entity *f = type->Struct.fields[i]; - GB_ASSERT(f->kind == Entity_Variable); - if (i > 0) { - w = gb_string_appendc(w, ","); - } - w = gb_string_append_length (w, f->token.string.text, f->token.string.len); - w = gb_string_appendc (w, ":"); - w = write_type_to_canonical_string(w, f->type); - String tag = type->Struct.tags[i]; - if (tag.len != 0) { - String s = quote_to_ascii(heap_allocator(), tag); - w = gb_string_append_length(w, s.text, s.len); - gb_free(heap_allocator(), s.text); - } - } - return gb_string_appendc(w, "}"); - - case Type_BitField: - w = gb_string_appendc(w, "bit_field"); - w = write_type_to_canonical_string(w, type->BitField.backing_type); - w = gb_string_appendc(w, " {"); - for (isize i = 0; i < type->BitField.fields.count; i++) { - Entity *f = type->BitField.fields[i]; - if (i > 0) { - w = gb_string_appendc(w, ","); - } - w = gb_string_append_length(w, f->token.string.text, f->token.string.len); - w = gb_string_appendc(w, ":"); - w = write_type_to_canonical_string(w, f->type); - w = gb_string_appendc(w, "|"); - w = gb_string_appendc(w, gb_bprintf("%u", type->BitField.bit_sizes[i])); - } - return gb_string_appendc(w, " }"); - - case Type_Proc: - w = gb_string_appendc(w, "proc"); - if (default_calling_convention() != type->Proc.calling_convention) { - w = gb_string_appendc(w, "\""); - w = gb_string_appendc(w, proc_calling_convention_strings[type->Proc.calling_convention]); - w = gb_string_appendc(w, "\""); - } - - w = write_canonical_params(w, type->Proc.params); - if (type->Proc.result_count > 0) { - w = gb_string_appendc(w, "->"); - w = write_canonical_params(w, type->Proc.results); - } - return w; - - case Type_Generic: - GB_PANIC("Type_Generic should never be hit"); - return w; - - case Type_Named: - if (type->Named.type_name != nullptr) { - Entity *e = type->Named.type_name; - - if ((e->scope->flags & (ScopeFlag_File | ScopeFlag_Pkg)) == 0 || - e->flags & EntityFlag_NotExported) { - if (e->scope->flags & ScopeFlag_Proc) { - GB_PANIC("NESTED IN PROC\n"); - } else if (e->scope->flags & ScopeFlag_File) { - GB_PANIC("PRIVATE TO FILE\n"); - } - } - if (e->pkg != nullptr) { - w = gb_string_append_length(w, e->pkg->name.text, e->pkg->name.len); - w = gb_string_appendc(w, "."); - } - Type *params = nullptr; - Entity *parent = type_get_polymorphic_parent(type, ¶ms); - if (parent) { - w = gb_string_append_length(w, parent->token.string.text, parent->token.string.len); - w = write_canonical_params(w, params); - } else { - w = gb_string_append_length(w, e->token.string.text, e->token.string.len); - } - } else { - w = gb_string_append_length(w, type->Named.name.text, type->Named.name.len); - } - // Handle parapoly stuff here? - return w; - - default: - GB_PANIC("unknown type kind %d", type->kind); - break; - } - - return w; -} \ No newline at end of file From b5cf776830151870730d32323502084d069668c3 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 17 Feb 2025 11:48:54 +0000 Subject: [PATCH 03/19] Use new name canonicalization approach --- src/llvm_backend_general.cpp | 6 ++++-- src/name_canonicalization.cpp | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index dc212e51d8d..6ff166899d3 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -1568,10 +1568,12 @@ gb_internal String lb_set_nested_type_name_ir_mangled_name(Entity *e, lbProcedur gb_internal String lb_get_entity_name(lbModule *m, Entity *e) { GB_ASSERT(m != nullptr); - if (e != nullptr && e->kind == Entity_TypeName && e->TypeName.ir_mangled_name.len != 0) { + GB_ASSERT(e != nullptr); + if (e->kind == Entity_TypeName && e->TypeName.ir_mangled_name.len != 0) { return e->TypeName.ir_mangled_name; + } else if (e->kind == Entity_Procedure && e->Procedure.link_name.len != 0) { + return e->Procedure.link_name; } - GB_ASSERT(e != nullptr); if (e->pkg == nullptr) { return e->token.string; diff --git a/src/name_canonicalization.cpp b/src/name_canonicalization.cpp index fa09f27c0af..48f7a18cb6f 100644 --- a/src/name_canonicalization.cpp +++ b/src/name_canonicalization.cpp @@ -180,7 +180,7 @@ gb_internal gbString write_canonical_entity_name(gbString w, Entity *e) { w = gb_string_appendc(w, gb_bprintf(".[%.*s].", LIT(file_name))); goto write_base_name; } - gb_printf_err("%s HERE %s %u %p\n", token_pos_to_string(e->token.pos), type_to_string(e->type), s->flags, s->decl_info); + gb_printf_err("%s WEIRD ENTITY TYPE %s %u %p\n", token_pos_to_string(e->token.pos), type_to_string(e->type), s->flags, s->decl_info); print_scope_flags(s); GB_PANIC("weird entity"); } From 043f9aea614b7fe5e1f62014ea34c7d3c155b0cb Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 17 Feb 2025 12:40:43 +0000 Subject: [PATCH 04/19] Clean up rules for name mangling --- src/llvm_backend_general.cpp | 162 +--------------------------------- src/name_canonicalization.cpp | 81 ++++++++++++----- 2 files changed, 64 insertions(+), 179 deletions(-) diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 6ff166899d3..7fdfa0bb2e7 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -1443,129 +1443,6 @@ gb_internal void lb_clone_struct_type(LLVMTypeRef dst, LLVMTypeRef src) { LLVMStructSetBody(dst, fields, field_count, LLVMIsPackedStruct(src)); } -gb_internal String lb_mangle_name(Entity *e) { -#if 1 - String name = e->token.string; - - AstPackage *pkg = e->pkg; - GB_ASSERT_MSG(pkg != nullptr, "Missing package for '%.*s'", LIT(name)); - String pkgn = pkg->name; - GB_ASSERT(!rune_is_digit(pkgn[0])); - if (pkgn == "llvm") { - pkgn = str_lit("llvm$"); - } - - isize max_len = pkgn.len + 1 + name.len + 1; - bool require_suffix_id = is_type_polymorphic(e->type, true); - - if ((e->scope->flags & (ScopeFlag_File | ScopeFlag_Pkg)) == 0) { - require_suffix_id = true; - } else if (is_blank_ident(e->token)) { - require_suffix_id = true; - }if (e->flags & EntityFlag_NotExported) { - require_suffix_id = true; - } - - if (require_suffix_id) { - max_len += 21; - } - - char *new_name = gb_alloc_array(permanent_allocator(), char, max_len); - isize new_name_len = gb_snprintf( - new_name, max_len, - "%.*s" ABI_PKG_NAME_SEPARATOR "%.*s", LIT(pkgn), LIT(name) - ); - if (require_suffix_id) { - char *str = new_name + new_name_len-1; - isize len = max_len-new_name_len; - isize extra = gb_snprintf(str, len, "-%llu", cast(unsigned long long)e->id); - new_name_len += extra-1; - } - - String mangled_name = make_string((u8 const *)new_name, new_name_len-1); - return mangled_name; -#else - gbString w = gb_string_make(gb_heap_allocator(), ""); - w = write_canonical_entity_name(w, e); - gb_printf_err(">> %s\n", w); - - String mangled_name = make_string(cast(u8 const *)w, gb_string_length(w)); - return mangled_name; -#endif -} - -gb_internal String lb_set_nested_type_name_ir_mangled_name(Entity *e, lbProcedure *p, lbModule *module) { -#if 0 - // NOTE(bill, 2020-03-08): A polymorphic procedure may take a nested type declaration - // and as a result, the declaration does not have time to determine what it should be - - GB_ASSERT(e != nullptr && e->kind == Entity_TypeName); - if (e->TypeName.ir_mangled_name.len != 0) { - return e->TypeName.ir_mangled_name; - } - GB_ASSERT((e->scope->flags & ScopeFlag_File) == 0); - - if (p == nullptr) { - Entity *proc = nullptr; - if (e->parent_proc_decl != nullptr) { - proc = e->parent_proc_decl->entity; - } else { - Scope *scope = e->scope; - while (scope != nullptr && (scope->flags & ScopeFlag_Proc) == 0) { - scope = scope->parent; - } - GB_ASSERT(scope != nullptr); - GB_ASSERT(scope->flags & ScopeFlag_Proc); - proc = scope->procedure_entity; - } - if (proc != nullptr) { - GB_ASSERT(proc->kind == Entity_Procedure); - if (proc->code_gen_procedure != nullptr) { - p = proc->code_gen_procedure; - } - } - } - - - // NOTE(bill): Generate a new name - // parent_proc.name-guid - String ts_name = e->token.string; - - if (p != nullptr) { - isize name_len = p->name.len + 1 + ts_name.len + 1 + 10 + 1; - char *name_text = gb_alloc_array(permanent_allocator(), char, name_len); - u32 guid = 1+p->module->nested_type_name_guid.fetch_add(1); - name_len = gb_snprintf(name_text, name_len, "%.*s" ABI_PKG_NAME_SEPARATOR "%.*s-%u", LIT(p->name), LIT(ts_name), guid); - - String name = make_string(cast(u8 *)name_text, name_len-1); - e->TypeName.ir_mangled_name = name; - - { - String s = type_to_canonical_string(temporary_allocator(), e->type); - gb_printf_err("1) %.*s\n", LIT(s)); - gb_printf_err("2) %.*s\n", LIT(name)); - } - return name; - } else { - // NOTE(bill): a nested type be required before its parameter procedure exists. Just give it a temp name for now - isize name_len = 9 + 1 + ts_name.len + 1 + 10 + 1; - char *name_text = gb_alloc_array(permanent_allocator(), char, name_len); - static std::atomic guid; - name_len = gb_snprintf(name_text, name_len, "_internal" ABI_PKG_NAME_SEPARATOR "%.*s-%u", LIT(ts_name), 1+guid.fetch_add(1)); - - String name = make_string(cast(u8 *)name_text, name_len-1); - e->TypeName.ir_mangled_name = name; - - { - String s = type_to_canonical_string(temporary_allocator(), e->type); - gb_printf_err("3) %.*s\n", LIT(s)); - gb_printf_err("4) %.*s\n", LIT(name)); - } - return name; - } -#endif -} - gb_internal String lb_get_entity_name(lbModule *m, Entity *e) { GB_ASSERT(m != nullptr); GB_ASSERT(e != nullptr); @@ -1579,51 +1456,20 @@ gb_internal String lb_get_entity_name(lbModule *m, Entity *e) { return e->token.string; } -#if 1 gbString w = gb_string_make(heap_allocator(), ""); w = write_canonical_entity_name(w, e); defer (gb_string_free(w)); - String name = copy_string(permanent_allocator(), make_string(cast(u8 const *)w, gb_string_length(w))); -#else - if (e->kind == Entity_TypeName && (e->scope->flags & ScopeFlag_File) == 0) { - return lb_set_nested_type_name_ir_mangled_name(e, nullptr, m); - } - - String name = {}; + gb_printf_err("%s\n", w); - bool no_name_mangle = false; - - if (e->kind == Entity_Variable) { - bool is_foreign = e->Variable.is_foreign; - bool is_export = e->Variable.is_export; - no_name_mangle = e->Variable.link_name.len > 0 || is_foreign || is_export; - if (e->Variable.link_name.len > 0) { - return e->Variable.link_name; - } - } else if (e->kind == Entity_Procedure && e->Procedure.link_name.len > 0) { - return e->Procedure.link_name; - } else if (e->kind == Entity_Procedure && e->Procedure.is_export) { - no_name_mangle = true; - } - - if (!no_name_mangle) { - name = lb_mangle_name(e); + String name = copy_string(permanent_allocator(), make_string(cast(u8 const *)w, gb_string_length(w))); - gbString w = gb_string_make(gb_heap_allocator(), ""); - w = write_canonical_entity_name(w, e); - if (w[0] == 0) { - gb_printf_err(">> %s %.*s\n", w, LIT(name)); - } - } - if (name.len == 0) { - name = e->token.string; - } -#endif if (e->kind == Entity_TypeName) { e->TypeName.ir_mangled_name = name; } else if (e->kind == Entity_Procedure) { e->Procedure.link_name = name; + } else if (e->kind == Entity_Variable) { + e->Variable.link_name = name; } return name; diff --git a/src/name_canonicalization.cpp b/src/name_canonicalization.cpp index 48f7a18cb6f..3910c573dd8 100644 --- a/src/name_canonicalization.cpp +++ b/src/name_canonicalization.cpp @@ -1,3 +1,43 @@ +/* + General Rules for canonical name mangling + + * No spaces between any values + + * normal declarations - pkg.name + * builtin names - just their normal name e.g. `i32` or `string` + * nested - pkg.parent1.parent2.name + * file private - pkg.[file_name].name + * Example: `foo.[bar.odin].Type` + * polymorphic procedure/type - pkg.foo::TYPE + * naming convention for parameters + * type + * $typeid_based_name + * $$constant_parameter + * Example: `foo.to_thing::proc(u64)->([]u8)` + * nested decl in polymorphic procedure - pkg.foo::TYPE.name + * anonymous procedures - pkg.foo.$anon123 + * 123 is the file offset in bytes + + +*/ + +#define CANONICAL_TYPE_SEPARATOR ":" +#define CANONICAL_NAME_SEPARATOR "." + +#define CANONICAL_PARAM_SEPARATOR "," + +#define CANONICAL_PARAM_TYPEID "$" +#define CANONICAL_PARAM_CONST "$$" + +#define CANONICAL_PARAM_C_VARARG "#c_vararg" +#define CANONICAL_PARAM_VARARG ".." + +#define CANONICAL_FIELD_SEPARATOR "," + +#define CANONICAL_ANON_PREFIX "$anon" + +#define CANONICAL_NONE_TYPE "<>" + gb_internal gbString write_type_to_canonical_string(gbString w, Type *type); gb_internal gbString write_canonical_params(gbString w, Type *params) { w = gb_string_appendc(w, "("); @@ -6,25 +46,25 @@ gb_internal gbString write_canonical_params(gbString w, Type *params) { for_array(i, params->Tuple.variables) { Entity *v = params->Tuple.variables[i]; if (i > 0) { - w = gb_string_appendc(w, ","); + w = gb_string_appendc(w, CANONICAL_PARAM_SEPARATOR); } if (v->kind == Entity_Variable) { if (v->flags&EntityFlag_CVarArg) { - w = gb_string_appendc(w, "#c_vararg"); + w = gb_string_appendc(w, CANONICAL_PARAM_C_VARARG); } if (v->flags&EntityFlag_Ellipsis) { Type *slice = base_type(v->type); - w = gb_string_appendc(w, ".."); + w = gb_string_appendc(w, CANONICAL_PARAM_VARARG); GB_ASSERT(v->type->kind == Type_Slice); w = write_type_to_canonical_string(w, slice->Slice.elem); } else { w = write_type_to_canonical_string(w, v->type); } } else if (v->kind == Entity_TypeName) { - w = gb_string_appendc(w, "$"); + w = gb_string_appendc(w, CANONICAL_PARAM_TYPEID); w = write_type_to_canonical_string(w, v->type); } else if (v->kind == Entity_Constant) { - w = gb_string_appendc(w, "$$"); + w = gb_string_appendc(w, CANONICAL_PARAM_CONST); w = write_exact_value_to_string(w, v->Constant.value); } else { GB_PANIC("TODO(bill): handle non type/const parapoly parameter values"); @@ -88,24 +128,24 @@ gb_internal gbString write_canonical_parent_prefix(gbString w, Entity *e, bool i w = write_canonical_parent_prefix(w, p); w = gb_string_append_length(w, p->token.string.text, p->token.string.len); if (is_type_polymorphic(p->type)) { - w = gb_string_appendc(w, "::"); + w = gb_string_appendc(w, CANONICAL_TYPE_SEPARATOR); w = write_type_to_canonical_string(w, p->type); } - w = gb_string_appendc(w, "."); + w = gb_string_appendc(w, CANONICAL_NAME_SEPARATOR); } else if (e->pkg && (scope_lookup_current(e->pkg->scope, e->token.string) == e)) { w = gb_string_append_length(w, e->pkg->name.text, e->pkg->name.len); if (e->pkg->name == "llvm") { gb_string_appendc(w, "$"); } - w = gb_string_appendc(w, "."); + w = gb_string_appendc(w, CANONICAL_NAME_SEPARATOR); } else { String file_name = filename_without_directory(e->file->fullpath); w = gb_string_append_length(w, e->pkg->name.text, e->pkg->name.len); if (e->pkg->name == "llvm") { gb_string_appendc(w, "$"); } - w = gb_string_appendc(w, gb_bprintf(".[%.*s].", LIT(file_name))); + w = gb_string_appendc(w, gb_bprintf(CANONICAL_NAME_SEPARATOR "[%.*s]" CANONICAL_NAME_SEPARATOR, LIT(file_name))); } } else if (e->kind == Entity_Procedure) { if (e->Procedure.is_export || e->Procedure.is_foreign) { @@ -116,17 +156,17 @@ gb_internal gbString write_canonical_parent_prefix(gbString w, Entity *e, bool i } if (e->kind == Entity_Procedure && e->Procedure.is_anonymous) { - w = gb_string_appendc(w, gb_bprintf("$anon%d", e->token.pos.offset)); + w = gb_string_appendc(w, gb_bprintf(CANONICAL_ANON_PREFIX "%d", e->token.pos.offset)); } else { w = gb_string_append_length(w, e->token.string.text, e->token.string.len); } if (is_type_polymorphic(e->type)) { - w = gb_string_appendc(w, "::"); + w = gb_string_appendc(w, CANONICAL_TYPE_SEPARATOR); w = write_type_to_canonical_string(w, e->type); } if (!ignore_final_dot) { - w = gb_string_appendc(w, "."); + w = gb_string_appendc(w, CANONICAL_NAME_SEPARATOR); } return w; @@ -177,7 +217,7 @@ gb_internal gbString write_canonical_entity_name(gbString w, Entity *e) { if (e->pkg->name == "llvm") { gb_string_appendc(w, "$"); } - w = gb_string_appendc(w, gb_bprintf(".[%.*s].", LIT(file_name))); + w = gb_string_appendc(w, gb_bprintf(CANONICAL_NAME_SEPARATOR "[%.*s]" CANONICAL_NAME_SEPARATOR, LIT(file_name))); goto write_base_name; } gb_printf_err("%s WEIRD ENTITY TYPE %s %u %p\n", token_pos_to_string(e->token.pos), type_to_string(e->type), s->flags, s->decl_info); @@ -186,7 +226,7 @@ gb_internal gbString write_canonical_entity_name(gbString w, Entity *e) { } if (e->pkg != nullptr) { w = gb_string_append_length(w, e->pkg->name.text, e->pkg->name.len); - w = gb_string_appendc(w, "."); + w = gb_string_appendc(w, CANONICAL_NAME_SEPARATOR); } write_base_name: @@ -210,7 +250,7 @@ gb_internal gbString write_canonical_entity_name(gbString w, Entity *e) { case Entity_Variable: w = gb_string_append_length(w, e->token.string.text, e->token.string.len); if (is_type_polymorphic(e->type)) { - w = gb_string_appendc(w, "::"); + w = gb_string_appendc(w, CANONICAL_TYPE_SEPARATOR); w = write_type_to_canonical_string(w, e->type); } return w; @@ -225,7 +265,7 @@ gb_internal gbString write_canonical_entity_name(gbString w, Entity *e) { // NOTE(bill): This exists so that we deterministically hash a type by serializing it to a canonical string gb_internal gbString write_type_to_canonical_string(gbString w, Type *type) { if (type == nullptr) { - return gb_string_appendc(w, "<>"); // none/void type + return gb_string_appendc(w, CANONICAL_NONE_TYPE); // none/void type } type = default_type(type); @@ -287,7 +327,7 @@ gb_internal gbString write_type_to_canonical_string(gbString w, Type *type) { Entity *f = type->Enum.fields[i]; GB_ASSERT(f->kind == Entity_Constant); if (i > 0) { - w = gb_string_appendc(w, ","); + w = gb_string_appendc(w, CANONICAL_FIELD_SEPARATOR); } w = gb_string_append_length(w, f->token.string.text, f->token.string.len); w = gb_string_appendc(w, "="); @@ -324,7 +364,7 @@ gb_internal gbString write_type_to_canonical_string(gbString w, Type *type) { w = gb_string_appendc(w, "{"); for_array(i, type->Union.variants) { Type *t = type->Union.variants[i]; - if (i > 0) w = gb_string_appendc(w, ", "); + if (i > 0) w = gb_string_appendc(w, CANONICAL_FIELD_SEPARATOR); w = write_type_to_canonical_string(w, t); } return gb_string_appendc(w, "}"); @@ -351,7 +391,7 @@ gb_internal gbString write_type_to_canonical_string(gbString w, Type *type) { Entity *f = type->Struct.fields[i]; GB_ASSERT(f->kind == Entity_Variable); if (i > 0) { - w = gb_string_appendc(w, ","); + w = gb_string_appendc(w, CANONICAL_FIELD_SEPARATOR); } w = gb_string_append_length (w, f->token.string.text, f->token.string.len); w = gb_string_appendc (w, ":"); @@ -372,7 +412,7 @@ gb_internal gbString write_type_to_canonical_string(gbString w, Type *type) { for (isize i = 0; i < type->BitField.fields.count; i++) { Entity *f = type->BitField.fields[i]; if (i > 0) { - w = gb_string_appendc(w, ","); + w = gb_string_appendc(w, CANONICAL_FIELD_SEPARATOR); } w = gb_string_append_length(w, f->token.string.text, f->token.string.len); w = gb_string_appendc(w, ":"); @@ -407,7 +447,6 @@ gb_internal gbString write_type_to_canonical_string(gbString w, Type *type) { } else { w = gb_string_append_length(w, type->Named.name.text, type->Named.name.len); } - // Handle parapoly stuff here? return w; default: From 9b26bb2e6a1e32e17102550b481c6909549b87e5 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 17 Feb 2025 13:10:38 +0000 Subject: [PATCH 05/19] Begin work on hash types --- src/checker.cpp | 44 +++++++++++++++++++++++++++++++++-- src/checker.hpp | 7 +++++- src/llvm_backend.cpp | 7 +++--- src/llvm_backend_general.cpp | 2 -- src/llvm_backend_type.cpp | 6 ++--- src/name_canonicalization.cpp | 25 ++++++++++++++++---- src/ptr_set.cpp | 10 ++++---- src/types.cpp | 36 ++++++++++++++++++++++++++-- 8 files changed, 114 insertions(+), 23 deletions(-) diff --git a/src/checker.cpp b/src/checker.cpp index c74a72a1416..054d6aeb032 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -3,7 +3,10 @@ #include "entity.cpp" #include "types.cpp" -String get_final_microarchitecture(); + +gb_internal u64 type_hash_canonical_type(Type *type); + +gb_internal String get_final_microarchitecture(); gb_internal void check_expr(CheckerContext *c, Operand *operand, Ast *expression); gb_internal void check_expr_or_type(CheckerContext *c, Operand *operand, Ast *expression, Type *type_hint=nullptr); @@ -2037,7 +2040,8 @@ gb_internal void add_type_info_type_internal(CheckerContext *c, Type *t) { // Unique entry // NOTE(bill): map entries grow linearly and in order ti_index = c->info->type_info_types.count; - array_add(&c->info->type_info_types, t); + Type_Info_Type tt = {t, type_hash_canonical_type(t)}; + array_add(&c->info->type_info_types, tt); } map_set(&c->checker->info.type_info_map, t, ti_index); @@ -6725,6 +6729,42 @@ gb_internal void check_parsed_files(Checker *c) { add_type_and_value(&c->builtin_ctx, u.expr, u.info->mode, u.info->type, u.info->value); } + TIME_SECTION("check for type hash collisions"); + { + PtrSet found = {}; + ptr_set_init(&found, c->info.type_info_types.count); + defer (ptr_set_destroy(&found)); + for (auto const &tt : c->info.type_info_types) { + if (ptr_set_update(&found, cast(uintptr)tt.hash)) { + Type *other_type = nullptr; + for (auto const &other : c->info.type_info_types) { + if (&tt == &other) { + continue; + } + if (cast(uintptr)other.hash == cast(uintptr)tt.hash && + !are_types_identical(tt.type, other.type)) { + other_type = other.type; + break; + } + } + if (other_type != nullptr) { + String ts = type_to_canonical_string(temporary_allocator(), tt.type); + String os = type_to_canonical_string(temporary_allocator(), other_type); + if (ts != os) { + compiler_error("%s found type hash collision with %s (hash = %llu)\n" + "%s vs %s\n", + type_to_string(tt.type), type_to_string(other_type), cast(unsigned long long)tt.hash, + temp_canonical_string(tt.type), + temp_canonical_string(other_type) + ); + } + } + } + } + } + + + TIME_SECTION("sort init and fini procedures"); check_sort_init_and_fini_procedures(c); diff --git a/src/checker.hpp b/src/checker.hpp index 472ab8e505c..c9a0c3302b3 100644 --- a/src/checker.hpp +++ b/src/checker.hpp @@ -409,6 +409,11 @@ struct Defineable { String pos_str; }; +struct Type_Info_Type { + Type *type; + u64 hash; // see: type_hash_canonical_type +}; + // CheckerInfo stores all the symbol information for a type-checked program struct CheckerInfo { Checker *checker; @@ -453,7 +458,7 @@ struct CheckerInfo { PtrMap gen_types; BlockingMutex type_info_mutex; // NOT recursive - Array type_info_types; + Array type_info_types; PtrMap type_info_map; BlockingMutex foreign_mutex; // NOT recursive diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index 0896ea8c799..8cb480dd4b6 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -24,7 +24,7 @@ #include "llvm_backend_stmt.cpp" #include "llvm_backend_proc.cpp" -String get_default_microarchitecture() { +gb_internal String get_default_microarchitecture() { String default_march = str_lit("generic"); if (build_context.metrics.arch == TargetArch_amd64) { // NOTE(bill): x86-64-v2 is more than enough for everyone @@ -47,7 +47,7 @@ String get_default_microarchitecture() { return default_march; } -String get_final_microarchitecture() { +gb_internal String get_final_microarchitecture() { BuildContext *bc = &build_context; String microarch = bc->microarch; @@ -3182,7 +3182,8 @@ gb_internal bool lb_generate_code(lbGenerator *gen) { isize count = 0; isize offsets_extra = 0; - for (Type *t : m->info->type_info_types) { + for (auto const &tt : m->info->type_info_types) { + Type *t = tt.type; isize index = lb_type_info_index(m->info, t, false); if (index < 0) { continue; diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 7fdfa0bb2e7..b9ae3d2540c 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -1460,8 +1460,6 @@ gb_internal String lb_get_entity_name(lbModule *m, Entity *e) { w = write_canonical_entity_name(w, e); defer (gb_string_free(w)); - gb_printf_err("%s\n", w); - String name = copy_string(permanent_allocator(), make_string(cast(u8 const *)w, gb_string_length(w))); if (e->kind == Entity_TypeName) { diff --git a/src/llvm_backend_type.cpp b/src/llvm_backend_type.cpp index 6c12b37be41..6f9f94fbd55 100644 --- a/src/llvm_backend_type.cpp +++ b/src/llvm_backend_type.cpp @@ -12,7 +12,7 @@ gb_internal isize lb_type_info_index(CheckerInfo *info, Type *type, bool err_on_ gb_printf_err("NOT FOUND lb_type_info_index:\n\t%s\n\t@ index %td\n\tmax count: %u\nFound:\n", type_to_string(type), index, set->count); for (auto const &entry : *set) { isize type_info_index = entry.key; - gb_printf_err("\t%s\n", type_to_string(info->type_info_types[type_info_index])); + gb_printf_err("\t%s\n", type_to_string(info->type_info_types[type_info_index].type)); } GB_PANIC("NOT FOUND"); } @@ -280,7 +280,7 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ LLVMTypeRef *modified_types = lb_setup_modified_types_for_type_info(m, global_type_info_data_entity_count); defer (gb_free(heap_allocator(), modified_types)); for_array(type_info_type_index, info->type_info_types) { - Type *t = info->type_info_types[type_info_type_index]; + Type *t = info->type_info_types[type_info_type_index].type; if (t == nullptr || t == t_invalid) { continue; } @@ -343,7 +343,7 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ }; for_array(type_info_type_index, info->type_info_types) { - Type *t = info->type_info_types[type_info_type_index]; + Type *t = info->type_info_types[type_info_type_index].type; if (t == nullptr || t == t_invalid) { continue; } diff --git a/src/name_canonicalization.cpp b/src/name_canonicalization.cpp index 3910c573dd8..8edb5e96897 100644 --- a/src/name_canonicalization.cpp +++ b/src/name_canonicalization.cpp @@ -7,7 +7,7 @@ * builtin names - just their normal name e.g. `i32` or `string` * nested - pkg.parent1.parent2.name * file private - pkg.[file_name].name - * Example: `foo.[bar.odin].Type` + * Example: `pkg.[file.odin].Type` * polymorphic procedure/type - pkg.foo::TYPE * naming convention for parameters * type @@ -15,7 +15,7 @@ * $$constant_parameter * Example: `foo.to_thing::proc(u64)->([]u8)` * nested decl in polymorphic procedure - pkg.foo::TYPE.name - * anonymous procedures - pkg.foo.$anon123 + * anonymous procedures - pkg.foo.$anon[file.odin:123] * 123 is the file offset in bytes @@ -38,7 +38,12 @@ #define CANONICAL_NONE_TYPE "<>" + gb_internal gbString write_type_to_canonical_string(gbString w, Type *type); +gb_internal u64 type_hash_canonical_type(Type *type); +gb_internal String type_to_canonical_string(gbAllocator allocator, Type *type); +gb_internal gbString temp_canonical_string(Type *type); + gb_internal gbString write_canonical_params(gbString w, Type *params) { w = gb_string_appendc(w, "("); if (params) { @@ -81,7 +86,7 @@ gb_internal u64 type_hash_canonical_type(Type *type) { TEMPORARY_ALLOCATOR_GUARD(); gbString w = write_type_to_canonical_string(gb_string_make(temporary_allocator(), ""), type); u64 hash = fnv64a(w, gb_string_length(w)); - return hash; + return hash ? hash : 1; } gb_internal String type_to_canonical_string(gbAllocator allocator, Type *type) { @@ -90,6 +95,11 @@ gb_internal String type_to_canonical_string(gbAllocator allocator, Type *type) { return make_string(cast(u8 const *)w, gb_string_length(w)); } +gb_internal gbString temp_canonical_string(Type *type) { + gbString w = gb_string_make(temporary_allocator(), ""); + return write_type_to_canonical_string(w, type); +} + gb_internal void print_scope_flags(Scope *s) { if (s->flags & ScopeFlag_Pkg) gb_printf_err("Pkg "); if (s->flags & ScopeFlag_Builtin) gb_printf_err("Builtin "); @@ -156,7 +166,8 @@ gb_internal gbString write_canonical_parent_prefix(gbString w, Entity *e, bool i } if (e->kind == Entity_Procedure && e->Procedure.is_anonymous) { - w = gb_string_appendc(w, gb_bprintf(CANONICAL_ANON_PREFIX "%d", e->token.pos.offset)); + String file_name = filename_without_directory(e->file->fullpath); + w = gb_string_appendc(w, gb_bprintf(CANONICAL_ANON_PREFIX "[%.*s:%d]", LIT(file_name), e->token.pos.offset)); } else { w = gb_string_append_length(w, e->token.string.text, e->token.string.len); } @@ -449,8 +460,12 @@ gb_internal gbString write_type_to_canonical_string(gbString w, Type *type) { } return w; + case Type_Tuple: + w = gb_string_appendc(w, "params"); + w = write_canonical_params(w, type); + return w; default: - GB_PANIC("unknown type kind %d", type->kind); + GB_PANIC("unknown type kind %d %.*s", type->kind, LIT(type_strings[type->kind])); break; } diff --git a/src/ptr_set.cpp b/src/ptr_set.cpp index ff4befc37a0..5097e2bb632 100644 --- a/src/ptr_set.cpp +++ b/src/ptr_set.cpp @@ -42,7 +42,7 @@ gb_internal void ptr_set_destroy(PtrSet *s) { template gb_internal isize ptr_set__find(PtrSet *s, T ptr) { - GB_ASSERT(ptr != nullptr); + GB_ASSERT(ptr != 0); if (s->count != 0) { #if 0 for (usize i = 0; i < s->capacity; i++) { @@ -58,7 +58,7 @@ gb_internal isize ptr_set__find(PtrSet *s, T ptr) { T key = s->keys[hash_index]; if (key == ptr) { return hash_index; - } else if (key == nullptr) { + } else if (key == 0) { return -1; } hash_index = (hash_index+1)&mask; @@ -122,7 +122,7 @@ gb_internal bool ptr_set_update(PtrSet *s, T ptr) { // returns true if it pre for (usize i = 0; i < s->capacity; i++) { T *key = &s->keys[hash_index]; GB_ASSERT(*key != ptr); - if (*key == (T)PtrSet::TOMBSTONE || *key == nullptr) { + if (*key == (T)PtrSet::TOMBSTONE || *key == 0) { *key = ptr; s->count++; return false; @@ -169,7 +169,7 @@ struct PtrSetIterator { return *this; } T key = set->keys[index]; - if (key != nullptr && key != (T)PtrSet::TOMBSTONE) { + if (key != 0 && key != (T)PtrSet::TOMBSTONE) { return *this; } } @@ -191,7 +191,7 @@ gb_internal PtrSetIterator begin(PtrSet &set) noexcept { usize index = 0; while (index < set.capacity) { T key = set.keys[index]; - if (key != nullptr && key != (T)PtrSet::TOMBSTONE) { + if (key != 0 && key != (T)PtrSet::TOMBSTONE) { break; } index++; diff --git a/src/types.cpp b/src/types.cpp index d6dea56ad5c..15e1bcf4595 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -2774,7 +2774,37 @@ gb_internal bool are_types_identical_internal(Type *x, Type *y, bool check_tuple case Type_Enum: - return x == y; // NOTE(bill): All enums are unique + if (x == y) { + return true; + } + if (x->Enum.fields.count != y->Enum.fields.count) { + return false; + } + if (!are_types_identical(x->Enum.base_type, y->Enum.base_type)) { + return false; + } + if (x->Enum.min_value_index != y->Enum.min_value_index) { + return false; + } + if (x->Enum.max_value_index != y->Enum.max_value_index) { + return false; + } + + for (isize i = 0; i < x->Enum.fields.count; i++) { + Entity *a = x->Enum.fields[i]; + Entity *b = y->Enum.fields[i]; + if (a->token.string != b->token.string) { + return false; + } + GB_ASSERT(a->kind == b->kind); + GB_ASSERT(a->kind == Entity_Constant); + bool same = compare_exact_values(Token_CmpEq, a->Constant.value, b->Constant.value); + if (!same) { + return false; + } + } + + return true; case Type_Union: if (x->Union.variants.count == y->Union.variants.count && @@ -2832,7 +2862,9 @@ gb_internal bool are_types_identical_internal(Type *x, Type *y, bool check_tuple return false; } } - return are_types_identical(x->Struct.polymorphic_params, y->Struct.polymorphic_params); + // TODO(bill): Which is the correct logic here? + // return are_types_identical(x->Struct.polymorphic_params, y->Struct.polymorphic_params); + return true; } break; From b8f057951c47ccb07316fcd936dba9b71e052d1f Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 17 Feb 2025 13:46:17 +0000 Subject: [PATCH 06/19] Begin work on `TypeSet` --- src/checker.cpp | 21 +--- src/checker.hpp | 21 +++- src/name_canonicalization.cpp | 221 ++++++++++++++++++++++++++++++++++ 3 files changed, 246 insertions(+), 17 deletions(-) diff --git a/src/checker.cpp b/src/checker.cpp index 054d6aeb032..6ceb3148928 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -1363,6 +1363,7 @@ gb_internal void init_checker_info(CheckerInfo *i) { map_init(&i->gen_types); array_init(&i->type_info_types, a); map_init(&i->type_info_map); + type_set_init(&i->type_info_set); string_map_init(&i->files); string_map_init(&i->packages); array_init(&i->variable_init_order, a); @@ -1397,6 +1398,7 @@ gb_internal void destroy_checker_info(CheckerInfo *i) { map_destroy(&i->gen_types); array_free(&i->type_info_types); map_destroy(&i->type_info_map); + type_set_destroy(&i->type_info_set); string_map_destroy(&i->files); string_map_destroy(&i->packages); array_free(&i->variable_init_order); @@ -2040,7 +2042,7 @@ gb_internal void add_type_info_type_internal(CheckerContext *c, Type *t) { // Unique entry // NOTE(bill): map entries grow linearly and in order ti_index = c->info->type_info_types.count; - Type_Info_Type tt = {t, type_hash_canonical_type(t)}; + TypeInfoPair tt = {t, type_hash_canonical_type(t)}; array_add(&c->info->type_info_types, tt); } map_set(&c->checker->info.type_info_map, t, ti_index); @@ -2293,22 +2295,11 @@ gb_internal void add_min_dep_type_info(Checker *c, Type *t) { return; } - auto *set = &c->info.minimum_dependency_type_info_set; - - isize ti_index = type_info_index(&c->info, t, false); - if (ti_index < 0) { - add_type_info_type(&c->builtin_ctx, t); // Missing the type information - ti_index = type_info_index(&c->info, t, false); - } - GB_ASSERT(ti_index >= 0); - // IMPORTANT NOTE(bill): this must be copied as `map_set` takes a const ref - // and effectively assigns the `+1` of the value - isize const count = set->count; - if (map_set_if_not_previously_exists(set, ti_index+1, count)) { - // Type already exists; - return; + if (type_set_update(&c->info.type_info_set, t)) { + // return; } + // Add nested types if (t->kind == Type_Named) { // NOTE(bill): Just in case diff --git a/src/checker.hpp b/src/checker.hpp index c9a0c3302b3..725c1ccf5e0 100644 --- a/src/checker.hpp +++ b/src/checker.hpp @@ -409,11 +409,27 @@ struct Defineable { String pos_str; }; -struct Type_Info_Type { +struct TypeInfoPair { Type *type; u64 hash; // see: type_hash_canonical_type }; +struct TypeSet { + TypeInfoPair *keys; + usize count; + usize capacity; +}; + +gb_internal void type_set_init (TypeSet *s, isize capacity = 16); +gb_internal void type_set_destroy(TypeSet *s); +gb_internal Type *type_set_add (TypeSet *s, Type *ptr); +gb_internal bool type_set_update (TypeSet *s, Type *ptr); // returns true if it previously existed +gb_internal bool type_set_update (TypeSet *s, TypeInfoPair pair); // returns true if it previously existed +gb_internal bool type_set_exists (TypeSet *s, Type *ptr); +gb_internal void type_set_remove (TypeSet *s, Type *ptr); +gb_internal void type_set_clear (TypeSet *s); +gb_internal TypeInfoPair *type_set_retrieve(TypeSet *s, Type *ptr); + // CheckerInfo stores all the symbol information for a type-checked program struct CheckerInfo { Checker *checker; @@ -458,8 +474,9 @@ struct CheckerInfo { PtrMap gen_types; BlockingMutex type_info_mutex; // NOT recursive - Array type_info_types; + Array type_info_types; PtrMap type_info_map; + TypeSet type_info_set; BlockingMutex foreign_mutex; // NOT recursive StringMap foreigns; diff --git a/src/name_canonicalization.cpp b/src/name_canonicalization.cpp index 8edb5e96897..b83441b19b8 100644 --- a/src/name_canonicalization.cpp +++ b/src/name_canonicalization.cpp @@ -44,6 +44,227 @@ gb_internal u64 type_hash_canonical_type(Type *type); gb_internal String type_to_canonical_string(gbAllocator allocator, Type *type); gb_internal gbString temp_canonical_string(Type *type); + +struct TypeInfoPair; +struct TypeSet; + +static constexpr u64 TYPE_SET_TOMBSTONE = ~(u64)(0ull); + +gb_internal void type_set_init (TypeSet *s, isize capacity); +gb_internal void type_set_destroy(TypeSet *s); +gb_internal Type *type_set_add (TypeSet *s, Type *ptr); +gb_internal bool type_set_update (TypeSet *s, Type *ptr); // returns true if it previously existed +gb_internal bool type_set_update (TypeSet *s, TypeInfoPair pair); // returns true if it previously existed +gb_internal bool type_set_exists (TypeSet *s, Type *ptr); +gb_internal void type_set_remove (TypeSet *s, Type *ptr); +gb_internal void type_set_clear (TypeSet *s); +gb_internal TypeInfoPair *type_set_retrieve(TypeSet *s, Type *ptr); + +gb_internal gbAllocator type_set_allocator(void) { + return heap_allocator(); +} + +struct TypeSetIterator { + TypeSet *set; + usize index; + + TypeSetIterator &operator++() noexcept { + for (;;) { + ++index; + if (set->capacity == index) { + return *this; + } + TypeInfoPair key = set->keys[index]; + if (key.hash != 0 && key.hash != TYPE_SET_TOMBSTONE) { + return *this; + } + } + } + + bool operator==(TypeSetIterator const &other) const noexcept { + return this->set == other.set && this->index == other.index; + } + + + operator TypeInfoPair *() const { + return &set->keys[index]; + } +}; + + +gb_internal TypeSetIterator begin(TypeSet &set) noexcept { + usize index = 0; + while (index < set.capacity) { + TypeInfoPair key = set.keys[index]; + if (key.hash != 0 && key.hash != TYPE_SET_TOMBSTONE) { + break; + } + index++; + } + return TypeSetIterator{&set, index}; +} +gb_internal TypeSetIterator end(TypeSet &set) noexcept { + return TypeSetIterator{&set, set.capacity}; +} + + +gb_internal void type_set_init(TypeSet *s, isize capacity) { + GB_ASSERT(s->keys == nullptr); + if (capacity != 0) { + capacity = next_pow2_isize(gb_max(16, capacity)); + s->keys = gb_alloc_array(type_set_allocator(), TypeInfoPair, capacity); + // This memory will be zeroed, no need to explicitly zero it + } + s->count = 0; + s->capacity = capacity; +} + +gb_internal void type_set_destroy(TypeSet *s) { + gb_free(type_set_allocator(), s->keys); + s->keys = nullptr; + s->count = 0; + s->capacity = 0; +} + + +gb_internal isize type_set__find(TypeSet *s, TypeInfoPair pair) { + GB_ASSERT(pair.type != nullptr); + GB_ASSERT(pair.hash != 0); + if (s->count != 0) { + usize hash = pair.hash; + usize mask = s->capacity-1; + usize hash_index = cast(usize)hash & mask; + for (usize i = 0; i < s->capacity; i++) { + Type *key = s->keys[hash_index].type; + if (are_types_identical(key, pair.type)) { + return hash_index; + } else if (key == 0) { + return -1; + } + hash_index = (hash_index+1)&mask; + } + } + return -1; +} +gb_internal isize type_set__find(TypeSet *s, Type *ptr) { + GB_ASSERT(ptr != 0); + if (s->count != 0) { + usize hash = cast(usize)type_hash_canonical_type(ptr); + usize mask = s->capacity-1; + usize hash_index = cast(usize)hash & mask; + for (usize i = 0; i < s->capacity; i++) { + Type *key = s->keys[hash_index].type; + if (are_types_identical(key, ptr)) { + return hash_index; + } else if (key == 0) { + return -1; + } + hash_index = (hash_index+1)&mask; + } + } + return -1; +} + +gb_internal bool type_set__full(TypeSet *s) { + return 0.75f * s->capacity <= s->count; +} + +gb_internal gb_inline void type_set_grow(TypeSet *old_set) { + if (old_set->capacity == 0) { + type_set_init(old_set); + return; + } + + TypeSet new_set = {}; + type_set_init(&new_set, gb_max(old_set->capacity<<1, 16)); + + for (TypeInfoPair const &set : *old_set) { + bool was_new = type_set_update(&new_set, set); + GB_ASSERT(!was_new); + } + GB_ASSERT(old_set->count == new_set.count); + + type_set_destroy(old_set); + + *old_set = new_set; +} + + +gb_internal gb_inline bool type_set_exists(TypeSet *s, Type *ptr) { + return type_set__find(s, ptr) >= 0; +} +gb_internal gb_inline bool type_set_exists(TypeSet *s, TypeInfoPair pair) { + return type_set__find(s, pair) >= 0; +} +gb_internal gb_inline TypeInfoPair *type_set_retrieve(TypeSet *s, Type *type) { + isize index = type_set__find(s, type); + if (index >= 0) { + return &s->keys[index]; + } + return nullptr; +} + + +gb_internal bool type_set_update(TypeSet *s, TypeInfoPair pair) { // returns true if it previously existsed + if (type_set_exists(s, pair)) { + return true; + } + + if (s->keys == nullptr) { + type_set_init(s); + } else if (type_set__full(s)) { + type_set_grow(s); + } + GB_ASSERT(s->count < s->capacity); + GB_ASSERT(s->capacity >= 0); + + usize mask = s->capacity-1; + usize hash = cast(usize)pair.hash; + usize hash_index = (cast(usize)hash) & mask; + GB_ASSERT(hash_index < s->capacity); + for (usize i = 0; i < s->capacity; i++) { + TypeInfoPair *key = &s->keys[hash_index]; + GB_ASSERT(!are_types_identical(key->type, pair.type)); + if (key->hash == TYPE_SET_TOMBSTONE || key->hash == 0) { + *key = pair; + s->count++; + return false; + } + hash_index = (hash_index+1)&mask; + } + + GB_PANIC("ptr set out of memory"); + return false; +} + +gb_internal bool type_set_update(TypeSet *s, Type *ptr) { // returns true if it previously existsed + TypeInfoPair pair = {ptr, type_hash_canonical_type(ptr)}; + return type_set_update(s, pair); +} + + +gb_internal Type *type_set_add(TypeSet *s, Type *ptr) { + type_set_update(s, ptr); + return ptr; +} + + +gb_internal void type_set_remove(TypeSet *s, Type *ptr) { + isize index = type_set__find(s, ptr); + if (index >= 0) { + GB_ASSERT(s->count > 0); + s->keys[index].type = nullptr; + s->keys[index].hash = TYPE_SET_TOMBSTONE; + s->count--; + } +} + +gb_internal gb_inline void type_set_clear(TypeSet *s) { + s->count = 0; + gb_zero_size(s->keys, s->capacity*gb_size_of(*s->keys)); +} + + gb_internal gbString write_canonical_params(gbString w, Type *params) { w = gb_string_appendc(w, "("); if (params) { From 4a29d9bb845050c483e537c7a0d6b2889af0f7bc Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 17 Feb 2025 16:29:42 +0000 Subject: [PATCH 07/19] Simplify type info table construction --- src/checker.cpp | 162 +++++++++++++++++++++------------- src/checker.hpp | 17 ++-- src/llvm_backend.cpp | 4 +- src/llvm_backend_type.cpp | 25 +++--- src/name_canonicalization.cpp | 23 ++++- 5 files changed, 146 insertions(+), 85 deletions(-) diff --git a/src/checker.cpp b/src/checker.cpp index 6ceb3148928..1c7126e9ac6 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -828,9 +828,15 @@ gb_internal void add_dependency(CheckerInfo *info, DeclInfo *d, Entity *e) { rw_mutex_unlock(&d->deps_mutex); } gb_internal void add_type_info_dependency(CheckerInfo *info, DeclInfo *d, Type *type) { - if (d == nullptr) { + if (d == nullptr || type == nullptr) { return; } + if (type->kind == Type_Named) { + Entity *e = type->Named.type_name; + if (e->TypeName.is_type_alias) { + type = type->Named.base; + } + } rw_mutex_lock(&d->type_info_deps_mutex); ptr_set_add(&d->type_info_deps, type); rw_mutex_unlock(&d->type_info_deps_mutex); @@ -1361,9 +1367,12 @@ gb_internal void init_checker_info(CheckerInfo *i) { string_map_init(&i->foreigns); // map_init(&i->gen_procs); map_init(&i->gen_types); + array_init(&i->type_info_types, a); - map_init(&i->type_info_map); - type_set_init(&i->type_info_set); + type_set_init(&i->min_dep_type_info_set); + map_init(&i->minimum_dependency_type_info_index_map); + + // map_init(&i->type_info_map); string_map_init(&i->files); string_map_init(&i->packages); array_init(&i->variable_init_order, a); @@ -1396,9 +1405,11 @@ gb_internal void destroy_checker_info(CheckerInfo *i) { string_map_destroy(&i->foreigns); // map_destroy(&i->gen_procs); map_destroy(&i->gen_types); + array_free(&i->type_info_types); - map_destroy(&i->type_info_map); - type_set_destroy(&i->type_info_set); + type_set_destroy(&i->min_dep_type_info_set); + map_destroy(&i->minimum_dependency_type_info_index_map); + string_map_destroy(&i->files); string_map_destroy(&i->packages); array_free(&i->variable_init_order); @@ -1632,41 +1643,36 @@ gb_internal void check_remove_expr_info(CheckerContext *c, Ast *e) { } } - -gb_internal isize type_info_index(CheckerInfo *info, Type *type, bool error_on_failure) { - type = default_type(type); - if (type == t_llvm_bool) { - type = t_bool; - } - - mutex_lock(&info->type_info_mutex); +gb_internal isize type_info_index(CheckerInfo *info, TypeInfoPair pair, bool error_on_failure) { + mutex_lock(&info->minimum_dependency_type_info_mutex); isize entry_index = -1; - isize *found_entry_index = map_get(&info->type_info_map, type); + uintptr hash = cast(uintptr)pair.hash; + isize *found_entry_index = map_get(&info->minimum_dependency_type_info_index_map, hash); if (found_entry_index) { entry_index = *found_entry_index; } - if (entry_index < 0) { - // NOTE(bill): Do manual linear search - for (auto const &e : info->type_info_map) { - if (are_types_identical_unique_tuples(e.key, type)) { - entry_index = e.value; - // NOTE(bill): Add it to the search map - map_set(&info->type_info_map, type, entry_index); - break; - } - } - } - - mutex_unlock(&info->type_info_mutex); + mutex_unlock(&info->minimum_dependency_type_info_mutex); if (error_on_failure && entry_index < 0) { - compiler_error("Type_Info for '%s' could not be found", type_to_string(type)); + compiler_error("Type_Info for '%s' could not be found", type_to_string(pair.type)); } return entry_index; } +gb_internal isize type_info_index(CheckerInfo *info, Type *type, bool error_on_failure) { + type = default_type(type); + if (type == t_llvm_bool) { + type = t_bool; + } + + u64 hash = type_hash_canonical_type(type); + return type_info_index(info, {type, hash}, error_on_failure); +} + + + gb_internal void add_untyped(CheckerContext *c, Ast *expr, AddressingMode mode, Type *type, ExactValue const &value) { if (expr == nullptr) { return; @@ -2018,8 +2024,12 @@ gb_internal void add_type_info_type_internal(CheckerContext *c, Type *t) { } add_type_info_dependency(c->info, c->decl, t); - +#if 0 MUTEX_GUARD_BLOCK(&c->info->type_info_mutex) { + if (type_set_update(&c->info->type_info_set, t)) { + // return; + } + auto found = map_get(&c->info->type_info_map, t); if (found != nullptr) { // Types have already been added @@ -2238,6 +2248,7 @@ gb_internal void add_type_info_type_internal(CheckerContext *c, Type *t) { GB_PANIC("Unhandled type: %*.s %d", LIT(type_strings[bt->kind]), bt->kind); break; } +#endif } @@ -2295,11 +2306,10 @@ gb_internal void add_min_dep_type_info(Checker *c, Type *t) { return; } - if (type_set_update(&c->info.type_info_set, t)) { - // return; + if (type_set_update(&c->info.min_dep_type_info_set, t)) { + return; } - // Add nested types if (t->kind == Type_Named) { // NOTE(bill): Just in case @@ -2697,7 +2707,6 @@ gb_internal void generate_minimum_dependency_set(Checker *c, Entity *start) { isize min_dep_set_cap = next_pow2_isize(entity_count*4); // empirically determined factor ptr_set_init(&c->info.minimum_dependency_set, min_dep_set_cap); - map_init(&c->info.minimum_dependency_type_info_set); #define FORCE_ADD_RUNTIME_ENTITIES(condition, ...) do { \ if (condition) { \ @@ -6720,39 +6729,70 @@ gb_internal void check_parsed_files(Checker *c) { add_type_and_value(&c->builtin_ctx, u.expr, u.info->mode, u.info->type, u.info->value); } - TIME_SECTION("check for type hash collisions"); + TIME_SECTION("initilize type info array"); { - PtrSet found = {}; - ptr_set_init(&found, c->info.type_info_types.count); - defer (ptr_set_destroy(&found)); - for (auto const &tt : c->info.type_info_types) { - if (ptr_set_update(&found, cast(uintptr)tt.hash)) { - Type *other_type = nullptr; - for (auto const &other : c->info.type_info_types) { - if (&tt == &other) { - continue; - } - if (cast(uintptr)other.hash == cast(uintptr)tt.hash && - !are_types_identical(tt.type, other.type)) { - other_type = other.type; - break; - } - } - if (other_type != nullptr) { - String ts = type_to_canonical_string(temporary_allocator(), tt.type); - String os = type_to_canonical_string(temporary_allocator(), other_type); - if (ts != os) { - compiler_error("%s found type hash collision with %s (hash = %llu)\n" - "%s vs %s\n", - type_to_string(tt.type), type_to_string(other_type), cast(unsigned long long)tt.hash, - temp_canonical_string(tt.type), - temp_canonical_string(other_type) - ); + for (auto const &tt : c->info.min_dep_type_info_set) { + array_add(&c->info.type_info_types, tt); + } + array_sort(c->info.type_info_types, type_info_pair_cmp); + + map_reserve(&c->info.minimum_dependency_type_info_index_map, c->info.type_info_types.count); + + for_array(i, c->info.type_info_types) { + auto const &tt = c->info.type_info_types[i]; + bool exists = map_set_if_not_previously_exists(&c->info.minimum_dependency_type_info_index_map, cast(uintptr)tt.hash, i); + if (exists) { + for (auto const &entry : c->info.minimum_dependency_type_info_index_map) { + if (entry.key == cast(uintptr)tt.hash) { + auto const &other = c->info.type_info_types[entry.value]; + if (!are_types_identical_unique_tuples(tt.type, other.type)) { + gbString t = temp_canonical_string(tt.type); + gbString o = temp_canonical_string(other.type); + GB_PANIC("%s (%s) %llu vs %s (%s) %llu", + type_to_string(tt.type, false), t, cast(unsigned long long)tt.hash, + type_to_string(other.type, false), o, cast(unsigned long long)other.hash); + } } } } } - } + + GB_ASSERT(c->info.minimum_dependency_type_info_index_map.count <= c->info.type_info_types.count); + } + + // TIME_SECTION("check for type hash collisions"); + // { + // PtrSet found = {}; + // ptr_set_init(&found, c->info.type_info_types.count); + // defer (ptr_set_destroy(&found)); + // for (auto const &tt : c->info.type_info_types) { + // if (ptr_set_update(&found, cast(uintptr)tt.hash)) { + // Type *other_type = nullptr; + // for (auto const &other : c->info.type_info_types) { + // if (&tt == &other) { + // continue; + // } + // if (cast(uintptr)other.hash == cast(uintptr)tt.hash && + // !are_types_identical(tt.type, other.type)) { + // other_type = other.type; + // break; + // } + // } + // if (other_type != nullptr) { + // String ts = type_to_canonical_string(temporary_allocator(), tt.type); + // String os = type_to_canonical_string(temporary_allocator(), other_type); + // if (ts != os) { + // compiler_error("%s found type hash collision with %s (hash = %llu)\n" + // "%s vs %s\n", + // type_to_string(tt.type), type_to_string(other_type), cast(unsigned long long)tt.hash, + // temp_canonical_string(tt.type), + // temp_canonical_string(other_type) + // ); + // } + // } + // } + // } + // } diff --git a/src/checker.hpp b/src/checker.hpp index 725c1ccf5e0..52676d4ee0a 100644 --- a/src/checker.hpp +++ b/src/checker.hpp @@ -222,7 +222,7 @@ struct DeclInfo { PtrSet deps; RwMutex type_info_deps_mutex; - PtrSet type_info_deps; + PtrSet type_info_deps; // TODO(bill): Use TypeSet BlockingMutex type_and_value_mutex; @@ -444,8 +444,10 @@ struct CheckerInfo { Scope * init_scope; Entity * entry_point; PtrSet minimum_dependency_set; - PtrMap minimum_dependency_type_info_set; - + BlockingMutex minimum_dependency_type_info_mutex; + PtrMap minimum_dependency_type_info_index_map; + TypeSet min_dep_type_info_set; + Array type_info_types; // sorted after filled Array testing_procedures; @@ -473,10 +475,10 @@ struct CheckerInfo { BlockingMutex gen_types_mutex; PtrMap gen_types; - BlockingMutex type_info_mutex; // NOT recursive - Array type_info_types; - PtrMap type_info_map; - TypeSet type_info_set; + // BlockingMutex type_info_mutex; // NOT recursive + // Array type_info_types; + // PtrMap type_info_map; + // TypeSet type_info_set; BlockingMutex foreign_mutex; // NOT recursive StringMap foreigns; @@ -595,6 +597,7 @@ gb_internal DeclInfo * decl_info_of_entity (Entity * e); gb_internal AstFile * ast_file_of_filename (CheckerInfo *i, String filename); // IMPORTANT: Only to use once checking is done gb_internal isize type_info_index (CheckerInfo *i, Type *type, bool error_on_failure); +gb_internal isize type_info_index (CheckerInfo *info, TypeInfoPair pair, bool error_on_failure); // Will return nullptr if not found gb_internal Entity *entity_of_node(Ast *expr); diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index 8cb480dd4b6..9081175015d 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -3154,7 +3154,9 @@ gb_internal bool lb_generate_code(lbGenerator *gen) { lbModule *m = default_module; { // Add type info data - isize max_type_info_count = info->minimum_dependency_type_info_set.count+1; + GB_ASSERT_MSG(info->minimum_dependency_type_info_index_map.count == info->type_info_types.count, "%tu vs %tu", info->minimum_dependency_type_info_index_map.count, info->type_info_types.count); + + isize max_type_info_count = info->minimum_dependency_type_info_index_map.count+1; Type *t = alloc_type_array(t_type_info_ptr, max_type_info_count); // IMPORTANT NOTE(bill): As LLVM does not have a union type, an array of unions cannot be initialized diff --git a/src/llvm_backend_type.cpp b/src/llvm_backend_type.cpp index 6f9f94fbd55..8e0f15f35b9 100644 --- a/src/llvm_backend_type.cpp +++ b/src/llvm_backend_type.cpp @@ -1,16 +1,12 @@ -gb_internal isize lb_type_info_index(CheckerInfo *info, Type *type, bool err_on_not_found=true) { - auto *set = &info->minimum_dependency_type_info_set; - isize index = type_info_index(info, type, err_on_not_found); + +gb_internal isize lb_type_info_index(CheckerInfo *info, TypeInfoPair pair, bool err_on_not_found=true) { + isize index = type_info_index(info, pair, err_on_not_found); if (index >= 0) { - auto *found = map_get(set, index+1); - if (found) { - GB_ASSERT(*found >= 0); - return *found + 1; - } + return index+1; } if (err_on_not_found) { - gb_printf_err("NOT FOUND lb_type_info_index:\n\t%s\n\t@ index %td\n\tmax count: %u\nFound:\n", type_to_string(type), index, set->count); - for (auto const &entry : *set) { + gb_printf_err("NOT FOUND lb_type_info_index:\n\t%s\n\t@ index %td\n\tmax count: %u\nFound:\n", type_to_string(pair.type), index, info->minimum_dependency_type_info_index_map.count); + for (auto const &entry : info->minimum_dependency_type_info_index_map) { isize type_info_index = entry.key; gb_printf_err("\t%s\n", type_to_string(info->type_info_types[type_info_index].type)); } @@ -19,6 +15,10 @@ gb_internal isize lb_type_info_index(CheckerInfo *info, Type *type, bool err_on_ return -1; } +gb_internal isize lb_type_info_index(CheckerInfo *info, Type *type, bool err_on_not_found=true) { + return lb_type_info_index(info, {type, type_hash_canonical_type(type)}, err_on_not_found); +} + gb_internal u64 lb_typeid_kind(lbModule *m, Type *type, u64 id=0) { GB_ASSERT(!build_context.no_rtti); @@ -280,12 +280,13 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ LLVMTypeRef *modified_types = lb_setup_modified_types_for_type_info(m, global_type_info_data_entity_count); defer (gb_free(heap_allocator(), modified_types)); for_array(type_info_type_index, info->type_info_types) { - Type *t = info->type_info_types[type_info_type_index].type; + auto const &tt = info->type_info_types[type_info_type_index]; + Type *t = tt.type; if (t == nullptr || t == t_invalid) { continue; } - isize entry_index = lb_type_info_index(info, t, false); + isize entry_index = lb_type_info_index(info, tt, false); if (entry_index <= 0) { continue; } diff --git a/src/name_canonicalization.cpp b/src/name_canonicalization.cpp index b83441b19b8..de35312dab2 100644 --- a/src/name_canonicalization.cpp +++ b/src/name_canonicalization.cpp @@ -48,6 +48,15 @@ gb_internal gbString temp_canonical_string(Type *type); struct TypeInfoPair; struct TypeSet; +gb_internal GB_COMPARE_PROC(type_info_pair_cmp) { + TypeInfoPair *x = cast(TypeInfoPair *)a; + TypeInfoPair *y = cast(TypeInfoPair *)b; + if (x->hash == y->hash) { + return 0; + } + return x->hash < y->hash ? -1 : +1; +} + static constexpr u64 TYPE_SET_TOMBSTONE = ~(u64)(0ull); gb_internal void type_set_init (TypeSet *s, isize capacity); @@ -136,7 +145,7 @@ gb_internal isize type_set__find(TypeSet *s, TypeInfoPair pair) { usize hash_index = cast(usize)hash & mask; for (usize i = 0; i < s->capacity; i++) { Type *key = s->keys[hash_index].type; - if (are_types_identical(key, pair.type)) { + if (are_types_identical_unique_tuples(key, pair.type)) { return hash_index; } else if (key == 0) { return -1; @@ -154,7 +163,7 @@ gb_internal isize type_set__find(TypeSet *s, Type *ptr) { usize hash_index = cast(usize)hash & mask; for (usize i = 0; i < s->capacity; i++) { Type *key = s->keys[hash_index].type; - if (are_types_identical(key, ptr)) { + if (are_types_identical_unique_tuples(key, ptr)) { return hash_index; } else if (key == 0) { return -1; @@ -224,7 +233,7 @@ gb_internal bool type_set_update(TypeSet *s, TypeInfoPair pair) { // returns tru GB_ASSERT(hash_index < s->capacity); for (usize i = 0; i < s->capacity; i++) { TypeInfoPair *key = &s->keys[hash_index]; - GB_ASSERT(!are_types_identical(key->type, pair.type)); + GB_ASSERT(!are_types_identical_unique_tuples(key->type, pair.type)); if (key->hash == TYPE_SET_TOMBSTONE || key->hash == 0) { *key = pair; s->count++; @@ -274,6 +283,9 @@ gb_internal gbString write_canonical_params(gbString w, Type *params) { if (i > 0) { w = gb_string_appendc(w, CANONICAL_PARAM_SEPARATOR); } + w = gb_string_append_length(w, v->token.string.text, v->token.string.len); + w = gb_string_appendc(w, CANONICAL_TYPE_SEPARATOR); + if (v->kind == Entity_Variable) { if (v->flags&EntityFlag_CVarArg) { w = gb_string_appendc(w, CANONICAL_PARAM_C_VARARG); @@ -466,14 +478,17 @@ gb_internal gbString write_canonical_entity_name(gbString w, Entity *e) { switch (e->kind) { case Entity_TypeName: { + Type *params = nullptr; Entity *parent = type_get_polymorphic_parent(e->type, ¶ms); - if (parent) { + if (parent && (parent->token.string == e->token.string)) { w = gb_string_append_length(w, parent->token.string.text, parent->token.string.len); w = write_canonical_params(w, params); } else { w = gb_string_append_length(w, e->token.string.text, e->token.string.len); } + gb_unused(parent); + } // Handle parapoly stuff here? return w; From d69eb57cfa7a781e68b61307093e08790f95f640 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 18 Feb 2025 13:18:51 +0000 Subject: [PATCH 08/19] Fix typos --- src/checker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/checker.cpp b/src/checker.cpp index 1c7126e9ac6..41adf0296bd 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -6729,7 +6729,7 @@ gb_internal void check_parsed_files(Checker *c) { add_type_and_value(&c->builtin_ctx, u.expr, u.info->mode, u.info->type, u.info->value); } - TIME_SECTION("initilize type info array"); + TIME_SECTION("initialize and check for collisions in type info array"); { for (auto const &tt : c->info.min_dep_type_info_set) { array_add(&c->info.type_info_types, tt); From 721bcf2249fe2f2f6dd462833fede983205d6c5a Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 18 Feb 2025 13:24:08 +0000 Subject: [PATCH 09/19] Minor code clean up --- src/checker.cpp | 62 ++++++++++++------------------------------------- 1 file changed, 15 insertions(+), 47 deletions(-) diff --git a/src/checker.cpp b/src/checker.cpp index 41adf0296bd..32e5ca40521 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -6741,60 +6741,28 @@ gb_internal void check_parsed_files(Checker *c) { for_array(i, c->info.type_info_types) { auto const &tt = c->info.type_info_types[i]; bool exists = map_set_if_not_previously_exists(&c->info.minimum_dependency_type_info_index_map, cast(uintptr)tt.hash, i); - if (exists) { - for (auto const &entry : c->info.minimum_dependency_type_info_index_map) { - if (entry.key == cast(uintptr)tt.hash) { - auto const &other = c->info.type_info_types[entry.value]; - if (!are_types_identical_unique_tuples(tt.type, other.type)) { - gbString t = temp_canonical_string(tt.type); - gbString o = temp_canonical_string(other.type); - GB_PANIC("%s (%s) %llu vs %s (%s) %llu", - type_to_string(tt.type, false), t, cast(unsigned long long)tt.hash, - type_to_string(other.type, false), o, cast(unsigned long long)other.hash); - } - } + if (!exists) { + continue + } + for (auto const &entry : c->info.minimum_dependency_type_info_index_map) { + if (entry.key != cast(uintptr)tt.hash) { + continue; } + auto const &other = c->info.type_info_types[entry.value]; + if (are_types_identical_unique_tuples(tt.type, other.type)) { + continue; + } + gbString t = temp_canonical_string(tt.type); + gbString o = temp_canonical_string(other.type); + GB_PANIC("%s (%s) %llu vs %s (%s) %llu", + type_to_string(tt.type, false), t, cast(unsigned long long)tt.hash, + type_to_string(other.type, false), o, cast(unsigned long long)other.hash); } } GB_ASSERT(c->info.minimum_dependency_type_info_index_map.count <= c->info.type_info_types.count); } - // TIME_SECTION("check for type hash collisions"); - // { - // PtrSet found = {}; - // ptr_set_init(&found, c->info.type_info_types.count); - // defer (ptr_set_destroy(&found)); - // for (auto const &tt : c->info.type_info_types) { - // if (ptr_set_update(&found, cast(uintptr)tt.hash)) { - // Type *other_type = nullptr; - // for (auto const &other : c->info.type_info_types) { - // if (&tt == &other) { - // continue; - // } - // if (cast(uintptr)other.hash == cast(uintptr)tt.hash && - // !are_types_identical(tt.type, other.type)) { - // other_type = other.type; - // break; - // } - // } - // if (other_type != nullptr) { - // String ts = type_to_canonical_string(temporary_allocator(), tt.type); - // String os = type_to_canonical_string(temporary_allocator(), other_type); - // if (ts != os) { - // compiler_error("%s found type hash collision with %s (hash = %llu)\n" - // "%s vs %s\n", - // type_to_string(tt.type), type_to_string(other_type), cast(unsigned long long)tt.hash, - // temp_canonical_string(tt.type), - // temp_canonical_string(other_type) - // ); - // } - // } - // } - // } - // } - - TIME_SECTION("sort init and fini procedures"); check_sort_init_and_fini_procedures(c); From 19b59461b04f4b6b63fa24d70e9c9376b3dd3249 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 18 Feb 2025 13:31:34 +0000 Subject: [PATCH 10/19] Use `TypeSet` for DeclInfo deps --- src/check_decl.cpp | 4 +- src/checker.cpp | 10 ++--- src/checker.hpp | 80 +++++++++++++++++++++++++---------- src/name_canonicalization.cpp | 37 +++------------- 4 files changed, 71 insertions(+), 60 deletions(-) diff --git a/src/check_decl.cpp b/src/check_decl.cpp index d6f8e6fa731..5607ea725c2 100644 --- a/src/check_decl.cpp +++ b/src/check_decl.cpp @@ -1742,8 +1742,8 @@ gb_internal void add_deps_from_child_to_parent(DeclInfo *decl) { rw_mutex_shared_lock(&decl->type_info_deps_mutex); rw_mutex_lock(&decl->parent->type_info_deps_mutex); - for (Type *t : decl->type_info_deps) { - ptr_set_add(&decl->parent->type_info_deps, t); + for (auto const &tt : decl->type_info_deps) { + type_set_add(&decl->parent->type_info_deps, tt); } rw_mutex_unlock(&decl->parent->type_info_deps_mutex); diff --git a/src/checker.cpp b/src/checker.cpp index 32e5ca40521..38da38b0c52 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -173,7 +173,7 @@ gb_internal void init_decl_info(DeclInfo *d, Scope *scope, DeclInfo *parent) { d->parent = parent; d->scope = scope; ptr_set_init(&d->deps, 0); - ptr_set_init(&d->type_info_deps, 0); + type_set_init(&d->type_info_deps, 0); d->labels.allocator = heap_allocator(); d->variadic_reuses.allocator = heap_allocator(); d->variadic_reuse_max_bytes = 0; @@ -838,7 +838,7 @@ gb_internal void add_type_info_dependency(CheckerInfo *info, DeclInfo *d, Type * } } rw_mutex_lock(&d->type_info_deps_mutex); - ptr_set_add(&d->type_info_deps, type); + type_set_add(&d->type_info_deps, type); rw_mutex_unlock(&d->type_info_deps_mutex); } @@ -2506,8 +2506,8 @@ gb_internal void add_dependency_to_set(Checker *c, Entity *entity) { if (decl == nullptr) { return; } - for (Type *t : decl->type_info_deps) { - add_min_dep_type_info(c, t); + for (TypeInfoPair const tt : decl->type_info_deps) { + add_min_dep_type_info(c, tt.type); } for (Entity *e : decl->deps) { @@ -6742,7 +6742,7 @@ gb_internal void check_parsed_files(Checker *c) { auto const &tt = c->info.type_info_types[i]; bool exists = map_set_if_not_previously_exists(&c->info.minimum_dependency_type_info_index_map, cast(uintptr)tt.hash, i); if (!exists) { - continue + continue; } for (auto const &entry : c->info.minimum_dependency_type_info_index_map) { if (entry.key != cast(uintptr)tt.hash) { diff --git a/src/checker.hpp b/src/checker.hpp index 52676d4ee0a..b8878d74563 100644 --- a/src/checker.hpp +++ b/src/checker.hpp @@ -167,6 +167,61 @@ typedef DECL_ATTRIBUTE_PROC(DeclAttributeProc); gb_internal void check_decl_attributes(CheckerContext *c, Array const &attributes, DeclAttributeProc *proc, AttributeContext *ac); +struct TypeInfoPair { + Type *type; + u64 hash; // see: type_hash_canonical_type +}; + +struct TypeSet { + TypeInfoPair *keys; + usize count; + usize capacity; +}; + +static constexpr u64 TYPE_SET_TOMBSTONE = ~(u64)(0ull); + +struct TypeSetIterator { + TypeSet *set; + usize index; + + TypeSetIterator &operator++() noexcept { + for (;;) { + ++index; + if (set->capacity == index) { + return *this; + } + TypeInfoPair key = set->keys[index]; + if (key.hash != 0 && key.hash != TYPE_SET_TOMBSTONE) { + return *this; + } + } + } + + bool operator==(TypeSetIterator const &other) const noexcept { + return this->set == other.set && this->index == other.index; + } + + + operator TypeInfoPair *() const { + return &set->keys[index]; + } +}; + + +gb_internal void type_set_init (TypeSet *s, isize capacity = 16); +gb_internal void type_set_destroy(TypeSet *s); +gb_internal Type *type_set_add (TypeSet *s, Type *ptr); +gb_internal Type *type_set_add (TypeSet *s, TypeInfoPair pair); +gb_internal bool type_set_update (TypeSet *s, Type *ptr); // returns true if it previously existed +gb_internal bool type_set_update (TypeSet *s, TypeInfoPair pair); // returns true if it previously existed +gb_internal bool type_set_exists (TypeSet *s, Type *ptr); +gb_internal void type_set_remove (TypeSet *s, Type *ptr); +gb_internal void type_set_clear (TypeSet *s); +gb_internal TypeInfoPair *type_set_retrieve(TypeSet *s, Type *ptr); + +gb_internal TypeSetIterator begin(TypeSet &set) noexcept; +gb_internal TypeSetIterator end(TypeSet &set) noexcept; + enum ProcCheckedState : u8 { ProcCheckedState_Unchecked, @@ -221,8 +276,8 @@ struct DeclInfo { RwMutex deps_mutex; PtrSet deps; - RwMutex type_info_deps_mutex; - PtrSet type_info_deps; // TODO(bill): Use TypeSet + RwMutex type_info_deps_mutex; + TypeSet type_info_deps; BlockingMutex type_and_value_mutex; @@ -409,27 +464,6 @@ struct Defineable { String pos_str; }; -struct TypeInfoPair { - Type *type; - u64 hash; // see: type_hash_canonical_type -}; - -struct TypeSet { - TypeInfoPair *keys; - usize count; - usize capacity; -}; - -gb_internal void type_set_init (TypeSet *s, isize capacity = 16); -gb_internal void type_set_destroy(TypeSet *s); -gb_internal Type *type_set_add (TypeSet *s, Type *ptr); -gb_internal bool type_set_update (TypeSet *s, Type *ptr); // returns true if it previously existed -gb_internal bool type_set_update (TypeSet *s, TypeInfoPair pair); // returns true if it previously existed -gb_internal bool type_set_exists (TypeSet *s, Type *ptr); -gb_internal void type_set_remove (TypeSet *s, Type *ptr); -gb_internal void type_set_clear (TypeSet *s); -gb_internal TypeInfoPair *type_set_retrieve(TypeSet *s, Type *ptr); - // CheckerInfo stores all the symbol information for a type-checked program struct CheckerInfo { Checker *checker; diff --git a/src/name_canonicalization.cpp b/src/name_canonicalization.cpp index de35312dab2..ef0e23f38a8 100644 --- a/src/name_canonicalization.cpp +++ b/src/name_canonicalization.cpp @@ -57,11 +57,10 @@ gb_internal GB_COMPARE_PROC(type_info_pair_cmp) { return x->hash < y->hash ? -1 : +1; } -static constexpr u64 TYPE_SET_TOMBSTONE = ~(u64)(0ull); - gb_internal void type_set_init (TypeSet *s, isize capacity); gb_internal void type_set_destroy(TypeSet *s); gb_internal Type *type_set_add (TypeSet *s, Type *ptr); +gb_internal Type *type_set_add (TypeSet *s, TypeInfoPair pair); gb_internal bool type_set_update (TypeSet *s, Type *ptr); // returns true if it previously existed gb_internal bool type_set_update (TypeSet *s, TypeInfoPair pair); // returns true if it previously existed gb_internal bool type_set_exists (TypeSet *s, Type *ptr); @@ -73,34 +72,6 @@ gb_internal gbAllocator type_set_allocator(void) { return heap_allocator(); } -struct TypeSetIterator { - TypeSet *set; - usize index; - - TypeSetIterator &operator++() noexcept { - for (;;) { - ++index; - if (set->capacity == index) { - return *this; - } - TypeInfoPair key = set->keys[index]; - if (key.hash != 0 && key.hash != TYPE_SET_TOMBSTONE) { - return *this; - } - } - } - - bool operator==(TypeSetIterator const &other) const noexcept { - return this->set == other.set && this->index == other.index; - } - - - operator TypeInfoPair *() const { - return &set->keys[index]; - } -}; - - gb_internal TypeSetIterator begin(TypeSet &set) noexcept { usize index = 0; while (index < set.capacity) { @@ -257,6 +228,12 @@ gb_internal Type *type_set_add(TypeSet *s, Type *ptr) { return ptr; } +gb_internal Type *type_set_add(TypeSet *s, TypeInfoPair pair) { + type_set_update(s, pair); + return pair.type; +} + + gb_internal void type_set_remove(TypeSet *s, Type *ptr) { isize index = type_set__find(s, ptr); From cc90e0cbbf5855ec629fa433ee67f6552d0251fa Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 18 Feb 2025 14:21:40 +0000 Subject: [PATCH 11/19] Add `TypeWriter` stream to allow for in-place hashing and string generation --- src/common.cpp | 4 +- src/llvm_backend_general.cpp | 3 +- src/name_canonicalization.cpp | 447 +++++++++++++++++++++------------- 3 files changed, 274 insertions(+), 180 deletions(-) diff --git a/src/common.cpp b/src/common.cpp index 0ef39bd1081..ad1e5a851da 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -134,9 +134,9 @@ gb_internal u32 fnv32a(void const *data, isize len) { return h; } -gb_internal u64 fnv64a(void const *data, isize len) { +gb_internal u64 fnv64a(void const *data, isize len, u64 seed=0xcbf29ce484222325ull) { u8 const *bytes = cast(u8 const *)data; - u64 h = 0xcbf29ce484222325ull; + u64 h = seed; for (; len >= 8; len -= 8, bytes += 8) { h = (h ^ bytes[0]) * 0x100000001b3ull; diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index b9ae3d2540c..4f6fcb88ea8 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -1456,8 +1456,7 @@ gb_internal String lb_get_entity_name(lbModule *m, Entity *e) { return e->token.string; } - gbString w = gb_string_make(heap_allocator(), ""); - w = write_canonical_entity_name(w, e); + gbString w = string_canonical_entity_name(heap_allocator(), e); defer (gb_string_free(w)); String name = copy_string(permanent_allocator(), make_string(cast(u8 const *)w, gb_string_length(w))); diff --git a/src/name_canonicalization.cpp b/src/name_canonicalization.cpp index ef0e23f38a8..99ff254e6dd 100644 --- a/src/name_canonicalization.cpp +++ b/src/name_canonicalization.cpp @@ -38,8 +38,10 @@ #define CANONICAL_NONE_TYPE "<>" +struct TypeWriter; -gb_internal gbString write_type_to_canonical_string(gbString w, Type *type); +gb_internal void write_type_to_canonical_string(TypeWriter *w, Type *type); +gb_internal void write_canonical_entity_name(TypeWriter *w, Entity *e); gb_internal u64 type_hash_canonical_type(Type *type); gb_internal String type_to_canonical_string(gbAllocator allocator, Type *type); gb_internal gbString temp_canonical_string(Type *type); @@ -251,63 +253,138 @@ gb_internal gb_inline void type_set_clear(TypeSet *s) { } -gb_internal gbString write_canonical_params(gbString w, Type *params) { - w = gb_string_appendc(w, "("); +#define TYPE_WRITER_PROC(name) bool name(TypeWriter *w, void const *ptr, isize len) +typedef TYPE_WRITER_PROC(TypeWriterProc); + + +struct TypeWriter { + TypeWriterProc *proc; + void *user_data; +}; + +bool type_writer_append(TypeWriter *w, void const *ptr, isize len) { + return w->proc(w, ptr, len); +} + +bool type_writer_appendb(TypeWriter *w, char b) { + return w->proc(w, &b, 1); +} + +bool type_writer_appendc(TypeWriter *w, char const *str) { + isize len = gb_strlen(str); + return w->proc(w, str, len); +} + +bool type_writer_append_fmt(TypeWriter *w, char const *fmt, ...) { + va_list va; + char *str; + va_start(va, fmt); + str = gb_bprintf_va(fmt, va); + va_end(va); + + return type_writer_appendc(w, str); +} + + + +TYPE_WRITER_PROC(type_writer_string_writer_proc) { + gbString *s = cast(gbString *)&w->user_data; + *s = gb_string_append_length(*s, ptr, len); + return true; +} + +void type_writer_make_string(TypeWriter *w, gbAllocator allocator) { + w->user_data = gb_string_make(allocator, ""); + w->proc = type_writer_string_writer_proc; +} + +void type_writer_destroy_string(TypeWriter *w) { + gb_string_free(cast(gbString)w->user_data); +} + + +TYPE_WRITER_PROC(type_writer_hasher_writer_proc) { + u64 *seed = cast(u64 *)w->user_data; + *seed = fnv64a(ptr, len, *seed); + return true; +} + +void type_writer_make_hasher(TypeWriter *w, u64 *hash) { + w->user_data = hash; + w->proc = type_writer_hasher_writer_proc; +} + + + + +gb_internal void write_canonical_params(TypeWriter *w, Type *params) { + type_writer_appendc(w, "("); if (params) { GB_ASSERT(params->kind == Type_Tuple); for_array(i, params->Tuple.variables) { Entity *v = params->Tuple.variables[i]; if (i > 0) { - w = gb_string_appendc(w, CANONICAL_PARAM_SEPARATOR); + type_writer_appendc(w, CANONICAL_PARAM_SEPARATOR); } - w = gb_string_append_length(w, v->token.string.text, v->token.string.len); - w = gb_string_appendc(w, CANONICAL_TYPE_SEPARATOR); + type_writer_append(w, v->token.string.text, v->token.string.len); + type_writer_appendc(w, CANONICAL_TYPE_SEPARATOR); if (v->kind == Entity_Variable) { if (v->flags&EntityFlag_CVarArg) { - w = gb_string_appendc(w, CANONICAL_PARAM_C_VARARG); + type_writer_appendc(w, CANONICAL_PARAM_C_VARARG); } if (v->flags&EntityFlag_Ellipsis) { Type *slice = base_type(v->type); - w = gb_string_appendc(w, CANONICAL_PARAM_VARARG); + type_writer_appendc(w, CANONICAL_PARAM_VARARG); GB_ASSERT(v->type->kind == Type_Slice); - w = write_type_to_canonical_string(w, slice->Slice.elem); + write_type_to_canonical_string(w, slice->Slice.elem); } else { - w = write_type_to_canonical_string(w, v->type); + write_type_to_canonical_string(w, v->type); } } else if (v->kind == Entity_TypeName) { - w = gb_string_appendc(w, CANONICAL_PARAM_TYPEID); - w = write_type_to_canonical_string(w, v->type); + type_writer_appendc(w, CANONICAL_PARAM_TYPEID); + write_type_to_canonical_string(w, v->type); } else if (v->kind == Entity_Constant) { - w = gb_string_appendc(w, CANONICAL_PARAM_CONST); - w = write_exact_value_to_string(w, v->Constant.value); + type_writer_appendc(w, CANONICAL_PARAM_CONST); + gbString s = exact_value_to_string(v->Constant.value, 1<<16); + type_writer_append(w, s, gb_string_length(s)); + gb_string_free(s); } else { GB_PANIC("TODO(bill): handle non type/const parapoly parameter values"); } } } - return gb_string_appendc(w, ")"); + type_writer_appendc(w, ")"); + return; } gb_internal u64 type_hash_canonical_type(Type *type) { if (type == nullptr) { return 0; } - TEMPORARY_ALLOCATOR_GUARD(); - gbString w = write_type_to_canonical_string(gb_string_make(temporary_allocator(), ""), type); - u64 hash = fnv64a(w, gb_string_length(w)); + u64 hash = fnv64a(nullptr, 0); + TypeWriter w = {}; + type_writer_make_hasher(&w, &hash); + write_type_to_canonical_string(&w, type); + return hash ? hash : 1; } gb_internal String type_to_canonical_string(gbAllocator allocator, Type *type) { - gbString w = gb_string_make(allocator, ""); - w = write_type_to_canonical_string(w, type); - return make_string(cast(u8 const *)w, gb_string_length(w)); + TypeWriter w = {}; + type_writer_make_string(&w, allocator); + write_type_to_canonical_string(&w, type); + + gbString s = cast(gbString)w.user_data; + return make_string(cast(u8 const *)s, gb_string_length(s)); } gb_internal gbString temp_canonical_string(Type *type) { - gbString w = gb_string_make(temporary_allocator(), ""); - return write_type_to_canonical_string(w, type); + TypeWriter w = {}; + type_writer_make_string(&w, temporary_allocator()); + write_type_to_canonical_string(&w, type); + + return cast(gbString)w.user_data; } gb_internal void print_scope_flags(Scope *s) { @@ -323,77 +400,74 @@ gb_internal void print_scope_flags(Scope *s) { gb_printf_err("\n"); } +gb_internal gbString string_canonical_entity_name(gbAllocator allocator, Entity *e) { + TypeWriter w = {}; + type_writer_make_string(&w, allocator); + write_canonical_entity_name(&w, e); + return cast(gbString)w.user_data; +} + -gb_internal gbString write_canonical_parent_prefix(gbString w, Entity *e, bool ignore_final_dot=false) { +gb_internal void write_canonical_parent_prefix(TypeWriter *w, Entity *e, bool ignore_final_dot=false) { GB_ASSERT(e != nullptr); - // auto const &parent_entity = [](Scope *s) -> Entity* { - // while ((s->flags & (ScopeFlag_Proc|ScopeFlag_File)) == 0 && s->decl_info == nullptr) { - // s = s->parent; - // } - // if (s->decl_info && s->decl_info->entity) { - // return s->decl_info->entity; - // } - // return nullptr; - // }; - if (e->kind == Entity_Procedure) { if (e->Procedure.is_export || e->Procedure.is_foreign) { // no prefix - return w; + return; } if (e->parent_proc_decl) { Entity *p = e->parent_proc_decl->entity; - w = write_canonical_parent_prefix(w, p); - w = gb_string_append_length(w, p->token.string.text, p->token.string.len); + write_canonical_parent_prefix(w, p); + type_writer_append(w, p->token.string.text, p->token.string.len); if (is_type_polymorphic(p->type)) { - w = gb_string_appendc(w, CANONICAL_TYPE_SEPARATOR); - w = write_type_to_canonical_string(w, p->type); + type_writer_appendc(w, CANONICAL_TYPE_SEPARATOR); + write_type_to_canonical_string(w, p->type); } - w = gb_string_appendc(w, CANONICAL_NAME_SEPARATOR); + type_writer_appendc(w, CANONICAL_NAME_SEPARATOR); } else if (e->pkg && (scope_lookup_current(e->pkg->scope, e->token.string) == e)) { - w = gb_string_append_length(w, e->pkg->name.text, e->pkg->name.len); + type_writer_append(w, e->pkg->name.text, e->pkg->name.len); if (e->pkg->name == "llvm") { - gb_string_appendc(w, "$"); + type_writer_appendc(w, "$"); } - w = gb_string_appendc(w, CANONICAL_NAME_SEPARATOR); + type_writer_appendc(w, CANONICAL_NAME_SEPARATOR); } else { String file_name = filename_without_directory(e->file->fullpath); - w = gb_string_append_length(w, e->pkg->name.text, e->pkg->name.len); + type_writer_append(w, e->pkg->name.text, e->pkg->name.len); if (e->pkg->name == "llvm") { - gb_string_appendc(w, "$"); + type_writer_appendc(w, "$"); } - w = gb_string_appendc(w, gb_bprintf(CANONICAL_NAME_SEPARATOR "[%.*s]" CANONICAL_NAME_SEPARATOR, LIT(file_name))); + type_writer_appendc(w, gb_bprintf(CANONICAL_NAME_SEPARATOR "[%.*s]" CANONICAL_NAME_SEPARATOR, LIT(file_name))); } } else if (e->kind == Entity_Procedure) { if (e->Procedure.is_export || e->Procedure.is_foreign) { // no prefix - return w; + return; } GB_PANIC("TODO(bill): handle entity kind: %d", e->kind); } if (e->kind == Entity_Procedure && e->Procedure.is_anonymous) { String file_name = filename_without_directory(e->file->fullpath); - w = gb_string_appendc(w, gb_bprintf(CANONICAL_ANON_PREFIX "[%.*s:%d]", LIT(file_name), e->token.pos.offset)); + type_writer_appendc(w, gb_bprintf(CANONICAL_ANON_PREFIX "[%.*s:%d]", LIT(file_name), e->token.pos.offset)); } else { - w = gb_string_append_length(w, e->token.string.text, e->token.string.len); + type_writer_append(w, e->token.string.text, e->token.string.len); } if (is_type_polymorphic(e->type)) { - w = gb_string_appendc(w, CANONICAL_TYPE_SEPARATOR); - w = write_type_to_canonical_string(w, e->type); + type_writer_appendc(w, CANONICAL_TYPE_SEPARATOR); + write_type_to_canonical_string(w, e->type); } if (!ignore_final_dot) { - w = gb_string_appendc(w, CANONICAL_NAME_SEPARATOR); + type_writer_appendc(w, CANONICAL_NAME_SEPARATOR); } - return w; + return; } -gb_internal gbString write_canonical_entity_name(gbString w, Entity *e) { +gb_internal void write_canonical_entity_name(TypeWriter *w, Entity *e) { GB_ASSERT(e != nullptr); if (e->token.string == "_") { @@ -407,18 +481,18 @@ gb_internal gbString write_canonical_entity_name(gbString w, Entity *e) { bool is_foreign = e->Variable.is_foreign; bool is_export = e->Variable.is_export; if (e->Variable.link_name.len > 0) { - w = gb_string_append_length(w, e->Variable.link_name.text, e->Variable.link_name.len); - return w; + type_writer_append(w, e->Variable.link_name.text, e->Variable.link_name.len); + return; } else if (is_foreign || is_export) { - w = gb_string_append_length(w, e->token.string.text, e->token.string.len); - return w; + type_writer_append(w, e->token.string.text, e->token.string.len); + return; } } else if (e->kind == Entity_Procedure && e->Procedure.link_name.len > 0) { - w = gb_string_append_length(w, e->Procedure.link_name.text, e->Procedure.link_name.len); - return w; + type_writer_append(w, e->Procedure.link_name.text, e->Procedure.link_name.len); + return; } else if (e->kind == Entity_Procedure && e->Procedure.is_export) { - w = gb_string_append_length(w, e->token.string.text, e->token.string.len); - return w; + type_writer_append(w, e->token.string.text, e->token.string.len); + return; } if ((e->scope->flags & (ScopeFlag_File | ScopeFlag_Pkg)) == 0 || @@ -430,15 +504,15 @@ gb_internal gbString write_canonical_entity_name(gbString w, Entity *e) { } if (s->decl_info != nullptr && s->decl_info->entity) { - w = write_canonical_parent_prefix(w, s->decl_info->entity); + write_canonical_parent_prefix(w, s->decl_info->entity); goto write_base_name; } else if ((s->flags & ScopeFlag_File) && s->file != nullptr) { String file_name = filename_without_directory(s->file->fullpath); - w = gb_string_append_length(w, e->pkg->name.text, e->pkg->name.len); + type_writer_append(w, e->pkg->name.text, e->pkg->name.len); if (e->pkg->name == "llvm") { - gb_string_appendc(w, "$"); + type_writer_appendc(w, "$"); } - w = gb_string_appendc(w, gb_bprintf(CANONICAL_NAME_SEPARATOR "[%.*s]" CANONICAL_NAME_SEPARATOR, LIT(file_name))); + type_writer_appendc(w, gb_bprintf(CANONICAL_NAME_SEPARATOR "[%.*s]" CANONICAL_NAME_SEPARATOR, LIT(file_name))); goto write_base_name; } gb_printf_err("%s WEIRD ENTITY TYPE %s %u %p\n", token_pos_to_string(e->token.pos), type_to_string(e->type), s->flags, s->decl_info); @@ -446,8 +520,8 @@ gb_internal gbString write_canonical_entity_name(gbString w, Entity *e) { GB_PANIC("weird entity"); } if (e->pkg != nullptr) { - w = gb_string_append_length(w, e->pkg->name.text, e->pkg->name.len); - w = gb_string_appendc(w, CANONICAL_NAME_SEPARATOR); + type_writer_append(w, e->pkg->name.text, e->pkg->name.len); + type_writer_appendc(w, CANONICAL_NAME_SEPARATOR); } write_base_name: @@ -459,37 +533,38 @@ gb_internal gbString write_canonical_entity_name(gbString w, Entity *e) { Type *params = nullptr; Entity *parent = type_get_polymorphic_parent(e->type, ¶ms); if (parent && (parent->token.string == e->token.string)) { - w = gb_string_append_length(w, parent->token.string.text, parent->token.string.len); - w = write_canonical_params(w, params); + type_writer_append(w, parent->token.string.text, parent->token.string.len); + write_canonical_params(w, params); } else { - w = gb_string_append_length(w, e->token.string.text, e->token.string.len); + type_writer_append(w, e->token.string.text, e->token.string.len); } gb_unused(parent); } // Handle parapoly stuff here? - return w; + return; case Entity_Procedure: case Entity_Variable: - w = gb_string_append_length(w, e->token.string.text, e->token.string.len); + type_writer_append(w, e->token.string.text, e->token.string.len); if (is_type_polymorphic(e->type)) { - w = gb_string_appendc(w, CANONICAL_TYPE_SEPARATOR); - w = write_type_to_canonical_string(w, e->type); + type_writer_appendc(w, CANONICAL_TYPE_SEPARATOR); + write_type_to_canonical_string(w, e->type); } - return w; + return; default: GB_PANIC("TODO(bill): entity kind %d", e->kind); break; } - return w; + return; } // NOTE(bill): This exists so that we deterministically hash a type by serializing it to a canonical string -gb_internal gbString write_type_to_canonical_string(gbString w, Type *type) { +gb_internal void write_type_to_canonical_string(TypeWriter *w, Type *type) { if (type == nullptr) { - return gb_string_appendc(w, CANONICAL_NONE_TYPE); // none/void type + type_writer_appendc(w, CANONICAL_NONE_TYPE); // none/void type + return; } type = default_type(type); @@ -497,190 +572,210 @@ gb_internal gbString write_type_to_canonical_string(gbString w, Type *type) { switch (type->kind) { case Type_Basic: - return gb_string_append_length(w, type->Basic.name.text, type->Basic.name.len); + type_writer_append(w, type->Basic.name.text, type->Basic.name.len); + return; case Type_Pointer: - w = gb_string_append_rune(w, '^'); - return write_type_to_canonical_string(w, type->Pointer.elem); + type_writer_appendb(w, '^'); + write_type_to_canonical_string(w, type->Pointer.elem); + return; case Type_MultiPointer: - w = gb_string_appendc(w, "[^]"); - return write_type_to_canonical_string(w, type->Pointer.elem); + type_writer_appendc(w, "[^]"); + write_type_to_canonical_string(w, type->Pointer.elem); + return; case Type_SoaPointer: - w = gb_string_appendc(w, "#soa^"); - return write_type_to_canonical_string(w, type->Pointer.elem); + type_writer_appendc(w, "#soa^"); + write_type_to_canonical_string(w, type->Pointer.elem); + return; case Type_EnumeratedArray: if (type->EnumeratedArray.is_sparse) { - w = gb_string_appendc(w, "#sparse"); + type_writer_appendc(w, "#sparse"); } - w = gb_string_append_rune(w, '['); - w = write_type_to_canonical_string(w, type->EnumeratedArray.index); - w = gb_string_append_rune(w, ']'); - return write_type_to_canonical_string(w, type->EnumeratedArray.elem); + type_writer_appendb(w, '['); + write_type_to_canonical_string(w, type->EnumeratedArray.index); + type_writer_appendb(w, ']'); + write_type_to_canonical_string(w, type->EnumeratedArray.elem); + return; case Type_Array: - w = gb_string_appendc(w, gb_bprintf("[%lld]", cast(long long)type->Array.count)); - return write_type_to_canonical_string(w, type->Array.elem); + type_writer_appendc(w, gb_bprintf("[%lld]", cast(long long)type->Array.count)); + write_type_to_canonical_string(w, type->Array.elem); + return; case Type_Slice: - w = gb_string_appendc(w, "[]"); - return write_type_to_canonical_string(w, type->Array.elem); + type_writer_appendc(w, "[]"); + write_type_to_canonical_string(w, type->Array.elem); + return; case Type_DynamicArray: - w = gb_string_appendc(w, "[dynamic]"); - return write_type_to_canonical_string(w, type->DynamicArray.elem); + type_writer_appendc(w, "[dynamic]"); + write_type_to_canonical_string(w, type->DynamicArray.elem); + return; case Type_SimdVector: - w = gb_string_appendc(w, gb_bprintf("#simd[%lld]", cast(long long)type->SimdVector.count)); - return write_type_to_canonical_string(w, type->SimdVector.elem); + type_writer_appendc(w, gb_bprintf("#simd[%lld]", cast(long long)type->SimdVector.count)); + write_type_to_canonical_string(w, type->SimdVector.elem); + return; case Type_Matrix: if (type->Matrix.is_row_major) { - w = gb_string_appendc(w, "#row_major "); + type_writer_appendc(w, "#row_major "); } - w = gb_string_appendc(w, gb_bprintf("matrix[%lld, %lld]", cast(long long)type->Matrix.row_count, cast(long long)type->Matrix.column_count)); - return write_type_to_canonical_string(w, type->Matrix.elem); + type_writer_appendc(w, gb_bprintf("matrix[%lld, %lld]", cast(long long)type->Matrix.row_count, cast(long long)type->Matrix.column_count)); + write_type_to_canonical_string(w, type->Matrix.elem); + return; case Type_Map: - w = gb_string_appendc(w, "map["); - w = write_type_to_canonical_string(w, type->Map.key); - w = gb_string_appendc(w, "]"); - return write_type_to_canonical_string(w, type->Map.value); + type_writer_appendc(w, "map["); + write_type_to_canonical_string(w, type->Map.key); + type_writer_appendc(w, "]"); + write_type_to_canonical_string(w, type->Map.value); + return; case Type_Enum: - w = gb_string_appendc(w, "enum"); + type_writer_appendc(w, "enum"); if (type->Enum.base_type != nullptr) { - w = gb_string_append_rune(w, ' '); - w = write_type_to_canonical_string(w, type->Enum.base_type); - w = gb_string_append_rune(w, ' '); + type_writer_appendb(w, ' '); + write_type_to_canonical_string(w, type->Enum.base_type); + type_writer_appendb(w, ' '); } - w = gb_string_append_rune(w, '{'); + type_writer_appendb(w, '{'); for_array(i, type->Enum.fields) { Entity *f = type->Enum.fields[i]; GB_ASSERT(f->kind == Entity_Constant); if (i > 0) { - w = gb_string_appendc(w, CANONICAL_FIELD_SEPARATOR); + type_writer_appendc(w, CANONICAL_FIELD_SEPARATOR); } - w = gb_string_append_length(w, f->token.string.text, f->token.string.len); - w = gb_string_appendc(w, "="); - w = write_exact_value_to_string(w, f->Constant.value); + type_writer_append(w, f->token.string.text, f->token.string.len); + type_writer_appendc(w, "="); + + gbString s = exact_value_to_string(f->Constant.value, 1<<16); + type_writer_append(w, s, gb_string_length(s)); + gb_string_free(s); } - return gb_string_append_rune(w, '}'); + type_writer_appendb(w, '}'); + return; case Type_BitSet: - w = gb_string_appendc(w, "bit_set["); + type_writer_appendc(w, "bit_set["); if (type->BitSet.elem == nullptr) { - w = write_type_to_canonical_string(w, type->BitSet.elem); + write_type_to_canonical_string(w, type->BitSet.elem); } else if (is_type_enum(type->BitSet.elem)) { - w = write_type_to_canonical_string(w, type->BitSet.elem); + write_type_to_canonical_string(w, type->BitSet.elem); } else { - w = gb_string_append_fmt(w, "%lld", type->BitSet.lower); - w = gb_string_append_fmt(w, "..="); - w = gb_string_append_fmt(w, "%lld", type->BitSet.upper); + type_writer_append_fmt(w, "%lld", type->BitSet.lower); + type_writer_append_fmt(w, "..="); + type_writer_append_fmt(w, "%lld", type->BitSet.upper); } if (type->BitSet.underlying != nullptr) { - w = gb_string_appendc(w, ";"); - w = write_type_to_canonical_string(w, type->BitSet.underlying); + type_writer_appendc(w, ";"); + write_type_to_canonical_string(w, type->BitSet.underlying); } - return gb_string_appendc(w, "]"); + type_writer_appendc(w, "]"); + return; case Type_Union: - w = gb_string_appendc(w, "union"); + type_writer_appendc(w, "union"); switch (type->Union.kind) { - case UnionType_no_nil: w = gb_string_appendc(w, "#no_nil"); break; - case UnionType_shared_nil: w = gb_string_appendc(w, "#shared_nil"); break; + case UnionType_no_nil: type_writer_appendc(w, "#no_nil"); break; + case UnionType_shared_nil: type_writer_appendc(w, "#shared_nil"); break; } if (type->Union.custom_align != 0) { - w = gb_string_append_fmt(w, "#align(%lld)", cast(long long)type->Union.custom_align); + type_writer_append_fmt(w, "#align(%lld)", cast(long long)type->Union.custom_align); } - w = gb_string_appendc(w, "{"); + type_writer_appendc(w, "{"); for_array(i, type->Union.variants) { Type *t = type->Union.variants[i]; - if (i > 0) w = gb_string_appendc(w, CANONICAL_FIELD_SEPARATOR); - w = write_type_to_canonical_string(w, t); + if (i > 0) type_writer_appendc(w, CANONICAL_FIELD_SEPARATOR); + write_type_to_canonical_string(w, t); } - return gb_string_appendc(w, "}"); + type_writer_appendc(w, "}"); + return; case Type_Struct: if (type->Struct.soa_kind != StructSoa_None) { switch (type->Struct.soa_kind) { - case StructSoa_Fixed: w = gb_string_append_fmt(w, "#soa[%lld]", cast(long long)type->Struct.soa_count); break; - case StructSoa_Slice: w = gb_string_appendc(w, "#soa[]"); break; - case StructSoa_Dynamic: w = gb_string_appendc(w, "#soa[dynamic]"); break; + case StructSoa_Fixed: type_writer_append_fmt(w, "#soa[%lld]", cast(long long)type->Struct.soa_count); break; + case StructSoa_Slice: type_writer_appendc(w, "#soa[]"); break; + case StructSoa_Dynamic: type_writer_appendc(w, "#soa[dynamic]"); break; default: GB_PANIC("Unknown StructSoaKind"); break; } return write_type_to_canonical_string(w, type->Struct.soa_elem); } - w = gb_string_appendc(w, "struct"); - if (type->Struct.is_packed) w = gb_string_appendc(w, "#packed"); - if (type->Struct.is_raw_union) w = gb_string_appendc(w, "#raw_union"); - if (type->Struct.is_no_copy) w = gb_string_appendc(w, "#no_copy"); - if (type->Struct.custom_min_field_align != 0) w = gb_string_append_fmt(w, "#min_field_align(%lld)", cast(long long)type->Struct.custom_min_field_align); - if (type->Struct.custom_max_field_align != 0) w = gb_string_append_fmt(w, "#max_field_align(%lld)", cast(long long)type->Struct.custom_max_field_align); - if (type->Struct.custom_align != 0) w = gb_string_append_fmt(w, "#align(%lld)", cast(long long)type->Struct.custom_align); - w = gb_string_appendc(w, "{"); + type_writer_appendc(w, "struct"); + if (type->Struct.is_packed) type_writer_appendc(w, "#packed"); + if (type->Struct.is_raw_union) type_writer_appendc(w, "#raw_union"); + if (type->Struct.is_no_copy) type_writer_appendc(w, "#no_copy"); + if (type->Struct.custom_min_field_align != 0) type_writer_append_fmt(w, "#min_field_align(%lld)", cast(long long)type->Struct.custom_min_field_align); + if (type->Struct.custom_max_field_align != 0) type_writer_append_fmt(w, "#max_field_align(%lld)", cast(long long)type->Struct.custom_max_field_align); + if (type->Struct.custom_align != 0) type_writer_append_fmt(w, "#align(%lld)", cast(long long)type->Struct.custom_align); + type_writer_appendb(w, '{'); for_array(i, type->Struct.fields) { Entity *f = type->Struct.fields[i]; GB_ASSERT(f->kind == Entity_Variable); if (i > 0) { - w = gb_string_appendc(w, CANONICAL_FIELD_SEPARATOR); + type_writer_appendc(w, CANONICAL_FIELD_SEPARATOR); } - w = gb_string_append_length (w, f->token.string.text, f->token.string.len); - w = gb_string_appendc (w, ":"); - w = write_type_to_canonical_string(w, f->type); + type_writer_append(w, f->token.string.text, f->token.string.len); + type_writer_appendc(w, CANONICAL_TYPE_SEPARATOR); + write_type_to_canonical_string(w, f->type); String tag = type->Struct.tags[i]; if (tag.len != 0) { String s = quote_to_ascii(heap_allocator(), tag); - w = gb_string_append_length(w, s.text, s.len); + type_writer_append(w, s.text, s.len); gb_free(heap_allocator(), s.text); } } - return gb_string_appendc(w, "}"); + type_writer_appendb(w, '}'); + return; case Type_BitField: - w = gb_string_appendc(w, "bit_field"); - w = write_type_to_canonical_string(w, type->BitField.backing_type); - w = gb_string_appendc(w, " {"); + type_writer_appendc(w, "bit_field"); + write_type_to_canonical_string(w, type->BitField.backing_type); + type_writer_appendc(w, " {"); for (isize i = 0; i < type->BitField.fields.count; i++) { Entity *f = type->BitField.fields[i]; if (i > 0) { - w = gb_string_appendc(w, CANONICAL_FIELD_SEPARATOR); + type_writer_appendc(w, CANONICAL_FIELD_SEPARATOR); } - w = gb_string_append_length(w, f->token.string.text, f->token.string.len); - w = gb_string_appendc(w, ":"); - w = write_type_to_canonical_string(w, f->type); - w = gb_string_appendc(w, "|"); - w = gb_string_appendc(w, gb_bprintf("%u", type->BitField.bit_sizes[i])); + type_writer_append(w, f->token.string.text, f->token.string.len); + type_writer_appendc(w, ":"); + write_type_to_canonical_string(w, f->type); + type_writer_appendc(w, "|"); + type_writer_appendc(w, gb_bprintf("%u", type->BitField.bit_sizes[i])); } - return gb_string_appendc(w, " }"); + type_writer_appendc(w, " }"); + return; case Type_Proc: - w = gb_string_appendc(w, "proc"); + type_writer_appendc(w, "proc"); if (default_calling_convention() != type->Proc.calling_convention) { - w = gb_string_appendc(w, "\""); - w = gb_string_appendc(w, proc_calling_convention_strings[type->Proc.calling_convention]); - w = gb_string_appendc(w, "\""); + type_writer_appendc(w, "\""); + type_writer_appendc(w, proc_calling_convention_strings[type->Proc.calling_convention]); + type_writer_appendc(w, "\""); } - w = write_canonical_params(w, type->Proc.params); + write_canonical_params(w, type->Proc.params); if (type->Proc.result_count > 0) { - w = gb_string_appendc(w, "->"); - w = write_canonical_params(w, type->Proc.results); + type_writer_appendc(w, "->"); + write_canonical_params(w, type->Proc.results); } - return w; + return; case Type_Generic: GB_PANIC("Type_Generic should never be hit"); - return w; + return; case Type_Named: if (type->Named.type_name != nullptr) { - return write_canonical_entity_name(w, type->Named.type_name); + write_canonical_entity_name(w, type->Named.type_name); + return; } else { - w = gb_string_append_length(w, type->Named.name.text, type->Named.name.len); + type_writer_append(w, type->Named.name.text, type->Named.name.len); } - return w; + return; case Type_Tuple: - w = gb_string_appendc(w, "params"); - w = write_canonical_params(w, type); - return w; + type_writer_appendc(w, "params"); + write_canonical_params(w, type); + return; default: GB_PANIC("unknown type kind %d %.*s", type->kind, LIT(type_strings[type->kind])); break; } - return w; + return; } \ No newline at end of file From 23efd1bd02703e12a4d97c2d7194d1175a28d56c Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 18 Feb 2025 14:21:40 +0000 Subject: [PATCH 12/19] Add `TypeWriter` stream to allow for in-place hashing and string generation --- src/checker.hpp | 56 +--- src/common.cpp | 4 +- src/llvm_backend_general.cpp | 4 +- src/name_canonicalization.cpp | 480 +++++++++++++++++++--------------- src/name_canonicalization.hpp | 88 +++++++ 5 files changed, 361 insertions(+), 271 deletions(-) create mode 100644 src/name_canonicalization.hpp diff --git a/src/checker.hpp b/src/checker.hpp index b8878d74563..8850d5c3f62 100644 --- a/src/checker.hpp +++ b/src/checker.hpp @@ -167,61 +167,7 @@ typedef DECL_ATTRIBUTE_PROC(DeclAttributeProc); gb_internal void check_decl_attributes(CheckerContext *c, Array const &attributes, DeclAttributeProc *proc, AttributeContext *ac); -struct TypeInfoPair { - Type *type; - u64 hash; // see: type_hash_canonical_type -}; - -struct TypeSet { - TypeInfoPair *keys; - usize count; - usize capacity; -}; - -static constexpr u64 TYPE_SET_TOMBSTONE = ~(u64)(0ull); - -struct TypeSetIterator { - TypeSet *set; - usize index; - - TypeSetIterator &operator++() noexcept { - for (;;) { - ++index; - if (set->capacity == index) { - return *this; - } - TypeInfoPair key = set->keys[index]; - if (key.hash != 0 && key.hash != TYPE_SET_TOMBSTONE) { - return *this; - } - } - } - - bool operator==(TypeSetIterator const &other) const noexcept { - return this->set == other.set && this->index == other.index; - } - - - operator TypeInfoPair *() const { - return &set->keys[index]; - } -}; - - -gb_internal void type_set_init (TypeSet *s, isize capacity = 16); -gb_internal void type_set_destroy(TypeSet *s); -gb_internal Type *type_set_add (TypeSet *s, Type *ptr); -gb_internal Type *type_set_add (TypeSet *s, TypeInfoPair pair); -gb_internal bool type_set_update (TypeSet *s, Type *ptr); // returns true if it previously existed -gb_internal bool type_set_update (TypeSet *s, TypeInfoPair pair); // returns true if it previously existed -gb_internal bool type_set_exists (TypeSet *s, Type *ptr); -gb_internal void type_set_remove (TypeSet *s, Type *ptr); -gb_internal void type_set_clear (TypeSet *s); -gb_internal TypeInfoPair *type_set_retrieve(TypeSet *s, Type *ptr); - -gb_internal TypeSetIterator begin(TypeSet &set) noexcept; -gb_internal TypeSetIterator end(TypeSet &set) noexcept; - +#include "name_canonicalization.hpp" enum ProcCheckedState : u8 { ProcCheckedState_Unchecked, diff --git a/src/common.cpp b/src/common.cpp index 0ef39bd1081..ad1e5a851da 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -134,9 +134,9 @@ gb_internal u32 fnv32a(void const *data, isize len) { return h; } -gb_internal u64 fnv64a(void const *data, isize len) { +gb_internal u64 fnv64a(void const *data, isize len, u64 seed=0xcbf29ce484222325ull) { u8 const *bytes = cast(u8 const *)data; - u64 h = 0xcbf29ce484222325ull; + u64 h = seed; for (; len >= 8; len -= 8, bytes += 8) { h = (h ^ bytes[0]) * 0x100000001b3ull; diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index b9ae3d2540c..233448ecea1 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -1456,11 +1456,11 @@ gb_internal String lb_get_entity_name(lbModule *m, Entity *e) { return e->token.string; } - gbString w = gb_string_make(heap_allocator(), ""); - w = write_canonical_entity_name(w, e); + gbString w = string_canonical_entity_name(heap_allocator(), e); defer (gb_string_free(w)); String name = copy_string(permanent_allocator(), make_string(cast(u8 const *)w, gb_string_length(w))); + gb_printf_err("%.*s\n", LIT(name)); if (e->kind == Entity_TypeName) { e->TypeName.ir_mangled_name = name; diff --git a/src/name_canonicalization.cpp b/src/name_canonicalization.cpp index ef0e23f38a8..9c28581705f 100644 --- a/src/name_canonicalization.cpp +++ b/src/name_canonicalization.cpp @@ -21,33 +21,6 @@ */ -#define CANONICAL_TYPE_SEPARATOR ":" -#define CANONICAL_NAME_SEPARATOR "." - -#define CANONICAL_PARAM_SEPARATOR "," - -#define CANONICAL_PARAM_TYPEID "$" -#define CANONICAL_PARAM_CONST "$$" - -#define CANONICAL_PARAM_C_VARARG "#c_vararg" -#define CANONICAL_PARAM_VARARG ".." - -#define CANONICAL_FIELD_SEPARATOR "," - -#define CANONICAL_ANON_PREFIX "$anon" - -#define CANONICAL_NONE_TYPE "<>" - - -gb_internal gbString write_type_to_canonical_string(gbString w, Type *type); -gb_internal u64 type_hash_canonical_type(Type *type); -gb_internal String type_to_canonical_string(gbAllocator allocator, Type *type); -gb_internal gbString temp_canonical_string(Type *type); - - -struct TypeInfoPair; -struct TypeSet; - gb_internal GB_COMPARE_PROC(type_info_pair_cmp) { TypeInfoPair *x = cast(TypeInfoPair *)a; TypeInfoPair *y = cast(TypeInfoPair *)b; @@ -57,16 +30,6 @@ gb_internal GB_COMPARE_PROC(type_info_pair_cmp) { return x->hash < y->hash ? -1 : +1; } -gb_internal void type_set_init (TypeSet *s, isize capacity); -gb_internal void type_set_destroy(TypeSet *s); -gb_internal Type *type_set_add (TypeSet *s, Type *ptr); -gb_internal Type *type_set_add (TypeSet *s, TypeInfoPair pair); -gb_internal bool type_set_update (TypeSet *s, Type *ptr); // returns true if it previously existed -gb_internal bool type_set_update (TypeSet *s, TypeInfoPair pair); // returns true if it previously existed -gb_internal bool type_set_exists (TypeSet *s, Type *ptr); -gb_internal void type_set_remove (TypeSet *s, Type *ptr); -gb_internal void type_set_clear (TypeSet *s); -gb_internal TypeInfoPair *type_set_retrieve(TypeSet *s, Type *ptr); gb_internal gbAllocator type_set_allocator(void) { return heap_allocator(); @@ -251,63 +214,138 @@ gb_internal gb_inline void type_set_clear(TypeSet *s) { } -gb_internal gbString write_canonical_params(gbString w, Type *params) { - w = gb_string_appendc(w, "("); +#define TYPE_WRITER_PROC(name) bool name(TypeWriter *w, void const *ptr, isize len) +typedef TYPE_WRITER_PROC(TypeWriterProc); + + +struct TypeWriter { + TypeWriterProc *proc; + void *user_data; +}; + +bool type_writer_append(TypeWriter *w, void const *ptr, isize len) { + return w->proc(w, ptr, len); +} + +bool type_writer_appendb(TypeWriter *w, char b) { + return w->proc(w, &b, 1); +} + +bool type_writer_appendc(TypeWriter *w, char const *str) { + isize len = gb_strlen(str); + return w->proc(w, str, len); +} + +bool type_writer_append_fmt(TypeWriter *w, char const *fmt, ...) { + va_list va; + char *str; + va_start(va, fmt); + str = gb_bprintf_va(fmt, va); + va_end(va); + + return type_writer_appendc(w, str); +} + + + +TYPE_WRITER_PROC(type_writer_string_writer_proc) { + gbString *s = cast(gbString *)&w->user_data; + *s = gb_string_append_length(*s, ptr, len); + return true; +} + +void type_writer_make_string(TypeWriter *w, gbAllocator allocator) { + w->user_data = gb_string_make(allocator, ""); + w->proc = type_writer_string_writer_proc; +} + +void type_writer_destroy_string(TypeWriter *w) { + gb_string_free(cast(gbString)w->user_data); +} + + +TYPE_WRITER_PROC(type_writer_hasher_writer_proc) { + u64 *seed = cast(u64 *)w->user_data; + *seed = fnv64a(ptr, len, *seed); + return true; +} + +void type_writer_make_hasher(TypeWriter *w, u64 *hash) { + w->user_data = hash; + w->proc = type_writer_hasher_writer_proc; +} + + + + +gb_internal void write_canonical_params(TypeWriter *w, Type *params) { + type_writer_appendc(w, "("); if (params) { GB_ASSERT(params->kind == Type_Tuple); for_array(i, params->Tuple.variables) { Entity *v = params->Tuple.variables[i]; if (i > 0) { - w = gb_string_appendc(w, CANONICAL_PARAM_SEPARATOR); + type_writer_appendc(w, CANONICAL_PARAM_SEPARATOR); } - w = gb_string_append_length(w, v->token.string.text, v->token.string.len); - w = gb_string_appendc(w, CANONICAL_TYPE_SEPARATOR); + type_writer_append(w, v->token.string.text, v->token.string.len); + type_writer_appendc(w, CANONICAL_TYPE_SEPARATOR); if (v->kind == Entity_Variable) { if (v->flags&EntityFlag_CVarArg) { - w = gb_string_appendc(w, CANONICAL_PARAM_C_VARARG); + type_writer_appendc(w, CANONICAL_PARAM_C_VARARG); } if (v->flags&EntityFlag_Ellipsis) { Type *slice = base_type(v->type); - w = gb_string_appendc(w, CANONICAL_PARAM_VARARG); + type_writer_appendc(w, CANONICAL_PARAM_VARARG); GB_ASSERT(v->type->kind == Type_Slice); - w = write_type_to_canonical_string(w, slice->Slice.elem); + write_type_to_canonical_string(w, slice->Slice.elem); } else { - w = write_type_to_canonical_string(w, v->type); + write_type_to_canonical_string(w, v->type); } } else if (v->kind == Entity_TypeName) { - w = gb_string_appendc(w, CANONICAL_PARAM_TYPEID); - w = write_type_to_canonical_string(w, v->type); + type_writer_appendc(w, CANONICAL_PARAM_TYPEID); + write_type_to_canonical_string(w, v->type); } else if (v->kind == Entity_Constant) { - w = gb_string_appendc(w, CANONICAL_PARAM_CONST); - w = write_exact_value_to_string(w, v->Constant.value); + type_writer_appendc(w, CANONICAL_PARAM_CONST); + gbString s = exact_value_to_string(v->Constant.value, 1<<16); + type_writer_append(w, s, gb_string_length(s)); + gb_string_free(s); } else { GB_PANIC("TODO(bill): handle non type/const parapoly parameter values"); } } } - return gb_string_appendc(w, ")"); + type_writer_appendc(w, ")"); + return; } gb_internal u64 type_hash_canonical_type(Type *type) { if (type == nullptr) { return 0; } - TEMPORARY_ALLOCATOR_GUARD(); - gbString w = write_type_to_canonical_string(gb_string_make(temporary_allocator(), ""), type); - u64 hash = fnv64a(w, gb_string_length(w)); + u64 hash = fnv64a(nullptr, 0); + TypeWriter w = {}; + type_writer_make_hasher(&w, &hash); + write_type_to_canonical_string(&w, type); + return hash ? hash : 1; } gb_internal String type_to_canonical_string(gbAllocator allocator, Type *type) { - gbString w = gb_string_make(allocator, ""); - w = write_type_to_canonical_string(w, type); - return make_string(cast(u8 const *)w, gb_string_length(w)); + TypeWriter w = {}; + type_writer_make_string(&w, allocator); + write_type_to_canonical_string(&w, type); + + gbString s = cast(gbString)w.user_data; + return make_string(cast(u8 const *)s, gb_string_length(s)); } gb_internal gbString temp_canonical_string(Type *type) { - gbString w = gb_string_make(temporary_allocator(), ""); - return write_type_to_canonical_string(w, type); + TypeWriter w = {}; + type_writer_make_string(&w, temporary_allocator()); + write_type_to_canonical_string(&w, type); + + return cast(gbString)w.user_data; } gb_internal void print_scope_flags(Scope *s) { @@ -323,77 +361,74 @@ gb_internal void print_scope_flags(Scope *s) { gb_printf_err("\n"); } +gb_internal gbString string_canonical_entity_name(gbAllocator allocator, Entity *e) { + TypeWriter w = {}; + type_writer_make_string(&w, allocator); + write_canonical_entity_name(&w, e); + return cast(gbString)w.user_data; +} + -gb_internal gbString write_canonical_parent_prefix(gbString w, Entity *e, bool ignore_final_dot=false) { +gb_internal void write_canonical_parent_prefix(TypeWriter *w, Entity *e, bool ignore_final_dot=false) { GB_ASSERT(e != nullptr); - // auto const &parent_entity = [](Scope *s) -> Entity* { - // while ((s->flags & (ScopeFlag_Proc|ScopeFlag_File)) == 0 && s->decl_info == nullptr) { - // s = s->parent; - // } - // if (s->decl_info && s->decl_info->entity) { - // return s->decl_info->entity; - // } - // return nullptr; - // }; - if (e->kind == Entity_Procedure) { if (e->Procedure.is_export || e->Procedure.is_foreign) { // no prefix - return w; + return; } if (e->parent_proc_decl) { Entity *p = e->parent_proc_decl->entity; - w = write_canonical_parent_prefix(w, p); - w = gb_string_append_length(w, p->token.string.text, p->token.string.len); + write_canonical_parent_prefix(w, p); + type_writer_append(w, p->token.string.text, p->token.string.len); if (is_type_polymorphic(p->type)) { - w = gb_string_appendc(w, CANONICAL_TYPE_SEPARATOR); - w = write_type_to_canonical_string(w, p->type); + type_writer_appendc(w, CANONICAL_TYPE_SEPARATOR); + write_type_to_canonical_string(w, p->type); } - w = gb_string_appendc(w, CANONICAL_NAME_SEPARATOR); + type_writer_appendc(w, CANONICAL_NAME_SEPARATOR); } else if (e->pkg && (scope_lookup_current(e->pkg->scope, e->token.string) == e)) { - w = gb_string_append_length(w, e->pkg->name.text, e->pkg->name.len); + type_writer_append(w, e->pkg->name.text, e->pkg->name.len); if (e->pkg->name == "llvm") { - gb_string_appendc(w, "$"); + type_writer_appendc(w, "$"); } - w = gb_string_appendc(w, CANONICAL_NAME_SEPARATOR); + type_writer_appendc(w, CANONICAL_NAME_SEPARATOR); } else { String file_name = filename_without_directory(e->file->fullpath); - w = gb_string_append_length(w, e->pkg->name.text, e->pkg->name.len); + type_writer_append(w, e->pkg->name.text, e->pkg->name.len); if (e->pkg->name == "llvm") { - gb_string_appendc(w, "$"); + type_writer_appendc(w, "$"); } - w = gb_string_appendc(w, gb_bprintf(CANONICAL_NAME_SEPARATOR "[%.*s]" CANONICAL_NAME_SEPARATOR, LIT(file_name))); + type_writer_appendc(w, gb_bprintf(CANONICAL_NAME_SEPARATOR "[%.*s]" CANONICAL_NAME_SEPARATOR, LIT(file_name))); } } else if (e->kind == Entity_Procedure) { if (e->Procedure.is_export || e->Procedure.is_foreign) { // no prefix - return w; + return; } GB_PANIC("TODO(bill): handle entity kind: %d", e->kind); } if (e->kind == Entity_Procedure && e->Procedure.is_anonymous) { String file_name = filename_without_directory(e->file->fullpath); - w = gb_string_appendc(w, gb_bprintf(CANONICAL_ANON_PREFIX "[%.*s:%d]", LIT(file_name), e->token.pos.offset)); + type_writer_appendc(w, gb_bprintf(CANONICAL_ANON_PREFIX "[%.*s:%d]", LIT(file_name), e->token.pos.offset)); } else { - w = gb_string_append_length(w, e->token.string.text, e->token.string.len); + type_writer_append(w, e->token.string.text, e->token.string.len); } if (is_type_polymorphic(e->type)) { - w = gb_string_appendc(w, CANONICAL_TYPE_SEPARATOR); - w = write_type_to_canonical_string(w, e->type); + type_writer_appendc(w, CANONICAL_TYPE_SEPARATOR); + write_type_to_canonical_string(w, e->type); } if (!ignore_final_dot) { - w = gb_string_appendc(w, CANONICAL_NAME_SEPARATOR); + type_writer_appendc(w, CANONICAL_NAME_SEPARATOR); } - return w; + return; } -gb_internal gbString write_canonical_entity_name(gbString w, Entity *e) { +gb_internal void write_canonical_entity_name(TypeWriter *w, Entity *e) { GB_ASSERT(e != nullptr); if (e->token.string == "_") { @@ -407,18 +442,18 @@ gb_internal gbString write_canonical_entity_name(gbString w, Entity *e) { bool is_foreign = e->Variable.is_foreign; bool is_export = e->Variable.is_export; if (e->Variable.link_name.len > 0) { - w = gb_string_append_length(w, e->Variable.link_name.text, e->Variable.link_name.len); - return w; + type_writer_append(w, e->Variable.link_name.text, e->Variable.link_name.len); + return; } else if (is_foreign || is_export) { - w = gb_string_append_length(w, e->token.string.text, e->token.string.len); - return w; + type_writer_append(w, e->token.string.text, e->token.string.len); + return; } } else if (e->kind == Entity_Procedure && e->Procedure.link_name.len > 0) { - w = gb_string_append_length(w, e->Procedure.link_name.text, e->Procedure.link_name.len); - return w; + type_writer_append(w, e->Procedure.link_name.text, e->Procedure.link_name.len); + return; } else if (e->kind == Entity_Procedure && e->Procedure.is_export) { - w = gb_string_append_length(w, e->token.string.text, e->token.string.len); - return w; + type_writer_append(w, e->token.string.text, e->token.string.len); + return; } if ((e->scope->flags & (ScopeFlag_File | ScopeFlag_Pkg)) == 0 || @@ -430,15 +465,15 @@ gb_internal gbString write_canonical_entity_name(gbString w, Entity *e) { } if (s->decl_info != nullptr && s->decl_info->entity) { - w = write_canonical_parent_prefix(w, s->decl_info->entity); + write_canonical_parent_prefix(w, s->decl_info->entity); goto write_base_name; } else if ((s->flags & ScopeFlag_File) && s->file != nullptr) { String file_name = filename_without_directory(s->file->fullpath); - w = gb_string_append_length(w, e->pkg->name.text, e->pkg->name.len); + type_writer_append(w, e->pkg->name.text, e->pkg->name.len); if (e->pkg->name == "llvm") { - gb_string_appendc(w, "$"); + type_writer_appendc(w, "$"); } - w = gb_string_appendc(w, gb_bprintf(CANONICAL_NAME_SEPARATOR "[%.*s]" CANONICAL_NAME_SEPARATOR, LIT(file_name))); + type_writer_appendc(w, gb_bprintf(CANONICAL_NAME_SEPARATOR "[%.*s]" CANONICAL_NAME_SEPARATOR, LIT(file_name))); goto write_base_name; } gb_printf_err("%s WEIRD ENTITY TYPE %s %u %p\n", token_pos_to_string(e->token.pos), type_to_string(e->type), s->flags, s->decl_info); @@ -446,8 +481,8 @@ gb_internal gbString write_canonical_entity_name(gbString w, Entity *e) { GB_PANIC("weird entity"); } if (e->pkg != nullptr) { - w = gb_string_append_length(w, e->pkg->name.text, e->pkg->name.len); - w = gb_string_appendc(w, CANONICAL_NAME_SEPARATOR); + type_writer_append(w, e->pkg->name.text, e->pkg->name.len); + type_writer_appendc(w, CANONICAL_NAME_SEPARATOR); } write_base_name: @@ -459,37 +494,38 @@ gb_internal gbString write_canonical_entity_name(gbString w, Entity *e) { Type *params = nullptr; Entity *parent = type_get_polymorphic_parent(e->type, ¶ms); if (parent && (parent->token.string == e->token.string)) { - w = gb_string_append_length(w, parent->token.string.text, parent->token.string.len); - w = write_canonical_params(w, params); + type_writer_append(w, parent->token.string.text, parent->token.string.len); + write_canonical_params(w, params); } else { - w = gb_string_append_length(w, e->token.string.text, e->token.string.len); + type_writer_append(w, e->token.string.text, e->token.string.len); } gb_unused(parent); } // Handle parapoly stuff here? - return w; + return; case Entity_Procedure: case Entity_Variable: - w = gb_string_append_length(w, e->token.string.text, e->token.string.len); + type_writer_append(w, e->token.string.text, e->token.string.len); if (is_type_polymorphic(e->type)) { - w = gb_string_appendc(w, CANONICAL_TYPE_SEPARATOR); - w = write_type_to_canonical_string(w, e->type); + type_writer_appendc(w, CANONICAL_TYPE_SEPARATOR); + write_type_to_canonical_string(w, e->type); } - return w; + return; default: GB_PANIC("TODO(bill): entity kind %d", e->kind); break; } - return w; + return; } // NOTE(bill): This exists so that we deterministically hash a type by serializing it to a canonical string -gb_internal gbString write_type_to_canonical_string(gbString w, Type *type) { +gb_internal void write_type_to_canonical_string(TypeWriter *w, Type *type) { if (type == nullptr) { - return gb_string_appendc(w, CANONICAL_NONE_TYPE); // none/void type + type_writer_appendc(w, CANONICAL_NONE_TYPE); // none/void type + return; } type = default_type(type); @@ -497,190 +533,210 @@ gb_internal gbString write_type_to_canonical_string(gbString w, Type *type) { switch (type->kind) { case Type_Basic: - return gb_string_append_length(w, type->Basic.name.text, type->Basic.name.len); + type_writer_append(w, type->Basic.name.text, type->Basic.name.len); + return; case Type_Pointer: - w = gb_string_append_rune(w, '^'); - return write_type_to_canonical_string(w, type->Pointer.elem); + type_writer_appendb(w, '^'); + write_type_to_canonical_string(w, type->Pointer.elem); + return; case Type_MultiPointer: - w = gb_string_appendc(w, "[^]"); - return write_type_to_canonical_string(w, type->Pointer.elem); + type_writer_appendc(w, "[^]"); + write_type_to_canonical_string(w, type->Pointer.elem); + return; case Type_SoaPointer: - w = gb_string_appendc(w, "#soa^"); - return write_type_to_canonical_string(w, type->Pointer.elem); + type_writer_appendc(w, "#soa^"); + write_type_to_canonical_string(w, type->Pointer.elem); + return; case Type_EnumeratedArray: if (type->EnumeratedArray.is_sparse) { - w = gb_string_appendc(w, "#sparse"); + type_writer_appendc(w, "#sparse"); } - w = gb_string_append_rune(w, '['); - w = write_type_to_canonical_string(w, type->EnumeratedArray.index); - w = gb_string_append_rune(w, ']'); - return write_type_to_canonical_string(w, type->EnumeratedArray.elem); + type_writer_appendb(w, '['); + write_type_to_canonical_string(w, type->EnumeratedArray.index); + type_writer_appendb(w, ']'); + write_type_to_canonical_string(w, type->EnumeratedArray.elem); + return; case Type_Array: - w = gb_string_appendc(w, gb_bprintf("[%lld]", cast(long long)type->Array.count)); - return write_type_to_canonical_string(w, type->Array.elem); + type_writer_append_fmt(w, "[%lld]", cast(long long)type->Array.count); + write_type_to_canonical_string(w, type->Array.elem); + return; case Type_Slice: - w = gb_string_appendc(w, "[]"); - return write_type_to_canonical_string(w, type->Array.elem); + type_writer_appendc(w, "[]"); + write_type_to_canonical_string(w, type->Array.elem); + return; case Type_DynamicArray: - w = gb_string_appendc(w, "[dynamic]"); - return write_type_to_canonical_string(w, type->DynamicArray.elem); + type_writer_appendc(w, "[dynamic]"); + write_type_to_canonical_string(w, type->DynamicArray.elem); + return; case Type_SimdVector: - w = gb_string_appendc(w, gb_bprintf("#simd[%lld]", cast(long long)type->SimdVector.count)); - return write_type_to_canonical_string(w, type->SimdVector.elem); + type_writer_append_fmt(w, "#simd[%lld]", cast(long long)type->SimdVector.count); + write_type_to_canonical_string(w, type->SimdVector.elem); + return; case Type_Matrix: if (type->Matrix.is_row_major) { - w = gb_string_appendc(w, "#row_major "); + type_writer_appendc(w, "#row_major "); } - w = gb_string_appendc(w, gb_bprintf("matrix[%lld, %lld]", cast(long long)type->Matrix.row_count, cast(long long)type->Matrix.column_count)); - return write_type_to_canonical_string(w, type->Matrix.elem); + type_writer_append_fmt(w, "matrix[%lld, %lld]", cast(long long)type->Matrix.row_count, cast(long long)type->Matrix.column_count); + write_type_to_canonical_string(w, type->Matrix.elem); + return; case Type_Map: - w = gb_string_appendc(w, "map["); - w = write_type_to_canonical_string(w, type->Map.key); - w = gb_string_appendc(w, "]"); - return write_type_to_canonical_string(w, type->Map.value); + type_writer_appendc(w, "map["); + write_type_to_canonical_string(w, type->Map.key); + type_writer_appendc(w, "]"); + write_type_to_canonical_string(w, type->Map.value); + return; case Type_Enum: - w = gb_string_appendc(w, "enum"); + type_writer_appendc(w, "enum"); if (type->Enum.base_type != nullptr) { - w = gb_string_append_rune(w, ' '); - w = write_type_to_canonical_string(w, type->Enum.base_type); - w = gb_string_append_rune(w, ' '); + type_writer_appendb(w, ' '); + write_type_to_canonical_string(w, type->Enum.base_type); + type_writer_appendb(w, ' '); } - w = gb_string_append_rune(w, '{'); + type_writer_appendb(w, '{'); for_array(i, type->Enum.fields) { Entity *f = type->Enum.fields[i]; GB_ASSERT(f->kind == Entity_Constant); if (i > 0) { - w = gb_string_appendc(w, CANONICAL_FIELD_SEPARATOR); + type_writer_appendc(w, CANONICAL_FIELD_SEPARATOR); } - w = gb_string_append_length(w, f->token.string.text, f->token.string.len); - w = gb_string_appendc(w, "="); - w = write_exact_value_to_string(w, f->Constant.value); + type_writer_append(w, f->token.string.text, f->token.string.len); + type_writer_appendc(w, "="); + + gbString s = exact_value_to_string(f->Constant.value, 1<<16); + type_writer_append(w, s, gb_string_length(s)); + gb_string_free(s); } - return gb_string_append_rune(w, '}'); + type_writer_appendb(w, '}'); + return; case Type_BitSet: - w = gb_string_appendc(w, "bit_set["); + type_writer_appendc(w, "bit_set["); if (type->BitSet.elem == nullptr) { - w = write_type_to_canonical_string(w, type->BitSet.elem); + type_writer_appendc(w, CANONICAL_NONE_TYPE); } else if (is_type_enum(type->BitSet.elem)) { - w = write_type_to_canonical_string(w, type->BitSet.elem); + write_type_to_canonical_string(w, type->BitSet.elem); } else { - w = gb_string_append_fmt(w, "%lld", type->BitSet.lower); - w = gb_string_append_fmt(w, "..="); - w = gb_string_append_fmt(w, "%lld", type->BitSet.upper); + type_writer_append_fmt(w, "%lld", type->BitSet.lower); + type_writer_append_fmt(w, CANONICAL_RANGE_OPERATOR); + type_writer_append_fmt(w, "%lld", type->BitSet.upper); } if (type->BitSet.underlying != nullptr) { - w = gb_string_appendc(w, ";"); - w = write_type_to_canonical_string(w, type->BitSet.underlying); + type_writer_appendc(w, ";"); + write_type_to_canonical_string(w, type->BitSet.underlying); } - return gb_string_appendc(w, "]"); + type_writer_appendc(w, "]"); + return; case Type_Union: - w = gb_string_appendc(w, "union"); + type_writer_appendc(w, "union"); switch (type->Union.kind) { - case UnionType_no_nil: w = gb_string_appendc(w, "#no_nil"); break; - case UnionType_shared_nil: w = gb_string_appendc(w, "#shared_nil"); break; + case UnionType_no_nil: type_writer_appendc(w, "#no_nil"); break; + case UnionType_shared_nil: type_writer_appendc(w, "#shared_nil"); break; } if (type->Union.custom_align != 0) { - w = gb_string_append_fmt(w, "#align(%lld)", cast(long long)type->Union.custom_align); + type_writer_append_fmt(w, "#align(%lld)", cast(long long)type->Union.custom_align); } - w = gb_string_appendc(w, "{"); + type_writer_appendc(w, "{"); for_array(i, type->Union.variants) { Type *t = type->Union.variants[i]; - if (i > 0) w = gb_string_appendc(w, CANONICAL_FIELD_SEPARATOR); - w = write_type_to_canonical_string(w, t); + if (i > 0) type_writer_appendc(w, CANONICAL_FIELD_SEPARATOR); + write_type_to_canonical_string(w, t); } - return gb_string_appendc(w, "}"); + type_writer_appendc(w, "}"); + return; case Type_Struct: if (type->Struct.soa_kind != StructSoa_None) { switch (type->Struct.soa_kind) { - case StructSoa_Fixed: w = gb_string_append_fmt(w, "#soa[%lld]", cast(long long)type->Struct.soa_count); break; - case StructSoa_Slice: w = gb_string_appendc(w, "#soa[]"); break; - case StructSoa_Dynamic: w = gb_string_appendc(w, "#soa[dynamic]"); break; + case StructSoa_Fixed: type_writer_append_fmt(w, "#soa[%lld]", cast(long long)type->Struct.soa_count); break; + case StructSoa_Slice: type_writer_appendc(w, "#soa[]"); break; + case StructSoa_Dynamic: type_writer_appendc(w, "#soa[dynamic]"); break; default: GB_PANIC("Unknown StructSoaKind"); break; } return write_type_to_canonical_string(w, type->Struct.soa_elem); } - w = gb_string_appendc(w, "struct"); - if (type->Struct.is_packed) w = gb_string_appendc(w, "#packed"); - if (type->Struct.is_raw_union) w = gb_string_appendc(w, "#raw_union"); - if (type->Struct.is_no_copy) w = gb_string_appendc(w, "#no_copy"); - if (type->Struct.custom_min_field_align != 0) w = gb_string_append_fmt(w, "#min_field_align(%lld)", cast(long long)type->Struct.custom_min_field_align); - if (type->Struct.custom_max_field_align != 0) w = gb_string_append_fmt(w, "#max_field_align(%lld)", cast(long long)type->Struct.custom_max_field_align); - if (type->Struct.custom_align != 0) w = gb_string_append_fmt(w, "#align(%lld)", cast(long long)type->Struct.custom_align); - w = gb_string_appendc(w, "{"); + type_writer_appendc(w, "struct"); + if (type->Struct.is_packed) type_writer_appendc(w, "#packed"); + if (type->Struct.is_raw_union) type_writer_appendc(w, "#raw_union"); + if (type->Struct.is_no_copy) type_writer_appendc(w, "#no_copy"); + if (type->Struct.custom_min_field_align != 0) type_writer_append_fmt(w, "#min_field_align(%lld)", cast(long long)type->Struct.custom_min_field_align); + if (type->Struct.custom_max_field_align != 0) type_writer_append_fmt(w, "#max_field_align(%lld)", cast(long long)type->Struct.custom_max_field_align); + if (type->Struct.custom_align != 0) type_writer_append_fmt(w, "#align(%lld)", cast(long long)type->Struct.custom_align); + type_writer_appendb(w, '{'); for_array(i, type->Struct.fields) { Entity *f = type->Struct.fields[i]; GB_ASSERT(f->kind == Entity_Variable); if (i > 0) { - w = gb_string_appendc(w, CANONICAL_FIELD_SEPARATOR); + type_writer_appendc(w, CANONICAL_FIELD_SEPARATOR); } - w = gb_string_append_length (w, f->token.string.text, f->token.string.len); - w = gb_string_appendc (w, ":"); - w = write_type_to_canonical_string(w, f->type); + type_writer_append(w, f->token.string.text, f->token.string.len); + type_writer_appendc(w, CANONICAL_TYPE_SEPARATOR); + write_type_to_canonical_string(w, f->type); String tag = type->Struct.tags[i]; if (tag.len != 0) { String s = quote_to_ascii(heap_allocator(), tag); - w = gb_string_append_length(w, s.text, s.len); + type_writer_append(w, s.text, s.len); gb_free(heap_allocator(), s.text); } } - return gb_string_appendc(w, "}"); + type_writer_appendb(w, '}'); + return; case Type_BitField: - w = gb_string_appendc(w, "bit_field"); - w = write_type_to_canonical_string(w, type->BitField.backing_type); - w = gb_string_appendc(w, " {"); + type_writer_appendc(w, "bit_field"); + write_type_to_canonical_string(w, type->BitField.backing_type); + type_writer_appendc(w, " {"); for (isize i = 0; i < type->BitField.fields.count; i++) { Entity *f = type->BitField.fields[i]; if (i > 0) { - w = gb_string_appendc(w, CANONICAL_FIELD_SEPARATOR); + type_writer_appendc(w, CANONICAL_FIELD_SEPARATOR); } - w = gb_string_append_length(w, f->token.string.text, f->token.string.len); - w = gb_string_appendc(w, ":"); - w = write_type_to_canonical_string(w, f->type); - w = gb_string_appendc(w, "|"); - w = gb_string_appendc(w, gb_bprintf("%u", type->BitField.bit_sizes[i])); + type_writer_append(w, f->token.string.text, f->token.string.len); + type_writer_appendc(w, CANONICAL_TYPE_SEPARATOR); + write_type_to_canonical_string(w, f->type); + type_writer_appendc(w, CANONICAL_BIT_FIELD_SEPARATOR); + type_writer_append_fmt(w, "%u", type->BitField.bit_sizes[i]); } - return gb_string_appendc(w, " }"); + type_writer_appendc(w, " }"); + return; case Type_Proc: - w = gb_string_appendc(w, "proc"); + type_writer_appendc(w, "proc"); if (default_calling_convention() != type->Proc.calling_convention) { - w = gb_string_appendc(w, "\""); - w = gb_string_appendc(w, proc_calling_convention_strings[type->Proc.calling_convention]); - w = gb_string_appendc(w, "\""); + type_writer_appendc(w, "\""); + type_writer_appendc(w, proc_calling_convention_strings[type->Proc.calling_convention]); + type_writer_appendc(w, "\""); } - w = write_canonical_params(w, type->Proc.params); + write_canonical_params(w, type->Proc.params); if (type->Proc.result_count > 0) { - w = gb_string_appendc(w, "->"); - w = write_canonical_params(w, type->Proc.results); + type_writer_appendc(w, "->"); + write_canonical_params(w, type->Proc.results); } - return w; + return; case Type_Generic: GB_PANIC("Type_Generic should never be hit"); - return w; + return; case Type_Named: if (type->Named.type_name != nullptr) { - return write_canonical_entity_name(w, type->Named.type_name); + write_canonical_entity_name(w, type->Named.type_name); + return; } else { - w = gb_string_append_length(w, type->Named.name.text, type->Named.name.len); + type_writer_append(w, type->Named.name.text, type->Named.name.len); } - return w; + return; case Type_Tuple: - w = gb_string_appendc(w, "params"); - w = write_canonical_params(w, type); - return w; + type_writer_appendc(w, "params"); + write_canonical_params(w, type); + return; default: GB_PANIC("unknown type kind %d %.*s", type->kind, LIT(type_strings[type->kind])); break; } - return w; + return; } \ No newline at end of file diff --git a/src/name_canonicalization.hpp b/src/name_canonicalization.hpp new file mode 100644 index 00000000000..620665cd2bd --- /dev/null +++ b/src/name_canonicalization.hpp @@ -0,0 +1,88 @@ + +#define CANONICAL_TYPE_SEPARATOR ":" +#define CANONICAL_NAME_SEPARATOR "::" + +#define CANONICAL_BIT_FIELD_SEPARATOR "|" + +#define CANONICAL_PARAM_SEPARATOR "," + +#define CANONICAL_PARAM_TYPEID "$" +#define CANONICAL_PARAM_CONST "$$" + +#define CANONICAL_PARAM_C_VARARG "#c_vararg" +#define CANONICAL_PARAM_VARARG ".." + +#define CANONICAL_FIELD_SEPARATOR "," + +#define CANONICAL_ANON_PREFIX "$anon" + +#define CANONICAL_NONE_TYPE "<>" + +#define CANONICAL_RANGE_OPERATOR "..=" + +struct TypeWriter; + +gb_internal void write_type_to_canonical_string(TypeWriter *w, Type *type); +gb_internal void write_canonical_entity_name(TypeWriter *w, Entity *e); +gb_internal u64 type_hash_canonical_type(Type *type); +gb_internal String type_to_canonical_string(gbAllocator allocator, Type *type); +gb_internal gbString temp_canonical_string(Type *type); + + +gb_internal GB_COMPARE_PROC(type_info_pair_cmp); + + +struct TypeInfoPair { + Type *type; + u64 hash; // see: type_hash_canonical_type +}; + +struct TypeSet { + TypeInfoPair *keys; + usize count; + usize capacity; +}; + +static constexpr u64 TYPE_SET_TOMBSTONE = ~(u64)(0ull); + +struct TypeSetIterator { + TypeSet *set; + usize index; + + TypeSetIterator &operator++() noexcept { + for (;;) { + ++index; + if (set->capacity == index) { + return *this; + } + TypeInfoPair key = set->keys[index]; + if (key.hash != 0 && key.hash != TYPE_SET_TOMBSTONE) { + return *this; + } + } + } + + bool operator==(TypeSetIterator const &other) const noexcept { + return this->set == other.set && this->index == other.index; + } + + + operator TypeInfoPair *() const { + return &set->keys[index]; + } +}; + + +gb_internal void type_set_init (TypeSet *s, isize capacity = 16); +gb_internal void type_set_destroy(TypeSet *s); +gb_internal Type *type_set_add (TypeSet *s, Type *ptr); +gb_internal Type *type_set_add (TypeSet *s, TypeInfoPair pair); +gb_internal bool type_set_update (TypeSet *s, Type *ptr); // returns true if it previously existed +gb_internal bool type_set_update (TypeSet *s, TypeInfoPair pair); // returns true if it previously existed +gb_internal bool type_set_exists (TypeSet *s, Type *ptr); +gb_internal void type_set_remove (TypeSet *s, Type *ptr); +gb_internal void type_set_clear (TypeSet *s); +gb_internal TypeInfoPair *type_set_retrieve(TypeSet *s, Type *ptr); + +gb_internal TypeSetIterator begin(TypeSet &set) noexcept; +gb_internal TypeSetIterator end(TypeSet &set) noexcept; From 04c1ff61f43a34b9f3d0945eb02af1518cb1e230 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 18 Feb 2025 15:03:49 +0000 Subject: [PATCH 13/19] Update the comments --- src/name_canonicalization.cpp | 109 ++++++++++++++++++---------------- src/name_canonicalization.hpp | 1 + 2 files changed, 59 insertions(+), 51 deletions(-) diff --git a/src/name_canonicalization.cpp b/src/name_canonicalization.cpp index 9c28581705f..ed72bb2e21d 100644 --- a/src/name_canonicalization.cpp +++ b/src/name_canonicalization.cpp @@ -3,19 +3,19 @@ * No spaces between any values - * normal declarations - pkg.name + * normal declarations - pkg::name * builtin names - just their normal name e.g. `i32` or `string` - * nested - pkg.parent1.parent2.name - * file private - pkg.[file_name].name - * Example: `pkg.[file.odin].Type` - * polymorphic procedure/type - pkg.foo::TYPE + * nested - pkg::parent1::parent2::name + * file private - pkg::[file_name]::name + * Example: `pkg::[file.odin]::Type` + * polymorphic procedure/type - pkg::foo:TYPE * naming convention for parameters * type * $typeid_based_name * $$constant_parameter - * Example: `foo.to_thing::proc(u64)->([]u8)` - * nested decl in polymorphic procedure - pkg.foo::TYPE.name - * anonymous procedures - pkg.foo.$anon[file.odin:123] + * Example: `foo::to_thing:proc(u64)->([]u8)` + * nested decl in polymorphic procedure - pkg::foo:TYPE::name + * anonymous procedures - pkg::foo::$anon[file.odin:123] * 123 is the file offset in bytes @@ -280,42 +280,51 @@ void type_writer_make_hasher(TypeWriter *w, u64 *hash) { gb_internal void write_canonical_params(TypeWriter *w, Type *params) { type_writer_appendc(w, "("); - if (params) { - GB_ASSERT(params->kind == Type_Tuple); - for_array(i, params->Tuple.variables) { - Entity *v = params->Tuple.variables[i]; - if (i > 0) { - type_writer_appendc(w, CANONICAL_PARAM_SEPARATOR); - } - type_writer_append(w, v->token.string.text, v->token.string.len); - type_writer_appendc(w, CANONICAL_TYPE_SEPARATOR); + defer (type_writer_appendc(w, ")")); - if (v->kind == Entity_Variable) { - if (v->flags&EntityFlag_CVarArg) { - type_writer_appendc(w, CANONICAL_PARAM_C_VARARG); - } - if (v->flags&EntityFlag_Ellipsis) { - Type *slice = base_type(v->type); - type_writer_appendc(w, CANONICAL_PARAM_VARARG); - GB_ASSERT(v->type->kind == Type_Slice); - write_type_to_canonical_string(w, slice->Slice.elem); - } else { - write_type_to_canonical_string(w, v->type); - } - } else if (v->kind == Entity_TypeName) { - type_writer_appendc(w, CANONICAL_PARAM_TYPEID); + if (params == nullptr) { + return; + } + GB_ASSERT(params->kind == Type_Tuple); + for_array(i, params->Tuple.variables) { + Entity *v = params->Tuple.variables[i]; + if (i > 0) { + type_writer_appendc(w, CANONICAL_PARAM_SEPARATOR); + } + type_writer_append(w, v->token.string.text, v->token.string.len); + type_writer_appendc(w, CANONICAL_TYPE_SEPARATOR); + + switch (v->kind) { + case Entity_Variable: + if (v->flags&EntityFlag_CVarArg) { + type_writer_appendc(w, CANONICAL_PARAM_C_VARARG); + } + if (v->flags&EntityFlag_Ellipsis) { + Type *slice = base_type(v->type); + type_writer_appendc(w, CANONICAL_PARAM_VARARG); + GB_ASSERT(v->type->kind == Type_Slice); + write_type_to_canonical_string(w, slice->Slice.elem); + } else { write_type_to_canonical_string(w, v->type); - } else if (v->kind == Entity_Constant) { + } + break; + case Entity_TypeName: + type_writer_appendc(w, CANONICAL_PARAM_TYPEID); + write_type_to_canonical_string(w, v->type); + break; + case Entity_Constant: + { type_writer_appendc(w, CANONICAL_PARAM_CONST); gbString s = exact_value_to_string(v->Constant.value, 1<<16); type_writer_append(w, s, gb_string_length(s)); gb_string_free(s); - } else { - GB_PANIC("TODO(bill): handle non type/const parapoly parameter values"); } + break; + default: + GB_PANIC("TODO(bill): handle non type/const parapoly parameter values"); + break; } } - type_writer_appendc(w, ")"); return; } @@ -348,19 +357,6 @@ gb_internal gbString temp_canonical_string(Type *type) { return cast(gbString)w.user_data; } -gb_internal void print_scope_flags(Scope *s) { - if (s->flags & ScopeFlag_Pkg) gb_printf_err("Pkg "); - if (s->flags & ScopeFlag_Builtin) gb_printf_err("Builtin "); - if (s->flags & ScopeFlag_Global) gb_printf_err("Global "); - if (s->flags & ScopeFlag_File) gb_printf_err("File "); - if (s->flags & ScopeFlag_Init) gb_printf_err("Init "); - if (s->flags & ScopeFlag_Proc) gb_printf_err("Proc "); - if (s->flags & ScopeFlag_Type) gb_printf_err("Type "); - if (s->flags & ScopeFlag_HasBeenImported) gb_printf_err("HasBeenImported "); - if (s->flags & ScopeFlag_ContextDefined) gb_printf_err("ContextDefined "); - gb_printf_err("\n"); -} - gb_internal gbString string_canonical_entity_name(gbAllocator allocator, Entity *e) { TypeWriter w = {}; type_writer_make_string(&w, allocator); @@ -477,6 +473,20 @@ gb_internal void write_canonical_entity_name(TypeWriter *w, Entity *e) { goto write_base_name; } gb_printf_err("%s WEIRD ENTITY TYPE %s %u %p\n", token_pos_to_string(e->token.pos), type_to_string(e->type), s->flags, s->decl_info); + + auto const print_scope_flags = [](Scope *s) { + if (s->flags & ScopeFlag_Pkg) gb_printf_err("Pkg "); + if (s->flags & ScopeFlag_Builtin) gb_printf_err("Builtin "); + if (s->flags & ScopeFlag_Global) gb_printf_err("Global "); + if (s->flags & ScopeFlag_File) gb_printf_err("File "); + if (s->flags & ScopeFlag_Init) gb_printf_err("Init "); + if (s->flags & ScopeFlag_Proc) gb_printf_err("Proc "); + if (s->flags & ScopeFlag_Type) gb_printf_err("Type "); + if (s->flags & ScopeFlag_HasBeenImported) gb_printf_err("HasBeenImported "); + if (s->flags & ScopeFlag_ContextDefined) gb_printf_err("ContextDefined "); + gb_printf_err("\n"); + }; + print_scope_flags(s); GB_PANIC("weird entity"); } @@ -499,10 +509,7 @@ gb_internal void write_canonical_entity_name(TypeWriter *w, Entity *e) { } else { type_writer_append(w, e->token.string.text, e->token.string.len); } - gb_unused(parent); - } - // Handle parapoly stuff here? return; case Entity_Procedure: diff --git a/src/name_canonicalization.hpp b/src/name_canonicalization.hpp index 620665cd2bd..1820a90fb1a 100644 --- a/src/name_canonicalization.hpp +++ b/src/name_canonicalization.hpp @@ -1,6 +1,7 @@ #define CANONICAL_TYPE_SEPARATOR ":" #define CANONICAL_NAME_SEPARATOR "::" +// #define CANONICAL_NAME_SEPARATOR "ยท" #define CANONICAL_BIT_FIELD_SEPARATOR "|" From 481745784f82c36b8233985768570b5a992656f5 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 18 Feb 2025 15:04:02 +0000 Subject: [PATCH 14/19] Remove debug print --- src/llvm_backend_general.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 233448ecea1..4f6fcb88ea8 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -1460,7 +1460,6 @@ gb_internal String lb_get_entity_name(lbModule *m, Entity *e) { defer (gb_string_free(w)); String name = copy_string(permanent_allocator(), make_string(cast(u8 const *)w, gb_string_length(w))); - gb_printf_err("%.*s\n", LIT(name)); if (e->kind == Entity_TypeName) { e->TypeName.ir_mangled_name = name; From e168cea67011f43209d752d52bad862a6416795e Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 18 Feb 2025 15:17:04 +0000 Subject: [PATCH 15/19] Add offset for nested things --- src/name_canonicalization.cpp | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/name_canonicalization.cpp b/src/name_canonicalization.cpp index ed72bb2e21d..0e81ab12cd1 100644 --- a/src/name_canonicalization.cpp +++ b/src/name_canonicalization.cpp @@ -366,11 +366,10 @@ gb_internal gbString string_canonical_entity_name(gbAllocator allocator, Entity -gb_internal void write_canonical_parent_prefix(TypeWriter *w, Entity *e, bool ignore_final_dot=false) { +gb_internal void write_canonical_parent_prefix(TypeWriter *w, Entity *e) { GB_ASSERT(e != nullptr); - - if (e->kind == Entity_Procedure) { - if (e->Procedure.is_export || e->Procedure.is_foreign) { + if (e->kind == Entity_Procedure || e->kind == Entity_TypeName) { + if (e->kind == Entity_Procedure && (e->Procedure.is_export || e->Procedure.is_foreign)) { // no prefix return; } @@ -396,7 +395,7 @@ gb_internal void write_canonical_parent_prefix(TypeWriter *w, Entity *e, bool ig if (e->pkg->name == "llvm") { type_writer_appendc(w, "$"); } - type_writer_appendc(w, gb_bprintf(CANONICAL_NAME_SEPARATOR "[%.*s]" CANONICAL_NAME_SEPARATOR, LIT(file_name))); + type_writer_append_fmt(w, CANONICAL_NAME_SEPARATOR "[%.*s]" CANONICAL_NAME_SEPARATOR, LIT(file_name)); } } else if (e->kind == Entity_Procedure) { if (e->Procedure.is_export || e->Procedure.is_foreign) { @@ -405,10 +404,9 @@ gb_internal void write_canonical_parent_prefix(TypeWriter *w, Entity *e, bool ig } GB_PANIC("TODO(bill): handle entity kind: %d", e->kind); } - if (e->kind == Entity_Procedure && e->Procedure.is_anonymous) { String file_name = filename_without_directory(e->file->fullpath); - type_writer_appendc(w, gb_bprintf(CANONICAL_ANON_PREFIX "[%.*s:%d]", LIT(file_name), e->token.pos.offset)); + type_writer_append_fmt(w, CANONICAL_ANON_PREFIX "[%.*s:%d]", LIT(file_name), e->token.pos.offset); } else { type_writer_append(w, e->token.string.text, e->token.string.len); } @@ -417,9 +415,7 @@ gb_internal void write_canonical_parent_prefix(TypeWriter *w, Entity *e, bool ig type_writer_appendc(w, CANONICAL_TYPE_SEPARATOR); write_type_to_canonical_string(w, e->type); } - if (!ignore_final_dot) { - type_writer_appendc(w, CANONICAL_NAME_SEPARATOR); - } + type_writer_appendc(w, CANONICAL_NAME_SEPARATOR); return; } @@ -461,7 +457,10 @@ gb_internal void write_canonical_entity_name(TypeWriter *w, Entity *e) { } if (s->decl_info != nullptr && s->decl_info->entity) { - write_canonical_parent_prefix(w, s->decl_info->entity); + Entity *parent = s->decl_info->entity; + write_canonical_parent_prefix(w, parent); + type_writer_append_fmt(w, CANONICAL_TYPE_SEPARATOR "[%d]", e->token.pos.offset); + goto write_base_name; } else if ((s->flags & ScopeFlag_File) && s->file != nullptr) { String file_name = filename_without_directory(s->file->fullpath); From 0482facdda4fd9257202fb89d9563d5a44e1499a Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 18 Feb 2025 15:26:59 +0000 Subject: [PATCH 16/19] Fix for weird builtin types --- src/name_canonicalization.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/name_canonicalization.cpp b/src/name_canonicalization.cpp index 0e81ab12cd1..6f092181c8d 100644 --- a/src/name_canonicalization.cpp +++ b/src/name_canonicalization.cpp @@ -448,11 +448,15 @@ gb_internal void write_canonical_entity_name(TypeWriter *w, Entity *e) { return; } - if ((e->scope->flags & (ScopeFlag_File | ScopeFlag_Pkg)) == 0 || - e->flags & EntityFlag_NotExported) { - + if (e->scope->flags & (ScopeFlag_Builtin)) { + // ignore + } else if ((e->scope->flags & (ScopeFlag_File | ScopeFlag_Pkg)) == 0) { Scope *s = e->scope; + while ((s->flags & (ScopeFlag_Proc|ScopeFlag_File)) == 0 && s->decl_info == nullptr) { + if (s->parent == nullptr) { + break; + } s = s->parent; } @@ -470,6 +474,8 @@ gb_internal void write_canonical_entity_name(TypeWriter *w, Entity *e) { } type_writer_appendc(w, gb_bprintf(CANONICAL_NAME_SEPARATOR "[%.*s]" CANONICAL_NAME_SEPARATOR, LIT(file_name))); goto write_base_name; + } else if (s->flags & (ScopeFlag_Builtin)) { + goto write_base_name; } gb_printf_err("%s WEIRD ENTITY TYPE %s %u %p\n", token_pos_to_string(e->token.pos), type_to_string(e->type), s->flags, s->decl_info); @@ -487,7 +493,7 @@ gb_internal void write_canonical_entity_name(TypeWriter *w, Entity *e) { }; print_scope_flags(s); - GB_PANIC("weird entity"); + GB_PANIC("weird entity %.*s", LIT(e->token.string)); } if (e->pkg != nullptr) { type_writer_append(w, e->pkg->name.text, e->pkg->name.len); From 827cd45f04f2f86c59c67c549dbc44b33e1f2863 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 18 Feb 2025 17:12:58 +0000 Subject: [PATCH 17/19] Add to check to fix crash --- src/name_canonicalization.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/name_canonicalization.cpp b/src/name_canonicalization.cpp index 6f092181c8d..fd4e4b50f9f 100644 --- a/src/name_canonicalization.cpp +++ b/src/name_canonicalization.cpp @@ -449,8 +449,9 @@ gb_internal void write_canonical_entity_name(TypeWriter *w, Entity *e) { } if (e->scope->flags & (ScopeFlag_Builtin)) { - // ignore - } else if ((e->scope->flags & (ScopeFlag_File | ScopeFlag_Pkg)) == 0) { + goto write_base_name; + } else if ((e->scope->flags & (ScopeFlag_File | ScopeFlag_Pkg)) == 0 || + e->flags & EntityFlag_NotExported) { Scope *s = e->scope; while ((s->flags & (ScopeFlag_Proc|ScopeFlag_File)) == 0 && s->decl_info == nullptr) { From 0bac34eec891080e2d0b9b4fc9e7b429a42064a3 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 19 Feb 2025 10:59:05 +0000 Subject: [PATCH 18/19] Number fields within procedures with a depth-first numbering system --- src/checker.cpp | 4 ++++ src/checker.hpp | 4 ++++ src/name_canonicalization.cpp | 5 ++++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/checker.cpp b/src/checker.cpp index 38da38b0c52..f1f1b2556dd 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -358,6 +358,10 @@ gb_internal void check_open_scope(CheckerContext *c, Ast *node) { scope->flags |= ScopeFlag_Type; break; } + if (c->decl && c->decl->proc_lit) { + // Number the scopes within a procedure body depth-first + scope->index = c->decl->scope_index++; + } c->scope = scope; c->state_flags |= StateFlag_bounds_check; } diff --git a/src/checker.hpp b/src/checker.hpp index 8850d5c3f62..c89a1bc9c46 100644 --- a/src/checker.hpp +++ b/src/checker.hpp @@ -229,6 +229,8 @@ struct DeclInfo { Array labels; + i32 scope_index; + Array variadic_reuses; i64 variadic_reuse_max_bytes; i64 variadic_reuse_max_align; @@ -273,6 +275,8 @@ struct Scope { std::atomic next; std::atomic head_child; + i32 index; // within a procedure + RwMutex mutex; StringMap elements; PtrSet imported; diff --git a/src/name_canonicalization.cpp b/src/name_canonicalization.cpp index fd4e4b50f9f..b24bf91260e 100644 --- a/src/name_canonicalization.cpp +++ b/src/name_canonicalization.cpp @@ -464,7 +464,10 @@ gb_internal void write_canonical_entity_name(TypeWriter *w, Entity *e) { if (s->decl_info != nullptr && s->decl_info->entity) { Entity *parent = s->decl_info->entity; write_canonical_parent_prefix(w, parent); - type_writer_append_fmt(w, CANONICAL_TYPE_SEPARATOR "[%d]", e->token.pos.offset); + if (e->scope->index > 0) { + type_writer_append_fmt(w, CANONICAL_TYPE_SEPARATOR "[%d]", e->scope->index); + } + // type_writer_append_fmt(w, CANONICAL_TYPE_SEPARATOR "[%d]", e->token.pos.offset); goto write_base_name; } else if ((s->flags & ScopeFlag_File) && s->file != nullptr) { From 29456bcdea6e9567a9655e49a948f9e57920ff7a Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 19 Feb 2025 11:12:32 +0000 Subject: [PATCH 19/19] Move docs to the header --- src/name_canonicalization.cpp | 30 +----------------------------- src/name_canonicalization.hpp | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 29 deletions(-) diff --git a/src/name_canonicalization.cpp b/src/name_canonicalization.cpp index b24bf91260e..043317ef0e2 100644 --- a/src/name_canonicalization.cpp +++ b/src/name_canonicalization.cpp @@ -1,26 +1,3 @@ -/* - General Rules for canonical name mangling - - * No spaces between any values - - * normal declarations - pkg::name - * builtin names - just their normal name e.g. `i32` or `string` - * nested - pkg::parent1::parent2::name - * file private - pkg::[file_name]::name - * Example: `pkg::[file.odin]::Type` - * polymorphic procedure/type - pkg::foo:TYPE - * naming convention for parameters - * type - * $typeid_based_name - * $$constant_parameter - * Example: `foo::to_thing:proc(u64)->([]u8)` - * nested decl in polymorphic procedure - pkg::foo:TYPE::name - * anonymous procedures - pkg::foo::$anon[file.odin:123] - * 123 is the file offset in bytes - - -*/ - gb_internal GB_COMPARE_PROC(type_info_pair_cmp) { TypeInfoPair *x = cast(TypeInfoPair *)a; TypeInfoPair *y = cast(TypeInfoPair *)b; @@ -397,11 +374,7 @@ gb_internal void write_canonical_parent_prefix(TypeWriter *w, Entity *e) { } type_writer_append_fmt(w, CANONICAL_NAME_SEPARATOR "[%.*s]" CANONICAL_NAME_SEPARATOR, LIT(file_name)); } - } else if (e->kind == Entity_Procedure) { - if (e->Procedure.is_export || e->Procedure.is_foreign) { - // no prefix - return; - } + } else { GB_PANIC("TODO(bill): handle entity kind: %d", e->kind); } if (e->kind == Entity_Procedure && e->Procedure.is_anonymous) { @@ -467,7 +440,6 @@ gb_internal void write_canonical_entity_name(TypeWriter *w, Entity *e) { if (e->scope->index > 0) { type_writer_append_fmt(w, CANONICAL_TYPE_SEPARATOR "[%d]", e->scope->index); } - // type_writer_append_fmt(w, CANONICAL_TYPE_SEPARATOR "[%d]", e->token.pos.offset); goto write_base_name; } else if ((s->flags & ScopeFlag_File) && s->file != nullptr) { diff --git a/src/name_canonicalization.hpp b/src/name_canonicalization.hpp index 1820a90fb1a..e289ed5b2d8 100644 --- a/src/name_canonicalization.hpp +++ b/src/name_canonicalization.hpp @@ -1,3 +1,25 @@ +/* + General Rules for canonical name mangling + + * No spaces between any values + + * normal declarations - pkg::name + * builtin names - just their normal name e.g. `i32` or `string` + * nested (zero level) - pkg::parent1::parent2::name + * nested (more scopes) - pkg::parent1::parent2::name[4] + * [4] indicates the 4th scope within a procedure numbered in depth-first order + * file private - pkg::[file_name]::name + * Example: `pkg::[file.odin]::Type` + * polymorphic procedure/type - pkg::foo:TYPE + * naming convention for parameters + * type + * $typeid_based_name + * $$constant_parameter + * Example: `foo::to_thing:proc(u64)->([]u8)` + * nested decl in polymorphic procedure - pkg::foo:TYPE::name + * anonymous procedures - pkg::foo::$anon[file.odin:123] + * 123 is the file offset in bytes +*/ #define CANONICAL_TYPE_SEPARATOR ":" #define CANONICAL_NAME_SEPARATOR "::"