Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
7c6a858
radlink: enable C11 atomics so BLAKE3 skips the lock'd CPU-feature load
honkstar1 Jun 16, 2026
fbca33c
radlink: skip symbol-name string-table scan on interp-only paths
honkstar1 Jun 16, 2026
7d32fb1
radlink: fold CV type-index fixup into a single hash probe
honkstar1 Jun 16, 2026
c594e91
radlink: release copy-on-write input views in parallel before exit
honkstar1 Jun 16, 2026
9c64504
radlink: trim COFF symbol/section parse overhead
honkstar1 Jun 17, 2026
8f8f2ef
radlink: cache leaf hash per bucket to avoid deref in type-dedup probe
honkstar1 Jun 17, 2026
e7a0e5e
radlink: release the ~1GB image buffer early so reclaim overlaps the run
honkstar1 Jun 17, 2026
3e05dd2
radlink: memoize parsed COFF symbols per obj (kills #1 link hotspot)
honkstar1 Jun 17, 2026
e8d041b
radlink: size the assigned-ti table by unique types, not total (recla…
honkstar1 Jun 17, 2026
b3c0afa
radlink: slim the parsed-symbol memo (drop name, decode on demand)
honkstar1 Jun 18, 2026
a169cb5
radlink: pack LNK_ParsedSymbolLite to 16B (offset + U32 value)
honkstar1 Jun 18, 2026
91ef9ba
radlink: garbage-collect unreferenced CodeView types before PDB emit
honkstar1 Jun 19, 2026
df18024
radlink: /OPT:ICF identical COMDAT folding (code + read-only data), p…
honkstar1 Jun 19, 2026
dc50e32
radlink/pdb: coalesce DBI section contributions; stabilize + parallel…
honkstar1 Jun 19, 2026
5e2be99
radlink: frontier worklist for type-GC transitive closure (perf)
honkstar1 Jun 19, 2026
995ed6d
radlink: count ICF reloc slices in the parallel fill, not a serial re…
honkstar1 Jun 19, 2026
0ae7fab
radlink: skip the GC mark atomic on the already-reachable fast path
honkstar1 Jun 19, 2026
b62f14b
radlink: make type-GC opt-in (/OPT:GCTYPES), default off
honkstar1 Jun 19, 2026
bcf83a1
radlink: parallelize the section-contrib sort for large chunks
honkstar1 Jun 19, 2026
add288b
radlink: skip converged classes in ICF refinement
honkstar1 Jun 19, 2026
141a3c1
radlink: cache symbol interp so the lib search stops re-parsing resol…
honkstar1 Jun 19, 2026
de5b169
radlink: skip redundant library re-searches in the resolution fixpoint
honkstar1 Jun 19, 2026
8321ca0
radlink: /OPT:ICFSTATIC -- fold static (internal-linkage) COMDATs
honkstar1 Jun 20, 2026
550b1a4
radlink: header-unit IFC debug-record resolution + ICF leader-keying
honkstar1 Jun 20, 2026
5804c3b
radlink perf: parallelize ICF refine round-loop (gather/scatter/survi…
honkstar1 Jun 21, 2026
57a2c87
radlink perf: parallelize ICF refine per-round color group-scan (para…
honkstar1 Jun 21, 2026
3e1506c
radlink perf: ICF refine round-loop -> ONE persistent-worker parallel…
honkstar1 Jun 21, 2026
1a03d93
radlink perf: count-based ICF work-split (drop 5.6s reloc-weight gath…
honkstar1 Jun 21, 2026
959d7c9
radlink perf: ICF refine dirty-class worklist tail (drop full-active …
honkstar1 Jun 21, 2026
c234f3d
radlink: consolidate ICF worklist + parallel IFC 0x1522 scan
honkstar1 Jun 21, 2026
16ba936
radlink: disable ICF worklist tail by default (region_cap 8->64)
honkstar1 Jun 21, 2026
c5b8ea9
radlink ICF: parallelize cand_map build with atomic-CAS open-addressi…
honkstar1 Jun 21, 2026
3cf511c
radlink ICF: parallelize apply_ifc nonblob_complete set merge + ifc p…
honkstar1 Jun 21, 2026
1ba8b8f
radlink perf: parallelize make_code_view_input serial setup loops
honkstar1 Jun 21, 2026
3ece47c
radlink perf: ICF cand_map memset fill + image_fill single-node fast-…
honkstar1 Jun 21, 2026
952ef1e
radlink perf: pow2 hash-table caps -> mask index (kill % DIV in type-…
honkstar1 Jun 21, 2026
580ad44
radlink perf: per-lib frontier cursor in lib search (skip re-scanning…
honkstar1 Jun 21, 2026
ea55986
radlink perf: decommit idle tctx scratch pages before PDB build
honkstar1 Jun 21, 2026
20eb8a1
radlink perf: free leaf bucket_arr probe tables before merge-types peak
honkstar1 Jun 22, 2026
bd71560
radlink: env-gated RADLINK_PHASE_LOG raw per-phase micros file (diagn…
honkstar1 Jun 22, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ if "%spall%"=="1" set auto_compile_flags=%auto_compile_flags%
if "%asan%"=="1" set auto_compile_flags=%auto_compile_flags% -fsanitize=address && echo [asan enabled]
if "%ubsan%"=="1" set auto_compile_flags=%auto_compile_flags% -fsanitize=undefined && echo [ubsan enabled]
if "%opengl%"=="1" set auto_compile_flags=%auto_compile_flags% -DR_BACKEND=R_BACKEND_OPENGL && echo [opengl render backend]
if "%worklist_selfcheck%"=="1" set auto_compile_flags=%auto_compile_flags% -DICF_WORKLIST_SELFCHECK=1 && echo [icf worklist selfcheck enabled]
if "%dwarf%"=="1" if "%clang%"=="1" set auto_compile_flags=%auto_compile_flags% -gdwarf && echo [dwarf debug info]
if "%dwarf%"=="" if "%clang%"=="1" set auto_compile_flags=%auto_compile_flags% -gcodeview
if "%pgo%"=="1" (
Expand Down Expand Up @@ -142,7 +143,14 @@ pushd build
if "%raddbg%"=="1" set didbuild=1 && %compile% ..\src\raddbg\raddbg_main.c %compile_link% %link_icon% %out%raddbg.exe || exit /b 1
if "%raddbg_non_graphical%"=="1" set didbuild=1 && %compile% -DWM_STUB=1 -DR_BACKEND=R_BACKEND_STUB ..\src\raddbg\raddbg_main.c %compile_link% %link_icon% %out%raddbg_non_graphical.exe || exit /b 1
if "%com_shim%"=="1" set didbuild=1 && %compile% ..\src\com_shim\com_shim_main.c %compile_link% %out%com_shim.exe || exit /b 1
if "%radlink%"=="1" set didbuild=1 && %compile% ..\src\linker\lnk.c %compile_link% %linker% /NOIMPLIB %linker% /NATVIS:"%~dp0\src\linker\linker.natvis" %out%radlink.exe || exit /b 1
:: NOTE: -DBLAKE3_ATOMICS=1 makes BLAKE3 use C11 _Atomic (plain atomic load) for
:: get_cpu_features instead of MSVC's _InterlockedOr `lock or` barrier on every
:: compress dispatch (was a ~5.5s main-thread hot spot). MSVC C11 atomics require
:: /std:c11 /experimental:c11atomics. Kept external so the vendored blake3 source
:: stays pristine.
set radlink_msvc_flags=
if "%msvc%"=="1" set radlink_msvc_flags=/std:c11 /experimental:c11atomics -DBLAKE3_ATOMICS=1
if "%radlink%"=="1" set didbuild=1 && %compile% %radlink_msvc_flags% ..\src\linker\lnk.c %compile_link% %linker% /NOIMPLIB %linker% /NATVIS:"%~dp0\src\linker\linker.natvis" %out%radlink.exe || exit /b 1
if "%radbin%"=="1" set didbuild=1 && %compile% ..\src\radbin\radbin_main.c %compile_link% %out%radbin.exe || exit /b 1
if "%raddump%"=="1" set didbuild=1 && %compile% ..\src\raddump\raddump_main.c %compile_link% %out%raddump.exe || exit /b 1
if "%ryan_scratch%"=="1" set didbuild=1 && %compile% ..\src\scratch\ryan_scratch.c %compile_link% %out%ryan_scratch.exe || exit /b 1
Expand Down
48 changes: 48 additions & 0 deletions src/base/base_arena.c
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,54 @@ arena_pop_to(Arena *arena, U64 pos)

}

//- rjf: arena decommit of unused (rewound/free) pages

internal void
arena_decommit_unused(Arena *arena)
{
// NOTE(perf): decommit committed-but-unused pages so they stop counting against
// working set, while keeping the reservation. Only touches pages strictly above
// the live `pos` high-water of each block in the active chain, and the unused
// bodies of free-list blocks. Live data (<= pos) is never touched. The push path
// re-commits on demand (arena_push grows `cmt`), so reuse is transparent.
if(arena->flags & ArenaFlag_LargePages)
{
// large pages cannot be partially decommitted safely; skip.
return;
}
U64 page_size = get_system_info()->page_size;

// rjf: active chain -- decommit committed region above each block's live pos
for(Arena *n = arena->current; n != 0; n = n->prev)
{
U64 pos_aligned = AlignPow2(n->pos, page_size);
if(pos_aligned < n->cmt)
{
U8 *decommit_ptr = (U8 *)n + pos_aligned;
U64 decommit_size = n->cmt - pos_aligned;
AsanPoisonMemoryRegion(decommit_ptr, decommit_size);
decommit_memory(decommit_ptr, decommit_size);
n->cmt = pos_aligned;
}
}

#if ARENA_FREE_LIST
// rjf: free chain -- decommit everything above the first (header) page
for(Arena *n = arena->free_last; n != 0; n = n->prev)
{
U64 keep = AlignPow2(ARENA_HEADER_SIZE, page_size);
if(keep < n->cmt)
{
U8 *decommit_ptr = (U8 *)n + keep;
U64 decommit_size = n->cmt - keep;
AsanPoisonMemoryRegion(decommit_ptr, decommit_size);
decommit_memory(decommit_ptr, decommit_size);
n->cmt = keep;
}
}
#endif
}

//- rjf: arena push/pop helpers

internal void
Expand Down
3 changes: 3 additions & 0 deletions src/base/base_arena.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ internal void *arena_push(Arena *arena, U64 size, U64 align, B32 zero);
internal U64 arena_pos(Arena *arena);
internal void arena_pop_to(Arena *arena, U64 pos);

//- rjf: arena decommit of unused (rewound/free) pages
internal void arena_decommit_unused(Arena *arena);

//- rjf: arena push/pop helpers
internal void arena_clear(Arena *arena);
internal void arena_pop(Arena *arena, U64 amt);
Expand Down
15 changes: 15 additions & 0 deletions src/base/base_thread_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,21 @@ tctx_get_scratch(Arena **conflicts, U64 count)
return result;
}

//- rjf: scratch decommit (release committed-but-unused scratch pages back to OS)

internal void
tctx_scratch_decommit(void)
{
TCTX *tctx = tctx_selected();
for(U64 i = 0; i < ArrayCount(tctx->arenas); i += 1)
{
if(tctx->arenas[i] != 0)
{
arena_decommit_unused(tctx->arenas[i]);
}
}
}

//- rjf: lane metadata

internal LaneCtx
Expand Down
1 change: 1 addition & 0 deletions src/base/base_thread_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ internal TCTX *tctx_selected(void);

//- rjf: scratch arenas
internal Arena *tctx_get_scratch(Arena **conflicts, U64 count);
internal void tctx_scratch_decommit(void);
#define scratch_begin(conflicts, count) temp_begin(tctx_get_scratch((conflicts), (count)))
#define scratch_end(scratch) temp_end(scratch)

Expand Down
28 changes: 24 additions & 4 deletions src/coff/coff_parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,11 +166,16 @@ coff_section_header_array_from_name(Arena *arena, String8 string_table, COFF_Sec
}


// NOTE: name-skipping variants. coff_read_symbol_name does a cstr scan over the
// memory-mapped string table, which is the dominant cost when parsing symbols in
// bulk; callers that only need the scalar fields (value/section/storage_class/aux,
// e.g. symbol-value interpretation) should use these to avoid that scan. The full
// coff_parse_symbol{16,32} below are these plus the name read, so the scalar-field
// logic lives in exactly one place.
internal COFF_ParsedSymbol
coff_parse_symbol32(String8 string_table, COFF_Symbol32 *sym32)
coff_parse_symbol32_no_name(COFF_Symbol32 *sym32)
{
COFF_ParsedSymbol result = {0};
result.name = coff_read_symbol_name(string_table, &sym32->name);
result.value = sym32->value;
result.section_number = sym32->section_number;
result.type = sym32->type;
Expand All @@ -181,10 +186,9 @@ coff_parse_symbol32(String8 string_table, COFF_Symbol32 *sym32)
}

internal COFF_ParsedSymbol
coff_parse_symbol16(String8 string_table, COFF_Symbol16 *sym16)
coff_parse_symbol16_no_name(COFF_Symbol16 *sym16)
{
COFF_ParsedSymbol result = {0};
result.name = coff_read_symbol_name(string_table, &sym16->name);
result.value = sym16->value;
if (sym16->section_number == COFF_Symbol_DebugSection16) {
result.section_number = COFF_Symbol_DebugSection32;
Expand All @@ -200,6 +204,22 @@ coff_parse_symbol16(String8 string_table, COFF_Symbol16 *sym16)
return result;
}

internal COFF_ParsedSymbol
coff_parse_symbol32(String8 string_table, COFF_Symbol32 *sym32)
{
COFF_ParsedSymbol result = coff_parse_symbol32_no_name(sym32);
result.name = coff_read_symbol_name(string_table, &sym32->name);
return result;
}

internal COFF_ParsedSymbol
coff_parse_symbol16(String8 string_table, COFF_Symbol16 *sym16)
{
COFF_ParsedSymbol result = coff_parse_symbol16_no_name(sym16);
result.name = coff_read_symbol_name(string_table, &sym16->name);
return result;
}

internal COFF_ParsedSymbol
coff_parse_symbol(COFF_FileHeaderInfo header, String8 string_table, String8 symbol_table, U32 symbol_idx)
{
Expand Down
2 changes: 2 additions & 0 deletions src/coff/coff_parse.h
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ internal String8 coff_name_from_section_header (String8 str

internal COFF_ParsedSymbol coff_parse_symbol32(String8 string_table, COFF_Symbol32 *sym32);
internal COFF_ParsedSymbol coff_parse_symbol16(String8 string_table, COFF_Symbol16 *sym16);
internal COFF_ParsedSymbol coff_parse_symbol32_no_name(COFF_Symbol32 *sym32);
internal COFF_ParsedSymbol coff_parse_symbol16_no_name(COFF_Symbol16 *sym16);
internal COFF_ParsedSymbol coff_parse_symbol (COFF_FileHeaderInfo header, String8 string_table, String8 symbol_table, U32 symbol_idx);

internal COFF_Symbol32Array coff_symbol_array_from_data_16(Arena *arena, String8 data, U64 symbol_array_off, U64 symbol_count);
Expand Down
107 changes: 107 additions & 0 deletions src/linker/codeview_ext/ifc.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// Copyright (c) Epic Games Tools
// Licensed under the MIT license (https://opensource.org/license/mit/)

read_only global U8 g_ifc_signature[4] = { 0x54, 0x51, 0x45, 0x1A };
read_only global U8 g_uba_signature[4] = { 0x55, 0x42, 0x41, 0x01 }; // "UBA\x01"

internal IFC_File
ifc_file_read(Arena *arena, String8 path, String8 *error_out)
{
IFC_File ifc = {0};
ifc.path = push_str8_copy(arena, path);

String8 data = lnk_read_data_from_file_path(arena, 0, path);
ifc.data = data;
if (data.size < 4) {
*error_out = push_str8f(arena, "IFC '%S' is too small (%llu bytes)", path, data.size);
return ifc;
}

// detect UBA-compressed input (out of scope)
if (MemoryMatch(data.str, g_uba_signature, sizeof(g_uba_signature))) {
*error_out = push_str8f(arena, "IFC '%S' is UBA-compressed (magic 'UBA\\x01'); materialize a raw .ifc (UBA decompress unsupported)", path);
return ifc;
}

// validate signature
if ( ! MemoryMatch(data.str, g_ifc_signature, sizeof(g_ifc_signature))) {
*error_out = push_str8f(arena, "IFC '%S' has bad signature (expected 54 51 45 1A)", path);
return ifc;
}

// --- parse header ---
U64 off = 4;
if (off + 32 > data.size) { goto truncated; }
MemoryCopy(ifc.content_hash, data.str + off, 32);
off += 32;

if (off + 4 > data.size) { goto truncated; }
U8 major = data.str[off+0];
U8 minor = data.str[off+1];
U8 abi = data.str[off+2]; (void)abi;
U8 arch = data.str[off+3];
off += 4;

// assert version 0.44 + x64; error otherwise (encoding proven only for these)
if ( ! (major == 0 && minor == 44)) {
*error_out = push_str8f(arena, "IFC '%S' unsupported version %u.%u (expected 0.44)", path, major, minor);
return ifc;
}
if (arch != 2) {
*error_out = push_str8f(arena, "IFC '%S' unsupported architecture %u (expected 2 == x64)", path, arch);
return ifc;
}

U32 cplusplus; off += str8_deserial_read_struct(data, off, &cplusplus); (void)cplusplus;
U32 string_table_bytes; off += str8_deserial_read_struct(data, off, &string_table_bytes);
U32 string_table_size; off += str8_deserial_read_struct(data, off, &string_table_size);
U32 unit; off += str8_deserial_read_struct(data, off, &unit); (void)unit;
U32 src_path; off += str8_deserial_read_struct(data, off, &src_path); (void)src_path;
U32 global_scope; off += str8_deserial_read_struct(data, off, &global_scope); (void)global_scope;
U32 toc; off += str8_deserial_read_struct(data, off, &toc);
U32 partition_count; off += str8_deserial_read_struct(data, off, &partition_count);
if (off > data.size) { goto truncated; }

// string table
if ((U64)string_table_bytes + string_table_size > data.size) {
*error_out = push_str8f(arena, "IFC '%S' string table out of bounds", path);
return ifc;
}
String8 string_table = str8(data.str + string_table_bytes, string_table_size);

// --- partition summary table ---
String8 needle = str8_lit(".msvc.trait.debug-records");
U64 po = toc;
for (U32 i = 0; i < partition_count; ++i, po += 16) {
if (po + 16 > data.size) { goto truncated; }
U32 name_off, p_off, count, entity_size;
str8_deserial_read_struct(data, po + 0, &name_off);
str8_deserial_read_struct(data, po + 4, &p_off);
str8_deserial_read_struct(data, po + 8, &count);
str8_deserial_read_struct(data, po + 12, &entity_size);

if (name_off >= string_table.size) { continue; }
String8 name = str8_cstring((char *)string_table.str + name_off);
if (str8_match(name, needle, 0)) {
// entity_size == 1 -> count is a byte length
if ((U64)p_off + count > data.size) {
*error_out = push_str8f(arena, "IFC '%S' debug-records partition out of bounds", path);
return ifc;
}
ifc.debug_records = str8(data.str + p_off, count);
break;
}
}

if (ifc.debug_records.size == 0) {
*error_out = push_str8f(arena, "IFC '%S' has no '.msvc.trait.debug-records' partition", path);
return ifc;
}

ifc.is_valid = 1;
return ifc;

truncated:
*error_out = push_str8f(arena, "IFC '%S' is truncated", path);
return ifc;
}
43 changes: 43 additions & 0 deletions src/linker/codeview_ext/ifc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright (c) Epic Games Tools
// Licensed under the MIT license (https://opensource.org/license/mit/)

#pragma once

////////////////////////////////
// MSVC IFC (header-unit module interface) reader
//
// radlink consumes the `.msvc.trait.debug-records` partition embedded in an
// MSVC `.ifc` (C++20 module / header-unit interface) file. That partition is a
// raw CodeView type-leaf stream (entity_size == 1, NO u32 signature, first leaf
// at offset 0, TI base 0x1000). A consuming `.obj` references this stream via
// LF_IFC_RECORD (0x1522) leaves -- see lnk_debug_info.c.
//
// File layout (microsoft/ifc-spec, FileHeader):
// u8[4] signature = { 0x54,0x51,0x45,0x1A } ("TQE\x1a")
// u8[32] content_hash (sha256) -- record.GUID(16)++record.hash(16) == first 32 bytes here
// u8 major, minor -- assert 0.44
// u8 abi
// u8 arch -- 2 == x64
// u32 cplusplus
// u32 string_table_bytes (off), u32 string_table_size
// u32 unit
// u32 src_path (textoffset)
// u32 global_scope
// u32 toc -- offset to partition summary table
// u32 partition_count
// u8 internal_partition
// Partition summary entry (16 bytes): { u32 name(textoffset); u32 offset; u32 count; u32 entity_size }

typedef struct IFC_File
{
String8 data; // whole .ifc bytes (owning view into arena)
String8 path; // .ifc path (copied)
U8 content_hash[32];
String8 debug_records; // .msvc.trait.debug-records blob {ptr,size}; size 0 if absent
B32 is_valid;
} IFC_File;

// Reads `path`, validates magic/version/arch, locates `.msvc.trait.debug-records`.
// On error fills *error_out and returns is_valid=0. Detects UBA-compressed inputs
// (magic "UBA\x01") and reports them (decompression is out of scope).
internal IFC_File ifc_file_read(Arena *arena, String8 path, String8 *error_out);
Loading
Loading