From 83a223a44404ecd3bc7b491f7a36202f8803a25b Mon Sep 17 00:00:00 2001 From: David Allsopp Date: Thu, 21 Nov 2024 14:57:40 +0000 Subject: [PATCH 1/4] Split imports and link_obj --- reloc.ml | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/reloc.ml b/reloc.ml index 93a8d7f..a807302 100644 --- a/reloc.ml +++ b/reloc.ml @@ -879,7 +879,6 @@ let build_dll link_exe output_file files exts extra_args = let imports obj = let n = needed imported defined resolve_alias resolve_alternate obj in - imported_from_implib := StrSet.union !imported_from_implib (StrSet.inter n from_imports); let undefs = StrSet.diff n defined in StrSet.filter (fun s -> @@ -887,7 +886,7 @@ let build_dll link_exe output_file files exts extra_args = | Some _ -> false | None -> s <> "environ" (* special hack for Cygwin64 *) ) - undefs + undefs, StrSet.inter n from_imports in (* Second step: transitive closure, starting from given objects *) @@ -935,7 +934,7 @@ let build_dll link_exe output_file files exts extra_args = in let dll_exports = ref StrSet.empty in - let rec link_obj fn obj = + let record_exports obj = List.iter (fun sym -> if Symbol.is_defin sym && exportable sym.sym_name @@ -944,7 +943,9 @@ let build_dll link_exe output_file files exts extra_args = obj.symbols; dll_exports := List.fold_left (fun accu x -> StrSet.add x accu) - !dll_exports (collect_dllexports obj); + !dll_exports (collect_dllexports obj) + in + let rec link_obj fn obj = StrSet.iter (fun s -> if StrSet.mem s !exported then () @@ -962,15 +963,20 @@ let build_dll link_exe output_file files exts extra_args = and link_libobj (libname,objname,obj) = if Hashtbl.mem libobjects (libname,objname) then () - else (Hashtbl.replace libobjects (libname,objname) (obj,imports obj); + else (let imports, from_imports = imports obj in + Hashtbl.replace libobjects (libname,objname) (obj,imports); + imported_from_implib := StrSet.union !imported_from_implib from_imports; + record_exports obj; link_obj (Printf.sprintf "%s(%s)" libname objname) obj) in let redirect = Hashtbl.create 16 in List.iter (fun (fn, obj) -> + let imps, from_imports = imports obj in + imported_from_implib := StrSet.union !imported_from_implib from_imports; + record_exports obj; link_obj fn obj; - let imps = imports obj in if StrSet.is_empty imps then () else Hashtbl.replace redirect fn (close_obj fn imps obj); ) objs; From 338ff13a47429f381fd3b6f3cca8e1cefa72cafa Mon Sep 17 00:00:00 2001 From: David Allsopp Date: Thu, 21 Nov 2024 15:33:13 +0000 Subject: [PATCH 2/4] Include default libraries in defined_in The defined_in function is used when determining which objects from the command line will actually be linked. However, this doesn't include the default libraries, which creates an issue for modules which are linked as a result of main. defined_in is augmented to include the mapping of symbols found in the default libraries, but some additional care is required to ensure that this doesn't cause imports from those to leak into the export table of the executable. --- reloc.ml | 56 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/reloc.ml b/reloc.ml index a807302..cd2338d 100644 --- a/reloc.ml +++ b/reloc.ml @@ -739,7 +739,7 @@ let build_dll link_exe output_file files exts extra_args = in (* Collect all the available symbols, including those defined in default libraries *) - let defined, from_imports, resolve_alias, resolve_alternate = + let defined, from_imports, resolve_alias, resolve_alternate, collected = let aliases = Hashtbl.create 16 in let alternates = Hashtbl.create 16 in let defined = ref StrSet.empty in @@ -808,14 +808,16 @@ let build_dll link_exe output_file files exts extra_args = and collect_file fn = if not (Hashtbl.mem collected (String.lowercase_ascii fn)) then begin - Hashtbl.replace collected (String.lowercase_ascii fn) (); debug 2 "** open: %s" fn; collect_defined fn (Lib.read fn) end and collect_defined fn = function - | `Obj obj -> collect_defined_obj obj + | `Obj obj -> + Hashtbl.replace collected (String.lowercase_ascii fn) [fn, obj]; + collect_defined_obj obj | `Lib (objs,imports) -> + Hashtbl.replace collected (String.lowercase_ascii fn) objs; List.iter (fun (_, obj) -> collect_defined_obj obj) objs; List.iter (fun (s,_) -> @@ -826,12 +828,7 @@ let build_dll link_exe output_file files exts extra_args = ) imports in - List.iter - (fun (fn,x) -> - Hashtbl.replace collected (String.lowercase_ascii fn) (); - collect_defined fn x - ) - files; + List.iter (fun (fn,x) -> collect_defined fn x) files; if !use_default_libs then List.iter (fun fn -> collect_file (find_file fn)) !default_libs; List.iter (fun fn -> collect_file (find_file fn)) exts; @@ -841,13 +838,13 @@ let build_dll link_exe output_file files exts extra_args = if !machine = `x64 then add_def "__ImageBase" else add_def "___ImageBase"; - !defined, !from_imports, (Hashtbl.find aliases), (Hashtbl.find alternates) + !defined, !from_imports, (Hashtbl.find aliases), (Hashtbl.find alternates), collected in (* Determine which objects from the given libraries should be linked in. First step: find the mapping (symbol -> object) for these objects. *) - let defined_in = + let defined_in, default_libs = let defined_in = Hashtbl.create 16 in let def_in_obj fn (objname, obj) = List.iter @@ -865,13 +862,21 @@ let build_dll link_exe output_file files exts extra_args = ) obj.symbols in - List.iter - (fun (fn,objs) -> - if !explain then Printf.printf "Scanning lib %s\n%!" fn; - List.iter (def_in_obj fn) objs - ) - libs; - Hashtbl.find defined_in + let scan (fn,objs) = + if !explain then Printf.printf "Scanning lib %s\n%!" fn; + List.iter (def_in_obj fn) objs + in + let default_libs = + List.fold_right + (fun fn acc -> + let fn = find_file fn in + scan (fn, Hashtbl.find collected (String.lowercase_ascii fn)); + StrSet.add fn acc) + (if !use_default_libs then !default_libs else []) + StrSet.empty + in + List.iter scan libs; + Hashtbl.find defined_in, default_libs in let imported_from_implib = ref StrSet.empty in @@ -962,20 +967,27 @@ let build_dll link_exe output_file files exts extra_args = (needed imported defined resolve_alias resolve_alternate obj) and link_libobj (libname,objname,obj) = + let default = StrSet.mem libname default_libs in if Hashtbl.mem libobjects (libname,objname) then () else (let imports, from_imports = imports obj in + let imports = if default then StrSet.empty else imports in Hashtbl.replace libobjects (libname,objname) (obj,imports); - imported_from_implib := StrSet.union !imported_from_implib from_imports; - record_exports obj; + if not default then begin + imported_from_implib := StrSet.union !imported_from_implib from_imports; + record_exports obj + end; link_obj (Printf.sprintf "%s(%s)" libname objname) obj) in let redirect = Hashtbl.create 16 in List.iter (fun (fn, obj) -> + let default = StrSet.mem fn default_libs in let imps, from_imports = imports obj in - imported_from_implib := StrSet.union !imported_from_implib from_imports; - record_exports obj; + if not default then begin + imported_from_implib := StrSet.union !imported_from_implib from_imports; + record_exports obj + end; link_obj fn obj; if StrSet.is_empty imps then () else Hashtbl.replace redirect fn (close_obj fn imps obj); From 39c90a2b7ffa5642384b427ff3e8e936d71768d2 Mon Sep 17 00:00:00 2001 From: David Allsopp Date: Thu, 21 Nov 2024 20:41:43 +0000 Subject: [PATCH 3/4] Choose the correct startup object for mingw-w64 If -municode is passed to mingw-w64's gcc, then crt2u.o (which causes wmain to be selected) should be linked rather than crt2.o. In passing, the code now sniffs for -nostartfiles, which causes neither crt2.o nor crt2u.o to be considered. --- CHANGES | 3 ++- reloc.ml | 10 ++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CHANGES b/CHANGES index da6c1b6..df5e82f 100644 --- a/CHANGES +++ b/CHANGES @@ -1,7 +1,8 @@ Next version - GPR#127: Recognise hyphens in option names in the COFF .drectve section. Fixes #126 (Reza Barazesh) - GPR#136: Fix parallel access crashes and misbehavior (David Allsopp, Jan Midtgaard, Antonin Décimo) - +- GPR#146: For mingw-w64, select crt2u.o instead of crt2.o if -link -municode + is specified (David Allsopp) Version 0.43 - GPR#108: Add -lgcc_s to Cygwin's link libraries, upstreaming a patch from the diff --git a/reloc.ml b/reloc.ml index cd2338d..10fcfa7 100644 --- a/reloc.ml +++ b/reloc.ml @@ -1378,8 +1378,14 @@ let setup_toolchain () = default_libs := ["-lmingw32"; "-lgcc"; "-lgcc_eh"; "-lmoldname"; "-lmingwex"; "-lmsvcrt"; "-luser32"; "-lkernel32"; "-ladvapi32"; "-lshell32" ]; - if !exe_mode = `EXE then default_libs := "crt2.o" :: !default_libs - else default_libs := "dllcrt2.o" :: !default_libs + if not (List.mem "-nostartfiles" !extra_args) then begin + if !exe_mode = `EXE then + if List.mem "-municode" !extra_args then + default_libs := "crt2u.o" :: !default_libs + else + default_libs := "crt2.o" :: !default_libs + else default_libs := "dllcrt2.o" :: !default_libs + end in match !toolchain with | _ when !builtin_linker -> From 58db0d1ecfb54a409ebd129b92ec2d9066b18cbb Mon Sep 17 00:00:00 2001 From: David Allsopp Date: Thu, 21 Nov 2024 20:44:12 +0000 Subject: [PATCH 4/4] Consider the entrypoint when determining exports The export table is determined by looping over the objects included on the command line. If the main symbol is included in a library (e.g. libcamlrun or libasmrun) then there may not be an object on the commandline which causes it, and any transitive dependencies, to be linked. The entrypoint symbol for the Cygwin/mingw-w64/MSVC toolchains is now resolved, and may cause additional objects to be linked. --- CHANGES | 5 ++-- reloc.ml | 73 ++++++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 63 insertions(+), 15 deletions(-) diff --git a/CHANGES b/CHANGES index df5e82f..2b1bc61 100644 --- a/CHANGES +++ b/CHANGES @@ -1,8 +1,9 @@ Next version - GPR#127: Recognise hyphens in option names in the COFF .drectve section. Fixes #126 (Reza Barazesh) - GPR#136: Fix parallel access crashes and misbehavior (David Allsopp, Jan Midtgaard, Antonin Décimo) -- GPR#146: For mingw-w64, select crt2u.o instead of crt2.o if -link -municode - is specified (David Allsopp) +- GPR#146: Take the entrypoint (main, wmainCRTStartup, etc.) into account when + determining the modules which will be linked. For mingw-w64, select crt2u.o + instead of crt2.o if -link -municode is specified (David Allsopp) Version 0.43 - GPR#108: Add -lgcc_s to Cygwin's link libraries, upstreaming a patch from the diff --git a/reloc.ml b/reloc.ml index 10fcfa7..fa3cf02 100644 --- a/reloc.ml +++ b/reloc.ml @@ -697,7 +697,7 @@ let needed imported defined resolve_alias resolve_alternate obj = StrSet.empty obj.symbols -let build_dll link_exe output_file files exts extra_args = +let build_dll link_exe output_file files exts extra_args_string = let main_pgm = link_exe <> `DLL in (* fully resolve filenames, eliminate duplicates *) @@ -979,6 +979,53 @@ let build_dll link_exe output_file files exts extra_args = link_obj (Printf.sprintf "%s(%s)" libname objname) obj) in + let entrypoint = + if not main_pgm then + None + else + match !toolchain with + | `CYGWIN64 -> + Some "main" + | `MINGW | `MINGW64 -> begin + let entry_point s = + String.length s > 7 && String.sub s 0 7 = "-Wl,-e," + in + try + let s = List.find entry_point !extra_args in + Some (String.sub s 7 (String.length s - 7)) + with Not_found -> + Some "mainCRTStartup" + end + | `MSVC | `MSVC64 -> begin + let entry_point s = + String.length s > 7 && String.lowercase_ascii (String.sub s 0 7) = "/entry:" + in + try + let s = List.find entry_point !extra_args in + Some (String.sub s 7 (String.length s - 7)) + with Not_found -> + if !subsystem = "windows" then + Some "WinMainCRTStartup" + else + Some "mainCRTStartup" + end + | `LIGHTLD | `GNAT | `GNAT64 -> + None + in + let () = + match entrypoint with + | None -> () + | Some entrypoint -> + try + let (libname, objname, _) as o = defined_in entrypoint in + if !explain then + Printf.printf "%s(%s) because of entrypoint %s\n%!" libname objname + entrypoint; + link_libobj o + with Not_found -> + if !explain then + Printf.printf "Entrypoint %s not found\n%!" entrypoint + in let redirect = Hashtbl.create 16 in List.iter (fun (fn, obj) -> @@ -1113,21 +1160,21 @@ let build_dll link_exe output_file files exts extra_args = being an empty file. *) let c = open_out implib in output_string c "x"; close_out c; let _impexp = add_temp (Filename.chop_suffix implib ".lib" ^ ".exp") in - let extra_args = - if !custom_crt then "/nodefaultlib:LIBCMT /nodefaultlib:MSVCRT " ^ extra_args - else "msvcrt.lib " ^ extra_args + let extra_args_string = + if !custom_crt then "/nodefaultlib:LIBCMT /nodefaultlib:MSVCRT " ^ extra_args_string + else "msvcrt.lib " ^ extra_args_string in - let extra_args = - if !machine = `x64 then (Printf.sprintf "/base:%s " !base_addr) ^ extra_args else extra_args + let extra_args_string = + if !machine = `x64 then (Printf.sprintf "/base:%s " !base_addr) ^ extra_args_string else extra_args_string in - let extra_args = + let extra_args_string = (* FlexDLL doesn't process .voltbl sections correctly, so don't allow the linker to process them. *) if Sys.command "link | findstr EMITVOLATILEMETADATA > nul" = 0 then - "/EMITVOLATILEMETADATA:NO " ^ extra_args - else extra_args + "/EMITVOLATILEMETADATA:NO " ^ extra_args_string + else extra_args_string in (* Flexdll requires that all images (main programs and all the DLLs) are @@ -1154,7 +1201,7 @@ let build_dll link_exe output_file files exts extra_args = (Filename.quote output_file) !subsystem files descr - extra_args + extra_args_string | `CYGWIN64 -> let def_file = if main_pgm then "" @@ -1175,7 +1222,7 @@ let build_dll link_exe output_file files exts extra_args = descr files def_file - extra_args + extra_args_string | `MINGW | `MINGW64 | `GNAT | `GNAT64 -> let def_file = if main_pgm then "" @@ -1198,7 +1245,7 @@ let build_dll link_exe output_file files exts extra_args = files def_file (if !implib then "-Wl,--out-implib=" ^ Filename.quote (Filename.chop_extension output_file ^ ".a") else "") - extra_args + extra_args_string | `LIGHTLD -> no_merge_manifest := true; Printf.sprintf @@ -1209,7 +1256,7 @@ let build_dll link_exe output_file files exts extra_args = descr files (if !implib then "--out-implib " ^ Filename.quote (Filename.chop_extension output_file ^ ".a") else "") - extra_args + extra_args_string in debug ~dry_mode 1 "+ %s" cmd; if not !dry_mode then begin