Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Unicode support to flexdll (was: Add wide-character version of flexdll_dlopen) #34

Merged
merged 3 commits into from
Jul 25, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,9 @@ Here is the content of the **`flexdll.h`** file:
#define FLEXDLL_RTLD_NOEXEC 0x0002

void *flexdll_dlopen(const char *, int);
#ifndef CYGWIN
void *flexdll_wdlopen(const wchar_t *, int);
#endif
void *flexdll_dlsym(void *, const char *);
void flexdll_dlclose(void *);
char *flexdll_dlerror(void);
Expand Down Expand Up @@ -472,6 +475,11 @@ error occurs during the call to **`flexdll_dlopen`**, the functions
returns **`NULL`** and the error message can be retrieved using
**`flexdll_dlerror`**.

The function **`flexdll_wdlopen`** is a wide-character version of
**`flexdll_dlopen`**. The filename argument to **`flexdll_wdlopen`** is a
wide-character string. **`flexdll_wdlopen`** and **`flexdll_dlopen`** behave
identically otherwise.

The second most important function is **`flexdll_dlsym`** which looks
for a symbol whose name is the second argument. The first argument can
be either a regular handle returned by **`flexdll_dlopen`** (the symbol
Expand Down
32 changes: 28 additions & 4 deletions flexdll.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,16 +71,16 @@ static char * ll_dlerror(void)

#else

static void *ll_dlopen(const char *libname, int for_execution) {
static void *ll_dlopen(const wchar_t *libname, int for_execution) {
HMODULE m;
m = LoadLibraryEx(libname, NULL,
for_execution ? 0 : DONT_RESOLVE_DLL_REFERENCES);
m = LoadLibraryExW(libname, NULL,
for_execution ? 0 : DONT_RESOLVE_DLL_REFERENCES);
/* See https://blogs.msdn.microsoft.com/oldnewthing/20050214-00/?p=36463
Should use LOAD_LIBRARY_AS_DATAFILE instead of DONT_RESOLVE_DLL_REFERENCES? */

/* Under Win 95/98/ME, LoadLibraryEx can fail in cases where LoadLibrary
would succeed. Just try again with LoadLibrary for good measure. */
if (m == NULL) m = LoadLibrary(libname);
if (m == NULL) m = LoadLibraryW(libname);
return (void *) m;
}

Expand Down Expand Up @@ -349,7 +349,11 @@ int flexdll_relocate(void *tbl) {
return 1;
}

#ifdef CYGWIN
void *flexdll_dlopen(const char *file, int mode) {
#else
void *flexdll_wdlopen(const wchar_t *file, int mode) {
#endif
void *handle;
dlunit *unit;
char flexdll_relocate_env[256];
Expand Down Expand Up @@ -403,6 +407,26 @@ void *flexdll_dlopen(const char *file, int mode) {
return unit;
}

#ifndef CYGWIN

void *flexdll_dlopen(const char *file, int mode)
{
wchar_t * p;
int nbr;
void * handle;

nbr = MultiByteToWideChar(CP_THREAD_ACP, 0, file, -1, NULL, 0);
if (nbr == 0) { if (!error) error = 1; return NULL; }
p = malloc(nbr*sizeof(*p));
MultiByteToWideChar(CP_THREAD_ACP, 0, file, -1, p, nbr);
handle = flexdll_wdlopen(p, mode);
free(p);

return handle;
}

#endif

void flexdll_dlclose(void *u) {
dlunit *unit = u;

Expand Down
6 changes: 6 additions & 0 deletions flexdll.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#ifndef FLEXDLL_H
#define FLEXDLL_H

#include <wchar.h>

#define FLEXDLL_RTLD_GLOBAL 0x0001
#define FLEXDLL_RTLD_LOCAL 0x0000
#define FLEXDLL_RTLD_NOEXEC 0x0002
Expand All @@ -22,6 +24,10 @@ extern "C"
#endif

void *flexdll_dlopen(const char *, int);
#ifndef CYGWIN
void *flexdll_wdlopen(const wchar_t *, int);
#endif

void *flexdll_dlsym(void *, const char *);
void flexdll_dlclose(void *);
char *flexdll_dlerror(void);
Expand Down
83 changes: 76 additions & 7 deletions reloc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -94,21 +94,90 @@ let get_output1 ?use_bash cmd =

let mk_dirs_opt pr = String.concat " " (List.map (fun s -> pr ^ (Filename.quote s)) !dirs)

exception Not_utf8

let utf8_next s i =
let fail () = raise Not_utf8 in
let check i =
if i >= String.length s then fail ();
let n = Char.code s.[i] in
if n lsr 6 <> 0b10 then fail () else n
in
if !i >= String.length s then fail ();
match s.[!i] with
| '\000'..'\127' as c ->
let n = Char.code c in
i := !i + 1;
n
| '\192'..'\223' as c ->
let n1 = Char.code c in
let n2 = check (!i+1) in
let n =
((n1 land 0b11111) lsl 6) lor
((n2 land 0b111111))
in
i := !i + 2;
n
| '\224'..'\239' as c ->
let n1 = Char.code c in
let n2 = check (!i+1) in
let n3 = check (!i+2) in
let n =
((n1 land 0b1111) lsl 12) lor
((n2 land 0b111111) lsl 6) lor
((n3 land 0b111111))
in
i := !i + 3;
n
| '\240'..'\247' as c ->
let n1 = Char.code c in
let n2 = check (!i+1) in
let n3 = check (!i+2) in
let n4 = check (!i+3) in
let n =
((n1 land 0b111) lsl 18) lor
((n2 land 0b111111) lsl 12) lor
((n3 land 0b111111) lsl 6) lor
((n4 land 0b111111))
in
i := !i + 4;
n
| _ ->
fail ()

let toutf16 s =
let i = ref 0 in
let b = Buffer.create (String.length s) in
let cp n = Buffer.add_char b (Char.chr (n land 0xFF)); Buffer.add_char b (Char.chr ((n lsr 8) land 0xFF)) in
while !i < String.length s do
let n = utf8_next s i in
if n <= 0xFFFF then cp n else (cp (0xD7C0 + (n lsl 10)); cp (0xDC00 + (n land 0x3FF)))
Copy link
Member

@dra27 dra27 Oct 20, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this supposed to be UTF-16 surrogate encoding? The formula for the high surrogate is (very) wrong, I'm afraid - although sufficiently so that I'm wondering if I'm missing something else.

else let n = n - 0x10000 in (cp (0xD800 + (n lsr 10)); ...)

is allowing me to run make world from a directory called 🐫

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ooops, did I make a mistake? I will take a closer look in a bit, thanks!

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, the first term seems to be really wrong - I can't understand what I must have been thinking to be honest... And it did work in the few tests I did -- pure luck I guess!

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please make a PR with this fix? Thanks!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doh! Just the for proverbial record, it's described on Wikipedia.

Over my ☕, I couldn't resist the puzzle: your code works for the high surrogates for exactly 16 of the non-BMP characters (precisely ones where you get the extra 1 required to correct the base bit pattern for 0xD800)... perhaps very appropriately, lots of them are from Linear-B 😄 𐀁 𐁁 𐂁 𐃁 𐄁 𐅁 𐆁 𐊁 𐋁 𐌁 𐍁 𐎁 𐏁

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See #47

done;
Buffer.contents b

(* Build @responsefile to work around Windows limitations on
command-line length *)
let build_diversion lst =
let (responsefile, oc) = open_temp_file "camlresp" "" in
List.iter
(fun f ->
if f <> "" then begin
let responsefile = temp_file "camlresp" "" in
let oc = open_out_bin responsefile in
let lst =
List.map (fun f ->
let s = Bytes.of_string (Filename.quote f) in
for i = 0 to Bytes.length s - 1 do
if Bytes.get s i = '\\' then Bytes.set s i '/'
done;
output_bytes oc s; output_char oc '\n'
end)
lst;
Bytes.to_string s ^ "\r\n"
) (List.filter (fun f -> f <> "") lst)
in
let utf16, lst =
match List.map toutf16 lst with
| lst ->
true, lst
| exception Not_utf8 ->
false, lst
in
if utf16 then output_string oc "\xFF\xFE"; (* LE BOM *)
List.iter (fun s -> output_string oc s) lst;
close_out oc;
"@" ^ responsefile

Expand Down