Skip to content

Commit 3e0a46b

Browse files
committed
Add support for shortened mnemonic elements
Fix for utf8 multibyte characters
1 parent 97524bd commit 3e0a46b

File tree

1 file changed

+41
-4
lines changed

1 file changed

+41
-4
lines changed

src/wordlist.c

+41-4
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,42 @@ static int bstrcmp(const void *l, const void *r)
66
return strcmp(l, (*(const char **)r));
77
}
88

9+
/**
10+
* Does a strncmp on utf8 strings. This mostly works by compairing but
11+
* not counting continuation byte & accent/sound mark characters. This
12+
* is not guaranteed to work for all utf8 strings but is supposed to
13+
* work for the bip39 word lists in libwally.
14+
*/
15+
int utf_strncmp(const char* s1, const char* s2, size_t n) {
16+
size_t p = 0;
17+
size_t c = 0;
18+
while ((s1[p] != '\0') && (s2[p] != '\0')) {
19+
//Only count non continuation or accent characters
20+
unsigned char byte = (unsigned char)s1[p];
21+
if ( (byte < 0x80) || ((byte > 0xbf) && (byte != 0xcc) && (byte != 0xcd))) {
22+
++c;
23+
//Skip Hiragana sound mark (e38299-e3829f)
24+
const unsigned char *s = (const unsigned char*)s1;
25+
if ((s[p] == 0xe3) && (s[p+1] == 0x82) && ((s[p+2] >= 0x99) && (s[p+2] <= 0x9f))) {
26+
--c;
27+
}
28+
}
29+
if (c > n) {
30+
--p;
31+
break;
32+
}
33+
34+
if (s1[p] != s2[p]) return (unsigned char)s1[p] - (unsigned char)s2[p];
35+
++p;
36+
}
37+
38+
return (unsigned char)s1[p] - (unsigned char)s2[p];
39+
}
40+
41+
static int bstr4cmp(const void *l, const void *r) {
42+
return utf_strncmp(l, (*(const char **)r), 4);
43+
}
44+
945
/* https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogObvious */
1046
static int get_bits(size_t n)
1147
{
@@ -72,13 +108,14 @@ size_t wordlist_lookup_word(const struct words *w, const char *word)
72108
const size_t size = sizeof(const char *);
73109
const char **found = NULL;
74110

75-
if (w->sorted)
76-
found = (const char **)bsearch(word, w->indices, w->len, size, bstrcmp);
77-
else {
111+
if (w->sorted) {
112+
found = (const char **)bsearch(word, w->indices, w->len, size, bstr4cmp);
113+
} else {
78114
size_t i;
79115
for (i = 0; i < w->len && !found; ++i)
80-
if (!strcmp(word, w->indices[i]))
116+
if (!utf_strncmp(word, w->indices[i], 4)) {
81117
found = w->indices + i;
118+
}
82119
}
83120
return found ? found - w->indices + 1u : 0u;
84121
}

0 commit comments

Comments
 (0)