Skip to content

Commit 75617ef

Browse files
committed
fix 5 unicode 15 non-case mapping exceptions
Fixes GH #145 fixup perl cross-checks with 5.40. regen unicode tables (no changes with 15.0.0, only whitespace)
1 parent 8bd6568 commit 75617ef

13 files changed

+2390
-2374
lines changed

Diff for: src/extwchar/towfc_s.c

+7-4
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
* returning the number of new wide character codepoints needed.
4545
* The usual \c iswupper(wc) case returns 1, and the special 104 full
4646
* folding cases as specified in Unicode 10.0 \c CaseFolding.txt return either
47-
* 2 or 3. This implements Unicode 14.0
47+
* 2 or 3. This implements Unicode 14.0. (5 errors for Unicode 15)
4848
*
4949
* @param[in] wc unicode character codepoint
5050
*
@@ -229,7 +229,7 @@ static const struct {
229229

230230
/* Return the number of wide lower-case characters needed to full fold-case
231231
the given uppercase character. Returns 0, 1, 2 or 3.
232-
0 if the charcater stays the same, 1 if one character changes,
232+
0 if the character stays the same, 1 if one character changes,
233233
2 or 3 if the character will be replaced with 2 or 3.
234234
235235
Note that accents expand to more characters than 1 via NFD decomposition.
@@ -239,8 +239,11 @@ static const struct {
239239
int iswfc(const uint32_t wc) {
240240
/* the slow variant would walk the 2 loops */
241241
if (likely((wc < 0xdf) || (wc > 0x0587 && wc < 0x1e96) ||
242-
(wc > 0x1FFC && wc < 0xFB00) || (wc > 0xFB17)))
242+
(wc > 0x1FFC && wc < 0xFB00) || (wc > 0xFB17))) {
243+
if (wc == 0x1cbb || wc == 0x1cbc)
244+
return 0;
243245
goto single;
246+
}
244247
if (wc < 0x1e96) {
245248
if (wc == 0xdf || wc == 0x130 || wc == 0x149 || wc == 0x1f0 ||
246249
wc == 0x587)
@@ -305,7 +308,7 @@ int iswfc(const uint32_t wc) {
305308
May return 2 on sizeof(wchar_t)==2 if >0xffff, i.e. converted to surrogate
306309
pair
307310
308-
perl5.27.3 -E'no warnings; for (0..0x10ffff){
311+
perl -E'no warnings; for (0..0x10ffff){
309312
my ($lc,$fc) = (lc(pack"W",$_), fc(pack"W",$_));
310313
printf "U+%04X: fc: %X, lc: %X\n", $_, unpack("W",$fc), unpack("W",$lc)
311314
if $lc ne $fc and length($fc)==1;

Diff for: src/extwchar/unw16ifcan.h

+15-15
Original file line numberDiff line numberDiff line change
@@ -283,24 +283,24 @@ typedef struct { const uint32_t cp; const wchar_t* v; } UNWIF_canon_exc_t;
283283
/* sorted for binary search */
284284
#define UNWIF_canon_exc_size 9
285285
static const UNWIF_canon_exc_t UNWIF_canon_exc [9] = {
286-
{ 0x1d160, L"\xd834\xdd58\xd834\xdd65\xd834\xdd6e" },
287-
{ 0x1d161, L"\xd834\xdd58\xd834\xdd65\xd834\xdd6f" },
288-
{ 0x1d162, L"\xd834\xdd58\xd834\xdd65\xd834\xdd70" },
289-
{ 0x1d163, L"\xd834\xdd58\xd834\xdd65\xd834\xdd71" },
290-
{ 0x1d164, L"\xd834\xdd58\xd834\xdd65\xd834\xdd72" },
291-
{ 0x1d1bd, L"\xd834\xddb9\xd834\xdd65\xd834\xdd6e" },
292-
{ 0x1d1be, L"\xd834\xddba\xd834\xdd65\xd834\xdd6e" },
293-
{ 0x1d1bf, L"\xd834\xddb9\xd834\xdd65\xd834\xdd6f" },
294-
{ 0x1d1c0, L"\xd834\xddba\xd834\xdd65\xd834\xdd6f" }
286+
{ 0x1d160, L"\xd834\xdd58\xd834\xdd65\xd834\xdd6e" },
287+
{ 0x1d161, L"\xd834\xdd58\xd834\xdd65\xd834\xdd6f" },
288+
{ 0x1d162, L"\xd834\xdd58\xd834\xdd65\xd834\xdd70" },
289+
{ 0x1d163, L"\xd834\xdd58\xd834\xdd65\xd834\xdd71" },
290+
{ 0x1d164, L"\xd834\xdd58\xd834\xdd65\xd834\xdd72" },
291+
{ 0x1d1bd, L"\xd834\xddb9\xd834\xdd65\xd834\xdd6e" },
292+
{ 0x1d1be, L"\xd834\xddba\xd834\xdd65\xd834\xdd6e" },
293+
{ 0x1d1bf, L"\xd834\xddb9\xd834\xdd65\xd834\xdd6f" },
294+
{ 0x1d1c0, L"\xd834\xddba\xd834\xdd65\xd834\xdd6f" }
295295
};
296296

297297
static const wchar_t* UNWIF_canon_tbl [6] = {
298-
(const wchar_t*) UNWIF_canon_tbl_1,
299-
(const wchar_t*) UNWIF_canon_tbl_2,
300-
(const wchar_t*) UNWIF_canon_tbl_3,
301-
(const wchar_t*) UNWIF_canon_tbl_4,
302-
NULL,
303-
(const wchar_t*) UNWIF_canon_tbl_6
298+
(const wchar_t*) UNWIF_canon_tbl_1,
299+
(const wchar_t*) UNWIF_canon_tbl_2,
300+
(const wchar_t*) UNWIF_canon_tbl_3,
301+
(const wchar_t*) UNWIF_canon_tbl_4,
302+
NULL,
303+
(const wchar_t*) UNWIF_canon_tbl_6
304304
};
305305

306306
/* the rows */

0 commit comments

Comments
 (0)