Skip to content

Commit

Permalink
fix 5 unicode 15 non-case mapping exceptions
Browse files Browse the repository at this point in the history
Fixes GH #145
fixup perl cross-checks with 5.40.
regen unicode tables (no changes with 15.0.0, only whitespace)
  • Loading branch information
rurban committed Dec 27, 2024
1 parent 8bd6568 commit fef1aa2
Show file tree
Hide file tree
Showing 14 changed files with 2,396 additions and 2,379 deletions.
5 changes: 3 additions & 2 deletions src/extwchar/towctrans.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
{ (u1), (l) - (u1), (u2) - (u1) + 1 }
#define CASELACE(u1, u2) CASEMAP((u1), (u2), (u1) + 1)

/* Unicode 14.0 */
/* Unicode 15.0 */

/* must be sorted */
static const struct {
Expand Down Expand Up @@ -167,7 +167,8 @@ static const unsigned short pairs[][2] = {
/* upper - lower */
{'I', 0x0131},
{'S', 0x017f},
{0x00b5, 0x03bc},
{0x00b5, 0x3bc},
{0x0101, 0x100}, // Unicode 15
{0x0130, 'i'},
{0x0178, 0x00ff},
{0x017f, 0x73},
Expand Down
11 changes: 7 additions & 4 deletions src/extwchar/towfc_s.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
* returning the number of new wide character codepoints needed.
* The usual \c iswupper(wc) case returns 1, and the special 104 full
* folding cases as specified in Unicode 10.0 \c CaseFolding.txt return either
* 2 or 3. This implements Unicode 14.0
* 2 or 3. This implements Unicode 14.0. (5 errors for Unicode 15)
*
* @param[in] wc unicode character codepoint
*
Expand Down Expand Up @@ -229,7 +229,7 @@ static const struct {

/* Return the number of wide lower-case characters needed to full fold-case
the given uppercase character. Returns 0, 1, 2 or 3.
0 if the charcater stays the same, 1 if one character changes,
0 if the character stays the same, 1 if one character changes,
2 or 3 if the character will be replaced with 2 or 3.
Note that accents expand to more characters than 1 via NFD decomposition.
Expand All @@ -239,8 +239,11 @@ static const struct {
int iswfc(const uint32_t wc) {
/* the slow variant would walk the 2 loops */
if (likely((wc < 0xdf) || (wc > 0x0587 && wc < 0x1e96) ||
(wc > 0x1FFC && wc < 0xFB00) || (wc > 0xFB17)))
(wc > 0x1FFC && wc < 0xFB00) || (wc > 0xFB17))) {
if (wc == 0x1cbb || wc == 0x1cbc)
return 0;
goto single;
}
if (wc < 0x1e96) {
if (wc == 0xdf || wc == 0x130 || wc == 0x149 || wc == 0x1f0 ||
wc == 0x587)
Expand Down Expand Up @@ -305,7 +308,7 @@ int iswfc(const uint32_t wc) {
May return 2 on sizeof(wchar_t)==2 if >0xffff, i.e. converted to surrogate
pair
perl5.27.3 -E'no warnings; for (0..0x10ffff){
perl -E'no warnings; for (0..0x10ffff){
my ($lc,$fc) = (lc(pack"W",$_), fc(pack"W",$_));
printf "U+%04X: fc: %X, lc: %X\n", $_, unpack("W",$fc), unpack("W",$lc)
if $lc ne $fc and length($fc)==1;
Expand Down
30 changes: 15 additions & 15 deletions src/extwchar/unw16ifcan.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,24 +283,24 @@ typedef struct { const uint32_t cp; const wchar_t* v; } UNWIF_canon_exc_t;
/* sorted for binary search */
#define UNWIF_canon_exc_size 9
static const UNWIF_canon_exc_t UNWIF_canon_exc [9] = {
{ 0x1d160, L"\xd834\xdd58\xd834\xdd65\xd834\xdd6e" },
{ 0x1d161, L"\xd834\xdd58\xd834\xdd65\xd834\xdd6f" },
{ 0x1d162, L"\xd834\xdd58\xd834\xdd65\xd834\xdd70" },
{ 0x1d163, L"\xd834\xdd58\xd834\xdd65\xd834\xdd71" },
{ 0x1d164, L"\xd834\xdd58\xd834\xdd65\xd834\xdd72" },
{ 0x1d1bd, L"\xd834\xddb9\xd834\xdd65\xd834\xdd6e" },
{ 0x1d1be, L"\xd834\xddba\xd834\xdd65\xd834\xdd6e" },
{ 0x1d1bf, L"\xd834\xddb9\xd834\xdd65\xd834\xdd6f" },
{ 0x1d1c0, L"\xd834\xddba\xd834\xdd65\xd834\xdd6f" }
{ 0x1d160, L"\xd834\xdd58\xd834\xdd65\xd834\xdd6e" },
{ 0x1d161, L"\xd834\xdd58\xd834\xdd65\xd834\xdd6f" },
{ 0x1d162, L"\xd834\xdd58\xd834\xdd65\xd834\xdd70" },
{ 0x1d163, L"\xd834\xdd58\xd834\xdd65\xd834\xdd71" },
{ 0x1d164, L"\xd834\xdd58\xd834\xdd65\xd834\xdd72" },
{ 0x1d1bd, L"\xd834\xddb9\xd834\xdd65\xd834\xdd6e" },
{ 0x1d1be, L"\xd834\xddba\xd834\xdd65\xd834\xdd6e" },
{ 0x1d1bf, L"\xd834\xddb9\xd834\xdd65\xd834\xdd6f" },
{ 0x1d1c0, L"\xd834\xddba\xd834\xdd65\xd834\xdd6f" }
};

static const wchar_t* UNWIF_canon_tbl [6] = {
(const wchar_t*) UNWIF_canon_tbl_1,
(const wchar_t*) UNWIF_canon_tbl_2,
(const wchar_t*) UNWIF_canon_tbl_3,
(const wchar_t*) UNWIF_canon_tbl_4,
NULL,
(const wchar_t*) UNWIF_canon_tbl_6
(const wchar_t*) UNWIF_canon_tbl_1,
(const wchar_t*) UNWIF_canon_tbl_2,
(const wchar_t*) UNWIF_canon_tbl_3,
(const wchar_t*) UNWIF_canon_tbl_4,
NULL,
(const wchar_t*) UNWIF_canon_tbl_6
};

/* the rows */
Expand Down
Loading

0 comments on commit fef1aa2

Please sign in to comment.