Skip to content

Commit 9109e95

Browse files
authored
Compound tone diacritics iii (#956)
* UnicodeData.txt lines from L2/24-232 * lb=CM * Inherited * Regenerate UCD * Diacritic * a test * Regenerate UCD * Ignore IDNA2008_Category
1 parent a714c56 commit 9109e95

20 files changed

+127
-83
lines changed

unicodetools/data/ucd/dev/DerivedAge.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# DerivedAge-18.0.0.txt
2-
# Date: 2025-11-21, 17:54:09 GMT
2+
# Date: 2025-11-21, 19:14:36 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -2126,6 +2126,8 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG
21262126
058B..058C ; 18.0 # [2] MODIFIER LETTER ARMENIAN SMALL INI..MODIFIER LETTER ARMENIAN SMALL YI
21272127
05C8 ; 18.0 # HEBREW POINT SHEVA NA MUDGASH
21282128
0984 ; 18.0 # BENGALI SIGN COMBINING ANUSVARA ABOVE
2129+
1ADE..1ADF ; 18.0 # [2] COMBINING GRAVE-DOT..COMBINING DOT-ACUTE
2130+
1AEC..1AF0 ; 18.0 # [5] COMBINING CARON-ACUTE..COMBINING DOUBLE COMMA ABOVE
21292131
20C2..20C3 ; 18.0 # [2] RUFIYAA SIGN..UAE DIRHAM SIGN
21302132
10ED9..10EEE ; 18.0 # [22] ARABIC CROWN LETTER BEH..ARABIC CROWN LETTER YEH
21312133
10EF9 ; 18.0 # ARABIC MARK CROWN
@@ -2144,6 +2146,6 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG
21442146
2B81E ; 18.0 # CJK UNIFIED IDEOGRAPH-2B81E
21452147
3D000..3FC3F ; 18.0 # [11328] SEAL CHARACTER-3D000..SEAL CHARACTER-3FC3F
21462148

2147-
# Total code points: 11775
2149+
# Total code points: 11782
21482150

21492151
# EOF

unicodetools/data/ucd/dev/DerivedCoreProperties.txt

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# DerivedCoreProperties-18.0.0.txt
2-
# Date: 2025-11-21, 17:54:30 GMT
2+
# Date: 2025-11-21, 19:14:57 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -3255,8 +3255,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
32553255
1AA7 ; Case_Ignorable # Lm TAI THAM SIGN MAI YAMOK
32563256
1AB0..1ABD ; Case_Ignorable # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
32573257
1ABE ; Case_Ignorable # Me COMBINING PARENTHESES OVERLAY
3258-
1ABF..1ADD ; Case_Ignorable # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW
3259-
1AE0..1AEB ; Case_Ignorable # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE
3258+
1ABF..1AF0 ; Case_Ignorable # Mn [50] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE COMMA ABOVE
32603259
1B00..1B03 ; Case_Ignorable # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
32613260
1B34 ; Case_Ignorable # Mn BALINESE SIGN REREKAN
32623261
1B36..1B3A ; Case_Ignorable # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
@@ -3581,7 +3580,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG
35813580
E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG
35823581
E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
35833582

3584-
# Total code points: 2808
3583+
# Total code points: 2815
35853584

35863585
# ================================================
35873586

@@ -7603,8 +7602,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
76037602
1A90..1A99 ; ID_Continue # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE
76047603
1AA7 ; ID_Continue # Lm TAI THAM SIGN MAI YAMOK
76057604
1AB0..1ABD ; ID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
7606-
1ABF..1ADD ; ID_Continue # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW
7607-
1AE0..1AEB ; ID_Continue # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE
7605+
1ABF..1AF0 ; ID_Continue # Mn [50] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE COMMA ABOVE
76087606
1B00..1B03 ; ID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
76097607
1B04 ; ID_Continue # Mc BALINESE SIGN BISAH
76107608
1B05..1B33 ; ID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA
@@ -8553,7 +8551,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
85538551
3D000..3FC3F ; ID_Continue # Lo [11328] SEAL CHARACTER-3D000..SEAL CHARACTER-3FC3F
85548552
E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
85558553

8556-
# Total code points: 160997
8554+
# Total code points: 161004
85578555

85588556
# ================================================
85598557

@@ -9847,8 +9845,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
98479845
1A90..1A99 ; XID_Continue # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE
98489846
1AA7 ; XID_Continue # Lm TAI THAM SIGN MAI YAMOK
98499847
1AB0..1ABD ; XID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
9850-
1ABF..1ADD ; XID_Continue # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW
9851-
1AE0..1AEB ; XID_Continue # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE
9848+
1ABF..1AF0 ; XID_Continue # Mn [50] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE COMMA ABOVE
98529849
1B00..1B03 ; XID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
98539850
1B04 ; XID_Continue # Mc BALINESE SIGN BISAH
98549851
1B05..1B33 ; XID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA
@@ -10802,7 +10799,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
1080210799
3D000..3FC3F ; XID_Continue # Lo [11328] SEAL CHARACTER-3D000..SEAL CHARACTER-3FC3F
1080310800
E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
1080410801

10805-
# Total code points: 160978
10802+
# Total code points: 160985
1080610803

1080710804
# ================================================
1080810805

@@ -11024,8 +11021,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<rese
1102411021
1A7F ; Grapheme_Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1102511022
1AB0..1ABD ; Grapheme_Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
1102611023
1ABE ; Grapheme_Extend # Me COMBINING PARENTHESES OVERLAY
11027-
1ABF..1ADD ; Grapheme_Extend # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW
11028-
1AE0..1AEB ; Grapheme_Extend # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE
11024+
1ABF..1AF0 ; Grapheme_Extend # Mn [50] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE COMMA ABOVE
1102911025
1B00..1B03 ; Grapheme_Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1103011026
1B34 ; Grapheme_Extend # Mn BALINESE SIGN REREKAN
1103111027
1B35 ; Grapheme_Extend # Mc BALINESE VOWEL SIGN TEDUNG
@@ -11285,7 +11281,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK.
1128511281
E0020..E007F ; Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG
1128611282
E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
1128711283

11288-
# Total code points: 2237
11284+
# Total code points: 2244
1128911285

1129011286
# ================================================
1129111287

@@ -13465,8 +13461,7 @@ ABC0..ABDA ; InCB; Consonant # Lo [27] MEETEI MAYEK LETTER KOK..MEETEI MAYEK
1346513461
1A7F ; InCB; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1346613462
1AB0..1ABD ; InCB; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
1346713463
1ABE ; InCB; Extend # Me COMBINING PARENTHESES OVERLAY
13468-
1ABF..1ADD ; InCB; Extend # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW
13469-
1AE0..1AEB ; InCB; Extend # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE
13464+
1ABF..1AF0 ; InCB; Extend # Mn [50] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE COMMA ABOVE
1347013465
1B00..1B03 ; InCB; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1347113466
1B34 ; InCB; Extend # Mn BALINESE SIGN REREKAN
1347213467
1B35 ; InCB; Extend # Mc BALINESE VOWEL SIGN TEDUNG
@@ -13721,6 +13716,6 @@ FF9E..FF9F ; InCB; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HA
1372113716
E0020..E007F ; InCB; Extend # Cf [96] TAG SPACE..CANCEL TAG
1372213717
E0100..E01EF ; InCB; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
1372313718

13724-
# Total code points: 2222
13719+
# Total code points: 2229
1372513720

1372613721
# EOF

unicodetools/data/ucd/dev/EastAsianWidth.txt

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# EastAsianWidth-18.0.0.txt
2-
# Date: 2025-11-21, 17:54:37 GMT
2+
# Date: 2025-11-21, 19:15:02 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -808,8 +808,7 @@
808808
1AA8..1AAD ; N # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG
809809
1AB0..1ABD ; N # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
810810
1ABE ; N # Me COMBINING PARENTHESES OVERLAY
811-
1ABF..1ADD ; N # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW
812-
1AE0..1AEB ; N # Mn [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE
811+
1ABF..1AF0 ; N # Mn [50] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOUBLE COMMA ABOVE
813812
1B00..1B03 ; N # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
814813
1B04 ; N # Mc BALINESE SIGN BISAH
815814
1B05..1B33 ; N # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA

unicodetools/data/ucd/dev/LineBreak.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# LineBreak-18.0.0.txt
2-
# Date: 2025-11-21, 17:54:40 GMT
2+
# Date: 2025-11-21, 19:15:04 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -776,9 +776,9 @@
776776
1AA8..1AAD ; SA # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG
777777
1AB0..1ABD ; CM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
778778
1ABE ; CM # Me COMBINING PARENTHESES OVERLAY
779-
1ABF..1ADD ; CM # Mn [31] COMBINING LATIN SMALL LETTER W BELOW..COMBINING DOT-AND-RING BELOW
780-
1AE0..1AEA ; CM # Mn [11] COMBINING LEFT TACK ABOVE..COMBINING UPWARDS ARROW ABOVE
779+
1ABF..1AEA ; CM # Mn [44] COMBINING LATIN SMALL LETTER W BELOW..COMBINING UPWARDS ARROW ABOVE
781780
1AEB ; GL # Mn COMBINING DOUBLE RIGHTWARDS ARROW ABOVE
781+
1AEC..1AF0 ; CM # Mn [5] COMBINING CARON-ACUTE..COMBINING DOUBLE COMMA ABOVE
782782
1B00..1B03 ; CM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
783783
1B04 ; CM # Mc BALINESE SIGN BISAH
784784
1B05..1B33 ; AK # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA

0 commit comments

Comments
 (0)