@@ -175,8 +175,11 @@ class WidthState(enum.IntEnum):
175
175
- 4th bit: whether to set top bit on emoji presentation.
176
176
If this is set but 3rd is not, the width mode is related to zwj sequences
177
177
- 5th from top: whether this is unaffected by ligature-transparent
178
+ (if set, should also set 3rd and 4th)
178
179
- 6th bit: if 4th is set but this one is not, then this is a ZWJ ligature state
179
- where no ZWJ has been encountered yet; encountering one flips this on"""
180
+ where no ZWJ has been encountered yet; encountering one flips this on
181
+ - Seventh bit: is VS1 (if CJK) or is VS2 (not CJK)
182
+ """
180
183
181
184
# BASIC WIDTHS
182
185
@@ -272,6 +275,9 @@ class WidthState(enum.IntEnum):
272
275
273
276
# VARIATION SELECTORS
274
277
278
+ VARIATION_SELECTOR_1_OR_2 = 0b0000_0010_0000_0000
279
+ "\\ uFE00 if CJK, or \\ uFE01 otherwise"
280
+
275
281
# Text presentation sequences (not CJK)
276
282
VARIATION_SELECTOR_15 = 0b0100_0000_0000_0000
277
283
"\\ uFE0E (text presentation sequences)"
@@ -367,6 +373,7 @@ def width_alone(self) -> int:
367
373
| WidthState .COMBINING_LONG_SOLIDUS_OVERLAY
368
374
| WidthState .VARIATION_SELECTOR_15
369
375
| WidthState .VARIATION_SELECTOR_16
376
+ | WidthState .VARIATION_SELECTOR_1_OR_2
370
377
):
371
378
return 0
372
379
case (
@@ -656,9 +663,11 @@ def load_width_maps() -> tuple[list[WidthState], list[WidthState]]:
656
663
ea [cp ] = width
657
664
658
665
# East-Asian only
666
+ ea [0xFE00 ] = WidthState .VARIATION_SELECTOR_1_OR_2
659
667
ea [0x0338 ] = WidthState .COMBINING_LONG_SOLIDUS_OVERLAY
660
668
661
669
# Not East Asian only
670
+ not_ea [0xFE01 ] = WidthState .VARIATION_SELECTOR_1_OR_2
662
671
not_ea [0xFE0E ] = WidthState .VARIATION_SELECTOR_15
663
672
664
673
return (not_ea , ea )
@@ -724,7 +733,7 @@ def load_solidus_transparent(
724
733
cjk_width_map : list [WidthState ],
725
734
) -> list [tuple [Codepoint , Codepoint ]]:
726
735
"""Characters expanding to a canonical combining class above 1, plus `ligature_transparent`s from above.
727
- Ranges matching ones in `ligature_transparent` exactly are excluded (for compression), so it needs to bechecked also.
736
+ Ranges matching ones in `ligature_transparent` exactly are excluded (for compression), so it needs to be checked also.
728
737
"""
729
738
730
739
ccc_above_1 = set ()
@@ -756,7 +765,7 @@ def load_solidus_transparent(
756
765
num_chars = len (ccc_above_1 )
757
766
758
767
for cp in ccc_above_1 :
759
- if cp != 0xFE0F :
768
+ if cp not in [ 0xFE00 , 0xFE0F ] :
760
769
assert (
761
770
cjk_width_map [cp ].table_width () != CharWidthInTable .SPECIAL
762
771
), f"U+{ cp :X} "
@@ -1312,8 +1321,17 @@ def lookup_fns(
1312
1321
return (0, next_info.set_emoji_presentation());
1313
1322
}"""
1314
1323
1315
- if not is_cjk :
1324
+ if is_cjk :
1325
+ s += """
1326
+ if c == '\\ u{FE00}' {
1327
+ return (0, next_info.set_vs1_2());
1328
+ }
1329
+ """
1330
+ else :
1316
1331
s += """
1332
+ if c == '\\ u{FE01}' {
1333
+ return (0, next_info.set_vs1_2());
1334
+ }
1317
1335
if c == '\\ u{FE0E}' {
1318
1336
return (0, next_info.set_text_presentation());
1319
1337
}
@@ -1323,9 +1341,19 @@ def lookup_fns(
1323
1341
} else {
1324
1342
next_info = next_info.unset_text_presentation();
1325
1343
}
1326
- }"""
1344
+ } else """
1327
1345
1328
- s += """
1346
+ s += """if next_info.is_vs1_2() {
1347
+ if matches!(c, '\\ u{2018}' | '\\ u{2019}' | '\\ u{201C}' | '\\ u{201D}') {
1348
+ return ("""
1349
+
1350
+ s += str (2 - is_cjk )
1351
+
1352
+ s += """, WidthInfo::DEFAULT);
1353
+ } else {
1354
+ next_info = next_info.unset_vs1_2();
1355
+ }
1356
+ }
1329
1357
if next_info.is_ligature_transparent() {
1330
1358
if c == '\\ u{200D}' {
1331
1359
return (0, next_info.set_zwj_bit());
@@ -1586,6 +1614,8 @@ def emit_module(
1586
1614
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1587
1615
struct WidthInfo(u16);
1588
1616
1617
+ const LIGATURE_TRANSPARENT_MASK: u16 = 0b0010_0000_0000_0000;
1618
+
1589
1619
impl WidthInfo {
1590
1620
/// No special handling necessary
1591
1621
const DEFAULT: Self = Self(0);
@@ -1615,51 +1645,84 @@ def emit_module(
1615
1645
1616
1646
/// Has top bit set
1617
1647
fn is_emoji_presentation(self) -> bool {{
1618
- (self.0 & 0b1000_0000_0000_0000 ) == 0b1000_0000_0000_0000
1648
+ (self.0 & WidthInfo::VARIATION_SELECTOR_16.0 ) == WidthInfo::VARIATION_SELECTOR_16.0
1619
1649
}}
1620
1650
1621
- /// Has top bit set
1622
1651
fn is_zwj_emoji_presentation(self) -> bool {{
1623
1652
(self.0 & 0b1011_0000_0000_0000) == 0b1001_0000_0000_0000
1624
1653
}}
1625
1654
1626
1655
/// Set top bit
1627
1656
fn set_emoji_presentation(self) -> Self {{
1628
- if (self.0 & 0b0010_0000_0000_0000 ) == 0b0010_0000_0000_0000
1657
+ if (self.0 & LIGATURE_TRANSPARENT_MASK ) == LIGATURE_TRANSPARENT_MASK
1629
1658
|| (self.0 & 0b1001_0000_0000_0000) == 0b0001_0000_0000_0000
1630
1659
{{
1631
- Self(self.0 | 0b1000_0000_0000_0000)
1660
+ Self(
1661
+ self.0
1662
+ | WidthInfo::VARIATION_SELECTOR_16.0
1663
+ & !WidthInfo::VARIATION_SELECTOR_15.0
1664
+ & !WidthInfo::VARIATION_SELECTOR_1_OR_2.0,
1665
+ )
1632
1666
}} else {{
1633
1667
Self::VARIATION_SELECTOR_16
1634
1668
}}
1635
1669
}}
1636
1670
1637
1671
/// Clear top bit
1638
1672
fn unset_emoji_presentation(self) -> Self {{
1639
- if (self.0 & 0b0010_0000_0000_0000 ) == 0b0010_0000_0000_0000 {{
1640
- Self(self.0 & 0b0111_1111_1111_1111 )
1673
+ if (self.0 & LIGATURE_TRANSPARENT_MASK ) == LIGATURE_TRANSPARENT_MASK {{
1674
+ Self(self.0 & !WidthInfo::VARIATION_SELECTOR_16.0 )
1641
1675
}} else {{
1642
1676
Self::DEFAULT
1643
1677
}}
1644
1678
}}
1645
1679
1646
1680
/// Has 2nd bit set
1647
1681
fn is_text_presentation(self) -> bool {{
1648
- (self.0 & 0b0100_0000_0000_0000 ) == 0b0100_0000_0000_0000
1682
+ (self.0 & WidthInfo::VARIATION_SELECTOR_15.0 ) == WidthInfo::VARIATION_SELECTOR_15.0
1649
1683
}}
1650
1684
1651
1685
/// Set 2nd bit
1652
1686
fn set_text_presentation(self) -> Self {{
1653
- if (self.0 & 0b0010_0000_0000_0000) == 0b0010_0000_0000_0000 {{
1654
- Self(self.0 | 0b0100_0000_0000_0000)
1687
+ if (self.0 & LIGATURE_TRANSPARENT_MASK) == LIGATURE_TRANSPARENT_MASK {{
1688
+ Self(
1689
+ self.0
1690
+ | WidthInfo::VARIATION_SELECTOR_15.0
1691
+ & !WidthInfo::VARIATION_SELECTOR_16.0
1692
+ & !WidthInfo::VARIATION_SELECTOR_1_OR_2.0,
1693
+ )
1655
1694
}} else {{
1656
- Self(0b0100_0000_0000_0000 )
1695
+ Self(WidthInfo::VARIATION_SELECTOR_15.0 )
1657
1696
}}
1658
1697
}}
1659
1698
1660
1699
/// Clear 2nd bit
1661
1700
fn unset_text_presentation(self) -> Self {{
1662
- Self(self.0 & 0b1011_1111_1111_1111)
1701
+ Self(self.0 & !WidthInfo::VARIATION_SELECTOR_15.0)
1702
+ }}
1703
+
1704
+ /// Has 7th bit set
1705
+ fn is_vs1_2(self) -> bool {{
1706
+ (self.0 & WidthInfo::VARIATION_SELECTOR_1_OR_2.0) == WidthInfo::VARIATION_SELECTOR_1_OR_2.0
1707
+ }}
1708
+
1709
+ /// Set 7th bit
1710
+ fn set_vs1_2(self) -> Self {{
1711
+ if (self.0 & LIGATURE_TRANSPARENT_MASK) == LIGATURE_TRANSPARENT_MASK {{
1712
+ Self(
1713
+ self.0
1714
+ | WidthInfo::VARIATION_SELECTOR_1_OR_2.0
1715
+ & !WidthInfo::VARIATION_SELECTOR_15.0
1716
+ & !WidthInfo::VARIATION_SELECTOR_16.0,
1717
+ )
1718
+ }} else {{
1719
+ Self(WidthInfo::VARIATION_SELECTOR_1_OR_2.0)
1720
+ }}
1721
+ }}
1722
+
1723
+ /// Clear 7th bit
1724
+ fn unset_vs1_2(self) -> Self {{
1725
+ Self(self.0 & !WidthInfo::VARIATION_SELECTOR_1_OR_2.0)
1663
1726
}}
1664
1727
}}
1665
1728
0 commit comments