Skip to content

Commit 1d737e4

Browse files
content: Handle multiple math blocks in <p>
Fixes: #1130
1 parent 6b40f36 commit 1d737e4

File tree

2 files changed

+144
-43
lines changed

2 files changed

+144
-43
lines changed

Diff for: lib/model/content.dart

+68-43
Original file line numberDiff line numberDiff line change
@@ -1055,13 +1055,6 @@ class _ZulipContentParser {
10551055
return inlineParser.parseBlockInline(nodes);
10561056
}
10571057

1058-
BlockContentNode parseMathBlock(dom.Element element) {
1059-
final debugHtmlNode = kDebugMode ? element : null;
1060-
final texSource = _parseMath(element, block: true);
1061-
if (texSource == null) return UnimplementedBlockContentNode(htmlNode: element);
1062-
return MathBlockNode(texSource: texSource, debugHtmlNode: debugHtmlNode);
1063-
}
1064-
10651058
BlockContentNode parseListNode(dom.Element element) {
10661059
ListStyle? listStyle;
10671060
switch (element.localName) {
@@ -1453,6 +1446,64 @@ class _ZulipContentParser {
14531446
return tableNode ?? UnimplementedBlockContentNode(htmlNode: tableElement);
14541447
}
14551448

1449+
void parseMathBlocks(dom.NodeList nodes, List<BlockContentNode> result) {
1450+
assert(nodes.isNotEmpty);
1451+
assert((() {
1452+
final first = nodes.first;
1453+
return first is dom.Element
1454+
&& first.localName == 'span'
1455+
&& first.className == 'katex-display';
1456+
})());
1457+
1458+
final firstChild = nodes.first as dom.Element;
1459+
final texSource = _parseMath(firstChild, block: true);
1460+
if (texSource != null) {
1461+
result.add(MathBlockNode(
1462+
texSource: texSource,
1463+
debugHtmlNode: kDebugMode ? firstChild : null));
1464+
} else {
1465+
result.add(UnimplementedBlockContentNode(htmlNode: firstChild));
1466+
}
1467+
1468+
// Skip further checks if there was only a single child.
1469+
if (nodes.length == 1) return;
1470+
1471+
// The case with the `<br>\n` can happen when at the end of a quote;
1472+
// it seems like a glitch in the server's Markdown processing,
1473+
// so hopefully there just aren't any further such glitches.
1474+
bool hasTrailingBreakNewline = false;
1475+
if (nodes case [..., dom.Element(localName: 'br'), dom.Text(text: '\n')]) {
1476+
hasTrailingBreakNewline = true;
1477+
}
1478+
1479+
final length = hasTrailingBreakNewline
1480+
? nodes.length - 2
1481+
: nodes.length;
1482+
for (int i = 1; i < length; i++) {
1483+
final child = nodes[i];
1484+
final debugHtmlNode = kDebugMode ? child : null;
1485+
1486+
// If there are multiple <span class="katex-display"> nodes in a <p>
1487+
// each node is interleaved by '\n\n'. Whitespaces are ignored in HTML
1488+
// on web but each node has `display: block`, which renders each node
1489+
// on a new line. Since the emitted MathBlockNode are BlockContentNode,
1490+
// we skip these newlines here to replicate the same behavior as on web.
1491+
if (child case dom.Text(text: '\n\n')) continue;
1492+
1493+
if (child case dom.Element(localName: 'span', className: 'katex-display')) {
1494+
final texSource = _parseMath(child, block: true);
1495+
if (texSource != null) {
1496+
result.add(MathBlockNode(
1497+
texSource: texSource,
1498+
debugHtmlNode: debugHtmlNode));
1499+
continue;
1500+
}
1501+
}
1502+
1503+
result.add(UnimplementedBlockContentNode(htmlNode: child));
1504+
}
1505+
}
1506+
14561507
BlockContentNode parseBlockContent(dom.Node node) {
14571508
final debugHtmlNode = kDebugMode ? node : null;
14581509
if (node is! dom.Element) {
@@ -1584,27 +1635,14 @@ class _ZulipContentParser {
15841635
for (final node in nodes) {
15851636
if (node is dom.Text && (node.text == '\n')) continue;
15861637

1587-
// Oddly, the way a math block gets encoded in Zulip HTML is inside a <p>.
1638+
// Oddly, the way math blocks get encoded in Zulip HTML is inside a <p>.
1639+
// And there can be multiple math blocks inside the paragraph node, so
1640+
// handle it explicitly here.
15881641
if (node case dom.Element(localName: 'p', className: '', nodes: [
1589-
dom.Element(
1590-
localName: 'span',
1591-
className: 'katex-display') && final child, ...])) {
1592-
final BlockContentNode parsed;
1593-
if (node.nodes case [_]
1594-
|| [_, dom.Element(localName: 'br'),
1595-
dom.Text(text: "\n")]) {
1596-
// This might be too specific; we'll find out when we do #190.
1597-
// The case with the `<br>\n` can happen when at the end of a quote;
1598-
// it seems like a glitch in the server's Markdown processing,
1599-
// so hopefully there just aren't any further such glitches.
1600-
parsed = parseMathBlock(child);
1601-
} else {
1602-
parsed = UnimplementedBlockContentNode(htmlNode: node);
1603-
}
1604-
1642+
dom.Element(localName: 'span', className: 'katex-display'), ...])) {
16051643
if (currentParagraph.isNotEmpty) consumeParagraph();
16061644
if (imageNodes.isNotEmpty) consumeImageNodes();
1607-
result.add(parsed);
1645+
parseMathBlocks(node.nodes, result);
16081646
continue;
16091647
}
16101648

@@ -1651,26 +1689,13 @@ class _ZulipContentParser {
16511689
continue;
16521690
}
16531691

1654-
// Oddly, the way a math block gets encoded in Zulip HTML is inside a <p>.
1692+
// Oddly, the way math blocks get encoded in Zulip HTML is inside a <p>.
1693+
// And there can be multiple math blocks inside the paragraph node, so
1694+
// handle it explicitly here.
16551695
if (node case dom.Element(localName: 'p', className: '', nodes: [
1656-
dom.Element(
1657-
localName: 'span',
1658-
className: 'katex-display') && final child, ...])) {
1659-
final BlockContentNode parsed;
1660-
if (node.nodes case [_]
1661-
|| [_, dom.Element(localName: 'br'),
1662-
dom.Text(text: "\n")]) {
1663-
// This might be too specific; we'll find out when we do #190.
1664-
// The case with the `<br>\n` can happen when at the end of a quote;
1665-
// it seems like a glitch in the server's Markdown processing,
1666-
// so hopefully there just aren't any further such glitches.
1667-
parsed = parseMathBlock(child);
1668-
} else {
1669-
parsed = UnimplementedBlockContentNode(htmlNode: node);
1670-
}
1671-
1696+
dom.Element(localName: 'span', className: 'katex-display'), ...])) {
16721697
if (imageNodes.isNotEmpty) consumeImageNodes();
1673-
result.add(parsed);
1698+
parseMathBlocks(node.nodes, result);
16741699
continue;
16751700
}
16761701

Diff for: test/model/content_test.dart

+76
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,23 @@ class ContentExample {
506506
'<span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.6944em;"></span><span class="mord mathnormal">λ</span></span></span></span></span></p>',
507507
[MathBlockNode(texSource: r'\lambda')]);
508508

509+
static const mathBlocksMultipleInParagraph = ContentExample(
510+
'math blocks, multiple in paragraph',
511+
'```math\na\n\nb\n```',
512+
// https://chat.zulip.org/#narrow/channel/7-test-here/topic/.E2.9C.94.20Rajesh/near/2001490
513+
'<p>'
514+
'<span class="katex-display"><span class="katex">'
515+
'<span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>a</mi></mrow>'
516+
'<annotation encoding="application/x-tex">a</annotation></semantics></math></span>'
517+
'<span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.4306em;"></span><span class="mord mathnormal">a</span></span></span></span></span>\n\n'
518+
'<span class="katex-display"><span class="katex">'
519+
'<span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>b</mi></mrow>'
520+
'<annotation encoding="application/x-tex">b</annotation></semantics></math></span>'
521+
'<span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.6944em;"></span><span class="mord mathnormal">b</span></span></span></span></span></p>', [
522+
MathBlockNode(texSource: 'a'),
523+
MathBlockNode(texSource: 'b'),
524+
]);
525+
509526
static const mathBlockInQuote = ContentExample(
510527
'math block in quote',
511528
// There's sometimes a quirky extra `<br>\n` at the end of the `<p>` that
@@ -522,6 +539,62 @@ class ContentExample {
522539
'<br>\n</p>\n</blockquote>',
523540
[QuotationNode([MathBlockNode(texSource: r'\lambda')])]);
524541

542+
static const mathBlocksMultipleInQuote = ContentExample(
543+
'math blocks, multiple in quote',
544+
"````quote\n```math\na\n\nb\n```\n````",
545+
// https://chat.zulip.org/#narrow/channel/7-test-here/topic/.E2.9C.94.20Rajesh/near/2029236
546+
'<blockquote>\n<p>'
547+
'<span class="katex-display"><span class="katex">'
548+
'<span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>a</mi></mrow>'
549+
'<annotation encoding="application/x-tex">a</annotation></semantics></math></span>'
550+
'<span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.4306em;"></span><span class="mord mathnormal">a</span></span></span></span></span>'
551+
'\n\n'
552+
'<span class="katex-display"><span class="katex">'
553+
'<span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>b</mi></mrow>'
554+
'<annotation encoding="application/x-tex">b</annotation></semantics></math></span>'
555+
'<span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.6944em;"></span><span class="mord mathnormal">b</span></span></span></span></span>'
556+
'<br>\n</p>\n</blockquote>',
557+
[QuotationNode([
558+
MathBlockNode(texSource: 'a'),
559+
MathBlockNode(texSource: 'b'),
560+
])]);
561+
562+
static const mathBlockBetweenImages = ContentExample(
563+
'math block between images',
564+
// https://chat.zulip.org/#narrow/channel/7-test-here/topic/Greg/near/2035891
565+
'https://upload.wikimedia.org/wikipedia/commons/7/78/Verregende_bloem_van_een_Helenium_%27El_Dorado%27._22-07-2023._%28d.j.b%29.jpg\n```math\na\n```\nhttps://upload.wikimedia.org/wikipedia/commons/thumb/7/71/Zaadpluizen_van_een_Clematis_texensis_%27Princess_Diana%27._18-07-2023_%28actm.%29_02.jpg/1280px-Zaadpluizen_van_een_Clematis_texensis_%27Princess_Diana%27._18-07-2023_%28actm.%29_02.jpg',
566+
'<div class="message_inline_image">'
567+
'<a href="https://upload.wikimedia.org/wikipedia/commons/7/78/Verregende_bloem_van_een_Helenium_%27El_Dorado%27._22-07-2023._%28d.j.b%29.jpg">'
568+
'<img src="/external_content/de28eb3abf4b7786de4545023dc42d434a2ea0c2/68747470733a2f2f75706c6f61642e77696b696d656469612e6f72672f77696b6970656469612f636f6d6d6f6e732f372f37382f566572726567656e64655f626c6f656d5f76616e5f65656e5f48656c656e69756d5f253237456c5f446f7261646f2532372e5f32322d30372d323032332e5f253238642e6a2e622532392e6a7067"></a></div>'
569+
'<p>'
570+
'<span class="katex-display"><span class="katex">'
571+
'<span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>a</mi></mrow>'
572+
'<annotation encoding="application/x-tex">a</annotation></semantics></math></span>'
573+
'<span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.4306em;"></span><span class="mord mathnormal">a</span></span></span></span></span>'
574+
'</p>\n'
575+
'<div class="message_inline_image">'
576+
'<a href="https://upload.wikimedia.org/wikipedia/commons/thumb/7/71/Zaadpluizen_van_een_Clematis_texensis_%27Princess_Diana%27._18-07-2023_%28actm.%29_02.jpg/1280px-Zaadpluizen_van_een_Clematis_texensis_%27Princess_Diana%27._18-07-2023_%28actm.%29_02.jpg">'
577+
'<img src="/external_content/58b0ef9a06d7bb24faec2b11df2f57f476e6f6bb/68747470733a2f2f75706c6f61642e77696b696d656469612e6f72672f77696b6970656469612f636f6d6d6f6e732f7468756d622f372f37312f5a616164706c75697a656e5f76616e5f65656e5f436c656d617469735f746578656e7369735f2532375072696e636573735f4469616e612532372e5f31382d30372d323032335f2532386163746d2e2532395f30322e6a70672f3132383070782d5a616164706c75697a656e5f76616e5f65656e5f436c656d617469735f746578656e7369735f2532375072696e636573735f4469616e612532372e5f31382d30372d323032335f2532386163746d2e2532395f30322e6a7067"></a></div>',
578+
[
579+
ImageNodeList([
580+
ImageNode(
581+
srcUrl: '/external_content/de28eb3abf4b7786de4545023dc42d434a2ea0c2/68747470733a2f2f75706c6f61642e77696b696d656469612e6f72672f77696b6970656469612f636f6d6d6f6e732f372f37382f566572726567656e64655f626c6f656d5f76616e5f65656e5f48656c656e69756d5f253237456c5f446f7261646f2532372e5f32322d30372d323032332e5f253238642e6a2e622532392e6a7067',
582+
thumbnailUrl: null,
583+
loading: false,
584+
originalWidth: null,
585+
originalHeight: null),
586+
]),
587+
MathBlockNode(texSource: 'a'),
588+
ImageNodeList([
589+
ImageNode(
590+
srcUrl: '/external_content/58b0ef9a06d7bb24faec2b11df2f57f476e6f6bb/68747470733a2f2f75706c6f61642e77696b696d656469612e6f72672f77696b6970656469612f636f6d6d6f6e732f7468756d622f372f37312f5a616164706c75697a656e5f76616e5f65656e5f436c656d617469735f746578656e7369735f2532375072696e636573735f4469616e612532372e5f31382d30372d323032335f2532386163746d2e2532395f30322e6a70672f3132383070782d5a616164706c75697a656e5f76616e5f65656e5f436c656d617469735f746578656e7369735f2532375072696e636573735f4469616e612532372e5f31382d30372d323032335f2532386163746d2e2532395f30322e6a7067',
591+
thumbnailUrl: null,
592+
loading: false,
593+
originalWidth: null,
594+
originalHeight: null),
595+
]),
596+
]);
597+
525598
static const imageSingle = ContentExample(
526599
'single image',
527600
// https://chat.zulip.org/#narrow/stream/7-test-here/topic/Thumbnails/near/1900103
@@ -1470,7 +1543,10 @@ void main() {
14701543
testParseExample(ContentExample.codeBlockFollowedByMultipleLineBreaks);
14711544

14721545
testParseExample(ContentExample.mathBlock);
1546+
testParseExample(ContentExample.mathBlocksMultipleInParagraph);
14731547
testParseExample(ContentExample.mathBlockInQuote);
1548+
testParseExample(ContentExample.mathBlocksMultipleInQuote);
1549+
testParseExample(ContentExample.mathBlockBetweenImages);
14741550

14751551
testParseExample(ContentExample.imageSingle);
14761552
testParseExample(ContentExample.imageSingleNoDimensions);

0 commit comments

Comments
 (0)