Skip to content

content: Handle multiple math blocks in <p> #1156

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 14, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 79 additions & 22 deletions lib/model/content.dart
Original file line number Diff line number Diff line change
@@ -1055,13 +1055,6 @@ class _ZulipContentParser {
return inlineParser.parseBlockInline(nodes);
}

BlockContentNode parseMathBlock(dom.Element element) {
final debugHtmlNode = kDebugMode ? element : null;
final texSource = _parseMath(element, block: true);
if (texSource == null) return UnimplementedBlockContentNode(htmlNode: element);
return MathBlockNode(texSource: texSource, debugHtmlNode: debugHtmlNode);
}

BlockContentNode parseListNode(dom.Element element) {
ListStyle? listStyle;
switch (element.localName) {
@@ -1453,6 +1446,64 @@ class _ZulipContentParser {
return tableNode ?? UnimplementedBlockContentNode(htmlNode: tableElement);
}

void parseMathBlocks(dom.NodeList nodes, List<BlockContentNode> result) {
assert(nodes.isNotEmpty);
assert((() {
final first = nodes.first;
return first is dom.Element
&& first.localName == 'span'
&& first.className == 'katex-display';
})());

final firstChild = nodes.first as dom.Element;
final texSource = _parseMath(firstChild, block: true);
if (texSource != null) {
result.add(MathBlockNode(
texSource: texSource,
debugHtmlNode: kDebugMode ? firstChild : null));
} else {
result.add(UnimplementedBlockContentNode(htmlNode: firstChild));
}

// Skip further checks if there was only a single child.
if (nodes.length == 1) return;

// The case with the `<br>\n` can happen when at the end of a quote;
// it seems like a glitch in the server's Markdown processing,
// so hopefully there just aren't any further such glitches.
bool hasTrailingBreakNewline = false;
if (nodes case [..., dom.Element(localName: 'br'), dom.Text(text: '\n')]) {
hasTrailingBreakNewline = true;
}

final length = hasTrailingBreakNewline
? nodes.length - 2
: nodes.length;
for (int i = 1; i < length; i++) {
final child = nodes[i];
final debugHtmlNode = kDebugMode ? child : null;

// If there are multiple <span class="katex-display"> nodes in a <p>
// each node is interleaved by '\n\n'. Whitespaces are ignored in HTML
// on web but each node has `display: block`, which renders each node
// on a new line. Since the emitted MathBlockNode are BlockContentNode,
// we skip these newlines here to replicate the same behavior as on web.
if (child case dom.Text(text: '\n\n')) continue;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can see how the first sentence is relevant. Regarding "do the same here", continue itself does not seem to address the part about rendering each node with display: block, as that is more relevant to blocks.add(MathBlockNode(texSource: texSource, debugHtmlNode: debugHtmlNode));.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added more info about how it tries to replicate web behavior.


if (child case dom.Element(localName: 'span', className: 'katex-display')) {
final texSource = _parseMath(child, block: true);
if (texSource != null) {
result.add(MathBlockNode(
texSource: texSource,
debugHtmlNode: debugHtmlNode));
continue;
}
}

result.add(UnimplementedBlockContentNode(htmlNode: child));
}
}

BlockContentNode parseBlockContent(dom.Node node) {
final debugHtmlNode = kDebugMode ? node : null;
if (node is! dom.Element) {
@@ -1471,21 +1522,6 @@ class _ZulipContentParser {
}

if (localName == 'p' && className.isEmpty) {
// Oddly, the way a math block gets encoded in Zulip HTML is inside a <p>.
if (element.nodes case [dom.Element(localName: 'span') && var child, ...]) {
if (child.className == 'katex-display') {
if (element.nodes case [_]
|| [_, dom.Element(localName: 'br'),
dom.Text(text: "\n")]) {
// This might be too specific; we'll find out when we do #190.
// The case with the `<br>\n` can happen when at the end of a quote;
// it seems like a glitch in the server's Markdown processing,
// so hopefully there just aren't any further such glitches.
return parseMathBlock(child);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this the only call site to parseMathBlock? That can be removed, then.

(Doing so in the same commit as the other changes is helpful for understanding those other changes, even, because it makes visible in the same diff the old code that the new code corresponds to.)

}
}
}

final parsed = parseBlockInline(element.nodes);
return ParagraphNode(debugHtmlNode: debugHtmlNode,
links: parsed.links,
@@ -1599,6 +1635,17 @@ class _ZulipContentParser {
for (final node in nodes) {
if (node is dom.Text && (node.text == '\n')) continue;

// Oddly, the way math blocks get encoded in Zulip HTML is inside a <p>.
// And there can be multiple math blocks inside the paragraph node, so
// handle it explicitly here.
if (node case dom.Element(localName: 'p', className: '', nodes: [
dom.Element(localName: 'span', className: 'katex-display'), ...])) {
if (currentParagraph.isNotEmpty) consumeParagraph();
if (imageNodes.isNotEmpty) consumeImageNodes();
parseMathBlocks(node.nodes, result);
continue;
}

if (_isPossibleInlineNode(node)) {
if (imageNodes.isNotEmpty) {
consumeImageNodes();
@@ -1642,6 +1689,16 @@ class _ZulipContentParser {
continue;
}

// Oddly, the way math blocks get encoded in Zulip HTML is inside a <p>.
// And there can be multiple math blocks inside the paragraph node, so
// handle it explicitly here.
if (node case dom.Element(localName: 'p', className: '', nodes: [
dom.Element(localName: 'span', className: 'katex-display'), ...])) {
if (imageNodes.isNotEmpty) consumeImageNodes();
parseMathBlocks(node.nodes, result);
continue;
}

final block = parseBlockContent(node);
if (block is ImageNode) {
imageNodes.add(block);
76 changes: 76 additions & 0 deletions test/model/content_test.dart
Original file line number Diff line number Diff line change
@@ -506,6 +506,23 @@ class ContentExample {
'<span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.6944em;"></span><span class="mord mathnormal">λ</span></span></span></span></span></p>',
[MathBlockNode(texSource: r'\lambda')]);

static const mathBlocksMultipleInParagraph = ContentExample(
'math blocks, multiple in paragraph',
'```math\na\n\nb\n```',
// https://chat.zulip.org/#narrow/channel/7-test-here/topic/.E2.9C.94.20Rajesh/near/2001490
'<p>'
'<span class="katex-display"><span class="katex">'
'<span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>a</mi></mrow>'
'<annotation encoding="application/x-tex">a</annotation></semantics></math></span>'
'<span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.4306em;"></span><span class="mord mathnormal">a</span></span></span></span></span>\n\n'
'<span class="katex-display"><span class="katex">'
'<span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>b</mi></mrow>'
'<annotation encoding="application/x-tex">b</annotation></semantics></math></span>'
'<span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.6944em;"></span><span class="mord mathnormal">b</span></span></span></span></span></p>', [
MathBlockNode(texSource: 'a'),
MathBlockNode(texSource: 'b'),
]);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's also have a test with multiple math blocks in a quote.


static const mathBlockInQuote = ContentExample(
'math block in quote',
// There's sometimes a quirky extra `<br>\n` at the end of the `<p>` that
@@ -522,6 +539,62 @@ class ContentExample {
'<br>\n</p>\n</blockquote>',
[QuotationNode([MathBlockNode(texSource: r'\lambda')])]);

static const mathBlocksMultipleInQuote = ContentExample(
'math blocks, multiple in quote',
"````quote\n```math\na\n\nb\n```\n````",
// https://chat.zulip.org/#narrow/channel/7-test-here/topic/.E2.9C.94.20Rajesh/near/2029236
'<blockquote>\n<p>'
'<span class="katex-display"><span class="katex">'
'<span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>a</mi></mrow>'
'<annotation encoding="application/x-tex">a</annotation></semantics></math></span>'
'<span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.4306em;"></span><span class="mord mathnormal">a</span></span></span></span></span>'
'\n\n'
'<span class="katex-display"><span class="katex">'
'<span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>b</mi></mrow>'
'<annotation encoding="application/x-tex">b</annotation></semantics></math></span>'
'<span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.6944em;"></span><span class="mord mathnormal">b</span></span></span></span></span>'
'<br>\n</p>\n</blockquote>',
[QuotationNode([
MathBlockNode(texSource: 'a'),
MathBlockNode(texSource: 'b'),
])]);

static const mathBlockBetweenImages = ContentExample(
'math block between images',
// https://chat.zulip.org/#narrow/channel/7-test-here/topic/Greg/near/2035891
'https://upload.wikimedia.org/wikipedia/commons/7/78/Verregende_bloem_van_een_Helenium_%27El_Dorado%27._22-07-2023._%28d.j.b%29.jpg\n```math\na\n```\nhttps://upload.wikimedia.org/wikipedia/commons/thumb/7/71/Zaadpluizen_van_een_Clematis_texensis_%27Princess_Diana%27._18-07-2023_%28actm.%29_02.jpg/1280px-Zaadpluizen_van_een_Clematis_texensis_%27Princess_Diana%27._18-07-2023_%28actm.%29_02.jpg',
'<div class="message_inline_image">'
'<a href="https://upload.wikimedia.org/wikipedia/commons/7/78/Verregende_bloem_van_een_Helenium_%27El_Dorado%27._22-07-2023._%28d.j.b%29.jpg">'
'<img src="/external_content/de28eb3abf4b7786de4545023dc42d434a2ea0c2/68747470733a2f2f75706c6f61642e77696b696d656469612e6f72672f77696b6970656469612f636f6d6d6f6e732f372f37382f566572726567656e64655f626c6f656d5f76616e5f65656e5f48656c656e69756d5f253237456c5f446f7261646f2532372e5f32322d30372d323032332e5f253238642e6a2e622532392e6a7067"></a></div>'
'<p>'
'<span class="katex-display"><span class="katex">'
'<span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>a</mi></mrow>'
'<annotation encoding="application/x-tex">a</annotation></semantics></math></span>'
'<span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.4306em;"></span><span class="mord mathnormal">a</span></span></span></span></span>'
'</p>\n'
'<div class="message_inline_image">'
'<a href="https://upload.wikimedia.org/wikipedia/commons/thumb/7/71/Zaadpluizen_van_een_Clematis_texensis_%27Princess_Diana%27._18-07-2023_%28actm.%29_02.jpg/1280px-Zaadpluizen_van_een_Clematis_texensis_%27Princess_Diana%27._18-07-2023_%28actm.%29_02.jpg">'
'<img src="/external_content/58b0ef9a06d7bb24faec2b11df2f57f476e6f6bb/68747470733a2f2f75706c6f61642e77696b696d656469612e6f72672f77696b6970656469612f636f6d6d6f6e732f7468756d622f372f37312f5a616164706c75697a656e5f76616e5f65656e5f436c656d617469735f746578656e7369735f2532375072696e636573735f4469616e612532372e5f31382d30372d323032335f2532386163746d2e2532395f30322e6a70672f3132383070782d5a616164706c75697a656e5f76616e5f65656e5f436c656d617469735f746578656e7369735f2532375072696e636573735f4469616e612532372e5f31382d30372d323032335f2532386163746d2e2532395f30322e6a7067"></a></div>',
[
ImageNodeList([
ImageNode(
srcUrl: '/external_content/de28eb3abf4b7786de4545023dc42d434a2ea0c2/68747470733a2f2f75706c6f61642e77696b696d656469612e6f72672f77696b6970656469612f636f6d6d6f6e732f372f37382f566572726567656e64655f626c6f656d5f76616e5f65656e5f48656c656e69756d5f253237456c5f446f7261646f2532372e5f32322d30372d323032332e5f253238642e6a2e622532392e6a7067',
thumbnailUrl: null,
loading: false,
originalWidth: null,
originalHeight: null),
]),
MathBlockNode(texSource: 'a'),
ImageNodeList([
ImageNode(
srcUrl: '/external_content/58b0ef9a06d7bb24faec2b11df2f57f476e6f6bb/68747470733a2f2f75706c6f61642e77696b696d656469612e6f72672f77696b6970656469612f636f6d6d6f6e732f7468756d622f372f37312f5a616164706c75697a656e5f76616e5f65656e5f436c656d617469735f746578656e7369735f2532375072696e636573735f4469616e612532372e5f31382d30372d323032335f2532386163746d2e2532395f30322e6a70672f3132383070782d5a616164706c75697a656e5f76616e5f65656e5f436c656d617469735f746578656e7369735f2532375072696e636573735f4469616e612532372e5f31382d30372d323032335f2532386163746d2e2532395f30322e6a7067',
thumbnailUrl: null,
loading: false,
originalWidth: null,
originalHeight: null),
]),
]);

static const imageSingle = ContentExample(
'single image',
// https://chat.zulip.org/#narrow/stream/7-test-here/topic/Thumbnails/near/1900103
@@ -1470,7 +1543,10 @@ void main() {
testParseExample(ContentExample.codeBlockFollowedByMultipleLineBreaks);

testParseExample(ContentExample.mathBlock);
testParseExample(ContentExample.mathBlocksMultipleInParagraph);
testParseExample(ContentExample.mathBlockInQuote);
testParseExample(ContentExample.mathBlocksMultipleInQuote);
testParseExample(ContentExample.mathBlockBetweenImages);

testParseExample(ContentExample.imageSingle);
testParseExample(ContentExample.imageSingleNoDimensions);