Skip to content

Commit eb5b719

Browse files
authored
Merge pull request commonmark#229 from github/fix-footnotes-nested-linkrefs-autolinker
Fix footnotes: nested, confused for link refs & mangled by the autolinker
2 parents 0f98e8a + de6feae commit eb5b719

File tree

5 files changed

+194
-13
lines changed

5 files changed

+194
-13
lines changed

src/blocks.c

+4-2
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,6 @@ static void process_footnotes(cmark_parser *parser) {
468468
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
469469
cur = cmark_iter_get_node(iter);
470470
if (ev_type == CMARK_EVENT_EXIT && cur->type == CMARK_NODE_FOOTNOTE_DEFINITION) {
471-
cmark_node_unlink(cur);
472471
cmark_footnote_create(map, cur);
473472
}
474473
}
@@ -515,13 +514,16 @@ static void process_footnotes(cmark_parser *parser) {
515514
qsort(map->sorted, map->size, sizeof(cmark_map_entry *), sort_footnote_by_ix);
516515
for (unsigned int i = 0; i < map->size; ++i) {
517516
cmark_footnote *footnote = (cmark_footnote *)map->sorted[i];
518-
if (!footnote->ix)
517+
if (!footnote->ix) {
518+
cmark_node_unlink(footnote->node);
519519
continue;
520+
}
520521
cmark_node_append_child(parser->root, footnote->node);
521522
footnote->node = NULL;
522523
}
523524
}
524525

526+
cmark_unlink_footnotes_map(map);
525527
cmark_map_free(map);
526528
}
527529

src/footnotes.c

+23
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,26 @@ void cmark_footnote_create(cmark_map *map, cmark_node *node) {
3838
cmark_map *cmark_footnote_map_new(cmark_mem *mem) {
3939
return cmark_map_new(mem, footnote_free);
4040
}
41+
42+
// Before calling `cmark_map_free` on a map with `cmark_footnotes`, first
43+
// unlink all of the footnote nodes before freeing their memory.
44+
//
45+
// Sometimes, two (unused) footnote nodes can end up referencing each other,
46+
// which as they get freed up by calling `cmark_map_free` -> `footnote_free` ->
47+
// etc, can lead to a use-after-free error.
48+
//
49+
// Better to `unlink` every footnote node first, setting their next, prev, and
50+
// parent pointers to NULL, and only then walk thru & free them up.
51+
void cmark_unlink_footnotes_map(cmark_map *map) {
52+
cmark_map_entry *ref;
53+
cmark_map_entry *next;
54+
55+
ref = map->refs;
56+
while(ref) {
57+
next = ref->next;
58+
if (((cmark_footnote *)ref)->node) {
59+
cmark_node_unlink(((cmark_footnote *)ref)->node);
60+
}
61+
ref = next;
62+
}
63+
}

src/footnotes.h

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ typedef struct cmark_footnote cmark_footnote;
1818
void cmark_footnote_create(cmark_map *map, cmark_node *node);
1919
cmark_map *cmark_footnote_map_new(cmark_mem *mem);
2020

21+
void cmark_unlink_footnotes_map(cmark_map *map);
22+
2123
#ifdef __cplusplus
2224
}
2325
#endif

src/inlines.c

+69-11
Original file line numberDiff line numberDiff line change
@@ -1137,19 +1137,77 @@ static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) {
11371137
// What if we're a footnote link?
11381138
if (parser->options & CMARK_OPT_FOOTNOTES &&
11391139
opener->inl_text->next &&
1140-
opener->inl_text->next->type == CMARK_NODE_TEXT &&
1141-
!opener->inl_text->next->next) {
1140+
opener->inl_text->next->type == CMARK_NODE_TEXT) {
1141+
11421142
cmark_chunk *literal = &opener->inl_text->next->as.literal;
1143-
if (literal->len > 1 && literal->data[0] == '^') {
1144-
inl = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE);
1145-
inl->as.literal = cmark_chunk_dup(literal, 1, literal->len - 1);
1146-
inl->start_line = inl->end_line = subj->line;
1147-
inl->start_column = opener->inl_text->start_column;
1148-
inl->end_column = subj->pos + subj->column_offset + subj->block_offset;
1149-
cmark_node_insert_before(opener->inl_text, inl);
1150-
cmark_node_free(opener->inl_text->next);
1151-
cmark_node_free(opener->inl_text);
1143+
1144+
// look back to the opening '[', and skip ahead to the next character
1145+
// if we're looking at a '[^' sequence, and there is other text or nodes
1146+
// after the ^, let's call it a footnote reference.
1147+
if ((literal->len > 0 && literal->data[0] == '^') && (literal->len > 1 || opener->inl_text->next->next)) {
1148+
1149+
// Before we got this far, the `handle_close_bracket` function may have
1150+
// advanced the current state beyond our footnote's actual closing
1151+
// bracket, ie if it went looking for a `link_label`.
1152+
// Let's just rewind the subject's position:
1153+
subj->pos = initial_pos;
1154+
1155+
cmark_node *fnref = make_simple(subj->mem, CMARK_NODE_FOOTNOTE_REFERENCE);
1156+
1157+
// the start and end of the footnote ref is the opening and closing brace
1158+
// i.e. the subject's current position, and the opener's start_column
1159+
int fnref_end_column = subj->pos + subj->column_offset + subj->block_offset;
1160+
int fnref_start_column = opener->inl_text->start_column;
1161+
1162+
// any given node delineates a substring of the line being processed,
1163+
// with the remainder of the line being pointed to thru its 'literal'
1164+
// struct member.
1165+
// here, we copy the literal's pointer, moving it past the '^' character
1166+
// for a length equal to the size of footnote reference text.
1167+
// i.e. end_col minus start_col, minus the [ and the ^ characters
1168+
//
1169+
// this copies the footnote reference string, even if between the
1170+
// `opener` and the subject's current position there are other nodes
1171+
//
1172+
// (first, check for underflows)
1173+
if ((fnref_start_column + 2) <= fnref_end_column) {
1174+
fnref->as.literal = cmark_chunk_dup(literal, 1, (fnref_end_column - fnref_start_column) - 2);
1175+
} else {
1176+
fnref->as.literal = cmark_chunk_dup(literal, 1, 0);
1177+
}
1178+
1179+
fnref->start_line = fnref->end_line = subj->line;
1180+
fnref->start_column = fnref_start_column;
1181+
fnref->end_column = fnref_end_column;
1182+
1183+
// we then replace the opener with this new fnref node, the net effect
1184+
// being replacing the opening '[' text node with a `^footnote-ref]` node.
1185+
cmark_node_insert_before(opener->inl_text, fnref);
1186+
11521187
process_emphasis(parser, subj, opener->previous_delimiter);
1188+
// sometimes, the footnote reference text gets parsed into multiple nodes
1189+
// i.e. '[^example]' parsed into '[', '^exam', 'ple]'.
1190+
// this happens for ex with the autolink extension. when the autolinker
1191+
// finds the 'w' character, it will split the text into multiple nodes
1192+
// in hopes of being able to match a 'www.' substring.
1193+
//
1194+
// because this function is called one character at a time via the
1195+
// `parse_inlines` function, and the current subj->pos is pointing at the
1196+
// closing ] brace, and because we copy all the text between the [ ]
1197+
// braces, we should be able to safely ignore and delete any nodes after
1198+
// the opener->inl_text->next.
1199+
//
1200+
// therefore, here we walk thru the list and free them all up
1201+
cmark_node *next_node;
1202+
cmark_node *current_node = opener->inl_text->next;
1203+
while(current_node) {
1204+
next_node = current_node->next;
1205+
cmark_node_free(current_node);
1206+
current_node = next_node;
1207+
}
1208+
1209+
cmark_node_free(opener->inl_text);
1210+
11531211
pop_bracket(subj);
11541212
return NULL;
11551213
}

test/regression.txt

+96
Original file line numberDiff line numberDiff line change
@@ -269,3 +269,99 @@ Pull request #128 - Buffer overread in tables extension
269269
<p>|
270270
-|</p>
271271
````````````````````````````````
272+
273+
Footnotes may be nested inside other footnotes.
274+
275+
```````````````````````````````` example footnotes
276+
This is some text. It has a citation.[^citation]
277+
278+
[^another-citation]: My second citation.
279+
280+
[^citation]: This is a long winded parapgraph that also has another citation.[^another-citation]
281+
.
282+
<p>This is some text. It has a citation.<sup class="footnote-ref"><a href="#fn1" id="fnref1">1</a></sup></p>
283+
<section class="footnotes">
284+
<ol>
285+
<li id="fn1">
286+
<p>This is a long winded parapgraph that also has another citation.<sup class="footnote-ref"><a href="#fn2" id="fnref2">2</a></sup> <a href="#fnref1" class="footnote-backref">↩</a></p>
287+
</li>
288+
<li id="fn2">
289+
<p>My second citation. <a href="#fnref2" class="footnote-backref">↩</a></p>
290+
</li>
291+
</ol>
292+
</section>
293+
````````````````````````````````
294+
295+
Footnotes are similar to, but should not be confused with, link references
296+
297+
```````````````````````````````` example footnotes
298+
This is some text. It has two footnotes references, side-by-side without any spaces,[^footnote1][^footnote2] which are definitely not link references.
299+
300+
[^footnote1]: Hello.
301+
302+
[^footnote2]: Goodbye.
303+
.
304+
<p>This is some text. It has two footnotes references, side-by-side without any spaces,<sup class="footnote-ref"><a href="#fn1" id="fnref1">1</a></sup><sup class="footnote-ref"><a href="#fn2" id="fnref2">2</a></sup> which are definitely not link references.</p>
305+
<section class="footnotes">
306+
<ol>
307+
<li id="fn1">
308+
<p>Hello. <a href="#fnref1" class="footnote-backref">↩</a></p>
309+
</li>
310+
<li id="fn2">
311+
<p>Goodbye. <a href="#fnref2" class="footnote-backref">↩</a></p>
312+
</li>
313+
</ol>
314+
</section>
315+
````````````````````````````````
316+
317+
Footnotes may begin with or have a 'w' or a '_' in their reference label.
318+
319+
```````````````````````````````` example footnotes autolink
320+
This is some text. Sometimes the autolinker splits up text into multiple nodes, hoping it will find a hyperlink, so this text has a footnote whose reference label begins with a `w`.[^widely-cited]
321+
322+
It has another footnote that contains many different characters (the autolinker was also breaking on `_`).[^sphinx-of-black-quartz_judge-my-vow-0123456789]
323+
324+
[^sphinx-of-black-quartz_judge-my-vow-0123456789]: so does this.
325+
326+
[^widely-cited]: this renders properly.
327+
.
328+
<p>This is some text. Sometimes the autolinker splits up text into multiple nodes, hoping it will find a hyperlink, so this text has a footnote whose reference label begins with a <code>w</code>.<sup class="footnote-ref"><a href="#fn1" id="fnref1">1</a></sup></p>
329+
<p>It has another footnote that contains many different characters (the autolinker was also breaking on <code>_</code>).<sup class="footnote-ref"><a href="#fn2" id="fnref2">2</a></sup></p>
330+
<section class="footnotes">
331+
<ol>
332+
<li id="fn1">
333+
<p>this renders properly. <a href="#fnref1" class="footnote-backref">↩</a></p>
334+
</li>
335+
<li id="fn2">
336+
<p>so does this. <a href="#fnref2" class="footnote-backref">↩</a></p>
337+
</li>
338+
</ol>
339+
</section>
340+
````````````````````````````````
341+
342+
Footnotes interacting with strikethrough should not lead to a use-after-free
343+
344+
```````````````````````````````` example footnotes autolink strikethrough table
345+
|Tot.....[^_a_]|
346+
.
347+
<p>|Tot.....[^_a_]|</p>
348+
````````````````````````````````
349+
350+
Footnotes interacting with strikethrough should not lead to a use-after-free pt2
351+
352+
```````````````````````````````` example footnotes autolink strikethrough table
353+
[^~~is~~1]
354+
.
355+
<p>[^~~is~~1]</p>
356+
````````````````````````````````
357+
358+
Adjacent unused footnotes definitions should not lead to a use after free
359+
360+
```````````````````````````````` example footnotes autolink strikethrough table
361+
Hello world
362+
363+
364+
[^a]:[^b]:
365+
.
366+
<p>Hello world</p>
367+
````````````````````````````````

0 commit comments

Comments
 (0)