Skip to content

allow extensions to use ! and ^ as special characters #30

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ mingw:
cmake .. -DCMAKE_TOOLCHAIN_FILE=../toolchain-mingw32.cmake -DCMAKE_INSTALL_PREFIX=$(MINGW_INSTALLDIR) ;\
$(MAKE) && $(MAKE) install

man/man3/cmark-gfm.3: src/cmark-gfm.h | $(CMARK)
man/man3/cmark-gfm.3: src/include/cmark-gfm.h | $(CMARK)
python man/make_man_page.py $< > $@ \

archive:
Expand Down
82 changes: 45 additions & 37 deletions src/inlines.c
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ static int scan_delims(cmark_parser *parser, subject *subj, unsigned char c,
before_char = 10;
} else {
before_char_pos = subj->pos - 1;

// walk back to the beginning of the UTF_8 sequence:
while ((peek_at(subj, before_char_pos) >> 6 == 2 || parser->skip_chars[peek_at(subj, before_char_pos)]) && before_char_pos > 0) {
before_char_pos -= 1;
Expand All @@ -437,7 +437,7 @@ static int scan_delims(cmark_parser *parser, subject *subj, unsigned char c,
after_char = 10;
} else {
after_char_pos = subj->pos;

while (parser->skip_chars[peek_at(subj, after_char_pos)] && after_char_pos < subj->input.len) {
after_char_pos += 1;
}
Expand Down Expand Up @@ -944,7 +944,7 @@ static int link_label(subject *subj, cmark_chunk *raw_label, bool parse_attribut
bufsize_t startpos = subj->pos;
int length = 0;
unsigned char c;

// If we are parsing attribute label, advance past ^
if (parse_attribute_label) {
if (peek_char(subj) == '^') {
Expand Down Expand Up @@ -1130,14 +1130,14 @@ static cmark_node *handle_close_bracket_attribute(cmark_parser *parser, subject
}
}
}

// If we can't match direct link, look for [link label] that matches in refmap
raw_label = cmark_chunk_literal("");
found_label = link_label(subj, &raw_label, false);
if (found_label) {
ref = (cmark_reference *)cmark_map_lookup(subj->refmap, &raw_label);
cmark_chunk_free(subj->mem, &raw_label);

if (ref && ref->is_attributes_reference) {
isAttributesNode = true;
attributes = chunk_clone(subj->mem, &ref->attributes);
Expand All @@ -1149,7 +1149,7 @@ static cmark_node *handle_close_bracket_attribute(cmark_parser *parser, subject
pop_bracket(subj);
return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
}

inl = make_simple(subj->mem, CMARK_NODE_ATTRIBUTE);
inl->as.attribute.attributes = attributes;
inl->start_line = inl->end_line = subj->line;
Expand Down Expand Up @@ -1516,6 +1516,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
cmark_chunk contents;
unsigned char c;
bufsize_t startpos, endpos;
bufsize_t initpos = subj->pos;
c = peek_char(subj);
if (c == 0) {
return 0;
Expand Down Expand Up @@ -1563,43 +1564,50 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
new_inl = handle_close_bracket(parser, subj);
break;
case '!':
advance(subj);
if (peek_char(subj) == '[' && peek_char_n(subj, 1) != '^') {
// specifically check for '![' not followed by '^'
if (peek_char_n(subj, 1) == '[' && peek_char_n(subj, 2) != '^') {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add a comment saying that this case is not effectively "![(!^)"

advance(subj);
advance(subj);
new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("!["));
push_bracket(subj, IMAGE, new_inl);
} else {
new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("!"));
}
break;
case '^':
advance(subj);
// TODO: Support a name between ^ and [
if (peek_char(subj) == '[') {
// specifically check for '^['
if (peek_char_n(subj, 1) == '[') {
advance(subj);
advance(subj);
new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("^["));
push_bracket(subj, ATTRIBUTE, new_inl);
} else {
new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("^"));
}
break;
default:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happenned to the default case? Did you just push it outside the switch?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I had to move the try_extensions call outside of this switch, so that it could be tried with characters that are part of the built-in grammar (specifically ^ and [). That's also why i modified the ! and ^ branches to peek for their syntax instead of eagerly advancing the pointer.

new_inl = try_extensions(parser, parent, c, subj);
if (new_inl != NULL)
break;
}

endpos = subject_find_special_char(parser, subj, options);
contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
startpos = subj->pos;
subj->pos = endpos;
if (subj->pos == initpos) {
if (!new_inl)
new_inl = try_extensions(parser, parent, c, subj);

// if we're at a newline, strip trailing spaces.
if ((options & CMARK_OPT_PRESERVE_WHITESPACE) == 0 && S_is_line_end_char(peek_char(subj))) {
cmark_chunk_rtrim(&contents);
}
if (!new_inl) {
endpos = subject_find_special_char(parser, subj, options);
if (endpos == subj->pos) {
advance(subj);
new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
} else {
contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
startpos = subj->pos;
subj->pos = endpos;

// if we're at a newline, strip trailing spaces.
if ((options & CMARK_OPT_PRESERVE_WHITESPACE) == 0 && S_is_line_end_char(peek_char(subj))) {
cmark_chunk_rtrim(&contents);
}

new_inl = make_str(subj, startpos, endpos - 1, contents);
new_inl = make_str(subj, startpos, endpos - 1, contents);
}
}
}

if (new_inl != NULL) {
cmark_node_append_child(parent, new_inl);
}
Expand Down Expand Up @@ -1708,27 +1716,27 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
bufsize_t cmark_parse_reference_attributes_inline(cmark_mem *mem, cmark_chunk *input,
cmark_map *refmap) {
subject subj;

cmark_chunk lab;
cmark_chunk attributes;

bufsize_t matchlen = 0;
unsigned char c;

subject_from_buf(mem, -1, 0, &subj, input, NULL);

// parse attribute label:
if (!link_label(&subj, &lab, true) || lab.len == 0) {
return 0;
}

// Colon:
if (peek_char(&subj) == ':') {
advance(&subj);
} else {
return 0;
}

// parse attributes
spnl(&subj);
// read until next newline
Expand All @@ -1737,19 +1745,19 @@ bufsize_t cmark_parse_reference_attributes_inline(cmark_mem *mem, cmark_chunk *i
advance(&subj);
matchlen++;
}

if (matchlen == 0) {
return 0;
}

attributes = cmark_chunk_dup(&subj.input, startpos, matchlen);

// parse final spaces and newline:
skip_spaces(&subj);
if (!skip_line_end(&subj)) {
return 0;
}

// insert reference into refmap
cmark_reference_create_attributes(refmap, &lab, &attributes);
return subj.pos;
Expand Down