Merge #27 from justinmk/fixes

justinmk · web-flow · commit d5950211e725 · 2022-10-05T10:11:44.000-04:00
feat: keycodes, fixes: taglink, column_heading, argument
diff --git a/corpus/arguments.txt b/corpus/arguments.txt
@@ -55,6 +55,11 @@ NOT an argument
 ================================================================================
 {foo "{bar}" `{baz}` |{baz| } {}
 
+===============
+3. Netrw *netrw-ref* {{{1
+
+EXTERNAL *netrw-externapp* {{{2
+
 
 --------------------------------------------------------------------------------
 
@@ -70,4 +75,19 @@ NOT an argument
       (taglink
         (word))
       (word)
-      (word))))
+      (word)))
+  (block
+    (line
+      (h1
+        (word)
+        (word)
+        (tag
+          (word))
+        (word))))
+  (block
+    (line
+      (h3
+        (uppercase_name)
+        (tag
+          (word))
+        (ERROR)))))
diff --git a/corpus/codespan.txt b/corpus/codespan.txt
@@ -40,11 +40,9 @@ an error`.
         (word))
       (word)
       (word)
-      (codespan
+      (ERROR
         (word)
-        (MISSING "`")))
-    (line
-      (word)
+        (word))
       (word))))
 
 ================================================================================
diff --git a/corpus/heading3-column_heading.txt b/corpus/heading3-column_heading.txt
@@ -234,6 +234,7 @@ ABC not-h3
 NOT column_heading
 ================================================================================
 tilde ~/foo/bar foo~ bar
+trailing-space-is-NOT-column_heading ~ 
 
 
 --------------------------------------------------------------------------------
@@ -243,5 +244,8 @@ tilde ~/foo/bar foo~ bar
     (line
       (word)
       (word)
+      (word)
+      (word))
+    (line
       (word)
       (word))))
diff --git a/corpus/optionlink.txt b/corpus/optionlink.txt
@@ -99,9 +99,13 @@ x `after_codespan`'s
   (ERROR)
   (block
     (line
-      (codespan
+      (argument
         (word))
-      (ERROR))))
+      (word))
+    (ERROR
+      (word)
+      (codespan
+        (word)))))
 
 ================================================================================
 NOT optionlink 3 (FIXME)
@@ -116,6 +120,8 @@ foo '"\ '.  Notice
   (block
     (line
       (word)
-      (word
-        (ERROR)
-        (MISSING "'")))))
+      (ERROR)
+      (word))
+    (line
+      (taglink
+        (word)))))
diff --git a/corpus/taglink.txt b/corpus/taglink.txt
@@ -17,6 +17,11 @@ taglink in text
 ================================================================================
 Hello |world| hello
 
+|-+|	+[num]	line
+|-e|	-e	Ex
+|-|	-	minus
+
+
 
 --------------------------------------------------------------------------------
 
@@ -26,6 +31,22 @@ Hello |world| hello
       (word)
       (taglink
         (word))
+      (word)))
+  (block
+    (line
+      (taglink
+        (word))
+      (word)
+      (word))
+    (line
+      (taglink
+        (word))
+      (word)
+      (word))
+    (line
+      (taglink
+        (word))
+      (word)
       (word))))
 
 ================================================================================
@@ -78,8 +99,10 @@ Note: ":autocmd" can...
       (word))
     (line
       (word)
-      (word)
-      (word)))
+      (taglink
+        (word))
+      (taglink
+        (word))))
   (block
     (line_li
       (line
diff --git a/corpus/text.txt b/corpus/text.txt
@@ -2,6 +2,8 @@
 simple file with text
 ================================================================================
 Simple text
+<Del> <CSI> <C-S-x> <C-x> <M-^> <x-y>
+CTRL-{char} ctr-z CTRL-SHIFT-\ CTRL-+ CTRL-Break ALT-?
 
 
 --------------------------------------------------------------------------------
@@ -10,7 +12,21 @@ Simple text
   (block
     (line
       (word)
-      (word))))
+      (word))
+    (line
+      (keycode)
+      (keycode)
+      (keycode)
+      (keycode)
+      (keycode)
+      (keycode))
+    (line
+      (keycode)
+      (word)
+      (keycode)
+      (keycode)
+      (keycode)
+      (keycode))))
 
 ================================================================================
 multiline text
diff --git a/grammar.js b/grammar.js
@@ -1,3 +1,9 @@
+// https://tree-sitter.github.io/tree-sitter/creating-parsers#conflicting-tokens
+// - Match Specificity: Tree-sitter will prefer a token that is specified in
+//   the grammar as a String instead of a RegExp.
+// - Rule Order: Tree-sitter will prefer the token that appears earlier in the
+//   grammar.
+//
 // https://tree-sitter.github.io/tree-sitter/creating-parsers
 // - Rules starting with underscore are hidden in the syntax tree.
 
@@ -50,12 +56,11 @@ module.exports = grammar({
         $.taglink,
         $.codespan,
         $.argument,
+        $.keycode,
       ),
 
     // Explicit special cases: these are plaintext, not errors.
     _word_common: () => choice(
-      // "|====|" and "|----|" are (plain text) table borders, not taglinks.
-      /\|(([+=][+=][+=][+=]+)|([+-][+-][+-][+-]+))\|/,
       // NOT optionlink: single "'".
       /[\t ]'[\t ]/,
       // NOT optionlink: contains any non-lowercase char.
@@ -68,10 +73,22 @@ module.exports = grammar({
       /\|\|*/,
       // NOT argument: "{}".
       /\{\}/,
+      /\{\{+[0-9]*/,
       '(',
       /\w+\(/,
     ),
 
+    keycode: () => choice(
+      /<[-a-zA-Z0-9_]+>/,
+      /<[SCMAD]-.>/,
+      /CTRL-./,
+      /CTRL-SHIFT-./,
+      /CTRL-(Break|PageUp|PageDown|Insert|Del)/,
+      /CTRL-\{char\}/,
+      /META-./,
+      /ALT-./,
+    ),
+
     // First part (minus tags) of h3 or column_heading.
     uppercase_name: () => seq(
       token.immediate(_uppercase_word),  // No whitespace before heading.
@@ -114,7 +131,7 @@ module.exports = grammar({
       $.codeblock,
       $._line_noli,
     ),
-    // Listitem line: consumes "*" line and all adjacent non-list lines.
+    // Listitem: consumes prefixed line and all adjacent non-prefixed lines.
     line_li: ($) => prec.right(1, seq(
       optional(token.immediate('<')),  // Treat codeblock-terminating "<" as whitespace.
       _li_token,
@@ -135,12 +152,10 @@ module.exports = grammar({
 
     // "Column heading": plaintext followed by "~".
     // Intended for table column names per `:help help-writing`.
+    // TODO: children should be $.word (plaintext), not $.atom.
     column_heading: ($) => seq(
-      field('name', seq(choice($._atom_noli, $._uppercase_words), repeat($._atom))),  // TODO: should be $.word (plaintext).
-      choice(
-        token.immediate(/~[\t ]*\n/),
-        /~[\t ]*\n/,
-      ),
+      field('name', seq(choice($._atom_noli, $._uppercase_words), repeat($._atom))),
+      /~\n/,
     ),
 
     h1: ($) =>
diff --git a/src/grammar.json b/src/grammar.json
@@ -157,16 +157,16 @@
         {
           "type": "SYMBOL",
           "name": "argument"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "keycode"
         }
       ]
     },
     "_word_common": {
       "type": "CHOICE",
       "members": [
-        {
-          "type": "PATTERN",
-          "value": "\\|(([+=][+=][+=][+=]+)|([+-][+-][+-][+-]+))\\|"
-        },
         {
           "type": "PATTERN",
           "value": "[\\t ]'[\\t ]"
@@ -229,6 +229,10 @@
           "type": "PATTERN",
           "value": "\\{\\}"
         },
+        {
+          "type": "PATTERN",
+          "value": "\\{\\{+[0-9]*"
+        },
         {
           "type": "STRING",
           "value": "("
@@ -239,6 +243,43 @@
         }
       ]
     },
+    "keycode": {
+      "type": "CHOICE",
+      "members": [
+        {
+          "type": "PATTERN",
+          "value": "<[-a-zA-Z0-9_]+>"
+        },
+        {
+          "type": "PATTERN",
+          "value": "<[SCMAD]-.>"
+        },
+        {
+          "type": "PATTERN",
+          "value": "CTRL-."
+        },
+        {
+          "type": "PATTERN",
+          "value": "CTRL-SHIFT-."
+        },
+        {
+          "type": "PATTERN",
+          "value": "CTRL-(Break|PageUp|PageDown|Insert|Del)"
+        },
+        {
+          "type": "PATTERN",
+          "value": "CTRL-\\{char\\}"
+        },
+        {
+          "type": "PATTERN",
+          "value": "META-."
+        },
+        {
+          "type": "PATTERN",
+          "value": "ALT-."
+        }
+      ]
+    },
     "uppercase_name": {
       "type": "SEQ",
       "members": [
@@ -590,20 +631,8 @@
           }
         },
         {
-          "type": "CHOICE",
-          "members": [
-            {
-              "type": "IMMEDIATE_TOKEN",
-              "content": {
-                "type": "PATTERN",
-                "value": "~[\\t ]*\\n"
-              }
-            },
-            {
-              "type": "PATTERN",
-              "value": "~[\\t ]*\\n"
-            }
-          ]
+          "type": "PATTERN",
+          "value": "~\\n"
         }
       ]
     },
diff --git a/src/node-types.json b/src/node-types.json
@@ -81,6 +81,10 @@
             "type": "codespan",
             "named": true
           },
+          {
+            "type": "keycode",
+            "named": true
+          },
           {
             "type": "optionlink",
             "named": true
@@ -121,6 +125,10 @@
           "type": "codespan",
           "named": true
         },
+        {
+          "type": "keycode",
+          "named": true
+        },
         {
           "type": "optionlink",
           "named": true
@@ -160,6 +168,10 @@
           "type": "codespan",
           "named": true
         },
+        {
+          "type": "keycode",
+          "named": true
+        },
         {
           "type": "optionlink",
           "named": true
@@ -224,6 +236,11 @@
       ]
     }
   },
+  {
+    "type": "keycode",
+    "named": true,
+    "fields": {}
+  },
   {
     "type": "line",
     "named": true,
@@ -260,6 +277,10 @@
           "type": "h3",
           "named": true
         },
+        {
+          "type": "keycode",
+          "named": true
+        },
         {
           "type": "optionlink",
           "named": true
diff --git a/src/parser.c b/src/parser.c