Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions .github/workflows/kdl-overlay.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,14 @@ jobs:
- name: Install tree-sitter CLI
run: npm install --global tree-sitter-cli

- name: Regenerate parser from latest grammar
- name: Regenerate generated parser artifacts from grammar source
working-directory: tools/tree-sitter-arco-kdl
run: |
npm install
tree-sitter generate
# parser.c / grammar.json / node-types.json are normal generated outputs.
# src/tree_sitter/parser.h is treated as a vendored, pinned runtime
# header and should only change when the Tree-sitter toolchain moves.
npx tree-sitter generate

- name: Run filtered prek hook
run: uvx --from prek==0.3.6 prek run --all-files --hook-stage manual
Expand Down
34 changes: 33 additions & 1 deletion tools/tree-sitter-arco-kdl/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,35 @@ Use `examples/highlight_demo.kdl` as the visual fixture when tuning colors.
If your editor supports capture inspection (e.g. Neovim `:Inspect`), open the
fixture and verify these captures line-by-line before taking screenshots.

## Source of truth

Authored files:

- `grammar.js`: overlay grammar source of truth
- `src/scanner.c`: thin Arco wrapper for external tokens
- `src/vendor/tree_sitter_kdl_external_scanner.inc`: vendored upstream KDL scanner
- `queries/*.scm`: editor queries
- `test/corpus/arco_math.txt`: parser regression corpus

Generated files:

- `src/parser.c`
- `src/grammar.json`
- `src/node-types.json`
- `src/tree_sitter/parser.h` (vendored tree-sitter runtime header, pinned to the current toolchain)

When `grammar.js` changes, regenerate the parser artifacts with:

```sh
npm install
npx tree-sitter generate
```

Do not hand-edit `src/parser.c`. It is generated code.
Do not hand-edit `src/tree_sitter/parser.h` either. Treat it as a vendored,
mostly frozen header that only changes when we intentionally bump the
Tree-sitter CLI/runtime version.

## Installation

### Neovim
Expand Down Expand Up @@ -146,7 +175,10 @@ instead of generic KDL highlighting.

## Files

- `grammar.js`: KDL overlay grammar.
- `grammar.js`: KDL overlay grammar, source of truth.
- `src/scanner.c`: thin Arco-specific external scanner shim.
- `src/vendor/tree_sitter_kdl_external_scanner.inc`: vendored upstream KDL scanner implementation.
- `src/parser.c`: generated parser output.
- `queries/injections.scm`: marks `arco_math_text` for language injection.
- `examples/highlight_demo.kdl`: semantic highlight fixture for theme tuning.
- `test/corpus/arco_math.txt`: corpus examples for algebra-body parsing.
Expand Down
6 changes: 3 additions & 3 deletions tools/tree-sitter-arco-kdl/examples/highlight_demo.kdl
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// Predicates/keywords should share one color
set gen alias=g
set time alias=t {
1
2
3
member 1
member 2
member 3
}

data generators from="data/generators.csv" {
Expand Down
120 changes: 67 additions & 53 deletions tools/tree-sitter-arco-kdl/grammar.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
const kdl = require("tree-sitter-kdl/grammar");

const nodeShape = ($, nameRule, childrenRule) =>
const PURE_MATH_NODE_NAMES = [
"expression",
"minimize",
"maximize",
"expr",
"filter",
"if",
"lower",
"upper",
];

const nodeShape = ($, { nameRule, childrenRule, fieldRule = $.node_field }) =>
seq(
alias(optional(seq("/-", repeat($._node_space))), $.node_comment),
optional($.type),
nameRule,
repeat(seq(repeat1($._node_space), $.node_field)),
repeat(seq(repeat1($._node_space), fieldRule)),
optional(
seq(
repeat($._node_space),
Expand All @@ -17,6 +28,19 @@ const nodeShape = ($, nameRule, childrenRule) =>
$._node_terminator,
);

const mathChildren = ($, mathRule) =>
prec(
2,
seq(
optional(
seq(alias("/-", $.node_children_comment), repeat($._node_space)),
),
"{",
choice(field("math", mathRule), seq(repeat($._linespace))),
"}",
),
);

module.exports = grammar(kdl, {
name: "arco_kdl",

Expand All @@ -27,6 +51,22 @@ module.exports = grammar(kdl, {
_node_terminator: ($, previous) =>
choice(previous, $._implicit_terminator),

string: ($, previous) =>
choice(previous, $._multiline_string),

_multiline_string: ($) =>
seq(
'"""',
optional($._newline),
repeat(
choice(
alias(token.immediate(prec(1, /[^"]+/)), $.string_fragment),
alias(token.immediate('"'), $.string_fragment),
),
),
'"""',
),

value: ($) =>
seq(
optional($.type),
Expand All @@ -36,83 +76,57 @@ module.exports = grammar(kdl, {
bare_identifier: ($) => $._bare_identifier,

node: ($) =>
choice($.arco_pure_math_node, $.arco_constraint_node, $.kdl_node),
choice(
$.arco_pure_math_node,
$.arco_constraint_node,
$.kdl_node,
),

kdl_node: ($) => prec(1, nodeShape($, $.identifier, $.node_children)),
kdl_node: ($) =>
prec(
1,
nodeShape($, {
nameRule: $.identifier,
childrenRule: $.node_children,
}),
),

// Nodes whose { } body is always algebra text.
arco_pure_math_node: ($) =>
prec(
2,
nodeShape(
$,
field(
"name",
choice(
"expression",
"minimize",
"maximize",
"expr",
"filter",
"if",
"lower",
"upper",
),
),
$.arco_pure_math_children,
),
nodeShape($, {
nameRule: field("name", choice(...PURE_MATH_NODE_NAMES)),
childrenRule: $.arco_pure_math_children,
}),
),

// Constraint nodes can have either KDL children or a math body.
arco_constraint_node: ($) =>
prec(
2,
nodeShape(
$,
field("name", "constraint"),
choice($.arco_constraint_math_children, $.node_children),
),
nodeShape($, {
nameRule: field("name", "constraint"),
childrenRule: choice($.arco_constraint_math_children, $.node_children),
}),
),

// Math body for nodes whose braces are always algebra text.
arco_pure_math_children: ($) =>
prec(
2,
seq(
optional(
seq(alias("/-", $.node_children_comment), repeat($._node_space)),
),
"{",
choice(field("math", $.arco_math_text), seq(repeat($._linespace))),
"}",
),
),
mathChildren($, $.arco_math_text),

// Constraint math body remains stricter so child-node bodies keep parsing
// as KDL instead of being swallowed as free-form math text.
arco_constraint_math_children: ($) =>
prec(
2,
seq(
optional(
seq(alias("/-", $.node_children_comment), repeat($._node_space)),
),
"{",
choice(
field("math", $.arco_constraint_math_text),
seq(repeat($._linespace)),
),
"}",
),
),
mathChildren($, $.arco_constraint_math_text),

// Single opaque token for free-form algebra text in expression/minimize/
// maximize/filter/if/lower/upper nodes.
arco_math_text: (_) => token(prec(10, /[^{}"']+/)),
arco_math_text: (_) => token(prec(10, /[^{}]+/)),

// Constraint math must include an operator or bracket so bare KDL child
// nodes like `if { ... }` still parse through node_children.
arco_constraint_math_text: (_) =>
token(prec(10, /[^{}"']*[<>=!+\-*\/\[\]][^{}"']*/)),
token(prec(10, /[^{}]*[<>=!+\-*\/\[\]][^{}]*/)),
},
});
80 changes: 75 additions & 5 deletions tools/tree-sitter-arco-kdl/src/grammar.json
Original file line number Diff line number Diff line change
Expand Up @@ -630,12 +630,21 @@
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_raw_string"
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_raw_string"
},
{
"type": "SYMBOL",
"name": "_escaped_string"
}
]
},
{
"type": "SYMBOL",
"name": "_escaped_string"
"name": "_multiline_string"
}
]
},
Expand Down Expand Up @@ -1214,6 +1223,67 @@
}
]
},
"_multiline_string": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "\"\"\""
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_newline"
},
{
"type": "BLANK"
}
]
},
{
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "ALIAS",
"content": {
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "PREC",
"value": 1,
"content": {
"type": "PATTERN",
"value": "[^\"]+"
}
}
},
"named": true,
"value": "string_fragment"
},
{
"type": "ALIAS",
"content": {
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "STRING",
"value": "\""
}
},
"named": true,
"value": "string_fragment"
}
]
}
},
{
"type": "STRING",
"value": "\"\"\""
}
]
},
"bare_identifier": {
"type": "SYMBOL",
"name": "_bare_identifier"
Expand Down Expand Up @@ -1774,7 +1844,7 @@
"value": 10,
"content": {
"type": "PATTERN",
"value": "[^{}\"']+"
"value": "[^{}]+"
}
}
},
Expand All @@ -1785,7 +1855,7 @@
"value": 10,
"content": {
"type": "PATTERN",
"value": "[^{}\"']*[<>=!+\\-*\\/\\[\\]][^{}\"']*"
"value": "[^{}]*[<>=!+\\-*\\/\\[\\]][^{}]*"
}
}
}
Expand Down
6 changes: 5 additions & 1 deletion tools/tree-sitter-arco-kdl/src/node-types.json
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@
"named": true,
"fields": {},
"children": {
"multiple": false,
"multiple": true,
"required": false,
"types": [
{
Expand Down Expand Up @@ -573,6 +573,10 @@
"type": "\"",
"named": false
},
{
"type": "\"\"\"",
"named": false
},
{
"type": "#",
"named": false
Expand Down
Loading
Loading