Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

g4 generator & antlr4rust #4

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,11 @@ Cargo.lock

# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb


# Added by cargo

/target

src/gen/*
!src/gen/mod.rs
6 changes: 6 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"rust-analyzer.linkedProjects": [
"./Cargo.toml",
"./Cargo.toml"
]
}
16 changes: 16 additions & 0 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// {
// "version": "2.0.0",
// "configurations": [
// {
// "name": "Debug ANTLR4 grammar",
// "type": "antlr-debug",
// "request": "launch",
// "input": "input.txt",
// "grammar": "grammars/Example.g4",
// "actionFile": "grammars/exampleActions.js",
// "startRule": "start",
// "printParseTree": true,
// "visualParseTree": true
// }
// ]
// }
11 changes: 11 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[package]
name = "lightsaber"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
antlr-rust = "0.3.0-beta"
serde = { version = "1.0", features = ["derive"] }
serde_yaml = "0.9"
10 changes: 8 additions & 2 deletions docs/proposal.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ let b = 1;
c = 1;
```

##### Var declaration
##### Code block

```yaml
lightsaber:
ident:
block:
style: ident # Choose ident, block, begin-end here
```

Expand All @@ -62,4 +62,10 @@ if (flag) {
if flag begin
print "Hello"
end
```

```yaml



```
15 changes: 15 additions & 0 deletions gen.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
set -e

cargo run --bin g4gen
cd src/gen
rm ls*.rs
java -jar antlr4-4.8-2-SNAPSHOT-complete.jar -visitor -Dlanguage=Rust lsLexer.g4
java -jar antlr4-4.8-2-SNAPSHOT-complete.jar -visitor -Dlanguage=Rust lsParser.g4
rm ls*.interp ls*.tokens
cd ../..
cargo run --bin lightsaber


# 1. 给出 lightsaber.yaml 运行 g4 generator
# 2. java 运行 antlr4 (antlr4rust.jar) 生成 parser.rs
# 3. parser.rs + lib.rs = 用户可以使用的 crate
4 changes: 4 additions & 0 deletions lightsaber.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
grammar:
block: brace # brace, begin-end
func_def: fn # func, fn, def
var_decl: let # let, var, empty
182 changes: 182 additions & 0 deletions src/bin/g4gen.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
use serde_yaml;
use std::fs::File;
use std::io::{Read, Write};

fn gen_lexer_g4() {
let lexer = r##"lexer grammar lsLexer;

LET: 'let';
BEGIN: 'begin';
END: 'end';

FUNC: 'func';
FN: 'fn';
DEF: 'def';

CONST: 'const';
INT: 'int';
VOID: 'void';
IF: 'if';
ELSE: 'else';
WHILE: 'while';
BREAK: 'break';
CONTINUE: 'continue';
RETURN: 'return';
PLUS: '+';
MINUS: '-';
MUL: '*';
DIV: '/';
MOD: '%';
ASSIGN: '=';
EQ: '==';
NEQ: '!=';
LT: '<';
GT: '>';
LE: '<=';
GE: '>=';
NOT: '!';
AND: '&&';
OR: '||';
L_PAREN: '(';
R_PAREN: ')';
L_BRACE: '{';
R_BRACE: '}';
L_BRACKT: '[';
R_BRACKT: ']';
COMMA: ',';
SEMICOLON: ';';

IDENT: [a-zA-Z_] [a-zA-Z0-9_]*;
INTEGER_CONST: ('0x' | '0X') [0-9a-fA-F]+
| '0' [0-7]*
| [1-9] [0-9]*;
WS: [ \r\n\t]+ -> skip;
LINE_COMMENT: '//' ~[\r\n]* -> skip;
MULTILINE_COMMENT: '/*' .*? '*/' -> skip;"##;

let mut file = File::create("src/gen/lsLexer.g4").expect("Failed to create file");
file.write_all(lexer.as_bytes())
.expect("Failed to write to file");
}

fn gen_parser_g4(config: &serde_yaml::Value) {
let var_decl = config["var_decl"].as_str().unwrap_or("var_decl missing");
let var_decl = match var_decl {
"let" => "varDecl: LET varDef SEMICOLON;",
"var" => "varDecl: VAR varDef SEMICOLON;",
"empty" => "",
_ => panic!("Invalid value for var_decl"),
};

let block = config["block"].as_str().unwrap_or("block missing");
let block = match block {
"begin-end" => "block: BEGIN (blockItem)* END;",
"brace" => "block: L_BRACE (blockItem)* R_BRACE;",
_ => panic!("Invalid value for block"),
};

let func_def = config["func_def"].as_str().unwrap_or("func_def missing");
let func_def = match func_def {
"func" => "funcDef: FUNC IDENT L_PAREN (funcFParams)? R_PAREN block;",
"fn" => "funcDef: FN IDENT L_PAREN (funcFParams)? R_PAREN block;",
"def" => "funcDef: DEF IDENT L_PAREN (funcFParams)? R_PAREN block;",
_ => panic!("Invalid value for func_def"),
};

let parser_template = r##"parser grammar lsParser;

options {
tokenVocab = lsLexer;
}

program: compUnit;
compUnit: (funcDef | decl)+ EOF;

decl: varDecl;

bType: INT;

#var_decl#

varDef: (IDENT (L_BRACKT constExp R_BRACKT)*)
| (IDENT (L_BRACKT constExp R_BRACKT)* ASSIGN initVal);

initVal: exp | (L_BRACE (initVal (COMMA initVal)*)? R_BRACE);

#func_def#

funcType: VOID | INT;

funcFParams: funcFParam (COMMA funcFParam)*;

funcFParam:
bType IDENT (L_BRACKT R_BRACKT (L_BRACKT exp R_BRACKT)*)?;

#block#

blockItem: decl | stmt;

stmt:
(lVal ASSIGN exp SEMICOLON)
| (exp? SEMICOLON)
| block
| (IF L_PAREN cond R_PAREN stmt (ELSE stmt)?)
| (WHILE L_PAREN cond R_PAREN stmt)
| (BREAK SEMICOLON)
| (CONTINUE SEMICOLON)
| (RETURN exp? SEMICOLON);

exp:
L_PAREN exp R_PAREN
| lVal
| number
| IDENT L_PAREN funcRParams? R_PAREN
| unaryOp exp
| exp (MUL | DIV | MOD) exp
| exp (PLUS | MINUS) exp;

cond:
exp
| cond (LT | GT | LE | GE) cond
| cond (EQ | NEQ) cond
| cond AND cond
| cond OR cond;

lVal: IDENT (L_BRACKT exp R_BRACKT)*;

number: INTEGER_CONST;

unaryOp: PLUS | MINUS | NOT;

funcRParams: param (COMMA param)*;

param: exp;

constExp: exp;"##;

let parser = parser_template
.replace("#var_decl#", &var_decl)
.replace("#block#", &block)
.replace("#func_def#", &func_def);

let mut file = File::create("src/gen/lsParser.g4").expect("Failed to create file");
file.write_all(parser.as_bytes())
.expect("Failed to write to file");
}

fn main() {
let mut file = File::open("lightsaber.yaml").expect("Failed to open file");
let mut contents = String::new();
file.read_to_string(&mut contents)
.expect("Failed to read file");

let yaml: serde_yaml::Value = serde_yaml::from_str(&contents).expect("Failed to parse YAML");

let grammar_yaml: &serde_yaml::Value =
yaml.get("grammar").expect("no grammar part in config file");

println!("{:?}", grammar_yaml);

gen_lexer_g4();
gen_parser_g4(grammar_yaml);
}
4 changes: 4 additions & 0 deletions src/gen/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pub mod lslexer;
pub mod lsparser;
pub mod lsparserlistener;
pub mod lsparservisitor;
40 changes: 40 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
mod gen;
use std::fs;

use antlr_rust::token_factory::CommonTokenFactory;
use antlr_rust::tree::ParseTreeListener;
use antlr_rust::InputStream;
use antlr_rust::{common_token_stream::CommonTokenStream, tree::ParseTree};
use gen::lslexer::lsLexer;
use gen::lsparser::lsParser;

use crate::gen::lsparser::ruleNames;
use crate::gen::{
lsparser::{lsParserContext, lsParserContextType},
lsparserlistener::lsParserListener,
};

fn main() {
let src_file = fs::read_to_string("test.ls").expect("cannot read file");

let tf = CommonTokenFactory::default();
let mut _lexer = lsLexer::new_with_token_factory(InputStream::new(src_file.as_str()), &tf);
let token_source = CommonTokenStream::new(_lexer);

let mut parser = lsParser::new(token_source);

struct Listener {}
impl<'input> ParseTreeListener<'input, lsParserContextType> for Listener {
fn enter_every_rule(&mut self, ctx: &dyn lsParserContext<'input>) {
println!(
"rule entered {}",
ruleNames.get(ctx.get_rule_index()).unwrap_or(&"error")
)
}
}
impl<'input> lsParserListener<'input> for Listener {}

parser.add_parse_listener(Box::new(Listener {}));
let result = parser.program();
println!("{}", result.unwrap().to_string_tree(&*parser));
}
4 changes: 4 additions & 0 deletions test.ls
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
fn main() {
let a = 1;
return a + b + 1;
}