Skip to content

Commit

Permalink
antlr4rust & g4 gen
Browse files Browse the repository at this point in the history
Signed-off-by: lyc8503 <[email protected]>
  • Loading branch information
lyc8503 committed Mar 26, 2024
1 parent 351158c commit dcf3573
Show file tree
Hide file tree
Showing 12 changed files with 266 additions and 62 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,6 @@ Cargo.lock
# Added by cargo

/target

src/gen/*
!src/gen/mod.rs
6 changes: 6 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"rust-analyzer.linkedProjects": [
"./Cargo.toml",
"./Cargo.toml"
]
}
16 changes: 16 additions & 0 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// {
// "version": "2.0.0",
// "configurations": [
// {
// "name": "Debug ANTLR4 grammar",
// "type": "antlr-debug",
// "request": "launch",
// "input": "input.txt",
// "grammar": "grammars/Example.g4",
// "actionFile": "grammars/exampleActions.js",
// "startRule": "start",
// "printParseTree": true,
// "visualParseTree": true
// }
// ]
// }
5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
pest = "2.7.8"
pest_derive = "2.7.8"
antlr-rust = "0.3.0-beta"
serde = { version = "1.0", features = ["derive"] }
serde_yaml = "0.9"
11 changes: 0 additions & 11 deletions docs/proposal.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,6 @@ lightsaber:
var a = 1;
let b = 1;
c = 1;


123 => int
"123" => string
true => bool




d = "123" & 123

```

##### Code block
Expand Down
15 changes: 15 additions & 0 deletions gen.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
set -e

cargo run --bin g4gen
cd src/gen
rm ls*.rs
java -jar antlr4-4.8-2-SNAPSHOT-complete.jar -visitor -Dlanguage=Rust lsLexer.g4
java -jar antlr4-4.8-2-SNAPSHOT-complete.jar -visitor -Dlanguage=Rust lsParser.g4
rm ls*.interp ls*.tokens
cd ../..
cargo run --bin lightsaber


# 1. 给出 lightsaber.yaml 运行 g4 generator
# 2. java 运行 antlr4 (antlr4rust.jar) 生成 parser.rs
# 3. parser.rs + lib.rs = 用户可以使用的 crate
4 changes: 4 additions & 0 deletions lightsaber.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
grammar:
block: brace # brace, begin-end
func_def: fn # func, fn, def
var_decl: let # let, var, empty
182 changes: 182 additions & 0 deletions src/bin/g4gen.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
use serde_yaml;
use std::fs::File;
use std::io::{Read, Write};

fn gen_lexer_g4() {
let lexer = r##"lexer grammar lsLexer;
LET: 'let';
BEGIN: 'begin';
END: 'end';
FUNC: 'func';
FN: 'fn';
DEF: 'def';
CONST: 'const';
INT: 'int';
VOID: 'void';
IF: 'if';
ELSE: 'else';
WHILE: 'while';
BREAK: 'break';
CONTINUE: 'continue';
RETURN: 'return';
PLUS: '+';
MINUS: '-';
MUL: '*';
DIV: '/';
MOD: '%';
ASSIGN: '=';
EQ: '==';
NEQ: '!=';
LT: '<';
GT: '>';
LE: '<=';
GE: '>=';
NOT: '!';
AND: '&&';
OR: '||';
L_PAREN: '(';
R_PAREN: ')';
L_BRACE: '{';
R_BRACE: '}';
L_BRACKT: '[';
R_BRACKT: ']';
COMMA: ',';
SEMICOLON: ';';
IDENT: [a-zA-Z_] [a-zA-Z0-9_]*;
INTEGER_CONST: ('0x' | '0X') [0-9a-fA-F]+
| '0' [0-7]*
| [1-9] [0-9]*;
WS: [ \r\n\t]+ -> skip;
LINE_COMMENT: '//' ~[\r\n]* -> skip;
MULTILINE_COMMENT: '/*' .*? '*/' -> skip;"##;

let mut file = File::create("src/gen/lsLexer.g4").expect("Failed to create file");
file.write_all(lexer.as_bytes())
.expect("Failed to write to file");
}

fn gen_parser_g4(config: &serde_yaml::Value) {
let var_decl = config["var_decl"].as_str().unwrap_or("var_decl missing");
let var_decl = match var_decl {
"let" => "varDecl: LET varDef SEMICOLON;",
"var" => "varDecl: VAR varDef SEMICOLON;",
"empty" => "",
_ => panic!("Invalid value for var_decl"),
};

let block = config["block"].as_str().unwrap_or("block missing");
let block = match block {
"begin-end" => "block: BEGIN (blockItem)* END;",
"brace" => "block: L_BRACE (blockItem)* R_BRACE;",
_ => panic!("Invalid value for block"),
};

let func_def = config["func_def"].as_str().unwrap_or("func_def missing");
let func_def = match func_def {
"func" => "funcDef: FUNC IDENT L_PAREN (funcFParams)? R_PAREN block;",
"fn" => "funcDef: FN IDENT L_PAREN (funcFParams)? R_PAREN block;",
"def" => "funcDef: DEF IDENT L_PAREN (funcFParams)? R_PAREN block;",
_ => panic!("Invalid value for func_def"),
};

let parser_template = r##"parser grammar lsParser;
options {
tokenVocab = lsLexer;
}
program: compUnit;
compUnit: (funcDef | decl)+ EOF;
decl: varDecl;
bType: INT;
#var_decl#
varDef: (IDENT (L_BRACKT constExp R_BRACKT)*)
| (IDENT (L_BRACKT constExp R_BRACKT)* ASSIGN initVal);
initVal: exp | (L_BRACE (initVal (COMMA initVal)*)? R_BRACE);
#func_def#
funcType: VOID | INT;
funcFParams: funcFParam (COMMA funcFParam)*;
funcFParam:
bType IDENT (L_BRACKT R_BRACKT (L_BRACKT exp R_BRACKT)*)?;
#block#
blockItem: decl | stmt;
stmt:
(lVal ASSIGN exp SEMICOLON)
| (exp? SEMICOLON)
| block
| (IF L_PAREN cond R_PAREN stmt (ELSE stmt)?)
| (WHILE L_PAREN cond R_PAREN stmt)
| (BREAK SEMICOLON)
| (CONTINUE SEMICOLON)
| (RETURN exp? SEMICOLON);
exp:
L_PAREN exp R_PAREN
| lVal
| number
| IDENT L_PAREN funcRParams? R_PAREN
| unaryOp exp
| exp (MUL | DIV | MOD) exp
| exp (PLUS | MINUS) exp;
cond:
exp
| cond (LT | GT | LE | GE) cond
| cond (EQ | NEQ) cond
| cond AND cond
| cond OR cond;
lVal: IDENT (L_BRACKT exp R_BRACKT)*;
number: INTEGER_CONST;
unaryOp: PLUS | MINUS | NOT;
funcRParams: param (COMMA param)*;
param: exp;
constExp: exp;"##;

let parser = parser_template
.replace("#var_decl#", &var_decl)
.replace("#block#", &block)
.replace("#func_def#", &func_def);

let mut file = File::create("src/gen/lsParser.g4").expect("Failed to create file");
file.write_all(parser.as_bytes())
.expect("Failed to write to file");
}

fn main() {
let mut file = File::open("lightsaber.yaml").expect("Failed to open file");
let mut contents = String::new();
file.read_to_string(&mut contents)
.expect("Failed to read file");

let yaml: serde_yaml::Value = serde_yaml::from_str(&contents).expect("Failed to parse YAML");

let grammar_yaml: &serde_yaml::Value =
yaml.get("grammar").expect("no grammar part in config file");

println!("{:?}", grammar_yaml);

gen_lexer_g4();
gen_parser_g4(grammar_yaml);
}
4 changes: 4 additions & 0 deletions src/gen/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pub mod lslexer;
pub mod lsparser;
pub mod lsparserlistener;
pub mod lsparservisitor;
11 changes: 0 additions & 11 deletions src/ini.pest

This file was deleted.

67 changes: 29 additions & 38 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,49 +1,40 @@
use pest::Parser;
use pest_derive::Parser;

#[derive(Parser)]
#[grammar = "ini.pest"]
pub struct INIParser;

// pub mod ini;
// use ini::INIParser;

use std::collections::HashMap;
mod gen;
use std::fs;

fn main() {
let unparsed_file = fs::read_to_string("config.ini").expect("cannot read file");

let file = INIParser::parse(Rule::file, &unparsed_file)
.expect("unsuccessful parse") // unwrap the parse result
.next()
.unwrap(); // get and unwrap the `file` rule; never fails
use antlr_rust::token_factory::CommonTokenFactory;
use antlr_rust::tree::ParseTreeListener;
use antlr_rust::InputStream;
use antlr_rust::{common_token_stream::CommonTokenStream, tree::ParseTree};
use gen::lslexer::lsLexer;
use gen::lsparser::lsParser;

let mut properties: HashMap<&str, HashMap<&str, &str>> = HashMap::new();
use crate::gen::lsparser::ruleNames;
use crate::gen::{
lsparser::{lsParserContext, lsParserContextType},
lsparserlistener::lsParserListener,
};

let mut current_section_name = "";
fn main() {
let src_file = fs::read_to_string("test.ls").expect("cannot read file");

for line in file.into_inner() {
match line.as_rule() {
Rule::section => {
let mut inner_rules = line.into_inner(); // { name }
current_section_name = inner_rules.next().unwrap().as_str();
}
Rule::property => {
let mut inner_rules = line.into_inner(); // { name ~ "=" ~ value }
let tf = CommonTokenFactory::default();
let mut _lexer = lsLexer::new_with_token_factory(InputStream::new(src_file.as_str()), &tf);
let token_source = CommonTokenStream::new(_lexer);

let name: &str = inner_rules.next().unwrap().as_str();
let value: &str = inner_rules.next().unwrap().as_str();
let mut parser = lsParser::new(token_source);

// Insert an empty inner hash map if the outer hash map hasn't
// seen this section name before.
let section = properties.entry(current_section_name).or_default();
section.insert(name, value);
}
Rule::EOI => (),
_ => unreachable!(),
struct Listener {}
impl<'input> ParseTreeListener<'input, lsParserContextType> for Listener {
fn enter_every_rule(&mut self, ctx: &dyn lsParserContext<'input>) {
println!(
"rule entered {}",
ruleNames.get(ctx.get_rule_index()).unwrap_or(&"error")
)
}
}
impl<'input> lsParserListener<'input> for Listener {}

println!("{:#?}", properties);
parser.add_parse_listener(Box::new(Listener {}));
let result = parser.program();
println!("{}", result.unwrap().to_string_tree(&*parser));
}
4 changes: 4 additions & 0 deletions test.ls
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
fn main() {
let a = 1;
return a + b + 1;
}

0 comments on commit dcf3573

Please sign in to comment.