From b29f2f16d76b2b1a9243d24f47097da7e8d93945 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Mon, 4 Mar 2024 16:07:26 +0800 Subject: [PATCH 1/3] Update proposal.md Signed-off-by: lyc8503 --- docs/proposal.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/proposal.md b/docs/proposal.md index 0230dd8..2535f17 100644 --- a/docs/proposal.md +++ b/docs/proposal.md @@ -40,11 +40,11 @@ let b = 1; c = 1; ``` -##### Var declaration +##### Code block ```yaml lightsaber: - ident: + block: style: ident # Choose ident, block, begin-end here ``` From 351158cab41e03555504d3ac425040d78fcde2ef Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Tue, 26 Mar 2024 00:10:33 +0800 Subject: [PATCH 2/3] pest impl Signed-off-by: lyc8503 --- .gitignore | 5 +++++ Cargo.toml | 10 ++++++++++ docs/proposal.md | 17 +++++++++++++++++ src/ini.pest | 11 +++++++++++ src/main.rs | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 92 insertions(+) create mode 100644 Cargo.toml create mode 100644 src/ini.pest create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore index 6985cf1..196e176 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,8 @@ Cargo.lock # MSVC Windows builds of rustc generate these, which store debugging information *.pdb + + +# Added by cargo + +/target diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..a0182db --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "lightsaber" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +pest = "2.7.8" +pest_derive = "2.7.8" diff --git a/docs/proposal.md b/docs/proposal.md index 2535f17..0359abb 100644 --- a/docs/proposal.md +++ b/docs/proposal.md @@ -38,6 +38,17 @@ lightsaber: var a = 1; let b = 1; c = 1; + + +123 => int +"123" => string +true => bool + + + + +d = "123" & 123 + ``` ##### Code block @@ -62,4 +73,10 @@ if (flag) { if flag begin print "Hello" end +``` + +```yaml + + + ``` diff --git a/src/ini.pest b/src/ini.pest new file mode 100644 index 0000000..3f6cfd1 --- /dev/null +++ b/src/ini.pest @@ -0,0 +1,11 @@ +WHITESPACE = _{ " " } +char = { ASCII_ALPHANUMERIC | "." | "_" | "/" } +name = @{ char+ } +value = @{ char* } +section = { "{" ~ name ~ "}" } +property = { name ~ "=" ~ value } +file = { + SOI ~ + ((section | property)? ~ NEWLINE)* ~ + EOI +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..67e1c2d --- /dev/null +++ b/src/main.rs @@ -0,0 +1,49 @@ +use pest::Parser; +use pest_derive::Parser; + +#[derive(Parser)] +#[grammar = "ini.pest"] +pub struct INIParser; + +// pub mod ini; +// use ini::INIParser; + +use std::collections::HashMap; +use std::fs; + +fn main() { + let unparsed_file = fs::read_to_string("config.ini").expect("cannot read file"); + + let file = INIParser::parse(Rule::file, &unparsed_file) + .expect("unsuccessful parse") // unwrap the parse result + .next() + .unwrap(); // get and unwrap the `file` rule; never fails + + let mut properties: HashMap<&str, HashMap<&str, &str>> = HashMap::new(); + + let mut current_section_name = ""; + + for line in file.into_inner() { + match line.as_rule() { + Rule::section => { + let mut inner_rules = line.into_inner(); // { name } + current_section_name = inner_rules.next().unwrap().as_str(); + } + Rule::property => { + let mut inner_rules = line.into_inner(); // { name ~ "=" ~ value } + + let name: &str = inner_rules.next().unwrap().as_str(); + let value: &str = inner_rules.next().unwrap().as_str(); + + // Insert an empty inner hash map if the outer hash map hasn't + // seen this section name before. + let section = properties.entry(current_section_name).or_default(); + section.insert(name, value); + } + Rule::EOI => (), + _ => unreachable!(), + } + } + + println!("{:#?}", properties); +} From dcf3573012ff068c0055788b688f0f1c973983f2 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Tue, 26 Mar 2024 23:18:39 +0800 Subject: [PATCH 3/3] antlr4rust & g4 gen Signed-off-by: lyc8503 --- .gitignore | 3 + .vscode/settings.json | 6 ++ .vscode/tasks.json | 16 ++++ Cargo.toml | 5 +- docs/proposal.md | 11 --- gen.sh | 15 ++++ lightsaber.yaml | 4 + src/bin/g4gen.rs | 182 ++++++++++++++++++++++++++++++++++++++++++ src/gen/mod.rs | 4 + src/ini.pest | 11 --- src/main.rs | 67 +++++++--------- test.ls | 4 + 12 files changed, 266 insertions(+), 62 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 .vscode/tasks.json create mode 100755 gen.sh create mode 100644 lightsaber.yaml create mode 100644 src/bin/g4gen.rs create mode 100644 src/gen/mod.rs delete mode 100644 src/ini.pest create mode 100644 test.ls diff --git a/.gitignore b/.gitignore index 196e176..e930e54 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,6 @@ Cargo.lock # Added by cargo /target + +src/gen/* +!src/gen/mod.rs diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..e47d9bb --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "rust-analyzer.linkedProjects": [ + "./Cargo.toml", + "./Cargo.toml" + ] +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..73cd325 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,16 @@ +// { +// "version": "2.0.0", +// "configurations": [ +// { +// "name": "Debug ANTLR4 grammar", +// "type": "antlr-debug", +// "request": "launch", +// "input": "input.txt", +// "grammar": "grammars/Example.g4", +// "actionFile": "grammars/exampleActions.js", +// "startRule": "start", +// "printParseTree": true, +// "visualParseTree": true +// } +// ] +// } \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index a0182db..42e0568 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,5 +6,6 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -pest = "2.7.8" -pest_derive = "2.7.8" +antlr-rust = "0.3.0-beta" +serde = { version = "1.0", features = ["derive"] } +serde_yaml = "0.9" diff --git a/docs/proposal.md b/docs/proposal.md index 0359abb..ece8d8e 100644 --- a/docs/proposal.md +++ b/docs/proposal.md @@ -38,17 +38,6 @@ lightsaber: var a = 1; let b = 1; c = 1; - - -123 => int -"123" => string -true => bool - - - - -d = "123" & 123 - ``` ##### Code block diff --git a/gen.sh b/gen.sh new file mode 100755 index 0000000..65b6a2b --- /dev/null +++ b/gen.sh @@ -0,0 +1,15 @@ +set -e + +cargo run --bin g4gen +cd src/gen +rm ls*.rs +java -jar antlr4-4.8-2-SNAPSHOT-complete.jar -visitor -Dlanguage=Rust lsLexer.g4 +java -jar antlr4-4.8-2-SNAPSHOT-complete.jar -visitor -Dlanguage=Rust lsParser.g4 +rm ls*.interp ls*.tokens +cd ../.. +cargo run --bin lightsaber + + +# 1. 给出 lightsaber.yaml 运行 g4 generator +# 2. java 运行 antlr4 (antlr4rust.jar) 生成 parser.rs +# 3. parser.rs + lib.rs = 用户可以使用的 crate diff --git a/lightsaber.yaml b/lightsaber.yaml new file mode 100644 index 0000000..ceeef89 --- /dev/null +++ b/lightsaber.yaml @@ -0,0 +1,4 @@ +grammar: + block: brace # brace, begin-end + func_def: fn # func, fn, def + var_decl: let # let, var, empty diff --git a/src/bin/g4gen.rs b/src/bin/g4gen.rs new file mode 100644 index 0000000..95fda99 --- /dev/null +++ b/src/bin/g4gen.rs @@ -0,0 +1,182 @@ +use serde_yaml; +use std::fs::File; +use std::io::{Read, Write}; + +fn gen_lexer_g4() { + let lexer = r##"lexer grammar lsLexer; + +LET: 'let'; +BEGIN: 'begin'; +END: 'end'; + +FUNC: 'func'; +FN: 'fn'; +DEF: 'def'; + +CONST: 'const'; +INT: 'int'; +VOID: 'void'; +IF: 'if'; +ELSE: 'else'; +WHILE: 'while'; +BREAK: 'break'; +CONTINUE: 'continue'; +RETURN: 'return'; +PLUS: '+'; +MINUS: '-'; +MUL: '*'; +DIV: '/'; +MOD: '%'; +ASSIGN: '='; +EQ: '=='; +NEQ: '!='; +LT: '<'; +GT: '>'; +LE: '<='; +GE: '>='; +NOT: '!'; +AND: '&&'; +OR: '||'; +L_PAREN: '('; +R_PAREN: ')'; +L_BRACE: '{'; +R_BRACE: '}'; +L_BRACKT: '['; +R_BRACKT: ']'; +COMMA: ','; +SEMICOLON: ';'; + +IDENT: [a-zA-Z_] [a-zA-Z0-9_]*; +INTEGER_CONST: ('0x' | '0X') [0-9a-fA-F]+ + | '0' [0-7]* + | [1-9] [0-9]*; +WS: [ \r\n\t]+ -> skip; +LINE_COMMENT: '//' ~[\r\n]* -> skip; +MULTILINE_COMMENT: '/*' .*? '*/' -> skip;"##; + + let mut file = File::create("src/gen/lsLexer.g4").expect("Failed to create file"); + file.write_all(lexer.as_bytes()) + .expect("Failed to write to file"); +} + +fn gen_parser_g4(config: &serde_yaml::Value) { + let var_decl = config["var_decl"].as_str().unwrap_or("var_decl missing"); + let var_decl = match var_decl { + "let" => "varDecl: LET varDef SEMICOLON;", + "var" => "varDecl: VAR varDef SEMICOLON;", + "empty" => "", + _ => panic!("Invalid value for var_decl"), + }; + + let block = config["block"].as_str().unwrap_or("block missing"); + let block = match block { + "begin-end" => "block: BEGIN (blockItem)* END;", + "brace" => "block: L_BRACE (blockItem)* R_BRACE;", + _ => panic!("Invalid value for block"), + }; + + let func_def = config["func_def"].as_str().unwrap_or("func_def missing"); + let func_def = match func_def { + "func" => "funcDef: FUNC IDENT L_PAREN (funcFParams)? R_PAREN block;", + "fn" => "funcDef: FN IDENT L_PAREN (funcFParams)? R_PAREN block;", + "def" => "funcDef: DEF IDENT L_PAREN (funcFParams)? R_PAREN block;", + _ => panic!("Invalid value for func_def"), + }; + + let parser_template = r##"parser grammar lsParser; + +options { + tokenVocab = lsLexer; +} + +program: compUnit; +compUnit: (funcDef | decl)+ EOF; + +decl: varDecl; + +bType: INT; + +#var_decl# + +varDef: (IDENT (L_BRACKT constExp R_BRACKT)*) + | (IDENT (L_BRACKT constExp R_BRACKT)* ASSIGN initVal); + +initVal: exp | (L_BRACE (initVal (COMMA initVal)*)? R_BRACE); + +#func_def# + +funcType: VOID | INT; + +funcFParams: funcFParam (COMMA funcFParam)*; + +funcFParam: + bType IDENT (L_BRACKT R_BRACKT (L_BRACKT exp R_BRACKT)*)?; + +#block# + +blockItem: decl | stmt; + +stmt: + (lVal ASSIGN exp SEMICOLON) + | (exp? SEMICOLON) + | block + | (IF L_PAREN cond R_PAREN stmt (ELSE stmt)?) + | (WHILE L_PAREN cond R_PAREN stmt) + | (BREAK SEMICOLON) + | (CONTINUE SEMICOLON) + | (RETURN exp? SEMICOLON); + +exp: + L_PAREN exp R_PAREN + | lVal + | number + | IDENT L_PAREN funcRParams? R_PAREN + | unaryOp exp + | exp (MUL | DIV | MOD) exp + | exp (PLUS | MINUS) exp; + +cond: + exp + | cond (LT | GT | LE | GE) cond + | cond (EQ | NEQ) cond + | cond AND cond + | cond OR cond; + +lVal: IDENT (L_BRACKT exp R_BRACKT)*; + +number: INTEGER_CONST; + +unaryOp: PLUS | MINUS | NOT; + +funcRParams: param (COMMA param)*; + +param: exp; + +constExp: exp;"##; + + let parser = parser_template + .replace("#var_decl#", &var_decl) + .replace("#block#", &block) + .replace("#func_def#", &func_def); + + let mut file = File::create("src/gen/lsParser.g4").expect("Failed to create file"); + file.write_all(parser.as_bytes()) + .expect("Failed to write to file"); +} + +fn main() { + let mut file = File::open("lightsaber.yaml").expect("Failed to open file"); + let mut contents = String::new(); + file.read_to_string(&mut contents) + .expect("Failed to read file"); + + let yaml: serde_yaml::Value = serde_yaml::from_str(&contents).expect("Failed to parse YAML"); + + let grammar_yaml: &serde_yaml::Value = + yaml.get("grammar").expect("no grammar part in config file"); + + println!("{:?}", grammar_yaml); + + gen_lexer_g4(); + gen_parser_g4(grammar_yaml); +} diff --git a/src/gen/mod.rs b/src/gen/mod.rs new file mode 100644 index 0000000..13d5e39 --- /dev/null +++ b/src/gen/mod.rs @@ -0,0 +1,4 @@ +pub mod lslexer; +pub mod lsparser; +pub mod lsparserlistener; +pub mod lsparservisitor; diff --git a/src/ini.pest b/src/ini.pest deleted file mode 100644 index 3f6cfd1..0000000 --- a/src/ini.pest +++ /dev/null @@ -1,11 +0,0 @@ -WHITESPACE = _{ " " } -char = { ASCII_ALPHANUMERIC | "." | "_" | "/" } -name = @{ char+ } -value = @{ char* } -section = { "{" ~ name ~ "}" } -property = { name ~ "=" ~ value } -file = { - SOI ~ - ((section | property)? ~ NEWLINE)* ~ - EOI -} diff --git a/src/main.rs b/src/main.rs index 67e1c2d..9ee7e24 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,49 +1,40 @@ -use pest::Parser; -use pest_derive::Parser; - -#[derive(Parser)] -#[grammar = "ini.pest"] -pub struct INIParser; - -// pub mod ini; -// use ini::INIParser; - -use std::collections::HashMap; +mod gen; use std::fs; -fn main() { - let unparsed_file = fs::read_to_string("config.ini").expect("cannot read file"); - - let file = INIParser::parse(Rule::file, &unparsed_file) - .expect("unsuccessful parse") // unwrap the parse result - .next() - .unwrap(); // get and unwrap the `file` rule; never fails +use antlr_rust::token_factory::CommonTokenFactory; +use antlr_rust::tree::ParseTreeListener; +use antlr_rust::InputStream; +use antlr_rust::{common_token_stream::CommonTokenStream, tree::ParseTree}; +use gen::lslexer::lsLexer; +use gen::lsparser::lsParser; - let mut properties: HashMap<&str, HashMap<&str, &str>> = HashMap::new(); +use crate::gen::lsparser::ruleNames; +use crate::gen::{ + lsparser::{lsParserContext, lsParserContextType}, + lsparserlistener::lsParserListener, +}; - let mut current_section_name = ""; +fn main() { + let src_file = fs::read_to_string("test.ls").expect("cannot read file"); - for line in file.into_inner() { - match line.as_rule() { - Rule::section => { - let mut inner_rules = line.into_inner(); // { name } - current_section_name = inner_rules.next().unwrap().as_str(); - } - Rule::property => { - let mut inner_rules = line.into_inner(); // { name ~ "=" ~ value } + let tf = CommonTokenFactory::default(); + let mut _lexer = lsLexer::new_with_token_factory(InputStream::new(src_file.as_str()), &tf); + let token_source = CommonTokenStream::new(_lexer); - let name: &str = inner_rules.next().unwrap().as_str(); - let value: &str = inner_rules.next().unwrap().as_str(); + let mut parser = lsParser::new(token_source); - // Insert an empty inner hash map if the outer hash map hasn't - // seen this section name before. - let section = properties.entry(current_section_name).or_default(); - section.insert(name, value); - } - Rule::EOI => (), - _ => unreachable!(), + struct Listener {} + impl<'input> ParseTreeListener<'input, lsParserContextType> for Listener { + fn enter_every_rule(&mut self, ctx: &dyn lsParserContext<'input>) { + println!( + "rule entered {}", + ruleNames.get(ctx.get_rule_index()).unwrap_or(&"error") + ) } } + impl<'input> lsParserListener<'input> for Listener {} - println!("{:#?}", properties); + parser.add_parse_listener(Box::new(Listener {})); + let result = parser.program(); + println!("{}", result.unwrap().to_string_tree(&*parser)); } diff --git a/test.ls b/test.ls new file mode 100644 index 0000000..70dbe72 --- /dev/null +++ b/test.ls @@ -0,0 +1,4 @@ +fn main() { + let a = 1; + return a + b + 1; +}