Add Kirikiri TJS language grammar

keweishang · May 24, 2019 · 4f40566 · 4f40566
1 parent 4fd23d0
commit 4f40566
Show file tree

Hide file tree

Showing 177 changed files with 80,387 additions and 0 deletions.
diff --git a/kirikiri-tjs/.gitignore b/kirikiri-tjs/.gitignore
@@ -0,0 +1,9 @@
+/TJS*.java
+/TJS*.js
+/TJS*.ts
+/TJS*.cs
+/TJS*.go
+*.tokens
+*.interp
+*.class
+.antlr
diff --git a/kirikiri-tjs/CSharp/TJSBaseLexer.cs b/kirikiri-tjs/CSharp/TJSBaseLexer.cs
@@ -0,0 +1,75 @@
+using Antlr4.Runtime;
+using System.Collections.Generic;
+using static PT.PM.TJSParseTreeUst.TJSParser;
+
+/// <summary>
+/// All lexer methods that used in grammar (IsStrictMode)
+/// should start with Upper Case Char similar to Lexer rules.
+/// </summary>
+public abstract class TJSBaseLexer : Lexer
+{
+    private IToken _lastToken = null;
+
+    public TJSBaseLexer(ICharStream input)
+        : base(input)
+    {
+    }
+
+    /// <summary>
+    /// Return the next token from the character stream and records this last
+    /// token in case it resides on the default channel. This recorded token
+    /// is used to determine when the lexer could possibly match a regex
+    /// literal.
+    /// 
+    /// </summary>
+    /// <returns>
+    /// The next token from the character stream.
+    /// </returns>
+    public override IToken NextToken()
+    {
+        // Get the next token.
+        IToken next = base.NextToken();
+
+        if (next.Channel == DefaultTokenChannel)
+        {
+            // Keep track of the last token on the default channel.
+            _lastToken = next;
+        }
+
+        return next;
+    }
+
+    /// <summary>
+    /// Returns true if the lexer can match a regex literal.
+    /// </summary>
+    protected bool IsRegexPossible()
+    {
+        if (_lastToken == null)
+        {
+            // No token has been produced yet: at the start of the input,
+            // no division is possible, so a regex literal _is_ possible.
+            return true;
+        }
+
+        switch (_lastToken.Type)
+        {
+            case Identifier:
+            case NullLiteral:
+            case BooleanLiteral:
+            case This:
+            case CloseBracket:
+            case CloseParen:
+            case OctalIntegerLiteral:
+            case DecimalLiteral:
+            case HexIntegerLiteral:
+            case StringLiteral:
+            case PlusPlus:
+            case MinusMinus:
+                // After any of the tokens above, no regex literal can follow.
+                return false;
+            default:
+                // In all other cases, a regex literal _is_ possible.
+                return true;
+        }
+    }
+}
diff --git a/kirikiri-tjs/CSharp/TJSBaseParser.cs b/kirikiri-tjs/CSharp/TJSBaseParser.cs
@@ -0,0 +1,65 @@
+using Antlr4.Runtime;
+using static PT.PM.TJSParseTreeUst.TJSParser;
+
+/// <summary>
+/// All parser methods that used in grammar (p, prev, notLineTerminator, etc.)
+/// should start with lower case char similar to parser rules.
+/// </summary>
+public abstract class TJSBaseParser : Parser
+{
+    public TJSBaseParser(ITokenStream input)
+        : base(input)
+    {
+    }
+
+    protected bool notOpenBraceAndNotFunction()
+    {
+        int nextTokenType = _input.Lt(1).Type;
+        return nextTokenType != OpenBrace && nextTokenType != Function;
+    }
+
+    protected bool closeBrace()
+    {
+        return _input.Lt(1).Type == CloseBrace;
+    }
+
+    /// <summary>
+    /// Returns true if on the current index of the parser's
+    /// token stream a token exists on the Hidden channel which
+    /// either is a line terminator, or is a multi line comment that
+    /// contains a line terminator.
+    /// </summary>
+    protected bool lineTerminatorAhead()
+    {
+        // Get the token ahead of the current index.
+        int possibleIndexEosToken = CurrentToken.TokenIndex - 1;
+        IToken ahead = _input.Get(possibleIndexEosToken);
+
+        if (ahead.Channel != Lexer.Hidden)
+        {
+            // We're only interested in tokens on the Hidden channel.
+            return false;
+        }
+
+        if (ahead.Type == LineTerminator)
+        {
+            // There is definitely a line terminator ahead.
+            return true;
+        }
+
+        if (ahead.Type == WhiteSpaces)
+        {
+            // Get the token ahead of the current whitespaces.
+            possibleIndexEosToken = CurrentToken.TokenIndex - 2;
+            ahead = _input.Get(possibleIndexEosToken);
+        }
+
+        // Get the token's text and type.
+        string text = ahead.Text;
+        int type = ahead.Type;
+
+        // Check if the token is, or contains a line terminator.
+        return (type == MultiLineComment && (text.Contains("\r") || text.Contains("\n"))) ||
+                (type == LineTerminator);
+    }
+}
diff --git a/kirikiri-tjs/Go/tjs_base_lexer.go b/kirikiri-tjs/Go/tjs_base_lexer.go
@@ -0,0 +1,39 @@
+package parser
+
+import "github.com/antlr/antlr4/runtime/Go/antlr"
+
+// TJSBaseLexer state
+type TJSBaseLexer struct {
+	*antlr.BaseLexer
+
+	lastToken        antlr.Token
+}
+
+// NextToken from the character stream.
+func (l *TJSBaseLexer) NextToken() antlr.Token {
+	next := l.BaseLexer.NextToken() // Get next token
+	if next.GetChannel() == antlr.TokenDefaultChannel {
+		// Keep track of the last token on default channel
+		l.lastToken = next
+	}
+	return next
+}
+
+// IsRegexPossible returns true if the lexer can match a
+// regex literal.
+func (l *TJSBaseLexer) IsRegexPossible() bool {
+	if l.lastToken == nil {
+		return true
+	}
+	switch l.lastToken.GetTokenType() {
+	case TJSLexerIdentifier, TJSLexerNullLiteral,
+		TJSLexerBooleanLiteral, TJSLexerThis,
+		TJSLexerCloseBracket, TJSLexerCloseParen,
+		TJSLexerOctalIntegerLiteral, TJSLexerDecimalLiteral,
+		TJSLexerHexIntegerLiteral, TJSLexerStringLiteral,
+		TJSLexerPlusPlus, TJSLexerMinusMinus:
+		return false
+	default:
+		return true
+	}
+}
diff --git a/kirikiri-tjs/Go/tjs_base_parser.go b/kirikiri-tjs/Go/tjs_base_parser.go
@@ -0,0 +1,55 @@
+package parser
+
+import (
+	"strings"
+
+	"github.com/antlr/antlr4/runtime/Go/antlr"
+)
+
+// TJSBaseParser implementation.
+type TJSBaseParser struct {
+	*antlr.BaseParser
+}
+
+func (p *TJSBaseParser) notOpenBraceAndNotFunction() bool {
+	nextTokenType := p.GetTokenStream().LT(1).GetTokenType()
+	return nextTokenType != TJSParserOpenBrace && nextTokenType != TJSParserFunction
+}
+
+func (p *TJSBaseParser) closeBrace() bool {
+	return p.GetTokenStream().LT(1).GetTokenType() == TJSParserCloseBrace
+}
+
+// Returns true if on the current index of the parser's
+// token stream a token exists on the Hidden channel which
+// either is a line terminator, or is a multi line comment that
+// contains a line terminator.
+func (p *TJSBaseParser) lineTerminatorAhead() bool {
+	// Get the token ahead of the current index.
+	possibleIndexEosToken := p.GetCurrentToken().GetTokenIndex() - 1
+	ahead := p.GetTokenStream().Get(possibleIndexEosToken)
+
+	if ahead.GetChannel() != antlr.LexerHidden {
+		// We're only interested in tokens on the HIDDEN channel.
+		return true
+	}
+
+	if ahead.GetTokenType() == TJSParserLineTerminator {
+		// There is definitely a line terminator ahead.
+		return true
+	}
+
+	if ahead.GetTokenType() == TJSParserWhiteSpaces {
+		// Get the token ahead of the current whitespaces.
+		possibleIndexEosToken = p.GetCurrentToken().GetTokenIndex() - 2
+		ahead = p.GetTokenStream().Get(possibleIndexEosToken)
+	}
+
+	// Get the token's text and type.
+	text := ahead.GetText()
+	_type := ahead.GetTokenType()
+
+	// Check if the token is, or contains a line terminator.
+	return (_type == TJSParserMultiLineComment && (strings.Contains(text, "\r") || strings.Contains(text, "\n"))) ||
+		(_type == TJSParserLineTerminator)
+}
diff --git a/kirikiri-tjs/Java/TJSBaseLexer.java b/kirikiri-tjs/Java/TJSBaseLexer.java
@@ -0,0 +1,71 @@
+import org.antlr.v4.runtime.*;
+
+import java.util.Stack;
+
+/**
+ * All lexer methods that used in grammar (IsStrictMode)
+ * should start with Upper Case Char similar to Lexer rules.
+ */
+public abstract class TJSBaseLexer extends Lexer
+{
+    /**
+     * Stores values of nested modes. By default mode is strict or
+     * defined externally (useStrictDefault)
+     */
+    private Token lastToken = null;
+
+    public TJSBaseLexer(CharStream input) {
+        super(input);
+    }
+    /**
+     * Return the next token from the character stream and records this last
+     * token in case it resides on the default channel. This recorded token
+     * is used to determine when the lexer could possibly match a regex
+     * literal.
+     *
+     * @return the next token from the character stream.
+     */
+    @Override
+    public Token nextToken() {
+        Token next = super.nextToken();
+
+        if (next.getChannel() == Token.DEFAULT_CHANNEL) {
+            // Keep track of the last token on the default channel.
+            this.lastToken = next;
+        }
+
+        return next;
+    }
+
+    /**
+     * Returns {@code true} if the lexer can match a regex literal.
+     */
+    protected boolean IsRegexPossible() {
+
+        if (this.lastToken == null) {
+            // No token has been produced yet: at the start of the input,
+            // no division is possible, so a regex literal _is_ possible.
+            return true;
+        }
+
+        switch (this.lastToken.getType()) {
+            case TJSLexer.Identifier:
+            case TJSLexer.NullLiteral:
+            case TJSLexer.BooleanLiteral:
+            case TJSLexer.This:
+            case TJSLexer.CloseBracket:
+            case TJSLexer.CloseParen:
+            case TJSLexer.OctalIntegerLiteral:
+            case TJSLexer.DecimalLiteral:
+            case TJSLexer.HexIntegerLiteral:
+            case TJSLexer.StringLiteral:
+            case TJSLexer.PlusPlus:
+            case TJSLexer.MinusMinus:
+                // After any of the tokens above, no regex literal can follow.
+                return false;
+            default:
+                // In all other cases, a regex literal _is_ possible.
+                return true;
+        }
+    }
+}
diff --git a/kirikiri-tjs/Java/TJSBaseParser.java b/kirikiri-tjs/Java/TJSBaseParser.java
@@ -0,0 +1,63 @@
+import org.antlr.v4.runtime.*;
+
+/**
+ * All parser methods that used in grammar (p, prev, notLineTerminator, etc.)
+ * should start with lower case char similar to parser rules.
+ */
+public abstract class TJSBaseParser extends Parser
+{
+    public TJSBaseParser(TokenStream input) {
+        super(input);
+    }
+
+    protected boolean notOpenBraceAndNotFunction() {
+        int nextTokenType = _input.LT(1).getType();
+        return nextTokenType != TJSParser.OpenBrace && nextTokenType != TJSParser.Function;
+    }
+
+    protected boolean closeBrace() {
+        return _input.LT(1).getType() == TJSParser.CloseBrace;
+    }
+
+    /**
+     * Returns {@code true} iff on the current index of the parser's
+     * token stream a token exists on the {@code HIDDEN} channel which
+     * either is a line terminator, or is a multi line comment that
+     * contains a line terminator.
+     *
+     * @return {@code true} iff on the current index of the parser's
+     * token stream a token exists on the {@code HIDDEN} channel which
+     * either is a line terminator, or is a multi line comment that
+     * contains a line terminator.
+     */
+    protected boolean lineTerminatorAhead() {
+
+        // Get the token ahead of the current index.
+        int possibleIndexEosToken = this.getCurrentToken().getTokenIndex() - 1;
+        Token ahead = _input.get(possibleIndexEosToken);
+
+        if (ahead.getChannel() != Lexer.HIDDEN) {
+            // We're only interested in tokens on the HIDDEN channel.
+            return false;
+        }
+
+        if (ahead.getType() == TJSParser.LineTerminator) {
+            // There is definitely a line terminator ahead.
+            return true;
+        }
+
+        if (ahead.getType() == TJSParser.WhiteSpaces) {
+            // Get the token ahead of the current whitespaces.
+            possibleIndexEosToken = this.getCurrentToken().getTokenIndex() - 2;
+            ahead = _input.get(possibleIndexEosToken);
+        }
+
+        // Get the token's text and type.
+        String text = ahead.getText();
+        int type = ahead.getType();
+
+        // Check if the token is, or contains a line terminator.
+        return (type == TJSParser.MultiLineComment && (text.contains("\r") || text.contains("\n"))) ||
+                (type == TJSParser.LineTerminator);
+    }
+}