Skip to content

Latest commit

 

History

History
370 lines (289 loc) · 9.54 KB

Tutorial-parser.md

File metadata and controls

370 lines (289 loc) · 9.54 KB

Writing your parser using YAPB

How to configure a haskell Stack project using YAPB

In the beginning, you initialize a haskell Stack project.

  $ stack new parser
  $ cd parser

To use the function of automatic computation of syntax completion candidates, the project must use YAPB >=0.1.1.

[package.yaml of arith]

executables:
  arith-exe:
    main:                Main.hs
    source-dirs:
    - app/ast         <====== To include ast/Expr.hs
    - app
    ghc-options:
    - -threaded
    - -rtsopts
    - -with-rtsopts=-N
    dependencies:
    - arith
    - yapb >= 0.1.1    <====== Add a dependency on yapb-0.1.1!

Note that stack.yaml of arith needs a change to download yapb-0.1.1 from Hackage as follows.

[stack.yaml in arith]

extra-deps:
  - yapb-0.1.1

How to write and run a parser using YAPB

For your own parser, you have to write Token.hs, Lexer.hs, ast/Expr.hs, Parser.hs, and Main.hs.

  $ ls app/*.hs app/ast/*.hs
  app/Lexer.hs  app/Main.hs  app/Parser.hs  app/Token.hs app/ast/Expr.hs

How to write those files are explained in the followin.

[app/Token.hs]

  module Token(Token(..)) where

  import Prelude hiding(EQ)
  import TokenInterface

  -----------------------------------------------
  -- What you need to write for your own parser!!
  -----------------------------------------------
  data Token =
      END_OF_TOKEN
    | OPEN_PAREN  | CLOSE_PAREN
    | IDENTIFIER  | INTEGER_NUMBER
    | ADD  | SUB  | MUL  | DIV
    | EQ  | SEMICOLON
    deriving (Eq, Show)

  tokenStrList :: [(Token,String)]
  tokenStrList =
    [ (END_OF_TOKEN, "$"),
      (OPEN_PAREN, "("), (CLOSE_PAREN, ")"),
      (IDENTIFIER, "identifier"), (INTEGER_NUMBER, "integer_number"),
      (ADD, "+"), (SUB, "-"), (MUL, "*"), (DIV, "/"),
      (EQ, "="), (SEMICOLON, ";")  
    ]

  -------------------------------------
  -- Just copy the following as it is!!
  -------------------------------------
  findTok tok [] = Nothing
  findTok tok ((tok_,str):list)
    | tok == tok_ = Just str
    | otherwise   = findTok tok list

  findStr str [] = Nothing
  findStr str ((tok,str_):list)
    | str == str_ = Just tok
    | otherwise   = findStr str list
  
  instance TokenInterface Token where
    toToken str   =
      case findStr str tokenStrList of
        Nothing  -> error ("toToken: " ++ str)
        Just tok -> tok
    fromToken tok =
      case findTok tok tokenStrList of
        Nothing  -> error ("fromToken: " ++ show tok)
        Just str -> str

[app/Lexer.hs]

  module Lexer(lexerSpec) where

  import Prelude hiding (EQ)
  import CommonParserUtil
  import Token

  ----------------------------
  -- Utility functions
  ----------------------------
  
  mkFn :: Token -> (String -> Maybe Token)
  mkFn tok = \text -> Just tok

  skip :: String -> Maybe Token
  skip = \text -> Nothing

  ----------------------------
  -- Your lexer specification
  ----------------------------
  
  lexerSpec :: LexerSpec Token
  lexerSpec = LexerSpec
    {
      endOfToken    = END_OF_TOKEN,
      lexerSpecList = 
        [ ("[ \t\n]", skip),
          ("[0-9]+" , mkFn INTEGER_NUMBER),
          ("\\("    , mkFn OPEN_PAREN),
          ("\\)"    , mkFn CLOSE_PAREN),
          ("\\+"    , mkFn ADD),
          ("\\-"    , mkFn SUB),
          ("\\*"    , mkFn MUL),
          ("\\/"    , mkFn DIV),
          ("\\="    , mkFn EQ),
          ("\\;"    , mkFn SEMICOLON),
          ("[a-zA-Z][a-zA-Z0-9]*"    , mkFn IDENTIFIER)
        ]
    } 

[app/ast/Expr.hs]

  module Expr where

  -----------------------------------------------
  -- What you need to write for your own parser!!
  -----------------------------------------------

  data AST =
      ASTSeq  { fromAstSeq  :: [Expr] } -- Expr Sequence: Expr1; ... ; Exprn
    | ASTExpr { fromAstExpr :: Expr   }

  instance Show AST where
    showsPrec p _ = (++) "AST ..."

  toAstSeq :: [Expr] -> AST
  toAstSeq exprs = ASTSeq exprs

  toAstExpr :: Expr -> AST
  toAstExpr expr = ASTExpr expr

  data Expr =
      Lit { fromLit :: Int }
    | Var { fromVar :: String }
    | BinOp { kindFromBinOp :: BinOpKind,
              leftOpFromBinOp :: Expr,
              rightOpFromBinOp :: Expr }
    | Assign { lhsFromAssign :: String,
               rhsFromAssign :: Expr  }

  data BinOpKind = ADD | SUB | MUL | DIV

  ---------------------------------------
  -- Optional: for pretty printing an AST
  ---------------------------------------
  pprintAst :: AST -> String
  pprintAst (ASTSeq exprs) =
    let insSemicolon []         = ""
        insSemicolon [str]      = str
        insSemicolon (str:strs) = str ++ "; " ++ insSemicolon strs
    in insSemicolon (map pprint exprs)
    
  pprintAst (ASTExpr expr) = pprint expr

  pprint :: Expr -> String
  pprint (Lit i) = show i
  pprint (Var v) = v
  pprint (BinOp Expr.ADD left right) =
    "(" ++ pprint left ++ " + " ++ pprint right ++ ")"
  pprint (BinOp Expr.SUB left right) =
    "(" ++ pprint left ++ " - " ++ pprint right ++ ")"
  pprint (BinOp Expr.MUL left right) =
    "(" ++ pprint left ++ " * " ++ pprint right ++ ")"
  pprint (BinOp Expr.DIV left right) =
    "(" ++ pprint left ++ " / " ++ pprint right ++ ")"
  pprint (Assign x expr) =   
    "(" ++ x ++ " = " ++ pprint expr ++ ")"

[app/Parser.hs]

  module Parser where

  import CommonParserUtil
  import Token
  import Expr


  -- | Utility
  rule prodRule action              = (prodRule, action, Nothing  )
  ruleWithPrec prodRule action prec = (prodRule, action, Just prec)

  ----------------------------
  -- Your parser specification
  ----------------------------
  
  parserSpec :: ParserSpec Token AST
  parserSpec = ParserSpec
    {
      startSymbol = "SeqExpr'",
    
      parserSpecList =
      [
        rule "SeqExpr' -> SeqExpr" (\rhs -> get rhs 1),
      
        rule "SeqExpr -> SeqExpr ; AssignExpr"
          (\rhs -> toAstSeq (
            fromAstSeq (get rhs 1) ++ [fromAstExpr (get rhs 3)]) ),
      
        rule "SeqExpr -> AssignExpr" (\rhs -> toAstSeq [fromAstExpr (get rhs 1)]),
      
        rule "AssignExpr -> identifier = AssignExpr"
          (\rhs -> toAstExpr (Assign (getText rhs 1) (fromAstExpr (get rhs 3))) ),
      
        rule "AssignExpr -> AdditiveExpr" (\rhs -> get rhs 1),

        rule "AdditiveExpr -> AdditiveExpr + MultiplicativeExpr"
          (\rhs -> toAstExpr (
            BinOp Expr.ADD (fromAstExpr (get rhs 1)) (fromAstExpr (get rhs 3))) ),

        rule "AdditiveExpr -> AdditiveExpr - MultiplicativeExpr"
          (\rhs -> toAstExpr (
            BinOp Expr.SUB (fromAstExpr (get rhs 1)) (fromAstExpr (get rhs 3))) ),

        rule "AdditiveExpr -> MultiplicativeExpr" (\rhs -> get rhs 1),

        rule "MultiplicativeExpr -> MultiplicativeExpr * PrimaryExpr"
          (\rhs -> toAstExpr (
            BinOp Expr.MUL (fromAstExpr (get rhs 1)) (fromAstExpr (get rhs 3))) ),

        rule "MultiplicativeExpr -> MultiplicativeExpr / PrimaryExpr"
          (\rhs -> toAstExpr (
            BinOp Expr.DIV (fromAstExpr (get rhs 1)) (fromAstExpr (get rhs 3))) ),

        rule "MultiplicativeExpr -> PrimaryExpr" (\rhs -> get rhs 1),
      
        rule "PrimaryExpr -> identifier" (\rhs -> toAstExpr (Var (getText rhs 1)) ),

        rule "PrimaryExpr -> integer_number"
          (\rhs -> toAstExpr (Lit (read (getText rhs 1))) ),

        rule "PrimaryExpr -> ( AssignExpr )" (\rhs -> get rhs 2)
      ],
    
      baseDir = "./",
      actionTblFile = "action_table.txt",  
      gotoTblFile = "goto_table.txt",
      grammarFile = "prod_rules.txt",
      parserSpecFile = "mygrammar.grm",
      genparserexe = "yapb-exe"
    }

[app/Main.hs]

  module Main where

  import CommonParserUtil

  import Lexer
  import Terminal
  import Parser
  import Expr

  import System.IO

  main :: IO ()
  main = do
    fileName <- readline "Enter your file: "
    case fileName of
      "exit" -> return ()
      line -> doProcess line

  doProcess line = do
    text <- readFile line 
    putStrLn "Lexing..."
    terminalList <- lexing lexerSpec text
    putStrLn "Parsing..."
    exprSeqAst <- parsing parserSpec terminalList
    putStrLn "Pretty Printing..."
    putStrLn (pprintAst exprSeqAst)
  
  
  readline msg = do
    putStr msg
    hFlush stdout
    readline'

  readline' = do
    ch <- getChar
    if ch == '\n' then
      return ""
    else
      do line <- readline'
         return (ch:line)

Now you are ready to run the arith parser. For testing, assume you write two examples, oneline.arith and multiline.arith.

[app/example/oneline.arith]

  1 + 2 - 3 * 4 / 5

[app/example/multiline.arith]

  x = 123;
  x = x + 1;
  y = x; 
  y = y - 1 * 2 / 3;
  z = y = x

Run the arith parser by stack.

  $ stack exec parser-exe
  Enter your file: app/example/oneline.arith
  Lexing...
  Parsing...
  done.
  Pretty Printing...
  ((1 + 2) - ((3 * 4) / 5))
  
  $ stack exec parser-exe
  Enter your file: app/example/multiline.arith
  Lexing...
  Parsing...
  done.
  Pretty Printing...
  (x = 123); (x = (x + 1)); (y = x); (y = (y - ((1 * 2) / 3))); (z = (y = x))

When your parser looks not working

This parser builder system leaves a .hash file after an automaton is generated from a grammar. This is useful for not generating the automation repetitedly for the same grammar.

However, the presence of the hash file could mislead the system in case, for example, a given grammar is not LALR(1) because of such as shift/reduce confict, so no automaton is generated, but the hash file is generated.

A simple work around is to just delete the hash file and return your parser (possibly after fixing your grammar to make it LALR(1)). Then the system will generate an automaton.