-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathLexer.mll
180 lines (164 loc) · 5.55 KB
/
Lexer.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
{
open Parser
open Printf
let int_of_hex digit1 digit2 = int_of_string (sprintf "0x%c%c" digit1 digit2)
let incrementNumLines (lexbuf : Lexing.lexbuf) =
let lcp : Lexing.position = lexbuf.lex_curr_p in
lexbuf.lex_curr_p <-
{
pos_fname = lcp.pos_fname;
pos_lnum = lcp.pos_lnum + 1;
pos_bol = lcp.pos_bol;
pos_cnum = lcp.pos_cnum
}
let get_location_msg (lexbuf : Lexing.lexbuf) =
sprintf "File \"%s\", line %d:\n" lexbuf.lex_curr_p.pos_fname lexbuf.lex_curr_p.pos_lnum
let char_list_in_string = ref []
let add_in_list c = char_list_in_string := c :: !char_list_in_string
let string_of_rev_char_list revCharList =
revCharList |> List.rev_map (String.make 1) |> String.concat ""
let multiline_string_msg = "Strings must close in the same line they start."
let handle_error lexbuf infoMsg =
Error.(handle_error lexing_error_msg (get_location_msg lexbuf ^ infoMsg))
let handle_error_fatal lexbuf infoMsg =
Error.(handle_error_fatal lexing_error_msg (get_location_msg lexbuf ^ infoMsg))
}
let digit = ['0'-'9']
let letter = ['a'-'z' 'A'-'Z']
(** [digit_hex] are the letters/digits that can be written in hex escape character. *)
let digit_hex = ['a'-'f' 'A'-'F' '0'-'9']
let char_hex = "\\x" digit_hex digit_hex
let white = [' ' '\t' '\r']
let char_common = [^ '\\' '\'' '"']
let char_escape = '\\' ['n' 't' 'r' '0' '\\' '\'' '"'] | char_hex
(** [char_not_escape] are the characters that, if written next to a front-slash,
the front-slash is considered redundant. For example '\a', '\1' and '\@'. *)
let char_not_escape = '\\' [^ 'n' 't' 'r' '0' '\\' '\'' '"' 'x']
let char_const = char_common | char_escape
(** [char_string] are the characters that can exist inside a string. *)
let char_string = char_common # ['"' '\n' '\\'] | char_escape
let identifier = letter (letter | digit | '_')*
let integer = digit+
let character = '\'' char_const '\''
let string = '"' (char_string | char_not_escape)* '"'
rule lexer = parse
| "and" { T_and }
| "char" { T_char }
| "div" { T_div }
| "do" { T_do }
| "else" { T_else }
| "fun" { T_fun }
| "if" { T_if }
| "int" { T_int }
| "mod" { T_mod }
| "not" { T_not }
| "nothing" { T_nothing }
| "or" { T_or }
| "ref" { T_ref }
| "return" { T_return }
| "then" { T_then }
| "var" { T_var }
| "while" { T_while }
| '+' { T_plus }
| '-' { T_minus }
| '*' { T_mul }
| '=' { T_equal }
| '#' { T_not_equal }
| '<' { T_less }
| '>' { T_greater }
| "<=" { T_less_eq }
| ">=" { T_greater_eq }
| '(' { T_left_par }
| ')' { T_right_par }
| '[' { T_left_sqr }
| ']' { T_right_sqr }
| '{' { T_left_br }
| '}' { T_right_br }
| ',' { T_comma }
| ';' { T_semicolon }
| ':' { T_colon}
| "<-" { T_assignment }
| "$$" { multi_comments lexbuf }
| '$' { comment lexbuf }
| identifier { T_identifier (Lexing.lexeme lexbuf) }
| integer { T_integer (int_of_string (Lexing.lexeme lexbuf)) }
| '\n' { incrementNumLines lexbuf; lexer lexbuf }
| white+ { lexer lexbuf }
| '\'' { characters lexbuf }
| '"' { char_list_in_string := []; strings lexbuf }
| '"' char_string* eof { handle_error_fatal lexbuf multiline_string_msg }
| eof { T_eof }
| _ as chr {
handle_error lexbuf (sprintf "Unknown character '%c'.\n" chr);
lexer lexbuf
}
and strings = parse
| "\n" {
handle_error lexbuf multiline_string_msg;
add_in_list '\n';
incrementNumLines lexbuf;
strings lexbuf
}
| "'" {
handle_error lexbuf
"Single quotes are not permitted in strings (maybe you forgot a \'\\\'?).";
add_in_list '\'';
strings lexbuf
}
| "\\x" (digit_hex as d1) (digit_hex as d2) {
add_in_list (Char.chr (int_of_hex d1 d2));
strings lexbuf
}
| char_escape as c {
add_in_list (
match c with
| "\\n" -> '\n'
| "\\t" -> '\t'
| "\\r" -> '\r'
| "\\0" -> '\000'
| "\\\\" -> '\\'
| "\\\'" -> '\''
| "\\\"" -> '\"'
| _ -> assert false
(* hex case is caught in previous rule *));
strings lexbuf
}
| char_not_escape as c { add_in_list (String.get c 1); strings lexbuf }
| char_common as c { add_in_list c; strings lexbuf }
| '"' { T_string (string_of_rev_char_list !char_list_in_string) }
and characters = parse
| (char_common as c) '\'' { T_chr c }
| "\\x" (digit_hex as d1) (digit_hex as d2) '\'' { T_chr (Char.chr (int_of_hex d1 d2)) }
| (char_escape as c) '\'' {
T_chr (
match c with
| "\\n" -> '\n'
| "\\t" -> '\t'
| "\\r" -> '\r'
| "\\0" -> '\000'
| "\\\\" -> '\\'
| "\\\'" -> '\''
| "\\\"" -> '\"'
| _ -> assert false
(* hex case is caught in previous rule *))
}
| (char_not_escape as c) '\'' { T_chr (String.get c 1) }
| '\'' {
handle_error lexbuf "Single quotes contain no character on the insides.";
T_chr '\000'
}
| char_common* '\'' {
handle_error_fatal lexbuf
"Multiple characters found inside single quotes(\'\')."
}
| _ { handle_error_fatal lexbuf "Single quotes opened and didn't close." }
and multi_comments = parse
| '\n' { incrementNumLines lexbuf; multi_comments lexbuf }
| "$$" { lexer lexbuf }
| eof { handle_error lexbuf "Unclosed multi-line comment."; T_eof }
| _ { multi_comments lexbuf }
and comment = parse
| '\n' { incrementNumLines lexbuf; lexer lexbuf }
| eof { T_eof }
| _ { comment lexbuf }
{}