-
Notifications
You must be signed in to change notification settings - Fork 70
/
Copy pathXMLLexer.g4
63 lines (53 loc) · 1.89 KB
/
XMLLexer.g4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
lexer grammar XMLLexer;
// Default "mode": Everything OUTSIDE of a tag
COMMENT : '<!--' .*? '-->' {true}? ;
CDATA : '<![CDATA[' .*? ']]>' ;
/** Scarf all DTD stuff, Entity Declarations like <!ENTITY ...>,
* and Notation Declarations <!NOTATION ...>
*/
DTD : '<!' .*? '>' -> skip ;
EntityRef : '&' Name ';' ;
CharRef : '&#' DIGIT+ ';'
| '&#x' HEXDIGIT+ ';'
;
SEA_WS : (' '|'\t'|'\r'? '\n') ;
OPEN : '<' -> pushMode(INSIDE) ;
XMLDeclOpen : '<?xml' S -> pushMode(INSIDE) ;
SPECIAL_OPEN: '<?' Name -> more, pushMode(PROC_INSTR) ;
TEXT : ~[<&]+ ; // match any 16 bit char other than < and &
// ----------------- Everything INSIDE of a tag ---------------------
mode INSIDE;
CLOSE : '>' {recog.pop_mode();} ;
SPECIAL_CLOSE: '?>' -> popMode ; // close <?xml...?>
SLASH_CLOSE : '/>' -> popMode ;
SLASH : '/' ;
EQUALS : '=' ;
STRING : '"' ~[<"]* '"'
| '\'' ~[<']* '\''
;
Name : NameStartChar NameChar* ;
S : [ \t\r\n] -> skip ;
fragment
HEXDIGIT : [a-fA-F0-9] ;
fragment
DIGIT : [0-9] ;
fragment
NameChar : NameStartChar
| '-' | '.' | DIGIT
| '\u00B7'
| '\u0300'..'\u036F'
| '\u203F'..'\u2040'
;
fragment
NameStartChar
: [:a-zA-Z]
| '\u2070'..'\u218F'
| '\u2C00'..'\u2FEF'
| '\u3001'..'\uD7FF'
| '\uF900'..'\uFDCF'
| '\uFDF0'..'\uFFFD'
;
// ----------------- Handle <? ... ?> ---------------------
mode PROC_INSTR;
PI : '?>' -> popMode ; // close <?...?>
IGNORE : . -> more ;