8
8
*/
9
9
namespace nicoSWD \Rules ;
10
10
11
- /**
12
- * Class Tokenizer
13
- * @package nicoSWD\Rules
14
- */
11
+ use SplPriorityQueue ;
12
+ use stdClass ;
13
+
15
14
final class Tokenizer implements TokenizerInterface
16
15
{
17
- /**
18
- * @var string
19
- */
20
- private $ tokens = '
21
- ~(
22
- (?<And>&&)
23
- | (?<Or>\|\|)
24
- | (?<NotEqualStrict>!==)
25
- | (?<NotEqual><>|!=)
26
- | (?<EqualStrict>===)
27
- | (?<Equal>==)
28
- | (?<In>\bin\b)
29
- | (?<Bool>\b(?:true|false)\b)
30
- | (?<Null>\bnull\b)
31
- | (?<Method>\.\s*[a-zA-Z_]\w*\s*\()
32
- | (?<Function>[a-zA-Z_]\w*\s*\()
33
- | (?<Variable>[a-zA-Z_]\w*)
34
- | (?<Float>-?\d+(?:\.\d+))
35
- | (?<Integer>-?\d+)
36
- | (?<EncapsedString>"[^"]*"| \'[^ \']* \')
37
- | (?<SmallerEqual><=)
38
- | (?<GreaterEqual>>=)
39
- | (?<Smaller><)
40
- | (?<Greater>>)
41
- | (?<OpeningParentheses>\()
42
- | (?<ClosingParentheses>\))
43
- | (?<OpeningArray>\[)
44
- | (?<ClosingArray>\])
45
- | (?<Comma>,)
46
- | (?<Regex>/[^/\*].*/[igm]{0,3})
47
- | (?<Comment>//[^\r\n]*|/\*.*?\*/)
48
- | (?<Newline>\r?\n)
49
- | (?<Space>\s+)
50
- | (?<Unknown>.)
51
- )~xAs ' ;
16
+ const TOKEN_AND = 'And ' ;
17
+ const TOKEN_OR = 'Or ' ;
18
+ const TOKEN_NOT_EQUAL_STRICT = 'NotEqualStrict ' ;
19
+ const TOKEN_NOT_EQUAL = 'NotEqual ' ;
20
+ const TOKEN_EQUAL_STRICT = 'EqualStrict ' ;
21
+ const TOKEN_EQUAL = 'Equal ' ;
22
+ const TOKEN_IN = 'In ' ;
23
+ const TOKEN_BOOL = 'Bool ' ;
24
+ const TOKEN_NULL = 'Null ' ;
25
+ const TOKEN_METHOD = 'Method ' ;
26
+ const TOKEN_FUNCTION = 'Function ' ;
27
+ const TOKEN_VARIABLE = 'Variable ' ;
28
+ const TOKEN_FLOAT = 'Float ' ;
29
+ const TOKEN_INTEGER = 'Integer ' ;
30
+ const TOKEN_ENCAPSED_STRING = 'EncapsedString ' ;
31
+ const TOKEN_SMALLER_EQUAL = 'SmallerEqual ' ;
32
+ const TOKEN_GREATER_EQUAL = 'GreaterEqual ' ;
33
+ const TOKEN_SMALLER = 'Smaller ' ;
34
+ const TOKEN_GREATER = 'Greater ' ;
35
+ const TOKEN_OPENING_PARENTHESIS = 'OpeningParentheses ' ;
36
+ const TOKEN_CLOSING_PARENTHESIS = 'ClosingParentheses ' ;
37
+ const TOKEN_OPENING_ARRAY = 'OpeningArray ' ;
38
+ const TOKEN_CLOSING_ARRAY = 'ClosingArray ' ;
39
+ const TOKEN_COMMA = 'Comma ' ;
40
+ const TOKEN_REGEX = 'Regex ' ;
41
+ const TOKEN_COMMENT = 'Comment ' ;
42
+ const TOKEN_NEWLINE = 'Newline ' ;
43
+ const TOKEN_SPACE = 'Space ' ;
44
+ const TOKEN_UNKNOWN = 'Unknown ' ;
45
+
46
+ private $ internalTokens = [];
47
+
48
+ private $ regex = '' ;
49
+
50
+ private $ regexRequiresReassambly = false ;
51
+
52
+ public function __construct ()
53
+ {
54
+ $ this ->registerToken (self ::TOKEN_AND , '&& ' , 145 );
55
+ $ this ->registerToken (self ::TOKEN_OR , '\|\| ' , 140 );
56
+ $ this ->registerToken (self ::TOKEN_NOT_EQUAL_STRICT , '!== ' , 135 );
57
+ $ this ->registerToken (self ::TOKEN_NOT_EQUAL , '<>|!= ' , 130 );
58
+ $ this ->registerToken (self ::TOKEN_EQUAL_STRICT , '=== ' , 125 );
59
+ $ this ->registerToken (self ::TOKEN_EQUAL , '== ' , 120 );
60
+ $ this ->registerToken (self ::TOKEN_IN , '\bin\b ' , 115 );
61
+ $ this ->registerToken (self ::TOKEN_BOOL , '\b(?:true|false)\b ' , 110 );
62
+ $ this ->registerToken (self ::TOKEN_NULL , '\bnull\b ' , 105 );
63
+ $ this ->registerToken (self ::TOKEN_METHOD , '\.\s*[a-zA-Z_]\w*\s*\( ' , 100 );
64
+ $ this ->registerToken (self ::TOKEN_FUNCTION , '[a-zA-Z_]\w*\s*\( ' , 95 );
65
+ $ this ->registerToken (self ::TOKEN_FLOAT , '-?\d+(?:\.\d+) ' , 90 );
66
+ $ this ->registerToken (self ::TOKEN_INTEGER , '-?\d+ ' , 85 );
67
+ $ this ->registerToken (self ::TOKEN_ENCAPSED_STRING , '"[^"]*"| \'[^ \']* \'' , 80 );
68
+ $ this ->registerToken (self ::TOKEN_SMALLER_EQUAL , '<= ' , 75 );
69
+ $ this ->registerToken (self ::TOKEN_GREATER_EQUAL , '>= ' , 70 );
70
+ $ this ->registerToken (self ::TOKEN_SMALLER , '< ' , 65 );
71
+ $ this ->registerToken (self ::TOKEN_GREATER , '> ' , 60 );
72
+ $ this ->registerToken (self ::TOKEN_OPENING_PARENTHESIS , '\( ' , 55 );
73
+ $ this ->registerToken (self ::TOKEN_CLOSING_PARENTHESIS , '\) ' , 50 );
74
+ $ this ->registerToken (self ::TOKEN_OPENING_ARRAY , '\[ ' , 45 );
75
+ $ this ->registerToken (self ::TOKEN_CLOSING_ARRAY , '\] ' , 40 );
76
+ $ this ->registerToken (self ::TOKEN_COMMA , ', ' , 35 );
77
+ $ this ->registerToken (self ::TOKEN_REGEX , '/[^/\*].*/[igm]{0,3} ' , 30 );
78
+ $ this ->registerToken (self ::TOKEN_COMMENT , '//[^\r\n]*|/\*.*?\*/ ' , 25 );
79
+ $ this ->registerToken (self ::TOKEN_NEWLINE , '\r?\n ' , 20 );
80
+ $ this ->registerToken (self ::TOKEN_SPACE , '\s+ ' , 15 );
81
+ $ this ->registerToken (self ::TOKEN_VARIABLE , '[a-zA-Z_]\w* ' , 10 );
82
+ $ this ->registerToken (self ::TOKEN_UNKNOWN , '. ' , 5 );
83
+ }
52
84
53
85
/**
54
- * @param string $string
55
- * @return Stack
86
+ * {@inheritdoc}
56
87
*/
57
88
public function tokenize ($ string )
58
89
{
59
90
$ stack = new Stack ();
91
+ $ regex = $ this ->getRegex ();
60
92
$ baseNameSpace = __NAMESPACE__ . '\\Tokens \\Token ' ;
61
93
$ offset = 0 ;
62
94
63
- while (preg_match ($ this -> tokens , $ string , $ matches , 0 , $ offset )) {
95
+ while (preg_match ($ regex , $ string , $ matches , 0 , $ offset )) {
64
96
$ token = $ this ->getMatchedToken ($ matches );
65
97
$ className = $ baseNameSpace . $ token ;
66
98
@@ -77,8 +109,22 @@ public function tokenize($string)
77
109
}
78
110
79
111
/**
80
- * @param string[] $matches
81
- * @return string
112
+ * {@inheritdoc}
113
+ */
114
+ public function registerToken ($ class , $ regex , $ priority = null )
115
+ {
116
+ $ token = new StdClass ();
117
+ $ token ->class = $ class ;
118
+ $ token ->regex = $ regex ;
119
+ $ token ->priority = $ priority !== null ? $ priority : $ this ->getPriority ($ class );
120
+
121
+ $ this ->internalTokens [$ class ] = $ token ;
122
+ $ this ->regexRequiresReassambly = true ;
123
+ }
124
+
125
+ /**
126
+ * @param array $matches
127
+ * @return int|string
82
128
*/
83
129
private function getMatchedToken (array $ matches )
84
130
{
@@ -90,4 +136,48 @@ private function getMatchedToken(array $matches)
90
136
91
137
return 'Unknown ' ;
92
138
}
93
- }
139
+
140
+ /**
141
+ * @return string
142
+ */
143
+ private function getRegex ()
144
+ {
145
+ if (!$ this ->regex || $ this ->regexRequiresReassambly ) {
146
+ $ regex = [];
147
+
148
+ foreach ($ this ->getQueue () as $ token ) {
149
+ $ regex [] = "(?< $ token ->class > $ token ->regex ) " ;
150
+ }
151
+
152
+ $ this ->regex = sprintf ('~(%s)~As ' , implode ('| ' , $ regex ));
153
+ $ this ->regexRequiresReassambly = false ;
154
+ }
155
+
156
+ return $ this ->regex ;
157
+ }
158
+
159
+ /**
160
+ * @return SplPriorityQueue
161
+ */
162
+ private function getQueue ()
163
+ {
164
+ $ queue = new SplPriorityQueue ();
165
+
166
+ foreach ($ this ->internalTokens as $ class ) {
167
+ $ queue ->insert ($ class , $ class ->priority );
168
+ }
169
+
170
+ return $ queue ;
171
+ }
172
+
173
+ /**
174
+ * @param string $class
175
+ * @return int
176
+ */
177
+ private function getPriority ($ class )
178
+ {
179
+ return isset ($ this ->internalTokens [$ class ])
180
+ ? $ this ->internalTokens [$ class ]->priority
181
+ : 10 ;
182
+ }
183
+ }
0 commit comments