Skip to content

Commit f5dface

Browse files
author
Nico
committed
Implement custom tokens
1 parent 94029e3 commit f5dface

13 files changed

+255
-45
lines changed

phpunit.xml.dist

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@
1414
</testsuite>
1515
</testsuites>
1616

17+
<filter>
18+
<whitelist processUncoveredFilesFromWhitelist="true">
19+
<directory suffix=".php">src</directory>
20+
</whitelist>
21+
</filter>
22+
1723
<!--<logging>
1824
<log type="coverage-html"
1925
target="tests/log/report"

src/nicoSWD/Rules/Parser.php

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,16 @@ public function assignVariables(array $variables)
127127
$this->variables = $variables;
128128
}
129129

130+
/**
131+
* @param string $class
132+
* @param string $regex
133+
* @param int $priority
134+
*/
135+
public function registerToken($class, $regex, $priority = null)
136+
{
137+
$this->tokenizer->registerToken($class, $regex, $priority);
138+
}
139+
130140
/**
131141
* @param Tokens\BaseToken $token
132142
* @throws Exceptions\ParserException

src/nicoSWD/Rules/Rule.php

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,16 @@ public function isValid()
8888
return \true;
8989
}
9090

91+
/**
92+
* @param string $class
93+
* @param string $regex
94+
* @param int $priority
95+
*/
96+
public function registerToken($class, $regex, $priority = null)
97+
{
98+
$this->parser->registerToken($class, $regex, $priority);
99+
}
100+
91101
/**
92102
* @return string
93103
*/

src/nicoSWD/Rules/Tokenizer.php

Lines changed: 135 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -8,59 +8,91 @@
88
*/
99
namespace nicoSWD\Rules;
1010

11-
/**
12-
* Class Tokenizer
13-
* @package nicoSWD\Rules
14-
*/
11+
use SplPriorityQueue;
12+
use stdClass;
13+
1514
final class Tokenizer implements TokenizerInterface
1615
{
17-
/**
18-
* @var string
19-
*/
20-
private $tokens = '
21-
~(
22-
(?<And>&&)
23-
| (?<Or>\|\|)
24-
| (?<NotEqualStrict>!==)
25-
| (?<NotEqual><>|!=)
26-
| (?<EqualStrict>===)
27-
| (?<Equal>==)
28-
| (?<In>\bin\b)
29-
| (?<Bool>\b(?:true|false)\b)
30-
| (?<Null>\bnull\b)
31-
| (?<Method>\.\s*[a-zA-Z_]\w*\s*\()
32-
| (?<Function>[a-zA-Z_]\w*\s*\()
33-
| (?<Variable>[a-zA-Z_]\w*)
34-
| (?<Float>-?\d+(?:\.\d+))
35-
| (?<Integer>-?\d+)
36-
| (?<EncapsedString>"[^"]*"|\'[^\']*\')
37-
| (?<SmallerEqual><=)
38-
| (?<GreaterEqual>>=)
39-
| (?<Smaller><)
40-
| (?<Greater>>)
41-
| (?<OpeningParentheses>\()
42-
| (?<ClosingParentheses>\))
43-
| (?<OpeningArray>\[)
44-
| (?<ClosingArray>\])
45-
| (?<Comma>,)
46-
| (?<Regex>/[^/\*].*/[igm]{0,3})
47-
| (?<Comment>//[^\r\n]*|/\*.*?\*/)
48-
| (?<Newline>\r?\n)
49-
| (?<Space>\s+)
50-
| (?<Unknown>.)
51-
)~xAs';
16+
const TOKEN_AND = 'And';
17+
const TOKEN_OR = 'Or';
18+
const TOKEN_NOT_EQUAL_STRICT = 'NotEqualStrict';
19+
const TOKEN_NOT_EQUAL = 'NotEqual';
20+
const TOKEN_EQUAL_STRICT = 'EqualStrict';
21+
const TOKEN_EQUAL = 'Equal';
22+
const TOKEN_IN = 'In';
23+
const TOKEN_BOOL = 'Bool';
24+
const TOKEN_NULL = 'Null';
25+
const TOKEN_METHOD = 'Method';
26+
const TOKEN_FUNCTION = 'Function';
27+
const TOKEN_VARIABLE = 'Variable';
28+
const TOKEN_FLOAT = 'Float';
29+
const TOKEN_INTEGER = 'Integer';
30+
const TOKEN_ENCAPSED_STRING = 'EncapsedString';
31+
const TOKEN_SMALLER_EQUAL = 'SmallerEqual';
32+
const TOKEN_GREATER_EQUAL = 'GreaterEqual';
33+
const TOKEN_SMALLER = 'Smaller';
34+
const TOKEN_GREATER = 'Greater';
35+
const TOKEN_OPENING_PARENTHESIS = 'OpeningParentheses';
36+
const TOKEN_CLOSING_PARENTHESIS = 'ClosingParentheses';
37+
const TOKEN_OPENING_ARRAY = 'OpeningArray';
38+
const TOKEN_CLOSING_ARRAY = 'ClosingArray';
39+
const TOKEN_COMMA = 'Comma';
40+
const TOKEN_REGEX = 'Regex';
41+
const TOKEN_COMMENT = 'Comment';
42+
const TOKEN_NEWLINE = 'Newline';
43+
const TOKEN_SPACE = 'Space';
44+
const TOKEN_UNKNOWN = 'Unknown';
45+
46+
private $internalTokens = [];
47+
48+
private $regex = '';
49+
50+
private $regexRequiresReassambly = false;
51+
52+
public function __construct()
53+
{
54+
$this->registerToken(self::TOKEN_AND, '&&', 145);
55+
$this->registerToken(self::TOKEN_OR, '\|\|', 140);
56+
$this->registerToken(self::TOKEN_NOT_EQUAL_STRICT, '!==', 135);
57+
$this->registerToken(self::TOKEN_NOT_EQUAL, '<>|!=', 130);
58+
$this->registerToken(self::TOKEN_EQUAL_STRICT, '===', 125);
59+
$this->registerToken(self::TOKEN_EQUAL, '==', 120);
60+
$this->registerToken(self::TOKEN_IN, '\bin\b', 115);
61+
$this->registerToken(self::TOKEN_BOOL, '\b(?:true|false)\b', 110);
62+
$this->registerToken(self::TOKEN_NULL, '\bnull\b', 105);
63+
$this->registerToken(self::TOKEN_METHOD, '\.\s*[a-zA-Z_]\w*\s*\(', 100);
64+
$this->registerToken(self::TOKEN_FUNCTION, '[a-zA-Z_]\w*\s*\(', 95);
65+
$this->registerToken(self::TOKEN_FLOAT, '-?\d+(?:\.\d+)', 90);
66+
$this->registerToken(self::TOKEN_INTEGER, '-?\d+', 85);
67+
$this->registerToken(self::TOKEN_ENCAPSED_STRING, '"[^"]*"|\'[^\']*\'', 80);
68+
$this->registerToken(self::TOKEN_SMALLER_EQUAL, '<=', 75);
69+
$this->registerToken(self::TOKEN_GREATER_EQUAL, '>=', 70);
70+
$this->registerToken(self::TOKEN_SMALLER, '<', 65);
71+
$this->registerToken(self::TOKEN_GREATER, '>', 60);
72+
$this->registerToken(self::TOKEN_OPENING_PARENTHESIS, '\(', 55);
73+
$this->registerToken(self::TOKEN_CLOSING_PARENTHESIS, '\)', 50);
74+
$this->registerToken(self::TOKEN_OPENING_ARRAY, '\[', 45);
75+
$this->registerToken(self::TOKEN_CLOSING_ARRAY, '\]', 40);
76+
$this->registerToken(self::TOKEN_COMMA, ',', 35);
77+
$this->registerToken(self::TOKEN_REGEX, '/[^/\*].*/[igm]{0,3}', 30);
78+
$this->registerToken(self::TOKEN_COMMENT, '//[^\r\n]*|/\*.*?\*/', 25);
79+
$this->registerToken(self::TOKEN_NEWLINE, '\r?\n', 20);
80+
$this->registerToken(self::TOKEN_SPACE, '\s+', 15);
81+
$this->registerToken(self::TOKEN_VARIABLE, '[a-zA-Z_]\w*', 10);
82+
$this->registerToken(self::TOKEN_UNKNOWN, '.', 5);
83+
}
5284

5385
/**
54-
* @param string $string
55-
* @return Stack
86+
* {@inheritdoc}
5687
*/
5788
public function tokenize($string)
5889
{
5990
$stack = new Stack();
91+
$regex = $this->getRegex();
6092
$baseNameSpace = __NAMESPACE__ . '\\Tokens\\Token';
6193
$offset = 0;
6294

63-
while (preg_match($this->tokens, $string, $matches, 0, $offset)) {
95+
while (preg_match($regex, $string, $matches, 0, $offset)) {
6496
$token = $this->getMatchedToken($matches);
6597
$className = $baseNameSpace . $token;
6698

@@ -77,8 +109,22 @@ public function tokenize($string)
77109
}
78110

79111
/**
80-
* @param string[] $matches
81-
* @return string
112+
* {@inheritdoc}
113+
*/
114+
public function registerToken($class, $regex, $priority = null)
115+
{
116+
$token = new StdClass();
117+
$token->class = $class;
118+
$token->regex = $regex;
119+
$token->priority = $priority !== null ? $priority : $this->getPriority($class);
120+
121+
$this->internalTokens[$class] = $token;
122+
$this->regexRequiresReassambly = true;
123+
}
124+
125+
/**
126+
* @param array $matches
127+
* @return int|string
82128
*/
83129
private function getMatchedToken(array $matches)
84130
{
@@ -90,4 +136,48 @@ private function getMatchedToken(array $matches)
90136

91137
return 'Unknown';
92138
}
93-
}
139+
140+
/**
141+
* @return string
142+
*/
143+
private function getRegex()
144+
{
145+
if (!$this->regex || $this->regexRequiresReassambly) {
146+
$regex = [];
147+
148+
foreach ($this->getQueue() as $token) {
149+
$regex[] = "(?<$token->class>$token->regex)";
150+
}
151+
152+
$this->regex = sprintf('~(%s)~As', implode('|', $regex));
153+
$this->regexRequiresReassambly = false;
154+
}
155+
156+
return $this->regex;
157+
}
158+
159+
/**
160+
* @return SplPriorityQueue
161+
*/
162+
private function getQueue()
163+
{
164+
$queue = new SplPriorityQueue();
165+
166+
foreach ($this->internalTokens as $class) {
167+
$queue->insert($class, $class->priority);
168+
}
169+
170+
return $queue;
171+
}
172+
173+
/**
174+
* @param string $class
175+
* @return int
176+
*/
177+
private function getPriority($class)
178+
{
179+
return isset($this->internalTokens[$class])
180+
? $this->internalTokens[$class]->priority
181+
: 10;
182+
}
183+
}

src/nicoSWD/Rules/TokenizerInterface.php

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,11 @@ interface TokenizerInterface
2020
* @throws \Exception
2121
*/
2222
public function tokenize($string);
23+
24+
/**
25+
* @param string $class
26+
* @param string $regex
27+
* @param int $priority
28+
*/
29+
public function registerToken($class, $regex, $priority = null);
2330
}

src/nicoSWD/Rules/Tokens/TokenEqual.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,12 @@ public function getGroup()
2323
{
2424
return Constants::GROUP_OPERATOR;
2525
}
26+
27+
/**
28+
* @return string
29+
*/
30+
public function getValue()
31+
{
32+
return '==';
33+
}
2634
}

src/nicoSWD/Rules/Tokens/TokenEqualStrict.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,12 @@ public function getGroup()
2323
{
2424
return Constants::GROUP_OPERATOR;
2525
}
26+
27+
/**
28+
* @return string
29+
*/
30+
public function getValue()
31+
{
32+
return '===';
33+
}
2634
}

src/nicoSWD/Rules/Tokens/TokenGreater.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,12 @@ public function getGroup()
2323
{
2424
return Constants::GROUP_OPERATOR;
2525
}
26+
27+
/**
28+
* @return string
29+
*/
30+
public function getValue()
31+
{
32+
return '>';
33+
}
2634
}

src/nicoSWD/Rules/Tokens/TokenGreaterEqual.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,12 @@ public function getGroup()
2323
{
2424
return Constants::GROUP_OPERATOR;
2525
}
26+
27+
/**
28+
* @return string
29+
*/
30+
public function getValue()
31+
{
32+
return '>=';
33+
}
2634
}

src/nicoSWD/Rules/Tokens/TokenNotEqualStrict.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,12 @@ public function getGroup()
2323
{
2424
return Constants::GROUP_OPERATOR;
2525
}
26+
27+
/**
28+
* @return string
29+
*/
30+
public function getValue()
31+
{
32+
return '!==';
33+
}
2634
}

0 commit comments

Comments
 (0)