Skip to content

Commit 285543d

Browse files
committed
Added inclusive and exclusive range tokenization
1 parent 6af5e8b commit 285543d

File tree

5 files changed

+119
-6
lines changed

5 files changed

+119
-6
lines changed

lib/Languages/Galach/TokenExtractor/Full.php

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ final class Full extends TokenExtractor
3636
'/(?<lexeme>(?:(?<marker>(?<!\\\\)\#)(?<tag>[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM,
3737
'/(?<lexeme>(?:(?<marker>(?<!\\\\)@)(?<user>[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM,
3838
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<quote>(?<!\\\\)["])(?<phrase>.*?)(?:(?<!\\\\)(?P=quote)))/Aus' => Tokenizer::TOKEN_TERM,
39-
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?\[(?<rangeFrom>[a-zA-Z0-9]+) TO (?<rangeTo>[a-zA-Z0-9]+)\])/Aus' => Tokenizer::TOKEN_TERM,
39+
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<rangeStartSymbol>[\[\{])(?<rangeFrom>[a-zA-Z0-9]+) TO (?<rangeTo>[a-zA-Z0-9]+)[\]\}])/Aus' => Tokenizer::TOKEN_TERM,
4040
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<word>(?:\\\\\\\\|\\\\ |\\\\\(|\\\\\)|\\\\"|[^"()\s])+?))(?:(?<!\\\\)["]|\(|\)|$|\s)/Au' => Tokenizer::TOKEN_TERM,
4141
];
4242

@@ -50,12 +50,13 @@ protected function createTermToken($position, array $data)
5050
$lexeme = $data['lexeme'];
5151

5252
switch (true) {
53-
case isset($data['rangeFrom']) && isset($data['rangeTo']):
53+
case isset($data['rangeStartSymbol']):
5454
return new Range(
5555
$lexeme,
5656
$position,
5757
$data['domain'],
58-
$data['rangeFrom'], $data['rangeTo']
58+
$data['rangeFrom'], $data['rangeTo'],
59+
Range::getTypeByStart($data['rangeStartSymbol'])
5960
);
6061
case isset($data['word']):
6162
return new Word(

lib/Languages/Galach/Values/Token/Range.php

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
*/
1313
final class Range extends Token
1414
{
15+
const TYPE_INCLUSIVE = 'inclusive';
16+
const TYPE_EXCLUSIVE = 'exclusive';
17+
1518
/**
1619
* Holds domain string.
1720
*
@@ -29,19 +32,50 @@ final class Range extends Token
2932
*/
3033
public $rangeTo;
3134

35+
/**
36+
* @var string
37+
*/
38+
public $type;
39+
3240
/**
3341
* @param string $lexeme
34-
* @param int $position
42+
* @param int $position
3543
* @param string $domain
3644
* @param string $rangeFrom
3745
* @param string $rangeTo
46+
* @param string $type
3847
*/
39-
public function __construct($lexeme, $position, $domain, $rangeFrom, $rangeTo)
48+
public function __construct($lexeme, $position, $domain, $rangeFrom, $rangeTo, $type)
4049
{
50+
if (!in_array($type, [self::TYPE_EXCLUSIVE, self::TYPE_INCLUSIVE])) {
51+
throw new \InvalidArgumentException(sprintf('Invalid range type: %s', $type));
52+
}
53+
4154
parent::__construct(Tokenizer::TOKEN_TERM, $lexeme, $position);
4255

4356
$this->domain = $domain;
4457
$this->rangeFrom = $rangeFrom;
4558
$this->rangeTo = $rangeTo;
59+
$this->type = $type;
60+
}
61+
62+
/**
63+
* Returns the range type, given the starting symbol.
64+
*
65+
* @param string $startSymbol the start symbol, either '[' or '{'
66+
*
67+
* @return string
68+
*/
69+
public static function getTypeByStart($startSymbol)
70+
{
71+
if ('[' === $startSymbol) {
72+
return self::TYPE_INCLUSIVE;
73+
}
74+
75+
if ('{' === $startSymbol) {
76+
return self::TYPE_EXCLUSIVE;
77+
}
78+
79+
throw new \InvalidArgumentException(sprintf('Invalid range start symbol: %s', $startSymbol));
4680
}
4781
}

tests/Galach/Tokenizer/FullTokenizerTest.php

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,13 @@ public function providerForTestTokenize()
116116
[
117117
'[a TO b]',
118118
[
119-
new RangeToken('[a TO b]', 0, '', 'a', 'b'),
119+
new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive'),
120+
],
121+
],
122+
[
123+
'{a TO b}',
124+
[
125+
new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive'),
120126
],
121127
],
122128
[

tests/Galach/Tokenizer/TextTokenizerTest.php

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,13 @@ public static function setUpBeforeClass()
102102
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5),
103103
new WordToken('b]', 6, '', 'b]'),
104104
],
105+
'{a TO b}' => [
106+
new WordToken('{a', 0, '', '{a'),
107+
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2),
108+
new WordToken('TO', 3, '', 'TO'),
109+
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5),
110+
new WordToken('b}', 6, '', 'b}'),
111+
],
105112
'domain:domain:' => [
106113
new WordToken('domain:domain:', 0, '', 'domain:domain:'),
107114
],
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
<?php
2+
3+
namespace QueryTranslator\Tests\Galach\Values\Token;
4+
5+
use PHPUnit\Framework\TestCase;
6+
use QueryTranslator\Languages\Galach\Values\Token\Range;
7+
8+
class RangeTest extends TestCase
9+
{
10+
public function failingStartSymbolDataprovider()
11+
{
12+
return [
13+
[''],
14+
['/'],
15+
['('],
16+
];
17+
}
18+
19+
/**
20+
* @dataProvider failingStartSymbolDataprovider
21+
* @param string $startSymbol
22+
*/
23+
public function testGetTypeByStartFails($startSymbol)
24+
{
25+
$this->expectException(\InvalidArgumentException::class);
26+
Range::getTypeByStart($startSymbol);
27+
}
28+
29+
public function successfulStartSymbolDataprovider()
30+
{
31+
return [
32+
['inclusive', '['],
33+
['exclusive', '{'],
34+
];
35+
}
36+
37+
/**
38+
* @dataProvider successfulStartSymbolDataprovider
39+
* @param string $expectedType
40+
* @param string $startSymbol
41+
*/
42+
public function testGetTypeByStartSucceeds($expectedType, $startSymbol)
43+
{
44+
$this->assertSame($expectedType, Range::getTypeByStart($startSymbol));
45+
}
46+
47+
public function failingTypeDataprovider()
48+
{
49+
return [
50+
[''],
51+
[null],
52+
['other'],
53+
];
54+
}
55+
56+
/**
57+
* @dataProvider failingTypeDataprovider
58+
* @param string $type
59+
*/
60+
public function testConstructorFailsWrongType($type)
61+
{
62+
$this->expectException(\InvalidArgumentException::class);
63+
new Range('[a TO b]', 0, '', 'a', 'b', $type);
64+
}
65+
}

0 commit comments

Comments
 (0)