-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathText.php
More file actions
76 lines (67 loc) · 2.66 KB
/
Text.php
File metadata and controls
76 lines (67 loc) · 2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
<?php
namespace QueryTranslator\Languages\Galach\TokenExtractor;
use QueryTranslator\Languages\Galach\TokenExtractor;
use QueryTranslator\Languages\Galach\Tokenizer;
use QueryTranslator\Languages\Galach\Values\Token\GroupBegin;
use QueryTranslator\Languages\Galach\Values\Token\Phrase;
use QueryTranslator\Languages\Galach\Values\Token\Word;
use RuntimeException;
/**
* Text implementation of the Galach token extractor.
*
* Supports text related subset of the language features.
*/
final class Text extends TokenExtractor
{
/**
* Map of regex expressions to Token types.
*
* @var array
*/
private static $expressionTypeMap = [
'/(?<lexeme>[\s]+)/Au' => Tokenizer::TOKEN_WHITESPACE,
'/(?<lexeme>\+)/Au' => Tokenizer::TOKEN_MANDATORY,
'/(?<lexeme>-)/Au' => Tokenizer::TOKEN_PROHIBITED,
'/(?<lexeme>!)/Au' => Tokenizer::TOKEN_LOGICAL_NOT_2,
'/(?<lexeme>\))/Au' => Tokenizer::TOKEN_GROUP_END,
'/(?<lexeme>NOT)(?:[\s"()+\-!]|$)/Au' => Tokenizer::TOKEN_LOGICAL_NOT,
'/(?<lexeme>(?:AND|&&))(?:[\s"()+\-!]|$)/Au' => Tokenizer::TOKEN_LOGICAL_AND,
'/(?<lexeme>(?:OR|\|\|))(?:[\s"()+\-!]|$)/Au' => Tokenizer::TOKEN_LOGICAL_OR,
'/(?<lexeme>\()/Au' => Tokenizer::TOKEN_GROUP_BEGIN,
'/(?<lexeme>(?<quote>(?<!\\\\)["])(?<phrase>.*?)(?:(?<!\\\\)(?P=quote)))/Aus' => Tokenizer::TOKEN_TERM,
'/(?<lexeme>(?<word>(?:\\\\\\\\|\\\\ |\\\\\(|\\\\\)|\\\\"|[^"()\s])+?))(?:(?<!\\\\)["]|\(|\)|$|\s)/Au' => Tokenizer::TOKEN_TERM,
];
protected function getExpressionTypeMap()
{
return self::$expressionTypeMap;
}
protected function createTermToken($position, array $data)
{
$lexeme = $data['lexeme'];
switch (true) {
case isset($data['word']):
return new Word(
$lexeme,
$position,
'',
// un-backslash special chars
preg_replace('/(?:\\\\(\\\\|(["+\-!() ])))/', '$1', $data['word'])
);
case isset($data['phrase']):
$quote = $data['quote'];
return new Phrase(
$lexeme,
$position,
'',
$quote,
// un-backslash quote
preg_replace('/(?:\\\\([' . $quote . ']))/', '$1', $data['phrase'])
);
}
throw new RuntimeException('Could not extract term token from the given data');
}
protected function createGroupBeginToken($position, array $data)
{
return new GroupBegin($data['lexeme'], $position, $data['lexeme'], '');
}
}