Skip to content

Commit ab62b4e

Browse files
committed
parser: add separate grammar for > and < date operators
ref: cern-sis/issues-inspire#134
1 parent c8e1a35 commit ab62b4e

File tree

7 files changed

+154
-89
lines changed

7 files changed

+154
-89
lines changed

inspire_query_parser/ast.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,14 @@ class GreaterThanOp(UnaryOp):
168168
pass
169169

170170

171+
class GreaterThanDateOp(UnaryOp):
172+
pass
173+
174+
175+
class LessThanDateOp(UnaryOp):
176+
pass
177+
178+
171179
class LessThanOp(UnaryOp):
172180
pass
173181

inspire_query_parser/parser.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ class SimpleValueWithColonUnit(SimpleValueUnit):
367367

368368

369369
class SimpleDateValueUnit(LeafRule):
370-
grammar = re.compile(r"[\d*\-\.\/]{4,10}(?=($|\s|\)))", re.UNICODE)
370+
grammar = re.compile(r"[\d*\-\.\/\_]{1,10}(?=($|\s|\)))", re.UNICODE)
371371
date_specifiers_regex = re.compile(r"({})\s*(-\s*\d+)?".format('|'.join(DATE_SPECIFIERS_COLLECTION)), re.UNICODE)
372372
string_month_date_regex = re.compile(MONTH_REGEX, re.IGNORECASE)
373373

@@ -555,6 +555,7 @@ def parse(cls, parser, text, pos):
555555
GreaterEqualOp,
556556
LessEqualOp,
557557
GreaterThanOp,
558+
GreaterThanDateOp,
558559
LessThanOp,
559560
ComplexValue
560561
]
@@ -600,7 +601,6 @@ def parse(cls, parser, text, pos):
600601
SimpleValueNegation,
601602
SimpleValue,
602603
SimpleDateValueNegation,
603-
SimpleDateValue,
604604
]
605605
)
606606

@@ -652,7 +652,15 @@ class GreaterThanOp(UnaryRule):
652652
653653
Supports queries like author-count > 2000 or date after 10-2000.
654654
"""
655-
grammar = omit(re.compile(r"after|>", re.IGNORECASE)), attr('op', [SimpleDateValue, SimpleValue])
655+
grammar = omit(re.compile(r">", re.IGNORECASE)), attr('op', [SimpleValue])
656+
657+
658+
class GreaterThanDateOp(UnaryRule):
659+
"""Greater than operator.
660+
661+
Supports queries like author-count > 2000 or date after 10-2000.
662+
"""
663+
grammar = omit(re.compile(r"after|>", re.IGNORECASE)), attr('op', [SimpleDateValue])
656664

657665

658666
class GreaterEqualOp(UnaryRule):
@@ -673,7 +681,15 @@ class LessThanOp(UnaryRule):
673681
674682
Supports queries like author-count < 100 or date before 1984.
675683
"""
676-
grammar = omit(re.compile(r"before|<", re.IGNORECASE)), attr('op', [SimpleDateValue, SimpleValue])
684+
grammar = omit(re.compile(r"<", re.IGNORECASE)), attr('op', [SimpleValue])
685+
686+
687+
class LessThanDateOp(UnaryRule):
688+
"""Less than operator.
689+
690+
Supports queries like author-count < 100 or date before 1984.
691+
"""
692+
grammar = omit(re.compile(r"before|<", re.IGNORECASE)), attr('op', [SimpleDateValue])
677693

678694

679695
class LessEqualOp(UnaryRule):
@@ -740,8 +756,8 @@ class DateValue(UnaryRule):
740756
(optional(omit(Literal("="))), RangeOp),
741757
GreaterEqualOp,
742758
LessEqualOp,
743-
GreaterThanOp,
744-
LessThanOp,
759+
GreaterThanDateOp,
760+
LessThanDateOp,
745761
(
746762
optional(omit(Literal("="))),
747763
[

inspire_query_parser/visitors/elastic_search_visitor.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -649,12 +649,18 @@ def visit_range_op(self, node, fieldnames):
649649
def visit_greater_than_op(self, node, fieldnames):
650650
return self._generate_range_queries(force_list(fieldnames), {'gt': node.op.value})
651651

652+
def visit_greater_than_date_op(self, node, fieldnames):
653+
return self._generate_range_queries(force_list(fieldnames), {'gt': node.op.value})
654+
652655
def visit_greater_equal_than_op(self, node, fieldnames):
653656
return self._generate_range_queries(force_list(fieldnames), {'gte': node.op.value})
654657

655658
def visit_less_than_op(self, node, fieldnames):
656659
return self._generate_range_queries(force_list(fieldnames), {'lt': node.op.value})
657660

661+
def visit_less_than_date_op(self, node, fieldnames):
662+
return self._generate_range_queries(force_list(fieldnames), {'lt': node.op.value})
663+
658664
def visit_less_equal_than_op(self, node, fieldnames):
659665
return self._generate_range_queries(force_list(fieldnames), {'lte': node.op.value})
660666

inspire_query_parser/visitors/restructuring_visitor.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,9 @@ def visit_value(self, node):
268268
def visit_range_op(self, node):
269269
return ast.RangeOp(node.left.accept(self), node.right.accept(self))
270270

271+
def visit_greater_than_date_op(self, node):
272+
return ast.GreaterThanDateOp(node.op.accept(self))
273+
271274
def visit_greater_than_op(self, node):
272275
return ast.GreaterThanOp(node.op.accept(self))
273276

@@ -279,6 +282,9 @@ def visit_greater_equal_op(self, node):
279282
return ast.GreaterEqualThanOp(value)
280283

281284
def visit_less_than_op(self, node):
285+
return ast.LessThanDateOp(node.op.accept(self))
286+
287+
def visit_less_than_date_op(self, node):
282288
return ast.LessThanOp(node.op.accept(self))
283289

284290
def visit_less_equal_op(self, node):

tests/test_elastic_search_visitor.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3235,3 +3235,21 @@ def test_elastic_search_visitor_complex_query():
32353235
}
32363236
generated_es_query = _parse_query(query_str)
32373237
assert generated_es_query == expected_es_query
3238+
3239+
3240+
def test_elastic_search_visitor_regression_greater_than_for_non_date():
3241+
query_str = "t after something"
3242+
expected_es_query = {
3243+
"match": {"titles.full_title": {"query": "after something", "operator": "and"}}
3244+
}
3245+
generated_es_query = _parse_query(query_str)
3246+
assert generated_es_query == expected_es_query
3247+
3248+
3249+
def test_elastic_search_visitor_regression_less_than_for_non_date():
3250+
query_str = "t before something"
3251+
expected_es_query = {
3252+
"match": {"titles.full_title": {"query": "before something", "operator": "and"}}
3253+
}
3254+
generated_es_query = _parse_query(query_str)
3255+
assert generated_es_query == expected_es_query

tests/test_parser_functionality.py

Lines changed: 76 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from inspire_query_parser.parser import (And, BooleanQuery, ComplexValue,
2828
DateValue, EmptyQuery, Expression,
2929
GreaterEqualOp, GreaterThanOp,
30+
GreaterThanDateOp,
3031
InspireDateKeyword, InspireKeyword,
3132
InvenioKeywordQuery, LessEqualOp,
3233
LessThanOp, MalformedQueryWords,
@@ -1762,64 +1763,6 @@
17621763
("", Query([EmptyQuery()])),
17631764
(" ", Query([EmptyQuery()])),
17641765
# G, GE, LT, LE, E queries
1765-
(
1766-
"date > 2000-10 and < 2000-12",
1767-
Query(
1768-
[
1769-
Statement(
1770-
BooleanQuery(
1771-
Expression(
1772-
SimpleQuery(
1773-
SpiresDateKeywordQuery(
1774-
InspireDateKeyword("date"),
1775-
DateValue(
1776-
GreaterThanOp(SimpleDateValue("2000-10"))
1777-
),
1778-
)
1779-
)
1780-
),
1781-
And(),
1782-
Statement(
1783-
Expression(
1784-
SimpleQuery(
1785-
Value(LessThanOp(SimpleDateValue("2000-12")))
1786-
)
1787-
)
1788-
),
1789-
)
1790-
)
1791-
]
1792-
),
1793-
),
1794-
(
1795-
"date after 10/2000 and before 2000-12",
1796-
Query(
1797-
[
1798-
Statement(
1799-
BooleanQuery(
1800-
Expression(
1801-
SimpleQuery(
1802-
SpiresDateKeywordQuery(
1803-
InspireDateKeyword("date"),
1804-
DateValue(
1805-
GreaterThanOp(SimpleDateValue("10/2000"))
1806-
),
1807-
)
1808-
)
1809-
),
1810-
And(),
1811-
Statement(
1812-
Expression(
1813-
SimpleQuery(
1814-
Value(LessThanOp(SimpleDateValue("2000-12")))
1815-
)
1816-
)
1817-
),
1818-
)
1819-
)
1820-
]
1821-
),
1822-
),
18231766
(
18241767
"date >= nov 2000 and d<=2005",
18251768
Query(
@@ -2070,7 +2013,7 @@
20702013
SpiresDateKeywordQuery(
20712014
InspireDateKeyword("date-updated"),
20722015
DateValue(
2073-
GreaterThanOp(SimpleDateValue("yesterday - 2"))
2016+
GreaterThanDateOp(SimpleDateValue("yesterday - 2"))
20742017
),
20752018
)
20762019
)
@@ -2112,7 +2055,7 @@
21122055
SpiresDateKeywordQuery(
21132056
InspireDateKeyword("date"),
21142057
DateValue(
2115-
GreaterThanOp(
2058+
GreaterThanDateOp(
21162059
SimpleDateValue("2013")
21172060
)
21182061
),
@@ -2335,3 +2278,76 @@ def test_parser_functionality(query_str, expected_parse_tree):
23352278
parser = StatefulParser()
23362279
_, parse_tree = parser.parse(query_str, Query)
23372280
assert parse_tree == expected_parse_tree
2281+
2282+
2283+
@pytest.mark.parametrize(
2284+
["query_str", "expected_parse_tree"],
2285+
{
2286+
(
2287+
"date > 2000-10 and < 2000-12",
2288+
Query(
2289+
[
2290+
Statement(
2291+
BooleanQuery(
2292+
Expression(
2293+
SimpleQuery(
2294+
SpiresDateKeywordQuery(
2295+
InspireDateKeyword("date"),
2296+
DateValue(
2297+
GreaterThanOp(SimpleDateValue("2000-10"))
2298+
),
2299+
)
2300+
)
2301+
),
2302+
And(),
2303+
Statement(
2304+
Expression(
2305+
SimpleQuery(
2306+
Value(LessThanOp(SimpleDateValue("2000-12")))
2307+
)
2308+
)
2309+
),
2310+
)
2311+
)
2312+
]
2313+
),
2314+
),
2315+
(
2316+
"date after 10/2000 and before 2000-12",
2317+
Query(
2318+
[
2319+
Statement(
2320+
BooleanQuery(
2321+
Expression(
2322+
SimpleQuery(
2323+
SpiresDateKeywordQuery(
2324+
InspireDateKeyword("date"),
2325+
DateValue(
2326+
GreaterThanOp(SimpleDateValue("10/2000"))
2327+
),
2328+
)
2329+
)
2330+
),
2331+
And(),
2332+
Statement(
2333+
Expression(
2334+
SimpleQuery(
2335+
Value(LessThanOp(SimpleDateValue("2000-12")))
2336+
)
2337+
)
2338+
),
2339+
)
2340+
)
2341+
]
2342+
),
2343+
),
2344+
},
2345+
)
2346+
@pytest.mark.xfail(
2347+
reason="the queries are not correct, should be fixed by https://github.com/cern-sis/issues-inspire/issues/150 "
2348+
)
2349+
def test_parser_functionality_regressions(query_str, expected_parse_tree):
2350+
print("Parsing: " + query_str)
2351+
parser = StatefulParser()
2352+
_, parse_tree = parser.parse(query_str, Query)
2353+
assert parse_tree == expected_parse_tree

tests/test_restructuring_visitor.py

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from inspire_query_parser import parser
3131
from inspire_query_parser.ast import (AndOp, EmptyQuery, ExactMatchValue,
3232
GreaterEqualThanOp, GreaterThanOp,
33+
GreaterThanDateOp,LessThanDateOp,
3334
Keyword, KeywordOp, LessEqualThanOp,
3435
LessThanOp, MalformedQuery,
3536
NestedKeywordOp, NotOp, OrOp,
@@ -354,17 +355,11 @@
354355
# G, GE, LT, LE, E queries
355356
(
356357
'date > 2000-10 and date < 2000-12',
357-
AndOp(
358-
KeywordOp(Keyword('date'), GreaterThanOp(Value('2000-10'))),
359-
KeywordOp(Keyword('date'), LessThanOp(Value('2000-12')))
360-
)
358+
AndOp(KeywordOp(Keyword('date'), GreaterThanDateOp(Value('2000-10'))), KeywordOp(Keyword('date'), LessThanOp(Value('2000-12'))))
361359
),
362360
(
363361
'date after 10/2000 and date before 2000-12',
364-
AndOp(
365-
KeywordOp(Keyword('date'), GreaterThanOp(Value('10/2000'))),
366-
KeywordOp(Keyword('date'), LessThanOp(Value('2000-12')))
367-
)
362+
AndOp(KeywordOp(Keyword('date'), GreaterThanDateOp(Value('10/2000'))), KeywordOp(Keyword('date'), LessThanOp(Value('2000-12'))))
368363
),
369364
(
370365
'date >= nov 2000 and d<=2005',
@@ -445,23 +440,11 @@
445440
'du > yesterday - 2',
446441
KeywordOp(
447442
Keyword('date-updated'),
448-
GreaterThanOp(Value(str((date.today() - relativedelta(days=3)))))
443+
GreaterThanDateOp(Value(str((date.today() - relativedelta(days=3)))))
449444
)
450445
),
451446
452447
# Wildcard queries
453-
(
454-
'find a \'o*aigh\' and t "alge*" and date >2013',
455-
AndOp(
456-
KeywordOp(Keyword('author'), PartialMatchValue('o*aigh', contains_wildcard=True)),
457-
AndOp(
458-
KeywordOp(Keyword('title'), ExactMatchValue('alge*'
459-
460-
)),
461-
KeywordOp(Keyword('date'), GreaterThanOp(Value('2013')))
462-
)
463-
)
464-
),
465448
(
466449
'a *alge | a alge* | a o*aigh',
467450
OrOp(
@@ -476,7 +459,19 @@
476459
'find texkey Hirata:1992*',
477460
KeywordOp(Keyword('texkeys'), Value('Hirata:1992*', contains_wildcard=True))
478461
),
479-
462+
(
463+
"find a 'o*aigh' and t \"alge*\" and date >2013",
464+
AndOp(
465+
KeywordOp(
466+
Keyword("author"),
467+
PartialMatchValue("o*aigh", contains_wildcard=True),
468+
),
469+
AndOp(
470+
KeywordOp(Keyword("title"), ExactMatchValue("alge*")),
471+
KeywordOp(Keyword("date"), GreaterThanDateOp(Value("2013"))),
472+
),
473+
),
474+
),
480475
# Queries for implicit "and" removal
481476
('title and foo', AndOp(ValueOp(Value('title')), ValueOp(Value('foo')))),
482477
('author takumi doi', KeywordOp(Keyword('author'), Value('takumi doi'))),
@@ -711,7 +706,7 @@ def test_foo_bar():
711706
)
712707
),
713708
('find cc italy', KeywordOp(Keyword('country'), Value('italy'))),
714-
('fin date > today', KeywordOp(Keyword('date'), GreaterThanOp(Value(str(date.today()))))),
709+
('fin date > today', KeywordOp(Keyword('date'), GreaterThanDateOp(Value(str(date.today()))))),
715710
('find r atlas-conf-*', KeywordOp(Keyword('reportnumber'), Value('atlas-conf-*', contains_wildcard=True))),
716711
(
717712
'find caption "Diagram for the fermion flow violating process"',

0 commit comments

Comments
 (0)