Skip to content

Commit 76ed2b8

Browse files
committed
[#4] Fix clobbered boolean rules
This requires sharing the BooleanInputParser among all the rules within the rules-rewriter and adding the input literals all at once at the end.
1 parent 550b126 commit 76ed2b8

File tree

5 files changed

+184
-237
lines changed

5 files changed

+184
-237
lines changed

dev/user.clj

+1-53
Original file line numberDiff line numberDiff line change
@@ -1,53 +1 @@
1-
(ns user
2-
(:require
3-
[com.nytimes.querqy :as q]
4-
[com.nytimes.querqy.commonrules :as c]))
5-
6-
(defn pin
7-
"Pin the given IDs to the top of the result set in the order given. Should only
8-
be used once within a given match rule."
9-
[& ids]
10-
(map-indexed
11-
(fn [idx id]
12-
(c/boost (- Float/MAX_VALUE idx) {:ids {:values [id]}}))
13-
ids))
14-
15-
;; Let's assume we have some special thanksgiving related content that editorial
16-
;; wants highly promoted, the documents with IDs 12345 and 5678. Rather than
17-
;; tweak scores or sprinkle boosts around our rule set, we can instead use our
18-
;; new pin rule which makes clear the intent of the boost and serves as
19-
;; documentation for what we're trying to achieve with this rule.
20-
(def rules
21-
(c/rules-rewriter
22-
(c/match "thanksgiving"
23-
(pin "12345" "5678"))))
24-
25-
;; We can now emit a query which pins results to the top.
26-
27-
(def opts {:match/fields ["headline"]})
28-
29-
(q/emit (q/rewrite rules "thanksgiving recipes") opts)
30-
31-
{:function_score
32-
{:query
33-
{:bool
34-
{:must [],
35-
:should
36-
[{:match {"headline" {:query "thanksgiving"}}}
37-
{:match {"headline" {:query "recipes"}}}],
38-
:must_not [],
39-
:filter []}},
40-
:functions
41-
[{:filter {:ids {:values ["5678"]}}, :weight 1.0E37}
42-
{:filter {:ids {:values ["12345"]}}, :weight 1.0E38}]}}
43-
44-
(def base
45-
{:function_score
46-
{:query
47-
{:bool {:must [],
48-
:should [],
49-
:must_not [],
50-
:filter []}},
51-
:functions
52-
[{:filter {:ids {:values ["5678"]}}, :weight 1.0E37}
53-
{:filter {:ids {:values ["12345"]}}, :weight 1.0E38}]}})
1+
(ns user)

src/com/nytimes/querqy/commonrules.clj

+99-86
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,24 @@
22
"CommonRules based rewriter"
33
(:refer-clojure :exclude [filter])
44
(:require
5-
[clojure.java.io :as io]
6-
[clojure.string :as str]
7-
[com.nytimes.querqy.model :as model]
8-
[com.nytimes.querqy.parser :as parser])
5+
[clojure.java.io :as io]
6+
[clojure.string :as str]
7+
[com.nytimes.querqy.model :as model]
8+
[com.nytimes.querqy.parser :as parser])
99
(:import
10-
(java.io Reader)
11-
(java.net URL)
12-
(java.util List UUID)
13-
(querqy.model Input Input$BooleanInput Input$SimpleInput)
14-
(querqy.parser QuerqyParser)
15-
(querqy.rewrite RewriterFactory)
16-
(querqy.rewrite.commonrules CommonRulesRewriter LineParser QuerqyParserFactory SimpleCommonRulesParser WhiteSpaceQuerqyParserFactory)
17-
(querqy.rewrite.commonrules.model BoostInstruction BoostInstruction$BoostDirection DeleteInstruction FilterInstruction Instructions SynonymInstruction TrieMapRulesCollectionBuilder)
18-
(querqy.rewrite.commonrules.select SelectionStrategyFactory)
19-
(querqy.rewrite.commonrules.select.booleaninput BooleanInputParser)
20-
(querqy.rewrite.commonrules.select.booleaninput.model BooleanInputElement BooleanInputElement$Type BooleanInputLiteral)))
10+
(java.io Reader)
11+
(java.net URL)
12+
(java.util List UUID)
13+
(querqy.model Input Input$BooleanInput Input$SimpleInput)
14+
(querqy.parser QuerqyParser)
15+
(querqy.rewrite RewriterFactory)
16+
(querqy.rewrite.commonrules CommonRulesRewriter LineParser QuerqyParserFactory SimpleCommonRulesParser WhiteSpaceQuerqyParserFactory)
17+
(querqy.rewrite.commonrules.model BoostInstruction BoostInstruction$BoostDirection DeleteInstruction FilterInstruction Instructions SynonymInstruction TrieMapRulesCollectionBuilder)
18+
(querqy.rewrite.commonrules.select SelectionStrategyFactory)
19+
(querqy.rewrite.commonrules.select.booleaninput BooleanInputParser)
20+
(querqy.rewrite.commonrules.select.booleaninput.model BooleanInputElement BooleanInputElement$Type BooleanInputLiteral)))
21+
22+
(set! *warn-on-reflection* true)
2123

2224
(defprotocol CommonRulesRewriterBuilder
2325
(common-rules-rewriter* [this]))
@@ -27,12 +29,6 @@
2729
(common-rules-rewriter* [_]
2830
(throw (IllegalArgumentException. "Must provide rules to rules-rewriter"))))
2931

30-
(defn- flatten-rules
31-
[fns]
32-
(reduce (fn [accm afn] (if (coll? afn) (into accm afn) (conj accm afn)))
33-
[]
34-
fns))
35-
3632
(defn rules-rewriter
3733
"Create a CommonRulesRewriter.
3834
@@ -41,15 +37,15 @@
4137
[& args]
4238
(if (and (= 1 (count args)) (instance? URL (first args)))
4339
(common-rules-rewriter* (first args))
44-
(common-rules-rewriter* (flatten-rules args))))
40+
(common-rules-rewriter* args)))
4541

4642
(defn rewriter-factory
4743
[rules]
4844
(proxy [RewriterFactory] [(str (UUID/randomUUID))]
4945
(createRewriter [_ _]
5046
(CommonRulesRewriter.
51-
rules
52-
SelectionStrategyFactory/DEFAULT_SELECTION_STRATEGY))
47+
rules
48+
SelectionStrategyFactory/DEFAULT_SELECTION_STRATEGY))
5349
(getCacheableGenerableTerms [] #{})))
5450

5551
;; ----------------------------------------------------------------------
@@ -63,10 +59,10 @@
6359
ignore-case true
6460
parser (WhiteSpaceQuerqyParserFactory.)}}]
6561
(let [rules-parser (SimpleCommonRulesParser.
66-
^Reader stream
67-
^boolean boolean-input
68-
^QuerqyParserFactory parser
69-
^boolean ignore-case)]
62+
^Reader stream
63+
^boolean boolean-input
64+
^QuerqyParserFactory parser
65+
^boolean ignore-case)]
7066
(.parse rules-parser))))
7167

7268
(extend-protocol CommonRulesRewriterBuilder
@@ -77,36 +73,51 @@
7773
;; ----------------------------------------------------------------------
7874
;; DSL
7975

80-
(extend-protocol CommonRulesRewriterBuilder
81-
List
82-
(common-rules-rewriter* [rules]
83-
(let [rules-builder (TrieMapRulesCollectionBuilder. true)]
84-
(doseq [rule-fn rules]
85-
(rule-fn rules-builder))
86-
(rewriter-factory (.build rules-builder)))))
87-
8876
(def ^:dynamic ^QuerqyParser *query-parser* parser/whitespace-parser)
8977

90-
(declare match*)
78+
(defrecord Rule [input instructions])
79+
80+
(defn match*
81+
"Create a "
82+
[head & tail]
83+
(->Rule head (vec tail)))
9184

9285
(defmacro match
93-
"Create a match rule."
94-
{:style/indent 1}
95-
;; TODO LEFT/ RIGHT boundaries
86+
"Create a rewriter rule from matching text input or boolean input followed by
87+
any number of query transformations.
88+
89+
```clojure
90+
;; Inject bar as a synonym to foo in any query.
91+
(match \"foo\"
92+
(synonym \"bar\"))
93+
```"
9694
[head & tail]
97-
`(match* '~head (vec (flatten (vector ~@tail)))))
95+
`(match* '~head ~@tail))
96+
97+
(defn- parse-string
98+
[string]
99+
(mapv #(LineParser/parseTerm %) (str/split string #"\s+")))
98100

99-
(defn- parse-string [string] (mapv #(LineParser/parseTerm %) (str/split string #"\s+")))
101+
(defn- parse-query
102+
[query]
103+
(cond
104+
(string? query)
105+
(.parse *query-parser* query)
100106

101-
(defn- parse-query [query]
102-
(cond (string? query) (.parse *query-parser* query)
103-
(map? query) (model/rawq {:query query})))
107+
(map? query)
108+
(model/rawq {:query query})))
104109

105-
(defn delete? [obj] (instance? DeleteInstruction obj))
110+
(defn delete?
111+
[obj]
112+
(instance? DeleteInstruction obj))
106113

107-
(defn delete [string] (DeleteInstruction. (parse-string string)))
114+
(defn delete
115+
[string]
116+
(DeleteInstruction. (parse-string string)))
108117

109-
(defn synonym? [obj] (instance? SynonymInstruction obj))
118+
(defn synonym?
119+
[obj]
120+
(instance? SynonymInstruction obj))
110121

111122
(defn synonym
112123
"Create a synonym instruction."
@@ -154,41 +165,43 @@
154165
not (wrap (cons NOT (parse-boolean-input terms)))
155166
(map parse-boolean-input input))))
156167

157-
;;
158-
159-
(def rule-count (atom 0))
160-
161-
(defn ^:no-doc match*
162-
;; implements match for use by match macro
163-
[input instructions]
164-
(let [ord (swap! rule-count inc)
165-
compiled (Instructions. ord ord instructions)]
166-
(fn [^TrieMapRulesCollectionBuilder rules-builder]
167-
(cond
168-
;; string rules
169-
(string? input)
170-
(let [simple-input (Input/parseSimpleInput input)]
171-
(.addRule rules-builder
172-
^Input$SimpleInput simple-input
173-
^Instructions compiled))
174-
175-
;; boolean rules
176-
(list? input)
177-
(do
178-
(when (some delete? instructions)
179-
(throw (IllegalArgumentException. "Cannot use a delete instruction with boolean input")))
180-
(when (some synonym? instructions)
181-
(throw (IllegalArgumentException. "Cannot use a synonym instruction with boolean input")))
182-
(let [boolean-input-parser (BooleanInputParser.)
183-
bool-input (Input$BooleanInput. (parse-boolean-input input)
184-
boolean-input-parser
185-
(pr-str input))]
186-
187-
(.applyInstructions bool-input compiled rules-builder)
188-
(doseq [^BooleanInputLiteral literal (.values (.getLiteralRegister boolean-input-parser))]
189-
(let [input (LineParser/parseInput (str/join \space (.getTerms literal)))]
190-
(.addRule rules-builder
191-
^Input$SimpleInput input
192-
^BooleanInputLiteral literal)))))
193-
194-
:else (throw (IllegalArgumentException. "Can only parse a string or list as input"))))))
168+
(defn parse-simple-input
169+
^Input$SimpleInput
170+
[string]
171+
(Input/parseSimpleInput string))
172+
173+
(extend-protocol CommonRulesRewriterBuilder
174+
List
175+
(common-rules-rewriter* [rules]
176+
(let [rules (flatten rules)
177+
counter (atom 0)
178+
rules-builder (TrieMapRulesCollectionBuilder. true)
179+
boolean-input-parser (BooleanInputParser.)]
180+
(doseq [{:keys [input instructions]} rules]
181+
(let [ord (swap! counter inc)
182+
id (str input "#" ord)
183+
instructions (Instructions. ord id instructions)]
184+
(cond
185+
(string? input)
186+
(.addRule rules-builder (parse-simple-input input) instructions)
187+
188+
(list? input)
189+
(do
190+
(when (some delete? instructions)
191+
(throw (IllegalArgumentException. "Cannot use a delete instruction with boolean input")))
192+
193+
(when (some synonym? instructions)
194+
(throw (IllegalArgumentException. "Cannot use a synonym instruction with boolean input")))
195+
196+
(let [bool-input (Input$BooleanInput. (parse-boolean-input input) boolean-input-parser (pr-str input))]
197+
;; inputPattern.applyInstructions(instructions, builder);
198+
(.applyInstructions bool-input instructions rules-builder))))))
199+
200+
;; Add boolean literals at the end
201+
(doseq [^BooleanInputLiteral literal (.values (.getLiteralRegister boolean-input-parser))]
202+
(let [string (str/join \space (.getTerms literal))
203+
input (parse-simple-input string)]
204+
(.addRule rules-builder input literal)))
205+
206+
;;
207+
(rewriter-factory (.build rules-builder)))))

src/com/nytimes/querqy/model.clj

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
(ns com.nytimes.querqy.model
22
"Builders for classes in the `querqy.model` package."
33
(:require
4-
[clojure.core.protocols :as cp]
5-
[clojure.datafy :refer [datafy]]
6-
[clojure.string :as str])
4+
[clojure.core.protocols :as cp]
5+
[clojure.datafy :refer [datafy]]
6+
[clojure.string :as str])
77
(:import
8-
(querqy.model BooleanParent BooleanQuery BoostQuery BoostedTerm Clause Clause$Occur DisjunctionMaxQuery ExpandedQuery Input$SimpleInput MatchAllQuery QuerqyQuery Query Term)))
8+
(querqy.model BooleanParent BooleanQuery BoostQuery BoostedTerm Clause Clause$Occur DisjunctionMaxQuery ExpandedQuery Input$SimpleInput MatchAllQuery QuerqyQuery Query Term)))
99

1010
(def should Clause$Occur/SHOULD)
1111
(def must Clause$Occur/MUST)
@@ -112,7 +112,7 @@
112112
BooleanQuery
113113
(datafy [^BooleanQuery q]
114114
{:type BooleanQuery
115-
:occur (.getOccur q)
115+
:occur (occur->kw (.getOccur q))
116116
:clauses (mapv datafy (.getClauses q))})
117117

118118
BoostQuery

test/com/nytimes/querqy/common-rules.txt

+6
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,9 @@ A8 =>
3333

3434
(A11 AND (NOT B11)) =>
3535
UP(2): C11
36+
37+
(best AND netflix AND show) =>
38+
UP(2): netflix
39+
40+
(best AND amazon AND show) =>
41+
UP(2): amazon

0 commit comments

Comments
 (0)