|
2 | 2 | "CommonRules based rewriter"
|
3 | 3 | (:refer-clojure :exclude [filter])
|
4 | 4 | (:require
|
5 |
| - [clojure.java.io :as io] |
6 |
| - [clojure.string :as str] |
7 |
| - [com.nytimes.querqy.model :as model] |
8 |
| - [com.nytimes.querqy.parser :as parser]) |
| 5 | + [clojure.java.io :as io] |
| 6 | + [clojure.string :as str] |
| 7 | + [com.nytimes.querqy.model :as model] |
| 8 | + [com.nytimes.querqy.parser :as parser]) |
9 | 9 | (:import
|
10 |
| - (java.io Reader) |
11 |
| - (java.net URL) |
12 |
| - (java.util List UUID) |
13 |
| - (querqy.model Input Input$BooleanInput Input$SimpleInput) |
14 |
| - (querqy.parser QuerqyParser) |
15 |
| - (querqy.rewrite RewriterFactory) |
16 |
| - (querqy.rewrite.commonrules CommonRulesRewriter LineParser QuerqyParserFactory SimpleCommonRulesParser WhiteSpaceQuerqyParserFactory) |
17 |
| - (querqy.rewrite.commonrules.model BoostInstruction BoostInstruction$BoostDirection DeleteInstruction FilterInstruction Instructions SynonymInstruction TrieMapRulesCollectionBuilder) |
18 |
| - (querqy.rewrite.commonrules.select SelectionStrategyFactory) |
19 |
| - (querqy.rewrite.commonrules.select.booleaninput BooleanInputParser) |
20 |
| - (querqy.rewrite.commonrules.select.booleaninput.model BooleanInputElement BooleanInputElement$Type BooleanInputLiteral))) |
| 10 | + (java.io Reader) |
| 11 | + (java.net URL) |
| 12 | + (java.util List UUID) |
| 13 | + (querqy.model Input Input$BooleanInput Input$SimpleInput) |
| 14 | + (querqy.parser QuerqyParser) |
| 15 | + (querqy.rewrite RewriterFactory) |
| 16 | + (querqy.rewrite.commonrules CommonRulesRewriter LineParser QuerqyParserFactory SimpleCommonRulesParser WhiteSpaceQuerqyParserFactory) |
| 17 | + (querqy.rewrite.commonrules.model BoostInstruction BoostInstruction$BoostDirection DeleteInstruction FilterInstruction Instructions SynonymInstruction TrieMapRulesCollectionBuilder) |
| 18 | + (querqy.rewrite.commonrules.select SelectionStrategyFactory) |
| 19 | + (querqy.rewrite.commonrules.select.booleaninput BooleanInputParser) |
| 20 | + (querqy.rewrite.commonrules.select.booleaninput.model BooleanInputElement BooleanInputElement$Type BooleanInputLiteral))) |
| 21 | + |
| 22 | +(set! *warn-on-reflection* true) |
21 | 23 |
|
22 | 24 | (defprotocol CommonRulesRewriterBuilder
|
23 | 25 | (common-rules-rewriter* [this]))
|
|
27 | 29 | (common-rules-rewriter* [_]
|
28 | 30 | (throw (IllegalArgumentException. "Must provide rules to rules-rewriter"))))
|
29 | 31 |
|
30 |
| -(defn- flatten-rules |
31 |
| - [fns] |
32 |
| - (reduce (fn [accm afn] (if (coll? afn) (into accm afn) (conj accm afn))) |
33 |
| - [] |
34 |
| - fns)) |
35 |
| - |
36 | 32 | (defn rules-rewriter
|
37 | 33 | "Create a CommonRulesRewriter.
|
38 | 34 |
|
|
41 | 37 | [& args]
|
42 | 38 | (if (and (= 1 (count args)) (instance? URL (first args)))
|
43 | 39 | (common-rules-rewriter* (first args))
|
44 |
| - (common-rules-rewriter* (flatten-rules args)))) |
| 40 | + (common-rules-rewriter* args))) |
45 | 41 |
|
46 | 42 | (defn rewriter-factory
|
47 | 43 | [rules]
|
48 | 44 | (proxy [RewriterFactory] [(str (UUID/randomUUID))]
|
49 | 45 | (createRewriter [_ _]
|
50 | 46 | (CommonRulesRewriter.
|
51 |
| - rules |
52 |
| - SelectionStrategyFactory/DEFAULT_SELECTION_STRATEGY)) |
| 47 | + rules |
| 48 | + SelectionStrategyFactory/DEFAULT_SELECTION_STRATEGY)) |
53 | 49 | (getCacheableGenerableTerms [] #{})))
|
54 | 50 |
|
55 | 51 | ;; ----------------------------------------------------------------------
|
|
63 | 59 | ignore-case true
|
64 | 60 | parser (WhiteSpaceQuerqyParserFactory.)}}]
|
65 | 61 | (let [rules-parser (SimpleCommonRulesParser.
|
66 |
| - ^Reader stream |
67 |
| - ^boolean boolean-input |
68 |
| - ^QuerqyParserFactory parser |
69 |
| - ^boolean ignore-case)] |
| 62 | + ^Reader stream |
| 63 | + ^boolean boolean-input |
| 64 | + ^QuerqyParserFactory parser |
| 65 | + ^boolean ignore-case)] |
70 | 66 | (.parse rules-parser))))
|
71 | 67 |
|
72 | 68 | (extend-protocol CommonRulesRewriterBuilder
|
|
77 | 73 | ;; ----------------------------------------------------------------------
|
78 | 74 | ;; DSL
|
79 | 75 |
|
80 |
| -(extend-protocol CommonRulesRewriterBuilder |
81 |
| - List |
82 |
| - (common-rules-rewriter* [rules] |
83 |
| - (let [rules-builder (TrieMapRulesCollectionBuilder. true)] |
84 |
| - (doseq [rule-fn rules] |
85 |
| - (rule-fn rules-builder)) |
86 |
| - (rewriter-factory (.build rules-builder))))) |
87 |
| - |
88 | 76 | (def ^:dynamic ^QuerqyParser *query-parser* parser/whitespace-parser)
|
89 | 77 |
|
90 |
| -(declare match*) |
| 78 | +(defrecord Rule [input instructions]) |
| 79 | + |
| 80 | +(defn match* |
| 81 | + "Create a " |
| 82 | + [head & tail] |
| 83 | + (->Rule head (vec tail))) |
91 | 84 |
|
92 | 85 | (defmacro match
|
93 |
| - "Create a match rule." |
94 |
| - {:style/indent 1} |
95 |
| - ;; TODO LEFT/ RIGHT boundaries |
| 86 | + "Create a rewriter rule from matching text input or boolean input followed by |
| 87 | + any number of query transformations. |
| 88 | +
|
| 89 | + ```clojure |
| 90 | + ;; Inject bar as a synonym to foo in any query. |
| 91 | + (match \"foo\" |
| 92 | + (synonym \"bar\")) |
| 93 | + ```" |
96 | 94 | [head & tail]
|
97 |
| - `(match* '~head (vec (flatten (vector ~@tail))))) |
| 95 | + `(match* '~head ~@tail)) |
| 96 | + |
| 97 | +(defn- parse-string |
| 98 | + [string] |
| 99 | + (mapv #(LineParser/parseTerm %) (str/split string #"\s+"))) |
98 | 100 |
|
99 |
| -(defn- parse-string [string] (mapv #(LineParser/parseTerm %) (str/split string #"\s+"))) |
| 101 | +(defn- parse-query |
| 102 | + [query] |
| 103 | + (cond |
| 104 | + (string? query) |
| 105 | + (.parse *query-parser* query) |
100 | 106 |
|
101 |
| -(defn- parse-query [query] |
102 |
| - (cond (string? query) (.parse *query-parser* query) |
103 |
| - (map? query) (model/rawq {:query query}))) |
| 107 | + (map? query) |
| 108 | + (model/rawq {:query query}))) |
104 | 109 |
|
105 |
| -(defn delete? [obj] (instance? DeleteInstruction obj)) |
| 110 | +(defn delete? |
| 111 | + [obj] |
| 112 | + (instance? DeleteInstruction obj)) |
106 | 113 |
|
107 |
| -(defn delete [string] (DeleteInstruction. (parse-string string))) |
| 114 | +(defn delete |
| 115 | + [string] |
| 116 | + (DeleteInstruction. (parse-string string))) |
108 | 117 |
|
109 |
| -(defn synonym? [obj] (instance? SynonymInstruction obj)) |
| 118 | +(defn synonym? |
| 119 | + [obj] |
| 120 | + (instance? SynonymInstruction obj)) |
110 | 121 |
|
111 | 122 | (defn synonym
|
112 | 123 | "Create a synonym instruction."
|
|
154 | 165 | not (wrap (cons NOT (parse-boolean-input terms)))
|
155 | 166 | (map parse-boolean-input input))))
|
156 | 167 |
|
157 |
| -;; |
158 |
| - |
159 |
| -(def rule-count (atom 0)) |
160 |
| - |
161 |
| -(defn ^:no-doc match* |
162 |
| - ;; implements match for use by match macro |
163 |
| - [input instructions] |
164 |
| - (let [ord (swap! rule-count inc) |
165 |
| - compiled (Instructions. ord ord instructions)] |
166 |
| - (fn [^TrieMapRulesCollectionBuilder rules-builder] |
167 |
| - (cond |
168 |
| - ;; string rules |
169 |
| - (string? input) |
170 |
| - (let [simple-input (Input/parseSimpleInput input)] |
171 |
| - (.addRule rules-builder |
172 |
| - ^Input$SimpleInput simple-input |
173 |
| - ^Instructions compiled)) |
174 |
| - |
175 |
| - ;; boolean rules |
176 |
| - (list? input) |
177 |
| - (do |
178 |
| - (when (some delete? instructions) |
179 |
| - (throw (IllegalArgumentException. "Cannot use a delete instruction with boolean input"))) |
180 |
| - (when (some synonym? instructions) |
181 |
| - (throw (IllegalArgumentException. "Cannot use a synonym instruction with boolean input"))) |
182 |
| - (let [boolean-input-parser (BooleanInputParser.) |
183 |
| - bool-input (Input$BooleanInput. (parse-boolean-input input) |
184 |
| - boolean-input-parser |
185 |
| - (pr-str input))] |
186 |
| - |
187 |
| - (.applyInstructions bool-input compiled rules-builder) |
188 |
| - (doseq [^BooleanInputLiteral literal (.values (.getLiteralRegister boolean-input-parser))] |
189 |
| - (let [input (LineParser/parseInput (str/join \space (.getTerms literal)))] |
190 |
| - (.addRule rules-builder |
191 |
| - ^Input$SimpleInput input |
192 |
| - ^BooleanInputLiteral literal))))) |
193 |
| - |
194 |
| - :else (throw (IllegalArgumentException. "Can only parse a string or list as input")))))) |
| 168 | +(defn parse-simple-input |
| 169 | + ^Input$SimpleInput |
| 170 | + [string] |
| 171 | + (Input/parseSimpleInput string)) |
| 172 | + |
| 173 | +(extend-protocol CommonRulesRewriterBuilder |
| 174 | + List |
| 175 | + (common-rules-rewriter* [rules] |
| 176 | + (let [rules (flatten rules) |
| 177 | + counter (atom 0) |
| 178 | + rules-builder (TrieMapRulesCollectionBuilder. true) |
| 179 | + boolean-input-parser (BooleanInputParser.)] |
| 180 | + (doseq [{:keys [input instructions]} rules] |
| 181 | + (let [ord (swap! counter inc) |
| 182 | + id (str input "#" ord) |
| 183 | + instructions (Instructions. ord id instructions)] |
| 184 | + (cond |
| 185 | + (string? input) |
| 186 | + (.addRule rules-builder (parse-simple-input input) instructions) |
| 187 | + |
| 188 | + (list? input) |
| 189 | + (do |
| 190 | + (when (some delete? instructions) |
| 191 | + (throw (IllegalArgumentException. "Cannot use a delete instruction with boolean input"))) |
| 192 | + |
| 193 | + (when (some synonym? instructions) |
| 194 | + (throw (IllegalArgumentException. "Cannot use a synonym instruction with boolean input"))) |
| 195 | + |
| 196 | + (let [bool-input (Input$BooleanInput. (parse-boolean-input input) boolean-input-parser (pr-str input))] |
| 197 | + ;; inputPattern.applyInstructions(instructions, builder); |
| 198 | + (.applyInstructions bool-input instructions rules-builder)))))) |
| 199 | + |
| 200 | + ;; Add boolean literals at the end |
| 201 | + (doseq [^BooleanInputLiteral literal (.values (.getLiteralRegister boolean-input-parser))] |
| 202 | + (let [string (str/join \space (.getTerms literal)) |
| 203 | + input (parse-simple-input string)] |
| 204 | + (.addRule rules-builder input literal))) |
| 205 | + |
| 206 | + ;; |
| 207 | + (rewriter-factory (.build rules-builder))))) |
0 commit comments