From 538cad8be0eec94371b11c54ed58b6ea1338f711 Mon Sep 17 00:00:00 2001 From: Jeremiah Via Date: Fri, 19 Jan 2024 12:52:56 -0800 Subject: [PATCH] Update querqy tests (#9) --- .github/workflows/pull-request.yml | 4 +- src/com/nytimes/querqy/elasticsearch.clj | 5 +- src/com/nytimes/querqy/model.clj | 307 ++++++++++++------- test/com/nytimes/querqy/commonrules_test.clj | 152 +++++++-- test/com/nytimes/querqy/replace_test.clj | 20 +- 5 files changed, 343 insertions(+), 145 deletions(-) diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index 8c47c3d..697bb82 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -5,8 +5,10 @@ on: branches: [ main ] jobs: - build: + test: runs-on: ubuntu-latest + strategy: + fail-fast: false steps: - uses: actions/checkout@v3 diff --git a/src/com/nytimes/querqy/elasticsearch.clj b/src/com/nytimes/querqy/elasticsearch.clj index 67cb63e..6ee54b1 100644 --- a/src/com/nytimes/querqy/elasticsearch.clj +++ b/src/com/nytimes/querqy/elasticsearch.clj @@ -163,9 +163,8 @@ boost-up (forv [query (.getBoostUpQueries query)] (emit* query opts)) boost-down (forv [^BoostQuery query (.getBoostDownQueries query)] - ;; down boost are converted to negative numbers here - (let [boosted (m/boostq {:boost (- (.getBoost query)) - :query (.getQuery query)})] + ;; down boost are converted to negative numbers here + (let [boosted (m/boost-query (- (.getBoost query)) (.getQuery query))] (emit* boosted opts))) functions (concat boost-up boost-down) default-query {:function_score {:query user-query, :functions []}}] diff --git a/src/com/nytimes/querqy/model.clj b/src/com/nytimes/querqy/model.clj index 019b602..9bdd4e9 100644 --- a/src/com/nytimes/querqy/model.clj +++ b/src/com/nytimes/querqy/model.clj @@ -2,10 +2,25 @@ "Builders for classes in the `querqy.model` package." (:require [clojure.core.protocols :as cp] - [clojure.datafy :refer [datafy]] - [clojure.string :as str]) + [clojure.datafy :refer [datafy]]) (:import - (querqy.model BooleanParent BooleanQuery BoostQuery BoostedTerm Clause Clause$Occur DisjunctionMaxQuery ExpandedQuery Input$SimpleInput MatchAllQuery QuerqyQuery Query Term))) + (querqy.model + BooleanParent + BooleanQuery + BoostQuery + BoostedTerm + Clause + Clause$Occur + DisjunctionMaxQuery + ExpandedQuery + Input$SimpleInput + MatchAllQuery + QuerqyQuery + Query + Term))) + +;; ---------------------------------------------------------------------- +;; Helpers (def should Clause$Occur/SHOULD) (def must Clause$Occur/MUST) @@ -14,67 +29,171 @@ (defn get-occur [^Clause clause] (.getOccur clause)) -(defn occur->kw [^Clause$Occur occur] - (keyword (str/lower-case (.name occur)))) +(def occur->kw + {should :should + must :must + must-not :must-not}) -(defn term? [obj] (instance? Term obj)) +(def kw->occur + {:should should + :must must + :must-not must-not}) + +;; ---------------------------------------------------------------------- +;; Term (defn term - [{:keys [parent field value generated] - :or {generated false}}] - {:pre [(string? value)]} - (Term. parent field value generated)) + "Create a `querqy.model.Term`." + ([value] + (term nil value nil)) + + ([field value] + (term field value nil)) + + ([field value {:keys [parent generated] :or {generated false}}] + (assert (string? value)) + (assert (boolean? generated)) + (Term. parent field value generated))) + +(defn term? + "Return true if object is a `querqy.model.Term`" + [obj] + (instance? Term obj)) + +(extend-protocol cp/Datafiable + Term + (datafy [^Term t] + (with-meta + (cond-> {:term (str (.getValue t))} + (.getField t) + (assoc :field (.getField t))) + {:type Term}))) -(defn boosted-term? [obj] (instance? BoostedTerm obj)) +;; ---------------------------------------------------------------------- +;; BoostedTerm (defn boosted-term - [{:keys [parent field value boost]}] - {:pre [(string? value) (number? boost)]} - (BoostedTerm. parent field value boost)) + "Create a `querqy.model.BoostedTerm`." + ([value boost] + (boosted-term nil value boost nil)) + + ([field value boost] + (boosted-term field value boost nil)) + + ([field value boost {:keys [parent]}] + (assert (string? value)) + (assert (number? boost)) + (BoostedTerm. parent field value boost))) + +(defn boosted-term? + [obj] + (instance? BoostedTerm obj)) + +(extend-protocol cp/Datafiable + BoostedTerm + (datafy [^BoostedTerm t] + (with-meta + (cond-> {:term (str (.getValue t)) + :boost (.getBoost t)} + (.getField t) + (assoc :field (.getField t))) + {:type BoostedTerm}))) + +;; ---------------------------------------------------------------------- +;; MatchAllQuery (defn match-all? [obj] (instance? MatchAllQuery obj)) (defn match-all [] (MatchAllQuery.)) -(defn dismaxq? [obj] (instance? DisjunctionMaxQuery obj)) +(extend-protocol cp/Datafiable + MatchAllQuery + (datafy [_] {:match_all {}})) -(defn dismaxq - [{:keys [parent occur generated clauses] - :or {occur should - generated false}}] - (let [query (DisjunctionMaxQuery. parent occur generated)] - (doseq [^Clause clause clauses] - (.addClause query (.clone clause query))) - query)) +;; ---------------------------------------------------------------------- +;; DisjunctionMaxQuery + +(defn dismax + ([clauses] + (dismax :should clauses nil)) + + ([occur clauses] + (dismax occur clauses nil)) + + ([occur clauses {:keys [parent generated] :or {generated false}}] + (let [occur (kw->occur occur) + query (DisjunctionMaxQuery. parent occur generated)] + (doseq [^Clause clause clauses] + (.addClause query (.clone clause query))) + query))) + +(defn dismax? + [obj] + (instance? DisjunctionMaxQuery obj)) -(defn boostq? [obj] (instance? BoostQuery obj)) +(extend-protocol cp/Datafiable + DisjunctionMaxQuery + (datafy [^DisjunctionMaxQuery q] + (set (mapv datafy (.getClauses q))))) + +;; ---------------------------------------------------------------------- +;; BoostQuery -(defn boostq - [{:keys [^QuerqyQuery query boost]}] +(defn boost-query + [boost ^QuerqyQuery query] {:pre [(number? boost)]} (BoostQuery. query boost)) -(defn boolq? [obj] (instance? BooleanQuery obj)) - -(defn boolq - [{:keys [^BooleanParent parent - ^Clause$Occur occur - generated - clauses] - :or {generated false - occur should - clauses []}}] - (let [bq (BooleanQuery. parent occur generated)] - (doseq [^Clause clause clauses] - (.addClause bq (.clone clause bq))) - bq)) +(defn boost-query? + [obj] + (instance? BoostQuery obj)) + +(extend-protocol cp/Datafiable + BoostQuery + (datafy [^BoostQuery q] + (with-meta + {:query (datafy (.getQuery q)) + :boost (.getBoost q)} + {:type BooleanQuery}))) + +;; ---------------------------------------------------------------------- +;; BooleanQuery + +(defn bool + ([clauses] + (bool :should clauses nil)) + + ([occur clauses] + (bool occur clauses nil)) + + ([occur clauses {:keys [^BooleanParent parent generated] :or {generated false}}] + (let [occur (kw->occur occur) + query (BooleanQuery. parent occur generated)] + (doseq [^Clause clause clauses] + (.addClause query (.clone clause query))) + query))) + +(defn bool? + [obj] + (instance? BooleanQuery obj)) + +(extend-protocol cp/Datafiable + BooleanQuery + (datafy [^BooleanQuery q] + (with-meta + (-> (group-by (comp occur->kw get-occur) (.getClauses q)) + (update-vals (partial mapv datafy))) + {:type BooleanQuery}))) + +;; ---------------------------------------------------------------------- +;; (defrecord RawQuery [parent occur query generated?] QuerqyQuery (clone [_ new-parent] (RawQuery. new-parent occur query generated?)) - (clone [_ new-parent generated?] - (RawQuery. new-parent occur query generated?))) + (clone [_ new-parent generated'?] + (RawQuery. new-parent occur query generated'?))) (defn rawq? [obj] (instance? RawQuery obj)) @@ -84,61 +203,58 @@ generated false}}] (RawQuery. parent occur query generated)) -(defn q? [obj] (instance? Query obj)) +;; ---------------------------------------------------------------------- +;; Query -(defn q - [{:keys [generated clauses] - :or {generated false}}] - (let [query (Query. generated)] - (doseq [^Clause clause clauses] - (.addClause query (.clone clause query))) - query)) +(defn query + ([clauses] + (query clauses nil)) -(defn querqyq? [obj] (instance? QuerqyQuery obj)) + ([clauses {:keys [generated] :or {generated false}}] + (let [query (Query. generated)] + (doseq [^Clause clause clauses] + (.addClause query (.clone clause query))) + query))) -(defn expandedq? [obj] (instance? ExpandedQuery obj)) +(defn query? + [obj] + (instance? Query obj)) -(defn expandedq - [{:keys [query boost-up boost-down filter] - :or {boost-up [] - boost-down [] - filter []}}] +;; ---------------------------------------------------------------------- +;; ExpandedQuery + +(defn expanded + [query & {:keys [boost-up boost-down filter] + :or {boost-up [] + boost-down [] + filter []}}] (ExpandedQuery. query filter boost-up boost-down)) +(defn expanded? + [obj] + (instance? ExpandedQuery obj)) + +(extend-protocol cp/Datafiable + ExpandedQuery + (datafy [^ExpandedQuery q] + (let [boosts (concat (.getBoostUpQueries q) (.getBoostDownQueries q))] + (with-meta + (cond-> {:query (datafy (.getUserQuery q))} + (.getFilterQueries q) + (assoc :filter (mapv datafy (.getFilterQueries q))) + + (seq boosts) + (assoc :boost (mapv datafy boosts))) + {:type ExpandedQuery})))) + ;; ---------------------------------------------------------------------- ;; datafy (extend-protocol cp/Datafiable - BooleanQuery - (datafy [^BooleanQuery q] - {:type BooleanQuery - :occur (occur->kw (.getOccur q)) - :clauses (mapv datafy (.getClauses q))}) - - BoostQuery - (datafy [^BoostQuery q] - {:type BoostQuery - :boost (.getBoost q) - :query (datafy (.getQuery q))}) RawQuery (datafy [^RawQuery q] - {:type RawQuery - :query (.-query q)}) - - DisjunctionMaxQuery - (datafy [^DisjunctionMaxQuery q] - {:type DisjunctionMaxQuery - :occur (occur->kw (.getOccur q)) - :clauses (mapv datafy (.getClauses q))}) - - ExpandedQuery - (datafy [^ExpandedQuery q] - {:type ExpandedQuery - :user-query (datafy (.getUserQuery q)) - :boost-up (mapv datafy (.getBoostUpQueries q)) - :boost-down (mapv datafy (.getBoostDownQueries q)) - :filter (mapv datafy (.getFilterQueries q))}) + {:raw/query (.-query q)}) Input$SimpleInput (datafy [^Input$SimpleInput i] @@ -147,26 +263,7 @@ :left-boundary? (.isRequiresLeftBoundary i) :right-boundary? (.isRequiresRightBoundary i)}) - MatchAllQuery - (datafy [^MatchAllQuery _q] - {:type MatchAllQuery - :match_all {}}) - Query (datafy [^Query q] - {:type Query - :occur (occur->kw (.getOccur q)) - :clauses (mapv datafy (.getClauses q))}) - - BoostedTerm - (datafy [^BoostedTerm t] - {:type BoostedTerm - :field (.getField t) - :boost (.getBoost t) - :value (str (.getValue t))}) - - Term - (datafy [^Term t] - {:type Term - :field (.getField t) - :value (str (.getValue t))})) + (-> (group-by (comp occur->kw get-occur) (.getClauses q)) + (update-vals (partial mapv datafy))))) diff --git a/test/com/nytimes/querqy/commonrules_test.clj b/test/com/nytimes/querqy/commonrules_test.clj index 8921c08..75c2c64 100644 --- a/test/com/nytimes/querqy/commonrules_test.clj +++ b/test/com/nytimes/querqy/commonrules_test.clj @@ -83,22 +83,127 @@ (datafy (querqy/rewrite rw string))) (deftest rewriter-test - (facts "DSL & Resource Rewriters have the same output" - (rewrite dsl-rewriter "A1") => (rewrite resource-rewriter "A1") - (rewrite dsl-rewriter "A2 B2") => (rewrite resource-rewriter "A2 B2") - (rewrite dsl-rewriter "A3") => (rewrite resource-rewriter "A3") - (rewrite dsl-rewriter "A4 B4") => (rewrite resource-rewriter "A4 B4") - (rewrite dsl-rewriter "A5") => (rewrite resource-rewriter "A5") - (rewrite dsl-rewriter "A6") => (rewrite resource-rewriter "A6") - (rewrite dsl-rewriter "A7 B7") => (rewrite resource-rewriter "A7 B7") - (rewrite dsl-rewriter "A8") => (rewrite resource-rewriter "A8") - (rewrite dsl-rewriter "A9") => (rewrite resource-rewriter "A9") - (rewrite dsl-rewriter "B9") => (rewrite resource-rewriter "B9") - (rewrite dsl-rewriter "A10 B10") => (rewrite resource-rewriter "A10 B10") - (rewrite dsl-rewriter "A11") => (rewrite resource-rewriter "A11") - (rewrite dsl-rewriter "A11 B11") => (rewrite resource-rewriter "A11 B11") - (rewrite dsl-rewriter "best netflix show") => (rewrite resource-rewriter "best netflix show") - (rewrite dsl-rewriter "best amazon show") => (rewrite resource-rewriter "best amazon show"))) + (facts "A1" + (rewrite resource-rewriter "A1") + => {:query {:should [#{{:term "A1"} {:term "B1"}}]}} + + (rewrite dsl-rewriter "A1") + => {:query {:should [#{{:term "A1"} {:term "B1"}}]}}) + + (facts "A2 B2" + (rewrite resource-rewriter "A2 B2") + => {:query + {:should + [#{{:term "A2"} {:term "C2"}} + #{{:term "B2"} {:term "C2"}}]}} + + (rewrite dsl-rewriter "A2 B2") + => {:query + {:should + [#{{:term "A2"} {:term "C2"}} + #{{:term "B2"} {:term "C2"}}]}}) + + (facts "A3" + (rewrite resource-rewriter "A3") + => {:query {:should [#{{:term "A3"} {:term "B3"} {:term "C3"}}]}} + + (rewrite dsl-rewriter "A3") + => {:query {:should [#{{:term "A3"} {:term "B3"} {:term "C3"}}]}}) + + (facts "A4 B4" + (rewrite resource-rewriter "A4 B4") + => {:query {:should [#{{:term "C4"} {:term "D4"} {:term "A4"}} + #{{:term "C4"} {:term "B4"} {:term "D4"}}]}} + + (rewrite dsl-rewriter "A4 B4") + => {:query {:should [#{{:term "A4"} {:term "C4"} {:term "D4"}} + #{{:term "B4"} {:term "C4"} {:term "D4"}}]}}) + + (facts "A5" + (rewrite resource-rewriter "A5") + => {:query {:should [#{{:term "A5"}}]}, + :boost [{:query {:must [#{{:term "B5"}}]}, :boost 2.0}]} + + (rewrite dsl-rewriter "A5") + => {:query {:should [#{{:term "A5"}}]}, + :boost [{:query {:must [#{{:term "B5"}}]}, :boost 2.0}]}) + + (facts "A6" + (rewrite resource-rewriter "A6") + => {:query {:should [#{{:term "A6"}}]}, + :filter [{:must [#{{:term "B6"}}]}]} + + (rewrite dsl-rewriter "A6") + => {:query {:should [#{{:term "A6"}}]}, + :filter [{:must [#{{:term "B6"}}]}]}) + + (facts "Terms may be deleted: A7 B7" + (rewrite resource-rewriter "A7 B7") + => {:query {:should [#{{:term "A7"}}]}} + + (rewrite dsl-rewriter "A7 B7") + => {:query {:should [#{{:term "A7"}}]}}) + + (facts "A8" + (rewrite resource-rewriter "A8") + => {:query {:should [#{{:term "B8"} {:term "A8"}}]}, + :boost [{:query {:must [#{{:term "C8"}}]}, :boost 2.0}]} + + (rewrite dsl-rewriter "A8") + => {:query {:should [#{{:term "B8"} {:term "A8"}}]}, + :boost [{:query {:must [#{{:term "C8"}}]}, :boost 2.0}]}) + + (facts "A9" + (rewrite resource-rewriter "A9") + => {:query {:should [#{{:term "A9"}}]}, + :boost [{:query {:must [#{{:term "C9"}}]}, :boost 2.0}]} + + (rewrite dsl-rewriter "A9") + => {:query {:should [#{{:term "A9"}}]}, + :boost [{:query {:must [#{{:term "C9"}}]}, :boost 2.0}]}) + + (facts "A10 B10" + (rewrite resource-rewriter "A10 B10") + => {:query {:should [#{{:term "A10"}} #{{:term "B10"}}]}, + :boost [{:query {:must [#{{:term "C10"}}]}, :boost 2.0}]} + + (rewrite dsl-rewriter "A10 B10") + => {:query {:should [#{{:term "A10"}} #{{:term "B10"}}]}, + :boost [{:query {:must [#{{:term "C10"}}]}, :boost 2.0}]}) + + (facts "A11" + (rewrite resource-rewriter "A11") + => {:query {:should [#{{:term "A11"}}]}, + :boost [{:query {:must [#{{:term "C11"}}]}, :boost 2.0}]} + + (rewrite dsl-rewriter "A11") + => {:query {:should [#{{:term "A11"}}]}, + :boost [{:query {:must [#{{:term "C11"}}]}, :boost 2.0}]}) + + (facts "A11 B11" + (rewrite resource-rewriter "A11 B11") + => {:query {:should [#{{:term "A11"}} #{{:term "B11"}}]}} + + (rewrite dsl-rewriter "A11 B11") + => {:query {:should [#{{:term "A11"}} #{{:term "B11"}}]}}) + + (facts "best netflix show" + (rewrite resource-rewriter "best netflix show") + => {:query {:should [#{{:term "best"}} #{{:term "netflix"}} #{{:term "show"}}]}, + :boost [{:query {:must [#{{:term "netflix"}}]}, :boost 2.0}]} + + (rewrite dsl-rewriter "best netflix show") + => {:query {:should [#{{:term "best"}} #{{:term "netflix"}} #{{:term "show"}}]}, + :boost [{:query {:must [#{{:term "netflix"}}]}, :boost 2.0}]}) + + (facts "best amazon show" + (rewrite resource-rewriter "best amazon show") + => {:query {:should [#{{:term "best"}} #{{:term "amazon"}} #{{:term "show"}}]}, + :boost [{:query {:must [#{{:term "amazon"}}]}, :boost 2.0}]} + + (rewrite dsl-rewriter "best amazon show") + => {:query {:should [#{{:term "best"}} #{{:term "amazon"}} #{{:term "show"}}]}, + :boost [{:query {:must [#{{:term "amazon"}}]}, :boost 2.0}]})) ;; Custom Functions @@ -116,13 +221,8 @@ (deftest custom-functions-test (facts "helper functions can return multiple match rules" (rewrite rules-with-custom-functions "chickpea salad") - =in=> - {:user-query - {:occur :should, - :clauses [{:occur :should, - :clauses [{:value "chickpea"} - {:occur :should, - :clauses - [{:occur :must, :clauses [{:value "garbanzo"}]} - {:occur :must, :clauses [{:value "bean"}]}]}]} - {:occur :should, :clauses [{:value "salad"}]}]}})) \ No newline at end of file + => {:query + {:should + [#{{:term "chickpea"} + {:must [#{{:term "garbanzo"}} #{{:term "bean"}}]}} + #{{:term "salad"}}]}})) diff --git a/test/com/nytimes/querqy/replace_test.clj b/test/com/nytimes/querqy/replace_test.clj index 583431e..b0f4134 100644 --- a/test/com/nytimes/querqy/replace_test.clj +++ b/test/com/nytimes/querqy/replace_test.clj @@ -13,10 +13,10 @@ "util to turn expanded query into a string. this only works for this particular set of tests." [q] (->> (datafy q) - :user-query - :clauses - (mapcat :clauses) - (map :value) + :query + :should + (mapcat identity) + (map :term) (str/join " ") (str/trim))) @@ -73,9 +73,9 @@ (deftest replace-test (are [input output] (= output (query->string (querqy/rewrite rewriter input))) "cheapest cheaper cheap" "cheap cheap cheap" - "samrtphone" "smartphone" - "computerscreen" "computer screen" - "iphones" "iphone" - "ihpone ihpone" "iphone iphone" - "galaxy+ phone" "galaxy plus phone" - "The Batman (2022)" "The Batman 2022")) + "samrtphone" "smartphone" + "computerscreen" "computer screen" + "iphones" "iphone" + "ihpone ihpone" "iphone iphone" + "galaxy+ phone" "galaxy plus phone" + "The Batman (2022)" "The Batman 2022"))