From 3900118e134ad1eba4c610ae75279f6cea7629ac Mon Sep 17 00:00:00 2001 From: Tobias Kaessmann Date: Wed, 26 May 2021 15:53:23 +0200 Subject: [PATCH 01/15] SOLR-15437: Add test to reproduce the problem --- .../apache/solr/ltr/TestLTROnSolrCloud.java | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java index 21b71c3e5ec..4dfcf3e8859 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java @@ -70,6 +70,29 @@ public void tearDown() throws Exception { super.tearDown(); } + @Test + public void testSimpleQueryCustomSort() throws Exception { + SolrQuery query = new SolrQuery("*:*"); + query.setRequestHandler("/query"); + query.setFields("*,score,[shard]"); + query.setParam("rows", "8"); + query.setParam("sort", "id asc"); + query.add("rq", "{!ltr model=powpularityS-model reRankDocs=8}"); + + QueryResponse queryResponse = + solrCluster.getSolrClient().query(COLLECTION, query); + assertEquals(8, queryResponse.getResults().getNumFound()); + assertEquals("8", queryResponse.getResults().get(0).get("id").toString()); + assertEquals("7", queryResponse.getResults().get(1).get("id").toString()); + assertEquals("6", queryResponse.getResults().get(2).get("id").toString()); + assertEquals("5", queryResponse.getResults().get(3).get("id").toString()); + assertEquals("4", queryResponse.getResults().get(4).get("id").toString()); + assertEquals("3", queryResponse.getResults().get(5).get("id").toString()); + assertEquals("2", queryResponse.getResults().get(6).get("id").toString()); + assertEquals("1", queryResponse.getResults().get(7).get("id").toString()); + } + + @Test // commented 4-Sep-2018 @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018 // commented out on: 24-Dec-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 14-Oct-2018 From 9f9e99dbaf3d80acd41f5271df7e7a5ebd3a3a15 Mon Sep 17 00:00:00 2001 From: Tobias Kaessmann Date: Thu, 27 May 2021 15:49:47 +0200 Subject: [PATCH 02/15] SOLR-15437: Add support for reranking in cloud and custom sort The basic idea is to have two queues in the QueryComponent. First get the results of the rerankend docs and after them place the other docs --- .../apache/solr/ltr/TestLTROnSolrCloud.java | 36 ++++++++++++++ .../handler/component/QueryComponent.java | 34 +++++++++++-- .../component/ShardFieldSortedHitQueue.java | 23 ++++----- ...ortedHitQueueWithSameShardCompareSkip.java | 48 +++++++++++++++++++ .../solr/search/AbstractReRankQuery.java | 4 ++ 5 files changed, 127 insertions(+), 18 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/handler/component/ShardFieldSortedHitQueueWithSameShardCompareSkip.java diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java index 4dfcf3e8859..fe86fd7a786 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java @@ -26,6 +26,8 @@ import org.apache.solr.client.solrj.response.CollectionAdminResponse; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.cloud.MiniSolrCloudCluster; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.ltr.feature.OriginalScoreFeature; @@ -92,6 +94,40 @@ public void testSimpleQueryCustomSort() throws Exception { assertEquals("1", queryResponse.getResults().get(7).get("id").toString()); } + @Test + public void testSimpleQueryCustomSortWithASubResults() throws Exception { + final int reRankDocs = 2; + SolrQuery query = new SolrQuery("*:*"); + query.setRequestHandler("/query"); + query.setFields("*,score,[shard]"); + query.setParam("rows", "8"); + query.setParam("sort", "id asc"); + query.add("rq", "{!ltr model=powpularityS-model reRankDocs="+reRankDocs+"}"); + + QueryResponse queryResponse = solrCluster.getSolrClient().query(COLLECTION, query); + SolrDocumentList results = queryResponse.getResults(); + assertEquals(8, results.getNumFound()); + + int docCounter = 0; + float lastScore = Float.MAX_VALUE; + double lastId = 0d; + for(SolrDocument d : results){ + float score = (float) d.getFieldValue("score"); + + double id = Double.parseDouble((String) d.getFirstValue("id")); + if(docCounter < reRankDocs){ + final float assertedCalculatedScore = (float) Math.pow(id, 2.0d) + 2.1f; + assertEquals(assertedCalculatedScore, score, 0.0); + assertTrue(lastScore > score); + } else if(docCounter > reRankDocs + 1) { + assertTrue(lastId < id); + } + lastScore = score; + lastId = id; + + docCounter++; + } + } @Test // commented 4-Sep-2018 @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018 diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index 2f5f8cf6791..133cbeaa303 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -73,6 +73,7 @@ import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SortableTextField; +import org.apache.solr.search.AbstractReRankQuery; import org.apache.solr.search.CursorMark; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocList; @@ -865,7 +866,19 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) { // Merge the docs via a priority queue so we don't have to sort *all* of the // documents... we only need to order the top (rows+start) - final ShardFieldSortedHitQueue queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher()); + ShardFieldSortedHitQueue queue = new ShardFieldSortedHitQueueWithSameShardCompareSkip(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher()); + + ShardFieldSortedHitQueue reRankQueue = null; + int reRankDocsSize = 0; + if(rb.getRankQuery() != null){ + final RankQuery rankQuery = rb.getRankQuery(); + if(rankQuery instanceof AbstractReRankQuery){ + reRankDocsSize = ((AbstractReRankQuery) rankQuery).getReRankDocs(); + reRankQueue = new ShardFieldSortedHitQueueWithSameShardCompareSkip(new SortField[]{SortField.FIELD_SCORE}, Math.min(reRankDocsSize, ss.getCount()), rb.req.getSearcher()); + + queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher()); + } + } NamedList shardInfo = null; if(rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) { @@ -962,6 +975,7 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) { @SuppressWarnings({"rawtypes"}) NamedList unmarshalledSortFieldValues = unmarshalSortValues(ss, sortFieldValues, schema); + int docCounter = 0; // go through every doc in this response, construct a ShardDoc, and // put it in the priority queue so it can be ordered. for (int i=0; i queuesize int resultSize = queue.size() - ss.getOffset(); + if(reRankQueue != null) { + resultSize += reRankQueue.size(); + } resultSize = Math.max(0, resultSize); // there may not be any docs in range Map resultIds = new HashMap<>(); for (int i=resultSize-1; i>=0; i--) { ShardDoc shardDoc = queue.pop(); - shardDoc.positionInResponse = i; + if(shardDoc == null && reRankQueue != null) { + shardDoc = reRankQueue.pop(); + } + shardDoc.positionInResponse = i; // FIXME check if we can handle a possible NPE more elegantly // Need the toString() for correlation with other lists that must // be strings (like keys in highlighting, explain, etc) resultIds.put(shardDoc.id.toString(), shardDoc); diff --git a/solr/core/src/java/org/apache/solr/handler/component/ShardFieldSortedHitQueue.java b/solr/core/src/java/org/apache/solr/handler/component/ShardFieldSortedHitQueue.java index 5d296ebb618..bfc412c7816 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/ShardFieldSortedHitQueue.java +++ b/solr/core/src/java/org/apache/solr/handler/component/ShardFieldSortedHitQueue.java @@ -29,7 +29,14 @@ import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR; -// used by distributed search to merge results. +/** + * Used by distributed search to merge results. + * + * To sort documents, their sort values are compared. + * + * Documents are also compared if they are on the same shard. This is importing for reRanking to work. + * Use the ShardFieldSortedHitQueueWithSameShardCompareSkip to use the orderInShard if docs are on the same shard. + */ public class ShardFieldSortedHitQueue extends PriorityQueue { /** Stores a comparator corresponding to each field being sorted by */ @@ -65,25 +72,11 @@ public ShardFieldSortedHitQueue(SortField[] fields, int size, IndexSearcher sear this.fields[i] = new SortField(fieldname, fields[i].getType(), fields[i].getReverse()); } - - //System.out.println("%%%%%%%%%%%%%%%%%% got "+fields[i].getType() +" for "+ fieldname +" fields[i].getReverse(): "+fields[i].getReverse()); } } @Override protected boolean lessThan(ShardDoc docA, ShardDoc docB) { - // If these docs are from the same shard, then the relative order - // is how they appeared in the response from that shard. - if (docA.shard == docB.shard) { - // if docA has a smaller position, it should be "larger" so it - // comes before docB. - // This will handle sorting by docid within the same shard - - // comment this out to test comparators. - return !(docA.orderInShard < docB.orderInShard); - } - - // run comparators final int n = comparators.length; int c = 0; diff --git a/solr/core/src/java/org/apache/solr/handler/component/ShardFieldSortedHitQueueWithSameShardCompareSkip.java b/solr/core/src/java/org/apache/solr/handler/component/ShardFieldSortedHitQueueWithSameShardCompareSkip.java new file mode 100644 index 00000000000..68584498d22 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/handler/component/ShardFieldSortedHitQueueWithSameShardCompareSkip.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.component; + +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.SortField; + +/** + * Used by distributed search to merge results. + * If two documents are on the same shard, their orderInShard is used for sorting instead of comparing their sort values. + * This shortcut does only work if we are not reRanking the results. + */ +public class ShardFieldSortedHitQueueWithSameShardCompareSkip extends ShardFieldSortedHitQueue { + + public ShardFieldSortedHitQueueWithSameShardCompareSkip(SortField[] fields, int size, IndexSearcher searcher) { + super(fields, size, searcher); + } + + @Override + protected boolean lessThan(ShardDoc docA, ShardDoc docB) { + // If these docs are from the same shard, then the relative order + // is how they appeared in the response from that shard. + if (docA.shard == docB.shard) { + // if docA has a smaller position, it should be "larger" so it + // comes before docB. + // This will handle sorting by docid within the same shard + + // comment this out to test comparators. + return !(docA.orderInShard < docB.orderInShard); + } + + return super.lessThan(docA, docB); + } +} \ No newline at end of file diff --git a/solr/core/src/java/org/apache/solr/search/AbstractReRankQuery.java b/solr/core/src/java/org/apache/solr/search/AbstractReRankQuery.java index f0e67025886..c92fc22a312 100644 --- a/solr/core/src/java/org/apache/solr/search/AbstractReRankQuery.java +++ b/solr/core/src/java/org/apache/solr/search/AbstractReRankQuery.java @@ -89,4 +89,8 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo public void visit(QueryVisitor visitor) { visitor.visitLeaf(this); } + + public int getReRankDocs() { + return reRankDocs; + } } From ced7de3de11ac8de9baa9d5952f54a33abf75ca7 Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Thu, 27 May 2021 16:31:15 +0200 Subject: [PATCH 03/15] [SOLR-15437] remove unnecessary docCounter --- .../java/org/apache/solr/handler/component/QueryComponent.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index 133cbeaa303..0ca886658c7 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -975,7 +975,6 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) { @SuppressWarnings({"rawtypes"}) NamedList unmarshalledSortFieldValues = unmarshalSortValues(ss, sortFieldValues, schema); - int docCounter = 0; // go through every doc in this response, construct a ShardDoc, and // put it in the priority queue so it can be ordered. for (int i=0; i Date: Fri, 28 May 2021 09:12:12 +0200 Subject: [PATCH 04/15] [SOLR-15437] remove extra subclass of ShardFieldSortedHitQueue that was used to keep the shortcut logic. Instead, make it configurable via constructor param. --- .../handler/component/QueryComponent.java | 6 +-- .../component/ShardFieldSortedHitQueue.java | 30 +++++++++++- ...ortedHitQueueWithSameShardCompareSkip.java | 48 ------------------- 3 files changed, 31 insertions(+), 53 deletions(-) delete mode 100644 solr/core/src/java/org/apache/solr/handler/component/ShardFieldSortedHitQueueWithSameShardCompareSkip.java diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index 0ca886658c7..e48aee20cdf 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -866,7 +866,7 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) { // Merge the docs via a priority queue so we don't have to sort *all* of the // documents... we only need to order the top (rows+start) - ShardFieldSortedHitQueue queue = new ShardFieldSortedHitQueueWithSameShardCompareSkip(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher()); + ShardFieldSortedHitQueue queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher()); ShardFieldSortedHitQueue reRankQueue = null; int reRankDocsSize = 0; @@ -874,9 +874,9 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) { final RankQuery rankQuery = rb.getRankQuery(); if(rankQuery instanceof AbstractReRankQuery){ reRankDocsSize = ((AbstractReRankQuery) rankQuery).getReRankDocs(); - reRankQueue = new ShardFieldSortedHitQueueWithSameShardCompareSkip(new SortField[]{SortField.FIELD_SCORE}, Math.min(reRankDocsSize, ss.getCount()), rb.req.getSearcher()); + reRankQueue = new ShardFieldSortedHitQueue(new SortField[]{SortField.FIELD_SCORE}, Math.min(reRankDocsSize, ss.getCount()), rb.req.getSearcher()); - queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher()); + queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher(), false); } } diff --git a/solr/core/src/java/org/apache/solr/handler/component/ShardFieldSortedHitQueue.java b/solr/core/src/java/org/apache/solr/handler/component/ShardFieldSortedHitQueue.java index bfc412c7816..94667d19d4d 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/ShardFieldSortedHitQueue.java +++ b/solr/core/src/java/org/apache/solr/handler/component/ShardFieldSortedHitQueue.java @@ -33,9 +33,11 @@ * Used by distributed search to merge results. * * To sort documents, their sort values are compared. + * Per default, this does not happen for documents that are on the same shard. Instead, their orderInShard is used. + * This behavior can be changed via the param useSameShardShortcut. * - * Documents are also compared if they are on the same shard. This is importing for reRanking to work. - * Use the ShardFieldSortedHitQueueWithSameShardCompareSkip to use the orderInShard if docs are on the same shard. + * The useSameShardShortcut option is necessary for reRanking to work in SolrCloud mod, because we have to disable the + * skip for the results that should not be reRanked in the full result set on the coordinator, but were reRanked on the shards. */ public class ShardFieldSortedHitQueue extends PriorityQueue { @@ -48,12 +50,25 @@ public class ShardFieldSortedHitQueue extends PriorityQueue { /** The order of these fieldNames should correspond to the order of sort field values retrieved from the shard */ protected List fieldNames = new ArrayList<>(); + /** + * Used to enable / disable a shortcut in the lessThen( )-method for documents that are on the same shard. + * If enabled, the orderInShard is used for sorting instead of comparing the sort values of the documents. + * This shortcut does only work in SolrCloud mode if we are not reRanking the results. + */ + private final boolean useSameShardShortcut; + @SuppressWarnings({"unchecked", "rawtypes"}) public ShardFieldSortedHitQueue(SortField[] fields, int size, IndexSearcher searcher) { + this(fields, size, searcher, true); + } + + @SuppressWarnings({"unchecked", "rawtypes"}) + public ShardFieldSortedHitQueue(SortField[] fields, int size, IndexSearcher searcher, boolean useSameShardShortcut) { super(size); final int n = fields.length; comparators = new Comparator[n]; this.fields = new SortField[n]; + this.useSameShardShortcut = useSameShardShortcut; for (int i = 0; i < n; ++i) { // keep track of the named fields @@ -77,6 +92,17 @@ public ShardFieldSortedHitQueue(SortField[] fields, int size, IndexSearcher sear @Override protected boolean lessThan(ShardDoc docA, ShardDoc docB) { + // If these docs are from the same shard, then the relative order + // is how they appeared in the response from that shard. + if (useSameShardShortcut && docA.shard == docB.shard) { + // if docA has a smaller position, it should be "larger" so it + // comes before docB. + // This will handle sorting by docid within the same shard + + // comment this out to test comparators. + return !(docA.orderInShard < docB.orderInShard); + } + // run comparators final int n = comparators.length; int c = 0; diff --git a/solr/core/src/java/org/apache/solr/handler/component/ShardFieldSortedHitQueueWithSameShardCompareSkip.java b/solr/core/src/java/org/apache/solr/handler/component/ShardFieldSortedHitQueueWithSameShardCompareSkip.java deleted file mode 100644 index 68584498d22..00000000000 --- a/solr/core/src/java/org/apache/solr/handler/component/ShardFieldSortedHitQueueWithSameShardCompareSkip.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.handler.component; - -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.SortField; - -/** - * Used by distributed search to merge results. - * If two documents are on the same shard, their orderInShard is used for sorting instead of comparing their sort values. - * This shortcut does only work if we are not reRanking the results. - */ -public class ShardFieldSortedHitQueueWithSameShardCompareSkip extends ShardFieldSortedHitQueue { - - public ShardFieldSortedHitQueueWithSameShardCompareSkip(SortField[] fields, int size, IndexSearcher searcher) { - super(fields, size, searcher); - } - - @Override - protected boolean lessThan(ShardDoc docA, ShardDoc docB) { - // If these docs are from the same shard, then the relative order - // is how they appeared in the response from that shard. - if (docA.shard == docB.shard) { - // if docA has a smaller position, it should be "larger" so it - // comes before docB. - // This will handle sorting by docid within the same shard - - // comment this out to test comparators. - return !(docA.orderInShard < docB.orderInShard); - } - - return super.lessThan(docA, docB); - } -} \ No newline at end of file From 47075d2819918ee42fb266fd877bf6e5454cb6d6 Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Fri, 28 May 2021 11:20:58 +0200 Subject: [PATCH 05/15] [SOLR-15437] extract logic of handling multiple ShardFieldSortedHitQueues to SortedHitQueueManager to remove complexity from mergeIds( ) --- .../handler/component/QueryComponent.java | 38 ++--------- .../component/SortedHitQueueManager.java | 65 +++++++++++++++++++ 2 files changed, 70 insertions(+), 33 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index e48aee20cdf..9b9c69bbfd7 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -73,7 +73,6 @@ import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SortableTextField; -import org.apache.solr.search.AbstractReRankQuery; import org.apache.solr.search.CursorMark; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocList; @@ -866,19 +865,7 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) { // Merge the docs via a priority queue so we don't have to sort *all* of the // documents... we only need to order the top (rows+start) - ShardFieldSortedHitQueue queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher()); - - ShardFieldSortedHitQueue reRankQueue = null; - int reRankDocsSize = 0; - if(rb.getRankQuery() != null){ - final RankQuery rankQuery = rb.getRankQuery(); - if(rankQuery instanceof AbstractReRankQuery){ - reRankDocsSize = ((AbstractReRankQuery) rankQuery).getReRankDocs(); - reRankQueue = new ShardFieldSortedHitQueue(new SortField[]{SortField.FIELD_SCORE}, Math.min(reRankDocsSize, ss.getCount()), rb.req.getSearcher()); - - queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher(), false); - } - } + SortedHitQueueManager queueManager = new SortedHitQueueManager(sortFields, ss, rb); NamedList shardInfo = null; if(rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) { @@ -1012,34 +999,19 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) { shardDoc.sortFieldValues = unmarshalledSortFieldValues; - if(reRankQueue != null && i < reRankDocsSize) { - ShardDoc droppedShardDoc = reRankQueue.insertWithOverflow(shardDoc); - // FIXME: Only works if the original request does not sort by score - if(droppedShardDoc != null) { - queue.insertWithOverflow(droppedShardDoc); - } - } else { - queue.insertWithOverflow(shardDoc); - } + queueManager.addDocument(shardDoc, i); } // end for-each-doc-in-response } // end for-each-response // The queue now has 0 -> queuesize docs, where queuesize <= start + rows // So we want to pop the last documents off the queue to get // the docs offset -> queuesize - int resultSize = queue.size() - ss.getOffset(); - if(reRankQueue != null) { - resultSize += reRankQueue.size(); - } - resultSize = Math.max(0, resultSize); // there may not be any docs in range + final int resultSize = Math.max(0, queueManager.getResultSize(ss.getOffset())); // there may not be any docs in range Map resultIds = new HashMap<>(); for (int i=resultSize-1; i>=0; i--) { - ShardDoc shardDoc = queue.pop(); - if(shardDoc == null && reRankQueue != null) { - shardDoc = reRankQueue.pop(); - } - shardDoc.positionInResponse = i; // FIXME check if we can handle a possible NPE more elegantly + final ShardDoc shardDoc = queueManager.nextDocument(); + shardDoc.positionInResponse = i; // Need the toString() for correlation with other lists that must // be strings (like keys in highlighting, explain, etc) resultIds.put(shardDoc.id.toString(), shardDoc); diff --git a/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java b/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java new file mode 100644 index 00000000000..88e58687bea --- /dev/null +++ b/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java @@ -0,0 +1,65 @@ +package org.apache.solr.handler.component; + +import org.apache.lucene.search.SortField; +import org.apache.solr.search.AbstractReRankQuery; +import org.apache.solr.search.RankQuery; +import org.apache.solr.search.SortSpec; + +/** + * This class is used to managed the possible multiple SortedHitQueues that we need during mergeIds( ). + * Multiple queues are needed, if reRanking is used. + * + * If reRanking is disabled, only the queue is used. + * If reRanking is enabled, the top reRankDocsSize documents are added to the reRankQueue, all other documents are + * collected in the queue. + */ +public class SortedHitQueueManager { + private final ShardFieldSortedHitQueue queue; + private final ShardFieldSortedHitQueue reRankQueue; + private final int reRankDocsSize; + + public SortedHitQueueManager(SortField[] sortFields, SortSpec ss, ResponseBuilder rb) { + final RankQuery rankQuery = rb.getRankQuery(); + + if(rb.getRankQuery() != null && rankQuery instanceof AbstractReRankQuery){ + // reRanking is enabled, create a queue that is only used for reRanked results + // disable shortcut in the queue to correctly sort documents that were reRanked on the shards back into the full result + reRankDocsSize = ((AbstractReRankQuery) rankQuery).getReRankDocs(); + reRankQueue = new ShardFieldSortedHitQueue(new SortField[]{SortField.FIELD_SCORE}, + Math.min(reRankDocsSize, ss.getCount()), rb.req.getSearcher()); + queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher(), false); + } else { + // reRanking is disabled, use one queue for all results + queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher()); + reRankQueue = null; + reRankDocsSize = 0; + } + } + + public void addDocument(ShardDoc shardDoc, int i) { + if(reRankQueue != null && i < reRankDocsSize) { + ShardDoc droppedShardDoc = reRankQueue.insertWithOverflow(shardDoc); + // FIXME: Only works if the original request does not sort by score + if(droppedShardDoc != null) { + queue.insertWithOverflow(droppedShardDoc); + } + } else { + queue.insertWithOverflow(shardDoc); + } + } + + public ShardDoc nextDocument() { + ShardDoc shardDoc = queue.pop(); + if(shardDoc == null && reRankQueue != null) { + shardDoc = reRankQueue.pop(); + } + return shardDoc; + } + + public int getResultSize(int offset) { + if(reRankQueue != null) { + return queue.size() - offset + reRankQueue.size(); + } + return queue.size() - offset; + } +} From f3ccac9513a89540263be827d2b25dd5435a40c9 Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Mon, 31 May 2021 11:32:03 +0200 Subject: [PATCH 06/15] [SOLR-15437] always request scores when reRanking even if not set as fl, because we need it for sorting --- .../apache/solr/ltr/TestLTROnSolrCloud.java | 27 +++++++++++++++---- .../handler/component/QueryComponent.java | 3 +++ .../component/SortedHitQueueManager.java | 2 +- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java index fe86fd7a786..7dd0b279998 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java @@ -16,7 +16,10 @@ package org.apache.solr.ltr; import java.io.File; +import java.util.ArrayList; +import java.util.List; import java.util.SortedMap; +import java.util.stream.Collectors; import org.apache.commons.io.FileUtils; import org.apache.solr.client.solrj.SolrQuery; @@ -59,11 +62,8 @@ public void setUp() throws Exception { int numberOfNodes = numberOfShards * numberOfReplicas; setupSolrCluster(numberOfShards, numberOfReplicas, numberOfNodes); - - } - @Override public void tearDown() throws Exception { restTestHarness.close(); @@ -76,7 +76,7 @@ public void tearDown() throws Exception { public void testSimpleQueryCustomSort() throws Exception { SolrQuery query = new SolrQuery("*:*"); query.setRequestHandler("/query"); - query.setFields("*,score,[shard]"); + query.setFields("*,[shard]"); query.setParam("rows", "8"); query.setParam("sort", "id asc"); query.add("rq", "{!ltr model=powpularityS-model reRankDocs=8}"); @@ -95,7 +95,7 @@ public void testSimpleQueryCustomSort() throws Exception { } @Test - public void testSimpleQueryCustomSortWithASubResults() throws Exception { + public void testSimpleQueryCustomSortWithSubResultSet() throws Exception { final int reRankDocs = 2; SolrQuery query = new SolrQuery("*:*"); query.setRequestHandler("/query"); @@ -108,6 +108,9 @@ public void testSimpleQueryCustomSortWithASubResults() throws Exception { SolrDocumentList results = queryResponse.getResults(); assertEquals(8, results.getNumFound()); + // save order to use it later + List expectedDocIdOrder = new ArrayList<>(); + int docCounter = 0; float lastScore = Float.MAX_VALUE; double lastId = 0d; @@ -115,6 +118,7 @@ public void testSimpleQueryCustomSortWithASubResults() throws Exception { float score = (float) d.getFieldValue("score"); double id = Double.parseDouble((String) d.getFirstValue("id")); + expectedDocIdOrder.add((String) d.getFirstValue("id")); if(docCounter < reRankDocs){ final float assertedCalculatedScore = (float) Math.pow(id, 2.0d) + 2.1f; assertEquals(assertedCalculatedScore, score, 0.0); @@ -127,6 +131,19 @@ public void testSimpleQueryCustomSortWithASubResults() throws Exception { docCounter++; } + + query.setFields("*,[shard]"); + + queryResponse = solrCluster.getSolrClient().query(COLLECTION, query); + results = queryResponse.getResults(); + assertEquals(8, results.getNumFound()); + + List docIdOrder = results.stream() + .map(document -> (String) document.getFirstValue("id")) + .collect(Collectors.toList()); + + // assert that sorting is correct when we do not return the score via fl param + assertEquals(expectedDocIdOrder, docIdOrder); } @Test diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index 9b9c69bbfd7..19b8892560b 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -116,6 +116,7 @@ import org.slf4j.LoggerFactory; import static org.apache.solr.common.params.CommonParams.QUERY_UUID; +import static org.apache.solr.search.SolrIndexSearcher.GET_SCORES; /** @@ -185,6 +186,8 @@ public void prepare(ResponseBuilder rb) throws IOException if(rq instanceof RankQuery) { RankQuery rankQuery = (RankQuery)rq; rb.setRankQuery(rankQuery); + // we always need the score for reRanking + rb.setFieldFlags(rb.getFieldFlags() | GET_SCORES); MergeStrategy mergeStrategy = rankQuery.getMergeStrategy(); if(mergeStrategy != null) { rb.addMergeStrategy(mergeStrategy); diff --git a/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java b/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java index 88e58687bea..64d75c4fe43 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java @@ -6,7 +6,7 @@ import org.apache.solr.search.SortSpec; /** - * This class is used to managed the possible multiple SortedHitQueues that we need during mergeIds( ). + * This class is used to manage the possible multiple SortedHitQueues that we need during mergeIds( ). * Multiple queues are needed, if reRanking is used. * * If reRanking is disabled, only the queue is used. From 84c2b4aeafcd2b1ccf1816a6518c652dc6732c8e Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Mon, 31 May 2021 11:52:35 +0200 Subject: [PATCH 07/15] [SOLR-15437] add license --- .../handler/component/SortedHitQueueManager.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java b/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java index 64d75c4fe43..8ccb8bf865f 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.solr.handler.component; import org.apache.lucene.search.SortField; From 004ed786febf97d05f1bd5662c851e43dd9efeef Mon Sep 17 00:00:00 2001 From: Tobias Kaessmann Date: Tue, 1 Jun 2021 13:09:58 +0200 Subject: [PATCH 08/15] SOLR-15437: Add test and calculate ltr score manually --- .../apache/solr/ltr/TestLTROnSolrCloud.java | 61 ++++++++++++++++++- .../component/SortedHitQueueManager.java | 8 ++- 2 files changed, 63 insertions(+), 6 deletions(-) diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java index 48e6bfc700f..95bc4e7fdbe 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java @@ -19,10 +19,14 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.SortedMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.IntStream; +import com.google.common.base.Splitter; import org.apache.commons.io.FileUtils; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.embedded.JettyConfig; @@ -54,6 +58,10 @@ public class TestLTROnSolrCloud extends TestRerankBase { String schema = "schema.xml"; SortedMap extraServlets = null; + + private final static String MODEL_WEIGHTS = "\"powpularityS\":1.0,\"c3\":1.0,\"original\":0.1," + + "\"dvIntFieldFeature\":0.1,\"dvLongFieldFeature\":0.1," + + "\"dvFloatFieldFeature\":0.1,\"dvDoubleFieldFeature\":0.1,\"dvStrNumFieldFeature\":0.1,\"dvStrBoolFieldFeature\":0.1"; @Override public void setUp() throws Exception { @@ -151,6 +159,55 @@ public void testSimpleQueryCustomSortWithSubResultSet() throws Exception { assertEquals(expectedDocIdOrder, docIdOrder); } + @Test + public void testSimpleQueryCustomSortWithSubResultSetAndRowsLessThanExistingDocs() throws Exception { + final int reRankDocs = 2; + SolrQuery query = new SolrQuery("*:*"); + query.setRequestHandler("/query"); + query.setFields("*,score,[shard],[fv]"); + query.setParam("rows", "6"); + query.setParam("sort", "id asc"); + query.add("rq", "{!ltr model=powpularityS-model reRankDocs="+reRankDocs+"}"); + QueryResponse queryResponse = solrCluster.getSolrClient().query(COLLECTION, query); + SolrDocumentList results = queryResponse.getResults(); + assertEquals(6, results.size()); + int docCounter = 0; + float lastScore = Float.MAX_VALUE; + double lastId = 0d; + for(SolrDocument d : results){ + float score = (float) d.getFieldValue("score"); + double id = Double.parseDouble((String) d.getFirstValue("id")); + if(docCounter < reRankDocs){ + final float calculatedScore = calculateLTRScoreForDoc(d); + System.out.println("calculatedScore " + calculatedScore); + System.out.println("score " + score); + assertEquals(calculatedScore, score, 0.0); + assertTrue(lastScore > score); + } else if(docCounter > reRankDocs + 1) { + assertTrue(lastId < id); + } + lastScore = score; + lastId = id; + docCounter++; + } + } + private float calculateLTRScoreForDoc(SolrDocument d) { + Matcher matcher = Pattern.compile(",?(\\w+)=([0-9]+\\.[0-9]+)").matcher((String) d.getFieldValue("[fv]")); + Map weights = Splitter.on(",") + .splitToList(MODEL_WEIGHTS) + .stream() + .map(e -> e.split(":")) + .collect( + Collectors.toMap(e -> e[0].replaceAll("\"", ""), e -> Float.parseFloat(e[1]))); + + float score = 0.0f; + while(matcher.find()) { + score += Float.parseFloat(matcher.group(2)) * weights.get(matcher.group(1)); + } + + return score; + } + @Test // commented 4-Sep-2018 @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018 // commented out on: 24-Dec-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 14-Oct-2018 @@ -373,9 +430,7 @@ private void loadModelsAndFeatures() throws Exception { final String featureStore = "test"; final String[] featureNames = new String[]{"powpularityS", "c3", "original", "dvIntFieldFeature", "dvLongFieldFeature", "dvFloatFieldFeature", "dvDoubleFieldFeature", "dvStrNumFieldFeature", "dvStrBoolFieldFeature"}; - final String jsonModelParams = "{\"weights\":{\"powpularityS\":1.0,\"c3\":1.0,\"original\":0.1," + - "\"dvIntFieldFeature\":0.1,\"dvLongFieldFeature\":0.1," + - "\"dvFloatFieldFeature\":0.1,\"dvDoubleFieldFeature\":0.1,\"dvStrNumFieldFeature\":0.1,\"dvStrBoolFieldFeature\":0.1}}"; + final String jsonModelParams = "{\"weights\":{" + MODEL_WEIGHTS + "}}"; loadFeature( featureNames[0], diff --git a/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java b/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java index 8ccb8bf865f..a46e4d50de6 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java @@ -41,9 +41,11 @@ public SortedHitQueueManager(SortField[] sortFields, SortSpec ss, ResponseBuilde // reRanking is enabled, create a queue that is only used for reRanked results // disable shortcut in the queue to correctly sort documents that were reRanked on the shards back into the full result reRankDocsSize = ((AbstractReRankQuery) rankQuery).getReRankDocs(); - reRankQueue = new ShardFieldSortedHitQueue(new SortField[]{SortField.FIELD_SCORE}, - Math.min(reRankDocsSize, ss.getCount()), rb.req.getSearcher()); - queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher(), false); + int absoluteReRankDocs = Math.min(reRankDocsSize, ss.getCount()); + reRankQueue = new ShardFieldSortedHitQueue(new SortField[]{SortField.FIELD_SCORE}, + absoluteReRankDocs, rb.req.getSearcher()); + queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount() - absoluteReRankDocs, + rb.req.getSearcher(), false); } else { // reRanking is disabled, use one queue for all results queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher()); From db49b6d2e70afe440efbd9f07ce36dae60784d25 Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Tue, 1 Jun 2021 13:24:23 +0200 Subject: [PATCH 09/15] [SOLR-15437] use helper method to calculate expected scores, improve readability --- .../org/apache/solr/ltr/TestLTROnSolrCloud.java | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java index 95bc4e7fdbe..c403493e7b4 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java @@ -112,7 +112,7 @@ public void testSimpleQueryCustomSortWithSubResultSet() throws Exception { final int reRankDocs = 2; SolrQuery query = new SolrQuery("*:*"); query.setRequestHandler("/query"); - query.setFields("*,score,[shard]"); + query.setFields("*,score,[shard],[fv]"); query.setParam("rows", "8"); query.setParam("sort", "id asc"); query.add("rq", "{!ltr model=powpularityS-model reRankDocs="+reRankDocs+"}"); @@ -133,7 +133,7 @@ public void testSimpleQueryCustomSortWithSubResultSet() throws Exception { double id = Double.parseDouble((String) d.getFirstValue("id")); expectedDocIdOrder.add((String) d.getFirstValue("id")); if(docCounter < reRankDocs){ - final float assertedCalculatedScore = (float) Math.pow(id, 2.0d) + 2.1f; + final float assertedCalculatedScore = calculateLTRScoreForDoc(d); assertEquals(assertedCalculatedScore, score, 0.0); assertTrue(lastScore > score); } else if(docCounter > reRankDocs + 1) { @@ -145,7 +145,7 @@ public void testSimpleQueryCustomSortWithSubResultSet() throws Exception { docCounter++; } - query.setFields("*,[shard]"); + query.setFields("*,[shard],[fv]"); queryResponse = solrCluster.getSolrClient().query(COLLECTION, query); results = queryResponse.getResults(); @@ -164,7 +164,7 @@ public void testSimpleQueryCustomSortWithSubResultSetAndRowsLessThanExistingDocs final int reRankDocs = 2; SolrQuery query = new SolrQuery("*:*"); query.setRequestHandler("/query"); - query.setFields("*,score,[shard],[fv]"); + query.setFields("*,score,[shard],[fv]"); // score as fl is needed here to be able to use it for assertions query.setParam("rows", "6"); query.setParam("sort", "id asc"); query.add("rq", "{!ltr model=powpularityS-model reRankDocs="+reRankDocs+"}"); @@ -179,8 +179,6 @@ public void testSimpleQueryCustomSortWithSubResultSetAndRowsLessThanExistingDocs double id = Double.parseDouble((String) d.getFirstValue("id")); if(docCounter < reRankDocs){ final float calculatedScore = calculateLTRScoreForDoc(d); - System.out.println("calculatedScore " + calculatedScore); - System.out.println("score " + score); assertEquals(calculatedScore, score, 0.0); assertTrue(lastScore > score); } else if(docCounter > reRankDocs + 1) { @@ -196,9 +194,9 @@ private float calculateLTRScoreForDoc(SolrDocument d) { Map weights = Splitter.on(",") .splitToList(MODEL_WEIGHTS) .stream() - .map(e -> e.split(":")) - .collect( - Collectors.toMap(e -> e[0].replaceAll("\"", ""), e -> Float.parseFloat(e[1]))); + .map(fieldWithValue -> fieldWithValue.split(":")) + .collect(Collectors.toMap(fieldAndValue -> fieldAndValue[0].replaceAll("\"", ""), + fieldAndValue -> Float.parseFloat(fieldAndValue[1]))); float score = 0.0f; while(matcher.find()) { From c1db0cf59976eec5f8cc6db2a13f7050d281bccc Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Tue, 1 Jun 2021 13:55:51 +0200 Subject: [PATCH 10/15] [SOLR-15437] correctly parse negative values when calculating the expected score --- .../src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java index c403493e7b4..6460429eec8 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java @@ -133,8 +133,8 @@ public void testSimpleQueryCustomSortWithSubResultSet() throws Exception { double id = Double.parseDouble((String) d.getFirstValue("id")); expectedDocIdOrder.add((String) d.getFirstValue("id")); if(docCounter < reRankDocs){ - final float assertedCalculatedScore = calculateLTRScoreForDoc(d); - assertEquals(assertedCalculatedScore, score, 0.0); + final float calculatedScore = calculateLTRScoreForDoc(d); + assertEquals(calculatedScore, score, 0.0); assertTrue(lastScore > score); } else if(docCounter > reRankDocs + 1) { assertTrue(lastId < id); @@ -190,11 +190,11 @@ public void testSimpleQueryCustomSortWithSubResultSetAndRowsLessThanExistingDocs } } private float calculateLTRScoreForDoc(SolrDocument d) { - Matcher matcher = Pattern.compile(",?(\\w+)=([0-9]+\\.[0-9]+)").matcher((String) d.getFieldValue("[fv]")); + Matcher matcher = Pattern.compile(",?(\\w+)=(-?[0-9]+\\.[0-9]+)").matcher((String) d.getFieldValue("[fv]")); Map weights = Splitter.on(",") .splitToList(MODEL_WEIGHTS) .stream() - .map(fieldWithValue -> fieldWithValue.split(":")) + .map(fieldWithWeight -> fieldWithWeight.split(":")) .collect(Collectors.toMap(fieldAndValue -> fieldAndValue[0].replaceAll("\"", ""), fieldAndValue -> Float.parseFloat(fieldAndValue[1]))); From df6f30ac0e2d3145d2833559bdecfc30c4ce1253 Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Sun, 20 Jun 2021 11:49:53 +0200 Subject: [PATCH 11/15] [SOLR-15437] maintain existing behavior for sort by score --- .../apache/solr/ltr/TestLTROnSolrCloud.java | 1 + .../component/SortedHitQueueManager.java | 26 +++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java index 6460429eec8..67a0c2f0438 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTROnSolrCloud.java @@ -189,6 +189,7 @@ public void testSimpleQueryCustomSortWithSubResultSetAndRowsLessThanExistingDocs docCounter++; } } + private float calculateLTRScoreForDoc(SolrDocument d) { Matcher matcher = Pattern.compile(",?(\\w+)=(-?[0-9]+\\.[0-9]+)").matcher((String) d.getFieldValue("[fv]")); Map weights = Splitter.on(",") diff --git a/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java b/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java index a46e4d50de6..e6564b83e7e 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java @@ -37,7 +37,7 @@ public class SortedHitQueueManager { public SortedHitQueueManager(SortField[] sortFields, SortSpec ss, ResponseBuilder rb) { final RankQuery rankQuery = rb.getRankQuery(); - if(rb.getRankQuery() != null && rankQuery instanceof AbstractReRankQuery){ + if(useFixForReRankOnSolrCloud(sortFields, rb, rankQuery)){ // reRanking is enabled, create a queue that is only used for reRanked results // disable shortcut in the queue to correctly sort documents that were reRanked on the shards back into the full result reRankDocsSize = ((AbstractReRankQuery) rankQuery).getReRankDocs(); @@ -54,10 +54,32 @@ public SortedHitQueueManager(SortField[] sortFields, SortSpec ss, ResponseBuilde } } + private boolean useFixForReRankOnSolrCloud(SortField[] sortFields, ResponseBuilder rb, RankQuery rankQuery) { + // we only want to use two queues if we reRank + return rb.getRankQuery() != null && rankQuery instanceof AbstractReRankQuery && + // using two queues makes reRanking on Solr Cloud possible (@see https://github.com/apache/solr/pull/151 ) + // however, the fix does not work if the non-reRanked docs should be sorted by score ( @see https://github.com/apache/solr/pull/151#discussion_r640664451 ) + // to maintain the existing behavior for these cases, we disable the broken use of two queues and use the (also broken) status quo instead + !hasScoreSortField(sortFields); + } + + private boolean hasScoreSortField(SortField[] sortFields) { + boolean hasScoreSortField = false; + for (SortField sortField : sortFields) { + // do not check equality with SortField.FIELD_SCORE because that would only cover score desc, not score asc + if (sortField.getType().equals(SortField.Type.SCORE) && sortField.getField() == null) { + hasScoreSortField = true; + break; + } + } + return hasScoreSortField; + } + public void addDocument(ShardDoc shardDoc, int i) { if(reRankQueue != null && i < reRankDocsSize) { ShardDoc droppedShardDoc = reRankQueue.insertWithOverflow(shardDoc); - // FIXME: Only works if the original request does not sort by score + // Only works if the original request does not sort by score + // @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-15437") if(droppedShardDoc != null) { queue.insertWithOverflow(droppedShardDoc); } From 37e9da88277d3fd107bc51e2fece0ad7c2094ea4 Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Sun, 20 Jun 2021 16:52:04 +0100 Subject: [PATCH 12/15] explore splitting SortedHitQueueManager into three (interface + two implementations) --- .../component/DualSortedHitQueueManager.java | 87 ++++++++++++++++++ .../handler/component/QueryComponent.java | 20 ++++- .../SingleSortedHitQueueManager.java | 45 ++++++++++ .../component/SortedHitQueueManager.java | 88 ++----------------- 4 files changed, 153 insertions(+), 87 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/handler/component/DualSortedHitQueueManager.java create mode 100644 solr/core/src/java/org/apache/solr/handler/component/SingleSortedHitQueueManager.java diff --git a/solr/core/src/java/org/apache/solr/handler/component/DualSortedHitQueueManager.java b/solr/core/src/java/org/apache/solr/handler/component/DualSortedHitQueueManager.java new file mode 100644 index 00000000000..285926fc97b --- /dev/null +++ b/solr/core/src/java/org/apache/solr/handler/component/DualSortedHitQueueManager.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.component; + +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.SortField; + +/** + * This class is used to manage the multiple SortedHitQueues that we need during mergeIds( ). + * Multiple queues are needed when reRanking is used. + * + * The top reRankDocsSize documents are added to the reRankQueue, all other documents are + * collected in the queue. + */ +public class DualSortedHitQueueManager implements SortedHitQueueManager { + + private final ShardFieldSortedHitQueue queue; + private final ShardFieldSortedHitQueue reRankQueue; + private final int reRankDocsSize; + + public DualSortedHitQueueManager(SortField[] sortFields, int count, int offset, IndexSearcher searcher, int reRankDocsSize) { + this.reRankDocsSize = reRankDocsSize; + int absoluteReRankDocs = Math.min(reRankDocsSize, count); + reRankQueue = new ShardFieldSortedHitQueue(new SortField[]{SortField.FIELD_SCORE}, + absoluteReRankDocs, searcher); + queue = new ShardFieldSortedHitQueue(sortFields, offset + count - absoluteReRankDocs, + searcher, false /* useSameShardShortcut */); + } + + /** + * TODO + * @deprecated TODO + */ + @Deprecated // will be removed when all sort fields become supported + public static boolean supports(SortField[] sortFields) { + for (SortField sortField : sortFields) { + // do not check equality with SortField.FIELD_SCORE because that would only cover score desc, not score asc + if (sortField.getType().equals(SortField.Type.SCORE) && sortField.getField() == null) { + // using two queues makes reRanking on Solr Cloud possible (@see https://github.com/apache/solr/pull/151 ) + // however, the fix does not work if the non-reRanked docs should be sorted by score ( @see https://github.com/apache/solr/pull/151#discussion_r640664451 ) + // to maintain the existing behavior for these cases, we disable the broken use of two queues and use the (also broken) status quo instead + return false; + } + } + return true; + } + + public void addDocument(ShardDoc shardDoc) { + if(shardDoc.orderInShard < reRankDocsSize) { + ShardDoc droppedShardDoc = reRankQueue.insertWithOverflow(shardDoc); + // Only works if the original request does not sort by score + // @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-15437") + if(droppedShardDoc != null) { + queue.insertWithOverflow(droppedShardDoc); + } + } else { + queue.insertWithOverflow(shardDoc); + } + } + + public ShardDoc popDocument() { + ShardDoc shardDoc = queue.pop(); + if(shardDoc == null) { + shardDoc = reRankQueue.pop(); + } + return shardDoc; + } + + public int size() { + return queue.size() + reRankQueue.size(); + } + +} diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index 19b8892560b..fbb42cdb58e 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -73,6 +73,7 @@ import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SortableTextField; +import org.apache.solr.search.AbstractReRankQuery; import org.apache.solr.search.CursorMark; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocList; @@ -832,6 +833,16 @@ protected boolean addFL(StringBuilder fl, String field, boolean additionalAdded) return true; } + private static SortedHitQueueManager newSortedHitQueueManager(SortField[] sortFields, SortSpec ss, ResponseBuilder rb) { + final RankQuery rankQuery = rb.getRankQuery(); + if (rankQuery instanceof AbstractReRankQuery && DualSortedHitQueueManager.supports(sortFields)) { + return new DualSortedHitQueueManager(sortFields, ss.getCount(), ss.getOffset(), rb.req.getSearcher(), + ((AbstractReRankQuery)rankQuery).getReRankDocs()); + } else { + return new SingleSortedHitQueueManager(sortFields, ss.getCount(), ss.getOffset(), rb.req.getSearcher()); + } + } + @SuppressWarnings({"unchecked"}) protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) { List mergeStrategies = rb.getMergeStrategies(); @@ -868,7 +879,7 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) { // Merge the docs via a priority queue so we don't have to sort *all* of the // documents... we only need to order the top (rows+start) - SortedHitQueueManager queueManager = new SortedHitQueueManager(sortFields, ss, rb); + final SortedHitQueueManager queueManager = newSortedHitQueueManager(sortFields, ss, rb); NamedList shardInfo = null; if(rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) { @@ -1002,18 +1013,19 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) { shardDoc.sortFieldValues = unmarshalledSortFieldValues; - queueManager.addDocument(shardDoc, i); + queueManager.addDocument(shardDoc); } // end for-each-doc-in-response } // end for-each-response // The queue now has 0 -> queuesize docs, where queuesize <= start + rows // So we want to pop the last documents off the queue to get // the docs offset -> queuesize - final int resultSize = Math.max(0, queueManager.getResultSize(ss.getOffset())); // there may not be any docs in range + int resultSize = queueManager.size() - ss.getOffset(); + resultSize = Math.max(0, resultSize); // there may not be any docs in range Map resultIds = new HashMap<>(); for (int i=resultSize-1; i>=0; i--) { - final ShardDoc shardDoc = queueManager.nextDocument(); + ShardDoc shardDoc = queueManager.popDocument(); shardDoc.positionInResponse = i; // Need the toString() for correlation with other lists that must // be strings (like keys in highlighting, explain, etc) diff --git a/solr/core/src/java/org/apache/solr/handler/component/SingleSortedHitQueueManager.java b/solr/core/src/java/org/apache/solr/handler/component/SingleSortedHitQueueManager.java new file mode 100644 index 00000000000..3575dc49d5a --- /dev/null +++ b/solr/core/src/java/org/apache/solr/handler/component/SingleSortedHitQueueManager.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.component; + +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.SortField; + +public class SingleSortedHitQueueManager implements SortedHitQueueManager { + + private final ShardFieldSortedHitQueue queue; + + public SingleSortedHitQueueManager(SortField[] sortFields, int count, int offset, IndexSearcher searcher) { + queue = new ShardFieldSortedHitQueue(sortFields, count + offset, searcher); + } + + @Override + public void addDocument(ShardDoc shardDoc) { + queue.insertWithOverflow(shardDoc); + } + + @Override + public ShardDoc popDocument() { + return queue.pop(); + } + + @Override + public int size() { + return queue.size(); + } + +} diff --git a/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java b/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java index e6564b83e7e..cd100d512d7 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java @@ -16,90 +16,12 @@ */ package org.apache.solr.handler.component; -import org.apache.lucene.search.SortField; -import org.apache.solr.search.AbstractReRankQuery; -import org.apache.solr.search.RankQuery; -import org.apache.solr.search.SortSpec; +public interface SortedHitQueueManager { -/** - * This class is used to manage the possible multiple SortedHitQueues that we need during mergeIds( ). - * Multiple queues are needed, if reRanking is used. - * - * If reRanking is disabled, only the queue is used. - * If reRanking is enabled, the top reRankDocsSize documents are added to the reRankQueue, all other documents are - * collected in the queue. - */ -public class SortedHitQueueManager { - private final ShardFieldSortedHitQueue queue; - private final ShardFieldSortedHitQueue reRankQueue; - private final int reRankDocsSize; - - public SortedHitQueueManager(SortField[] sortFields, SortSpec ss, ResponseBuilder rb) { - final RankQuery rankQuery = rb.getRankQuery(); - - if(useFixForReRankOnSolrCloud(sortFields, rb, rankQuery)){ - // reRanking is enabled, create a queue that is only used for reRanked results - // disable shortcut in the queue to correctly sort documents that were reRanked on the shards back into the full result - reRankDocsSize = ((AbstractReRankQuery) rankQuery).getReRankDocs(); - int absoluteReRankDocs = Math.min(reRankDocsSize, ss.getCount()); - reRankQueue = new ShardFieldSortedHitQueue(new SortField[]{SortField.FIELD_SCORE}, - absoluteReRankDocs, rb.req.getSearcher()); - queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount() - absoluteReRankDocs, - rb.req.getSearcher(), false); - } else { - // reRanking is disabled, use one queue for all results - queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher()); - reRankQueue = null; - reRankDocsSize = 0; - } - } - - private boolean useFixForReRankOnSolrCloud(SortField[] sortFields, ResponseBuilder rb, RankQuery rankQuery) { - // we only want to use two queues if we reRank - return rb.getRankQuery() != null && rankQuery instanceof AbstractReRankQuery && - // using two queues makes reRanking on Solr Cloud possible (@see https://github.com/apache/solr/pull/151 ) - // however, the fix does not work if the non-reRanked docs should be sorted by score ( @see https://github.com/apache/solr/pull/151#discussion_r640664451 ) - // to maintain the existing behavior for these cases, we disable the broken use of two queues and use the (also broken) status quo instead - !hasScoreSortField(sortFields); - } - - private boolean hasScoreSortField(SortField[] sortFields) { - boolean hasScoreSortField = false; - for (SortField sortField : sortFields) { - // do not check equality with SortField.FIELD_SCORE because that would only cover score desc, not score asc - if (sortField.getType().equals(SortField.Type.SCORE) && sortField.getField() == null) { - hasScoreSortField = true; - break; - } - } - return hasScoreSortField; - } + void addDocument(ShardDoc shardDoc); - public void addDocument(ShardDoc shardDoc, int i) { - if(reRankQueue != null && i < reRankDocsSize) { - ShardDoc droppedShardDoc = reRankQueue.insertWithOverflow(shardDoc); - // Only works if the original request does not sort by score - // @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-15437") - if(droppedShardDoc != null) { - queue.insertWithOverflow(droppedShardDoc); - } - } else { - queue.insertWithOverflow(shardDoc); - } - } + ShardDoc popDocument(); - public ShardDoc nextDocument() { - ShardDoc shardDoc = queue.pop(); - if(shardDoc == null && reRankQueue != null) { - shardDoc = reRankQueue.pop(); - } - return shardDoc; - } + int size(); - public int getResultSize(int offset) { - if(reRankQueue != null) { - return queue.size() - offset + reRankQueue.size(); - } - return queue.size() - offset; - } -} +} \ No newline at end of file From 909a25d59b826839dd34f4b1495b966913c95d3c Mon Sep 17 00:00:00 2001 From: Christine Poerschke Date: Sun, 20 Jun 2021 18:19:09 +0100 Subject: [PATCH 13/15] continue to have only one setFieldFlags call in QueryComponent.prepare --- .../solr/handler/component/QueryComponent.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index fbb42cdb58e..429099d3675 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -117,7 +117,6 @@ import org.slf4j.LoggerFactory; import static org.apache.solr.common.params.CommonParams.QUERY_UUID; -import static org.apache.solr.search.SolrIndexSearcher.GET_SCORES; /** @@ -150,14 +149,8 @@ public void prepare(ResponseBuilder rb) throws IOException } } - // Set field flags ReturnFields returnFields = new SolrReturnFields( req ); rsp.setReturnFields( returnFields ); - int flags = 0; - if (returnFields.wantsScore()) { - flags |= SolrIndexSearcher.GET_SCORES; - } - rb.setFieldFlags( flags ); String defType = params.get(QueryParsing.DEFTYPE, QParserPlugin.DEFAULT_QTYPE); @@ -187,8 +180,6 @@ public void prepare(ResponseBuilder rb) throws IOException if(rq instanceof RankQuery) { RankQuery rankQuery = (RankQuery)rq; rb.setRankQuery(rankQuery); - // we always need the score for reRanking - rb.setFieldFlags(rb.getFieldFlags() | GET_SCORES); MergeStrategy mergeStrategy = rankQuery.getMergeStrategy(); if(mergeStrategy != null) { rb.addMergeStrategy(mergeStrategy); @@ -227,6 +218,15 @@ public void prepare(ResponseBuilder rb) throws IOException throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } + // set field flags + { + int flags = 0; + if (returnFields.wantsScore() || (rb.getRankQuery() instanceof AbstractReRankQuery)) { + flags |= SolrIndexSearcher.GET_SCORES; + } + rb.setFieldFlags( flags ); + } + if (params.getBool(GroupParams.GROUP, false)) { prepareGrouping(rb); } else { From bbf887f3bc4f24c1b5ff4ff213fb658f05c7d25f Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Mon, 21 Jun 2021 23:28:32 +0200 Subject: [PATCH 14/15] [SOLR-15437] move method to create new SortedHitQueueManager into manager and change interface to abstract class --- ...java => DefaultSortedHitQueueManager.java} | 10 ++++-- .../handler/component/QueryComponent.java | 11 +------ ....java => ReRankSortedHitQueueManager.java} | 9 +++--- .../component/SortedHitQueueManager.java | 32 ++++++++++++++++--- 4 files changed, 42 insertions(+), 20 deletions(-) rename solr/core/src/java/org/apache/solr/handler/component/{SingleSortedHitQueueManager.java => DefaultSortedHitQueueManager.java} (79%) rename solr/core/src/java/org/apache/solr/handler/component/{DualSortedHitQueueManager.java => ReRankSortedHitQueueManager.java} (88%) diff --git a/solr/core/src/java/org/apache/solr/handler/component/SingleSortedHitQueueManager.java b/solr/core/src/java/org/apache/solr/handler/component/DefaultSortedHitQueueManager.java similarity index 79% rename from solr/core/src/java/org/apache/solr/handler/component/SingleSortedHitQueueManager.java rename to solr/core/src/java/org/apache/solr/handler/component/DefaultSortedHitQueueManager.java index 3575dc49d5a..ed306c3f803 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SingleSortedHitQueueManager.java +++ b/solr/core/src/java/org/apache/solr/handler/component/DefaultSortedHitQueueManager.java @@ -19,11 +19,17 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.SortField; -public class SingleSortedHitQueueManager implements SortedHitQueueManager { +/** + * This class is just a wrapper for the ShardFieldSortedHitQueue. + * Is is used when we do not use reRanking. + * + * All documents are added to the queue. + */ +public class DefaultSortedHitQueueManager extends SortedHitQueueManager { private final ShardFieldSortedHitQueue queue; - public SingleSortedHitQueueManager(SortField[] sortFields, int count, int offset, IndexSearcher searcher) { + public DefaultSortedHitQueueManager(SortField[] sortFields, int count, int offset, IndexSearcher searcher) { queue = new ShardFieldSortedHitQueue(sortFields, count + offset, searcher); } diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index 429099d3675..b0a163185f1 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -117,6 +117,7 @@ import org.slf4j.LoggerFactory; import static org.apache.solr.common.params.CommonParams.QUERY_UUID; +import static org.apache.solr.handler.component.SortedHitQueueManager.newSortedHitQueueManager; /** @@ -833,16 +834,6 @@ protected boolean addFL(StringBuilder fl, String field, boolean additionalAdded) return true; } - private static SortedHitQueueManager newSortedHitQueueManager(SortField[] sortFields, SortSpec ss, ResponseBuilder rb) { - final RankQuery rankQuery = rb.getRankQuery(); - if (rankQuery instanceof AbstractReRankQuery && DualSortedHitQueueManager.supports(sortFields)) { - return new DualSortedHitQueueManager(sortFields, ss.getCount(), ss.getOffset(), rb.req.getSearcher(), - ((AbstractReRankQuery)rankQuery).getReRankDocs()); - } else { - return new SingleSortedHitQueueManager(sortFields, ss.getCount(), ss.getOffset(), rb.req.getSearcher()); - } - } - @SuppressWarnings({"unchecked"}) protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) { List mergeStrategies = rb.getMergeStrategies(); diff --git a/solr/core/src/java/org/apache/solr/handler/component/DualSortedHitQueueManager.java b/solr/core/src/java/org/apache/solr/handler/component/ReRankSortedHitQueueManager.java similarity index 88% rename from solr/core/src/java/org/apache/solr/handler/component/DualSortedHitQueueManager.java rename to solr/core/src/java/org/apache/solr/handler/component/ReRankSortedHitQueueManager.java index 285926fc97b..fde2fccf5d8 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/DualSortedHitQueueManager.java +++ b/solr/core/src/java/org/apache/solr/handler/component/ReRankSortedHitQueueManager.java @@ -26,13 +26,13 @@ * The top reRankDocsSize documents are added to the reRankQueue, all other documents are * collected in the queue. */ -public class DualSortedHitQueueManager implements SortedHitQueueManager { +public class ReRankSortedHitQueueManager extends SortedHitQueueManager { private final ShardFieldSortedHitQueue queue; private final ShardFieldSortedHitQueue reRankQueue; private final int reRankDocsSize; - public DualSortedHitQueueManager(SortField[] sortFields, int count, int offset, IndexSearcher searcher, int reRankDocsSize) { + public ReRankSortedHitQueueManager(SortField[] sortFields, int count, int offset, IndexSearcher searcher, int reRankDocsSize) { this.reRankDocsSize = reRankDocsSize; int absoluteReRankDocs = Math.min(reRankDocsSize, count); reRankQueue = new ShardFieldSortedHitQueue(new SortField[]{SortField.FIELD_SCORE}, @@ -42,8 +42,9 @@ public DualSortedHitQueueManager(SortField[] sortFields, int count, int offset, } /** - * TODO - * @deprecated TODO + * Check whether the sortFields contain score. + * If that's true, we do not support this query. (see comments below) + * @deprecated will be removed as soon as https://github.com/apache/solr/pull/171 is done */ @Deprecated // will be removed when all sort fields become supported public static boolean supports(SortField[] sortFields) { diff --git a/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java b/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java index cd100d512d7..74513f6b442 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java @@ -16,12 +16,36 @@ */ package org.apache.solr.handler.component; -public interface SortedHitQueueManager { +import org.apache.lucene.search.SortField; +import org.apache.solr.search.AbstractReRankQuery; +import org.apache.solr.search.RankQuery; +import org.apache.solr.search.SortSpec; - void addDocument(ShardDoc shardDoc); +public abstract class SortedHitQueueManager { - ShardDoc popDocument(); + abstract void addDocument(ShardDoc shardDoc); - int size(); + abstract ShardDoc popDocument(); + abstract int size(); + + /** + * Return the implementation of SortedHitQueueManager that should be used for the given sort and ranking. + * We use multiple queues if reRanking is enabled and we do not sort by score. + * In all other cases only one queue is used. + * + * @param sortFields the fields by which the results should be sorted + * @param ss SortSpec of the responseBuilder + * @param rb responseBuilder for a query + * @return either a SortedHitQueueManager that handles reRanking or the default implementation + */ + public static SortedHitQueueManager newSortedHitQueueManager(SortField[] sortFields, SortSpec ss, ResponseBuilder rb) { + final RankQuery rankQuery = rb.getRankQuery(); + if (rankQuery instanceof AbstractReRankQuery && ReRankSortedHitQueueManager.supports(sortFields)) { + return new ReRankSortedHitQueueManager(sortFields, ss.getCount(), ss.getOffset(), rb.req.getSearcher(), + ((AbstractReRankQuery)rankQuery).getReRankDocs()); + } else { + return new DefaultSortedHitQueueManager(sortFields, ss.getCount(), ss.getOffset(), rb.req.getSearcher()); + } + } } \ No newline at end of file From dc2b84f12b2d4393cf0cf12dea93d08fc11cd86b Mon Sep 17 00:00:00 2001 From: tomglk <> Date: Tue, 22 Jun 2021 21:14:00 +0200 Subject: [PATCH 15/15] [SOLR-15437] remove sortSpec as parameter, just get it from rb instead; improve comments --- .../solr/handler/component/QueryComponent.java | 15 +++++++-------- .../component/ReRankSortedHitQueueManager.java | 7 ++++--- .../handler/component/SortedHitQueueManager.java | 8 ++++---- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java index b0a163185f1..6da1e67380d 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -219,14 +219,13 @@ public void prepare(ResponseBuilder rb) throws IOException throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } - // set field flags - { - int flags = 0; - if (returnFields.wantsScore() || (rb.getRankQuery() instanceof AbstractReRankQuery)) { - flags |= SolrIndexSearcher.GET_SCORES; - } - rb.setFieldFlags( flags ); + // Set field flags. We presume to be first to set this flag. + // For this reason, we override any flags other components' prepare methods may have already set. + int flags = 0; + if (returnFields.wantsScore() || (rb.getRankQuery() instanceof AbstractReRankQuery)) { + flags |= SolrIndexSearcher.GET_SCORES; } + rb.setFieldFlags( flags ); if (params.getBool(GroupParams.GROUP, false)) { prepareGrouping(rb); @@ -870,7 +869,7 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) { // Merge the docs via a priority queue so we don't have to sort *all* of the // documents... we only need to order the top (rows+start) - final SortedHitQueueManager queueManager = newSortedHitQueueManager(sortFields, ss, rb); + final SortedHitQueueManager queueManager = newSortedHitQueueManager(sortFields, rb); NamedList shardInfo = null; if(rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) { diff --git a/solr/core/src/java/org/apache/solr/handler/component/ReRankSortedHitQueueManager.java b/solr/core/src/java/org/apache/solr/handler/component/ReRankSortedHitQueueManager.java index fde2fccf5d8..b844dffff8e 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/ReRankSortedHitQueueManager.java +++ b/solr/core/src/java/org/apache/solr/handler/component/ReRankSortedHitQueueManager.java @@ -20,7 +20,7 @@ import org.apache.lucene.search.SortField; /** - * This class is used to manage the multiple SortedHitQueues that we need during mergeIds( ). + * This class is used to manage the multiple SortedHitQueues that we need during {@link QueryComponent#mergeIds(ResponseBuilder, ShardRequest)}. * Multiple queues are needed when reRanking is used. * * The top reRankDocsSize documents are added to the reRankQueue, all other documents are @@ -53,7 +53,8 @@ public static boolean supports(SortField[] sortFields) { if (sortField.getType().equals(SortField.Type.SCORE) && sortField.getField() == null) { // using two queues makes reRanking on Solr Cloud possible (@see https://github.com/apache/solr/pull/151 ) // however, the fix does not work if the non-reRanked docs should be sorted by score ( @see https://github.com/apache/solr/pull/151#discussion_r640664451 ) - // to maintain the existing behavior for these cases, we disable the broken use of two queues and use the (also broken) status quo instead + // to maintain the existing behavior for these cases, we do not support the broken use of two queues and + // continue to use the (also broken) status quo instead return false; } } @@ -64,7 +65,7 @@ public void addDocument(ShardDoc shardDoc) { if(shardDoc.orderInShard < reRankDocsSize) { ShardDoc droppedShardDoc = reRankQueue.insertWithOverflow(shardDoc); // Only works if the original request does not sort by score - // @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-15437") + // see https://issues.apache.org/jira/browse/SOLR-15437 if(droppedShardDoc != null) { queue.insertWithOverflow(droppedShardDoc); } diff --git a/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java b/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java index 74513f6b442..44d78b1066e 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SortedHitQueueManager.java @@ -35,17 +35,17 @@ public abstract class SortedHitQueueManager { * In all other cases only one queue is used. * * @param sortFields the fields by which the results should be sorted - * @param ss SortSpec of the responseBuilder * @param rb responseBuilder for a query * @return either a SortedHitQueueManager that handles reRanking or the default implementation */ - public static SortedHitQueueManager newSortedHitQueueManager(SortField[] sortFields, SortSpec ss, ResponseBuilder rb) { + public static SortedHitQueueManager newSortedHitQueueManager(SortField[] sortFields, ResponseBuilder rb) { final RankQuery rankQuery = rb.getRankQuery(); + final SortSpec sortSpec = rb.getSortSpec(); if (rankQuery instanceof AbstractReRankQuery && ReRankSortedHitQueueManager.supports(sortFields)) { - return new ReRankSortedHitQueueManager(sortFields, ss.getCount(), ss.getOffset(), rb.req.getSearcher(), + return new ReRankSortedHitQueueManager(sortFields, sortSpec.getCount(), sortSpec.getOffset(), rb.req.getSearcher(), ((AbstractReRankQuery)rankQuery).getReRankDocs()); } else { - return new DefaultSortedHitQueueManager(sortFields, ss.getCount(), ss.getOffset(), rb.req.getSearcher()); + return new DefaultSortedHitQueueManager(sortFields, sortSpec.getCount(), sortSpec.getOffset(), rb.req.getSearcher()); } } } \ No newline at end of file