From 4bdb7bbd0bf1f5d6d8411ce0ea33a6ada71f05b2 Mon Sep 17 00:00:00 2001 From: Gaston Gonzalez Date: Sat, 26 Mar 2016 12:22:34 -0700 Subject: [PATCH 01/13] Resolved Ivy dependency issue for Solr 5.x --- solr5.x/ivy/ivy-settings.xml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/solr5.x/ivy/ivy-settings.xml b/solr5.x/ivy/ivy-settings.xml index 19d4394..5680754 100644 --- a/solr5.x/ivy/ivy-settings.xml +++ b/solr5.x/ivy/ivy-settings.xml @@ -1,7 +1,11 @@ - - + + + + + + From 562dd15322e3e7ae2ac2ebf9be4757593b785075 Mon Sep 17 00:00:00 2001 From: Gaston Gonzalez Date: Sat, 26 Mar 2016 12:24:37 -0700 Subject: [PATCH 02/13] Added initial .gitignore to exclude build artifacts --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eab767a --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +build-lib +build +dist From 6f8b8d9d5ea074eab7824776c3b430aabfcdc371 Mon Sep 17 00:00:00 2001 From: Gaston Gonzalez Date: Sun, 27 Mar 2016 08:38:53 -0700 Subject: [PATCH 03/13] Issue 3: Upgraded to Solr 5.3.1 and refectored to ShardHandler.checkDistributed to use ShardHandler.prepDistributed --- solr5.x/build.xml | 4 ++-- .../handler/component/QueryAutoFilteringComponent.java | 10 ++++++---- .../conf/solrconfig.snippet.randomindexconfig.xml | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/solr5.x/build.xml b/solr5.x/build.xml index 6e74893..30b91f9 100644 --- a/solr5.x/build.xml +++ b/solr5.x/build.xml @@ -20,8 +20,8 @@ - - + + diff --git a/solr5.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java b/solr5.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java index eaf13a1..e7e02cc 100644 --- a/solr5.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java +++ b/solr5.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java @@ -804,11 +804,13 @@ private void addDistributedTerms( ResponseBuilder rb, SynonymMap.Builder fieldBu ShardHandlerFactory shardHandlerFactory = container.getShardHandlerFactory( ); ShardHandler shardHandler = shardHandlerFactory.getShardHandler(); - shardHandler.checkDistributed( rb ); + + final SolrParams distribParams = rb.req.getParams(); + final boolean isDistrib = distribParams.get(ShardParams.SHARDS) != null; + Log.debug( "Is Distributed = " + isDistrib ); - Log.debug( "Is Distributed = " + rb.isDistrib ); - - if( rb.isDistrib ) { + if( isDistrib ) { + shardHandler.prepDistributed( rb ); // create a ShardRequest that contains a Terms Request. // don't send to this shard??? ShardRequest sreq = new ShardRequest(); diff --git a/solr5.x/src/test/resources/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml b/solr5.x/src/test/resources/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml index 7514aa4..13f8214 100644 --- a/solr5.x/src/test/resources/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml +++ b/solr5.x/src/test/resources/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml @@ -31,7 +31,7 @@ A solrconfig.xml snippet containing indexConfig settings for randomized testing. ${useCompoundFile:false} ${solr.tests.maxBufferedDocs} - ${solr.tests.maxIndexingThreads} + ${solr.tests.maxIndexingThreads:8} ${solr.tests.ramBufferSizeMB} From b99d24c936eab2f7b07497fc41bf429204ebc29d Mon Sep 17 00:00:00 2001 From: Gaston Gonzalez Date: Sun, 27 Mar 2016 14:37:47 -0700 Subject: [PATCH 04/13] Added support for a whitelist field list --- README.md | 20 +++++++++ .../QueryAutoFilteringComponent.java | 44 ++++++++++++++----- 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index ffc75df..ecc7082 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,23 @@ +# Notes about this fork + +This project is a fork of https://github.com/lucidworks/query-autofiltering-component, but includes the following changes to the 5.x code base: + +* Resolved issues with Ivy dependencies +* Upgraded component to work with SOlr 5.3.1 and above. +* Added support for using a field whitelist. + +The whilelist field definition feature was implemented to solve a gap regarding dynamic fields. Although asking the Luke admin handler would have been another implementation option, it seems that a generic whitelist is more powerful; albeit, a bit more verbose in the configuration. Simply define the following: + +
+    <searchComponent name="autofilter" class="org.apache.solr.handler.component.QueryAutoFilteringComponent" >
+      <arr name="whitelistFields">
+        <str>field1</str>
+        <str>field2</str>
+        <str>fieldN</str>
+      </arr>
+    </searchComponent>
+
+ # query-autofiltering-component A Query Autofiltering SearchComponent for Solr that can translate free-text queries into structured queries using index metadata. diff --git a/solr5.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java b/solr5.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java index e7e02cc..f363110 100644 --- a/solr5.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java +++ b/solr5.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java @@ -98,6 +98,7 @@ public class QueryAutoFilteringComponent extends QueryComponent implements SolrC private String termsHandler = "/terms"; + private HashSet whitelistFields;; private HashSet excludeFields; private HashSet stopwords; @@ -115,6 +116,15 @@ public class QueryAutoFilteringComponent extends QueryComponent implements SolrC @Override public void init( NamedList initArgs ) { + + List whitelistFields = (List) initArgs.get("whitelistFields"); + if (whitelistFields != null) { + this.whitelistFields = new HashSet( ); + for (String field : whitelistFields ) { + this.whitelistFields.add( field ); + } + } + List excludeFields = (List) initArgs.get("excludeFields"); if (excludeFields != null) { this.excludeFields = new HashSet( ); @@ -729,23 +739,35 @@ private void buildFieldMap( ResponseBuilder rb ) throws IOException { // TODO: Filter this by the configuration fields ... private ArrayList getStringFields( SolrIndexSearcher searcher ) { - IndexSchema schema = searcher.getSchema(); + ArrayList strFields = new ArrayList( ); - - Collection fieldNames = searcher.getFieldNames(); - Iterator fnIt = fieldNames.iterator(); - while ( fnIt.hasNext() ) { - String fieldName = fnIt.next( ); - if (excludeFields == null || !excludeFields.contains( fieldName )) { - SchemaField field = schema.getField(fieldName); - if (field.stored() && field.getType() instanceof StrField ) { - strFields.add( fieldName ); + + if ( hasWhitelist() ) { + Log.info("Using whitelist fields instead of schema."); + for ( String fieldName: whitelistFields ) { + strFields.add( fieldName ); + } + } else { + IndexSchema schema = searcher.getSchema(); + Collection fieldNames = searcher.getFieldNames(); + Iterator fnIt = fieldNames.iterator(); + while ( fnIt.hasNext() ) { + String fieldName = fnIt.next( ); + if (excludeFields == null || !excludeFields.contains( fieldName )) { + SchemaField field = schema.getField(fieldName); + if (field.stored() && field.getType() instanceof StrField ) { + strFields.add( fieldName ); + } } } } - + return strFields; } + + private boolean hasWhitelist() { + return this.whitelistFields != null && this.whitelistFields.size() > 0; + } private void addTerm( CharsRef fieldChars, String fieldValue, SynonymMap.Builder fieldBuilder, SynonymMap.Builder termBuilder ) throws IOException { From a878a76c5952f86558885fd70679165c02dd0510 Mon Sep 17 00:00:00 2001 From: Gaston Gonzalez Date: Sun, 27 Mar 2016 14:39:41 -0700 Subject: [PATCH 05/13] Added support for a whitelist field list --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index ecc7082..9ca7361 100644 --- a/README.md +++ b/README.md @@ -2,20 +2,20 @@ This project is a fork of https://github.com/lucidworks/query-autofiltering-component, but includes the following changes to the 5.x code base: -* Resolved issues with Ivy dependencies -* Upgraded component to work with SOlr 5.3.1 and above. +* Resolved issues with Ivy dependencies. +* Upgraded component to work with Solr 5.3.1 and above. * Added support for using a field whitelist. The whilelist field definition feature was implemented to solve a gap regarding dynamic fields. Although asking the Luke admin handler would have been another implementation option, it seems that a generic whitelist is more powerful; albeit, a bit more verbose in the configuration. Simply define the following:
-    <searchComponent name="autofilter" class="org.apache.solr.handler.component.QueryAutoFilteringComponent" >
-      <arr name="whitelistFields">
-        <str>field1</str>
-        <str>field2</str>
-        <str>fieldN</str>
-      </arr>
-    </searchComponent>
+ <searchComponent name="autofilter" class="org.apache.solr.handler.component.QueryAutoFilteringComponent" >
+   <arr name="whitelistFields">
+   <str>field1</str>
+   <str>field2</str>
+   <str>fieldN</str>
+   </arr>
+ </searchComponent>
 
# query-autofiltering-component From 36d5b86dc37b964098caf01750c0cc0352da1db5 Mon Sep 17 00:00:00 2001 From: Ben Kahn Date: Thu, 15 Jun 2017 13:02:06 -0700 Subject: [PATCH 06/13] Changed the autofilter to use the bq field instead of q so it plays nicely with the elevator. --- .../handler/component/QueryAutoFilteringComponent.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/solr5.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java b/solr5.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java index f363110..9fbc43a 100644 --- a/solr5.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java +++ b/solr5.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java @@ -479,16 +479,15 @@ else if (qbuilder.length() > 0 && fieldMap.size() > 0) { } } else { // boostFactor is NOT null - // use the original query add fielded boost clauses + // use the bq field to add fielded boost clauses StringBuilder bbuilder = new StringBuilder( ); String boostSuffix = "^" + boostFactor.toString( ); - bbuilder.append( getPhrase( queryTokens, 0, queryTokens.size() - 1, " " ) ); for (String fieldName : fieldMap.keySet( ) ) { bbuilder.append( " " ); bbuilder.append( getFilterQuery( rb, fieldName, fieldMap.get( fieldName ), fieldPositionMap.get( fieldName ), queryTokens, boostSuffix ) ); } - Log.info( "setting q = '" + bbuilder.toString() + "'" ); - modParams.set( "q", bbuilder.toString( ) ); + Log.info( "adding bq = '" + bbuilder.toString() + "'" ); + modParams.add( "bq", bbuilder.toString( ).trim() ); } return true; } From 49dd0f866836cd65cf3fefff79ca43e475f2dee0 Mon Sep 17 00:00:00 2001 From: Gaston Gonzalez Date: Thu, 24 Aug 2017 12:26:25 -0700 Subject: [PATCH 07/13] Initial copy of 5.x for 6.x baseline. --- solr6.x/build.xml | 127 ++ solr6.x/ivy.xml | 21 + solr6.x/ivy/ivy-settings.xml | 11 + .../QueryAutoFilteringComponent.java | 1424 +++++++++++++++++ .../DistributedQueryAutoFilteringTest.java | 75 + .../QueryAutoFilteringComponentTest.java | 486 ++++++ .../solr/collection1/conf/currency.xml | 37 + .../collection1/conf/schema-autofilter.xml | 273 ++++ .../conf/solrconfig-autofilter.xml | 242 +++ .../solrconfig.snippet.randomindexconfig.xml | 47 + .../solr/collection1/conf/stopwords.txt | 58 + .../collection1/conf/synonyms-autofilter.txt | 17 + .../solr/collection1/conf/synonyms.txt | 31 + 13 files changed, 2849 insertions(+) create mode 100644 solr6.x/build.xml create mode 100644 solr6.x/ivy.xml create mode 100644 solr6.x/ivy/ivy-settings.xml create mode 100644 solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java create mode 100644 solr6.x/src/test/com/lucidworks/solr/handler/component/DistributedQueryAutoFilteringTest.java create mode 100644 solr6.x/src/test/com/lucidworks/solr/handler/component/QueryAutoFilteringComponentTest.java create mode 100644 solr6.x/src/test/resources/solr/collection1/conf/currency.xml create mode 100644 solr6.x/src/test/resources/solr/collection1/conf/schema-autofilter.xml create mode 100644 solr6.x/src/test/resources/solr/collection1/conf/solrconfig-autofilter.xml create mode 100644 solr6.x/src/test/resources/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml create mode 100644 solr6.x/src/test/resources/solr/collection1/conf/stopwords.txt create mode 100644 solr6.x/src/test/resources/solr/collection1/conf/synonyms-autofilter.txt create mode 100644 solr6.x/src/test/resources/solr/collection1/conf/synonyms.txt diff --git a/solr6.x/build.xml b/solr6.x/build.xml new file mode 100644 index 0000000..30b91f9 --- /dev/null +++ b/solr6.x/build.xml @@ -0,0 +1,127 @@ + + Builds Query Autofiltering Component + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr6.x/ivy.xml b/solr6.x/ivy.xml new file mode 100644 index 0000000..f0e8ae2 --- /dev/null +++ b/solr6.x/ivy.xml @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/solr6.x/ivy/ivy-settings.xml b/solr6.x/ivy/ivy-settings.xml new file mode 100644 index 0000000..5680754 --- /dev/null +++ b/solr6.x/ivy/ivy-settings.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java b/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java new file mode 100644 index 0000000..9fbc43a --- /dev/null +++ b/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java @@ -0,0 +1,1424 @@ +package org.apache.solr.handler.component; + +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.TermsParams; +import org.apache.solr.common.params.ShardParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.request.SolrQueryRequest; + +import org.apache.solr.util.plugin.SolrCoreAware; +import org.apache.solr.core.CoreContainer; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.core.SolrEventListener; + +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.schema.StrField; +import org.apache.solr.search.SolrIndexSearcher; + +import org.apache.solr.client.solrj.response.TermsResponse; + +import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.CharsRefBuilder; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.uninverting.UninvertingReader; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.Term; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.KeywordTokenizer; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.synonym.SynonymMap; +import org.apache.lucene.analysis.synonym.SynonymMap.Builder; +import org.apache.lucene.analysis.synonym.SolrSynonymParser; +import org.apache.lucene.analysis.util.TokenFilterFactory; +import org.apache.lucene.analysis.util.TokenizerFactory; +import org.apache.lucene.analysis.standard.StandardTokenizer; + +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.util.fst.FST; + +import java.util.ArrayList; +import java.util.List; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Collection; +import java.util.Iterator; +import java.util.StringTokenizer; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.BufferedReader; +import java.io.StringReader; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; + +/** + * Creates filter or boost queries from freetext queries based on pattern matches with terms in stored String fields. Uses + * the FieldCache (UninvertingIndex) to build a map of term to search field. This map is then used to parse the + * query to detect phrases that map to specific field values. These field/value pairs can then be used to generate + * a filter query or a boost query if recall needs to be preserved. + * + * For SolrCloud, this component requires that the TermsComponent be defined in solrconfig.xml. This is used + * to get distributed term maps. + * + * Compiles with Solr 5.x + */ + +public class QueryAutoFilteringComponent extends QueryComponent implements SolrCoreAware, SolrEventListener { + + private static final Logger Log = LoggerFactory.getLogger( QueryAutoFilteringComponent.class ); + + public static final String MINIMUM_TOKENS = "mt"; + public static final String BOOST_PARAM = "afb"; + + private SynonymMap fieldMap; // Map of search terms to fieldName + private SynonymMap synonyms; // synonyms from synonyms.txt + private SynonymMap termMap; // Map of search term to indexed term + + private String synonymsFile; + + private NamedList initParams; + + private boolean initFieldMap = false; + + private String termsHandler = "/terms"; + + private HashSet whitelistFields;; + private HashSet excludeFields; + private HashSet stopwords; + + private Integer boostFactor; // if null, use Filter Query + + // For multiple terms in the same field, if field is multi-valued = use AND for filter query + private boolean useAndForMultiValuedFields = true; + + private String fieldDelim = "|"; + + private String fieldSplitExpr = "\\|"; + + // map of a "verb" phrase to a metadata field + private ArrayList verbModifierList; + + @Override + public void init( NamedList initArgs ) { + + List whitelistFields = (List) initArgs.get("whitelistFields"); + if (whitelistFields != null) { + this.whitelistFields = new HashSet( ); + for (String field : whitelistFields ) { + this.whitelistFields.add( field ); + } + } + + List excludeFields = (List) initArgs.get("excludeFields"); + if (excludeFields != null) { + this.excludeFields = new HashSet( ); + for (String field : excludeFields ) { + this.excludeFields.add( field ); + } + } + + List verbModifiers = (List)initArgs.get( "verbModifiers" ); + if (verbModifiers != null) { + this.verbModifierList = new ArrayList( ); + for (String modifier : verbModifiers) { + String modifierPhrase = new String( modifier.substring( 0, modifier.indexOf( ":" ))); + String modifierFields = new String( modifier.substring( modifier.indexOf( ":" ) + 1 )); + + if (modifierPhrase.indexOf( "," ) > 0) { + String[] phrases = modifierPhrase.split( "," ); + for (int i = 0; i < phrases.length; i++) { + addModifier( phrases[i], modifierFields ); + } + } + else { + addModifier( modifierPhrase, modifierFields ); + } + } + } + + Integer boostFactor = (Integer)initArgs.get( "boostFactor" ); + if (boostFactor != null) { + this.boostFactor = boostFactor; + } + + String useAndForMV = (String)initArgs.get( "useAndForMultiValuedFields" ); + if (useAndForMV != null) { + this.useAndForMultiValuedFields = useAndForMV.equalsIgnoreCase( "true" ); + } + + String useFieldDelim = (String)initArgs.get( "fieldDelimiter" ); + if (useFieldDelim != null) { + this.fieldDelim = useFieldDelim; + this.fieldSplitExpr = useFieldDelim; + } + + initParams = initArgs; + } + + private void addModifier( String modifierPhrase, String modifierFields ) { + Log.info( "addModifier: " + modifierPhrase + ": " + modifierFields ); + ModifierDefinition modDef = new ModifierDefinition( ); + modDef.modifierPhrase = modifierPhrase.toLowerCase( ); + + if (modifierFields.indexOf( fieldDelim ) > 0) { + modDef.filterFields = new HashMap( ); + String fieldPairs = new String( modifierFields.substring( modifierFields.indexOf( fieldDelim ) + 1 )); + modifierFields = new String( modifierFields.substring( 0, modifierFields.indexOf( fieldDelim ))); + Log.info( "fieldPairs = " + fieldPairs ); + + String modifierTemplate = null; + if (fieldPairs.indexOf( fieldDelim ) > 0) { + modifierTemplate = new String( fieldPairs.substring( fieldPairs.indexOf( fieldDelim ) + 1 )); + fieldPairs = new String( fieldPairs.substring( 0, fieldPairs.indexOf( fieldDelim ))); + } + + if (fieldPairs.indexOf( "," ) > 0) { + String[] fieldPairList = fieldPairs.split( "," ); + for (int i = 0; i < fieldPairList.length; i++) { + String field = new String( fieldPairList[i].substring( 0, fieldPairList[i].indexOf( ":" ))); + String value = new String(fieldPairList[i].substring( fieldPairList[i].indexOf( ":" ) + 1 )); + modDef.filterFields.put( field, value ); + } + } + else { + String field = new String(fieldPairs.substring( 0, fieldPairs.indexOf( ":" ))); + String value = new String( fieldPairs.substring( fieldPairs.indexOf( ":" ) + 1 )); + modDef.filterFields.put( field, value ); + } + + if (modifierTemplate != null) { + modDef.templateRule = new ModifierTemplateRule( modifierTemplate ); + } + } + modDef.modifierFields = new ArrayList( ); + if (modifierFields.indexOf( "," ) > 0) { + String[] fields = modifierFields.split( "," ); + for (int i = 0; i < fields.length; i++) { + modDef.modifierFields.add( fields[i] ); + } + } + else { + modDef.modifierFields.add( modifierFields ); + } + + modDef.modTokens = modDef.modifierPhrase.split( " " ); + verbModifierList.add( modDef ); + } + + + @Override + public void inform( SolrCore core ) { + if (initParams != null) { + SolrResourceLoader resourceLoader = core.getResourceLoader( ); + + synonymsFile = (String)initParams.get( "synonyms" ); + if (synonymsFile != null) { + Analyzer analyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer tokenizer = new KeywordTokenizer(); + return new TokenStreamComponents(tokenizer, tokenizer ); + } + }; + + try { + SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer); + CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT); + + parser.parse(new InputStreamReader( resourceLoader.openResource(synonymsFile), decoder)); + this.synonyms = parser.build( ); + } + catch ( Exception e ) { + // ??? + Log.warn( "Parsing Synonyms Got Exception " + e ); + } + } + + String stopwordsFile = (String)initParams.get( "stopwords" ); + if (stopwordsFile != null) { + this.stopwords = new HashSet( ); + try { + BufferedReader br = new BufferedReader( new InputStreamReader( resourceLoader.openResource( stopwordsFile ))); + String line = null; + while ((line = br.readLine( )) != null) { + stopwords.add( line.toLowerCase( ) ); + } + br.close( ); + } + catch ( IOException ioe ) { + Log.warn( "Adding Stopwords Got Exception " + ioe ); + } + } + } + + core.registerFirstSearcherListener( this ); + core.registerNewSearcherListener( this ); + } + + @Override + public void postCommit() { } + + @Override + public void postSoftCommit() { } + + public void newSearcher(SolrIndexSearcher newSearcher, SolrIndexSearcher currentSearcher) { + synchronized( this ) { + initFieldMap = true; + } + } + + @Override + public void prepare( ResponseBuilder rb ) throws IOException + { + SolrQueryRequest req = rb.req; + SolrParams params = req.getParams( ); + + // Only build the field map and do the processing if we are the main event + String isShard = params.get( "isShard" ); + if (isShard != null && isShard.equals( "true" )) { + Log.debug( "A shard query: don't process!" ); + return; + } + + Log.info( "prepare ..." ); + if (initFieldMap) { + synchronized( this ) { + buildFieldMap( rb ); + initFieldMap = false; + } + } + + int mintok = 1; + String mt = params.get( MINIMUM_TOKENS ); + if ( mt != null ) { + try { + mintok = Integer.parseInt( mt ); + } + catch ( NumberFormatException nfe ) { + // ??? + mintok = 1; + } + } + + String qStr = params.get( CommonParams.Q ); + Log.debug( "query is: " + qStr ); + if (qStr.equals( "*" ) || qStr.indexOf( ":" ) > 0) { + Log.debug( "Complex query - do not process" ); + return; + } + + // tokenize the query string, if any part of it matches, remove the token from the list and + // add a filter query with :value: + ArrayList queryTokens = tokenize( qStr ); + + if (queryTokens.size( ) >= mintok) { + ModifiableSolrParams modParams = new ModifiableSolrParams( params ); + if (findPattern( queryTokens, rb, modParams )) { + req.setParams( modParams ); + } + } + } + + private boolean findPattern( ArrayList queryTokens, ResponseBuilder rb, ModifiableSolrParams modParams ) throws IOException { + Log.debug( "findPattern " ); + + HashSet usedTokens = new HashSet( ); + HashMap> fieldMap = new HashMap>( ); + HashMap fieldPositionMap = new HashMap( ); + HashMap entityPositionMap = (verbModifierList != null) ? new HashMap() : null; + + String longestPhraseField = null; + int startToken = 0; + int lastEndToken = 0; + while ( startToken < queryTokens.size() ) { + Log.debug( "startToken = " + startToken ); + int endToken = startToken; + + while ( endToken < queryTokens.size( ) ) { + // FieldName can be comma separated if there are more than one field name for a set of tokens + String fieldName = getFieldNameFor( queryTokens, startToken, endToken ); + if ( fieldName != null ) { + longestPhraseField = fieldName; + lastEndToken = endToken; + } + else if ( longestPhraseField != null ) { + break; + } + ++endToken; + } + + if (longestPhraseField != null) { + // create matching phrase from startToken -> endToken + String phrase = getPhrase( queryTokens, startToken, lastEndToken ); + Log.debug( "get Indexed Term for " + phrase ); + String indexedTerm = getMappedFieldName( termMap, phrase.toLowerCase( )); + if (indexedTerm == null) { + indexedTerm = getMappedFieldName( termMap, getStemmed( phrase )); + } + if (indexedTerm != null) { + indexedTerm = indexedTerm.replace( '_', ' ' ); + if (indexedTerm.indexOf( " " ) > 0 ) { + indexedTerm = "\"" + indexedTerm + "\""; + } + ArrayListvalList = fieldMap.get( longestPhraseField ); + if (valList == null) { + valList = new ArrayList( ); + fieldMap.put( longestPhraseField, valList ); + } + + Log.info( "indexedTerm: " + indexedTerm ); + int[] entityPosition = null; + if (entityPositionMap != null) { + entityPosition = new int[2]; + entityPosition[0] = startToken; + entityPosition[1] = endToken-1; + } + + Log.debug( "indexedTerm: " + indexedTerm ); + if (indexedTerm.indexOf( fieldDelim ) > 0) + { + String[] indexedTerms = indexedTerm.split( fieldSplitExpr ); + for (int t = 0; t < indexedTerms.length; t++) { + valList.add( indexedTerms[t] ); + if (entityPositionMap != null) entityPositionMap.put( indexedTerms[t], entityPosition ); + } + } + else { + valList.add( indexedTerm ); + if (entityPositionMap != null) entityPositionMap.put( indexedTerm, entityPosition ); + } + + // save startToken and lastEndToken so can use for boolean operator context + // for multi-value fields -save the min and max of all tokens positions for the field + int[] posArray = fieldPositionMap.get( longestPhraseField ); + if (posArray == null) + { + posArray = new int[2]; + posArray[0] = startToken; + posArray[1] = lastEndToken; + fieldPositionMap.put( longestPhraseField, posArray ); + } + else + { + posArray[1] = lastEndToken; + } + + longestPhraseField = null; + for (int i = startToken; i <= lastEndToken; i++) { + Log.debug( "adding used token at " + i ); + usedTokens.add( new Integer( i ) ); + } + startToken = lastEndToken + 1; + } + } + else { + ++startToken; + } + } + + if (usedTokens.size( ) > 0) { + + // filter field maps based on verbs here: + if (entityPositionMap != null) { + filterFieldMap( queryTokens, fieldMap, entityPositionMap, fieldPositionMap ); + } + + String useBoost = modParams.get( BOOST_PARAM ); + Integer boostFactor = (useBoost != null) ? new Integer( useBoost ) : this.boostFactor; + if (boostFactor == null) { + StringBuilder qbuilder = new StringBuilder( ); + if (usedTokens.size( ) < queryTokens.size( ) ) { + for (int i = 0; i < queryTokens.size(); i++) { + if (boostFactor != null || usedTokens.contains( new Integer( i ) ) == false ) { + char[] token = queryTokens.get( i ); + if (qbuilder.length() > 0) qbuilder.append( " " ); + qbuilder.append( token ); + } + } + } + + Log.debug( "got qbuilder string = '" + qbuilder.toString() + "'" ); + if (qbuilder.length() == 0 && fieldMap.size() > 0) { + // build a filter query - + // EH: can't do this if dismax + Log.debug( "setting q = *:*" ); + modParams.set( "q", "*:*" ); + for (String fieldName : fieldMap.keySet() ) { + String fq = getFilterQuery( rb, fieldName, fieldMap.get( fieldName ), fieldPositionMap.get( fieldName ), queryTokens, "" ); + Log.info( "adding filter query: " + fq ); + modParams.add( "fq", fq ); + } + } + else if (qbuilder.length() > 0 && fieldMap.size() > 0) { + // build a boolean query for the fielded data, OR with remainder of query + StringBuilder boolQ = new StringBuilder( ); + for (String fieldName : fieldMap.keySet() ) { + if (boolQ.length() > 0) boolQ.append( " AND " ); + boolQ.append( getFilterQuery( rb, fieldName, fieldMap.get( fieldName ), fieldPositionMap.get( fieldName ), queryTokens, "" ) ); + } + String q = qbuilder.toString( ) + " (" + boolQ.toString() + ")"; + Log.info( "setting q = '" + q + "'" ); + modParams.set( "q", q ); + } + } + else { // boostFactor is NOT null + // use the bq field to add fielded boost clauses + StringBuilder bbuilder = new StringBuilder( ); + String boostSuffix = "^" + boostFactor.toString( ); + for (String fieldName : fieldMap.keySet( ) ) { + bbuilder.append( " " ); + bbuilder.append( getFilterQuery( rb, fieldName, fieldMap.get( fieldName ), fieldPositionMap.get( fieldName ), queryTokens, boostSuffix ) ); + } + Log.info( "adding bq = '" + bbuilder.toString() + "'" ); + modParams.add( "bq", bbuilder.toString( ).trim() ); + } + return true; + } + + return false; + } + + private String getPhrase( ArrayList tokens, int startToken, int endToken ) { + return getPhrase( tokens, startToken, endToken, "_" ); + } + + private String getPhrase( ArrayList tokens, int startToken, int endToken, String tokenSep ) { + StringBuilder strb = new StringBuilder( ); + for (int i = startToken; i <= endToken; i++) { + if (i > startToken) strb.append( tokenSep ); + + strb.append( tokens.get( i ) ); + } + Log.debug( "getPhrase returns " + strb.toString( ) ); + return strb.toString( ); + } + + private String getFilterQuery( ResponseBuilder rb, String fieldName, ArrayList valList, + int[] termPosRange, ArrayList queryTokens, String suffix) { + if (fieldName.indexOf( fieldDelim ) > 0) { + return getFilterQuery( rb, fieldName.split( fieldSplitExpr ), valList, termPosRange, queryTokens, suffix ); + } + if (valList.size() == 1) { + // check if valList[0] is multi-term - if so, check if there is a single term equivalent + // if this returns non-null, create an OR query with single term version + // example "white linen perfume" vs "white linen shirt" where "White Linen" is a brand + String term = valList.get( 0 ); + + if (term.indexOf( " " ) > 0) { + String singleTermQuery = getSingleTermQuery( term ); + if (singleTermQuery != null) { + StringBuilder strb = new StringBuilder( ); + // EH: possible meta-escaping problem if value includes {!field f=}value + strb.append( "(" ).append( fieldName ).append( ":" ) + .append( term ).append( " OR (" ).append( singleTermQuery ).append( "))" ).append( suffix ); + Log.debug( "returning composite query: " + strb.toString( ) ); + return strb.toString( ); + } + } + + String query = fieldName + ":" + term + suffix; + Log.debug( "returning single query: " + query ); + return query; + } + else { + SolrIndexSearcher searcher = rb.req.getSearcher(); + IndexSchema schema = searcher.getSchema(); + SchemaField field = schema.getField(fieldName); + boolean useAnd = field.multiValued() && useAndForMultiValuedFields; + // if query has 'or' in it and or is at a position 'within' the values for this field ... + if (useAnd) { + for (int i = termPosRange[0] + 1; i < termPosRange[1]; i++ ) { + char[] qToken = queryTokens.get( i ); + // is the token 'or'? + if (qToken.length == 2 && qToken[0] == 'o' && qToken[1] == 'r' ) { + useAnd = false; + break; + } + } + } + + StringBuilder orQ = new StringBuilder( ); + for (String val : valList ) { + if (orQ.length() > 0) orQ.append( (useAnd ? " AND " : " OR ") ); + orQ.append( val ); + } + return fieldName + ":(" + orQ.toString() + ")" + suffix; + } + } + + private String getFilterQuery( ResponseBuilder rb, String[] fieldNames, ArrayList valList, + int[] termPosRange, ArrayList queryTokens, String suffix) { + StringBuilder filterQBuilder = new StringBuilder( ); + for (int i = 0; i < fieldNames.length; i++) { + if (i > 0) filterQBuilder.append( " OR " ); + filterQBuilder.append( getFilterQuery( rb, fieldNames[i], valList, termPosRange, queryTokens, suffix ) ); + } + return "(" + filterQBuilder.toString() + ")"; + } + + private String getFieldNameFor( ArrayList queryTokens, int startToken, int endToken ) throws IOException { + String phrase = getPhrase( queryTokens, startToken, endToken ); + String fieldName = getFieldNameFor( phrase.toLowerCase( ) ); + if (fieldName != null) return fieldName; + + String stemmed = getStemmed( phrase ); + Log.debug( "checking stemmed " + stemmed ); + return (stemmed.equals( phrase )) ? null : getFieldNameFor( stemmed ); + } + + private String getSingleTermQuery( String multiTermValue ) { + + String multiTerm = multiTermValue; + if (multiTermValue.startsWith( "\"" )) { + multiTerm = new String( multiTermValue.substring( 1, multiTermValue.lastIndexOf( "\"" ))); + } + Log.debug( "getSingleTermQuery " + multiTerm + "" ); + + try { + StringBuilder strb = new StringBuilder( ); + + String[] terms = multiTerm.split( " " ); + for (int i = 0; i < terms.length; i++) { + if (i > 0) strb.append( " AND " ); + + String fieldName = getFieldNameFor( terms[i].toLowerCase( ) ); + Log.debug( "fieldName for " + terms[i].toLowerCase( ) + " is " + fieldName ); + if (fieldName == null) return null; + + if (fieldName.indexOf( fieldDelim ) > 0) { + String[] fields = fieldName.split( fieldSplitExpr ); + strb.append( "(" ); + for (int f = 0; f < fields.length; f++) { + if (f > 0) strb.append( " OR " ); + strb.append( fields[f] ).append( ":" ).append( getMappedFieldName( termMap, terms[i].toLowerCase( ) ) ); + } + strb.append( ")" ); + } + else { + strb.append( fieldName ).append( ":" ).append( getMappedFieldName( termMap, terms[i].toLowerCase( ) ) ); + } + } + + Log.debug( "getSingleTermQuery returns: '" + strb.toString( ) + "'" ); + return strb.toString( ); + } + catch (IOException ioe ) { + return null; + } + } + + private String getFieldNameFor( String phrase ) throws IOException { + Log.debug( "getFieldNameFor '" + phrase + "'" ); + return ("*".equals( phrase) || "* *".equals( phrase )) ? null : getMappedFieldName( fieldMap, phrase.toLowerCase( ) ); + } + + + // TODO: Return comma separated string if more than one + private String getMappedFieldName( SynonymMap termMap, String phrase ) throws IOException { + Log.debug( "getMappedFieldName: '" + phrase + "'" ); + FST fst = termMap.fst; + FST.BytesReader fstReader = fst.getBytesReader(); + FST.Arc scratchArc = new FST.Arc<>( ); + BytesRef scratchBytes = new BytesRef(); + CharsRefBuilder scratchChars = new CharsRefBuilder(); + ByteArrayDataInput bytesReader = new ByteArrayDataInput(); + + BytesRef pendingOutput = fst.outputs.getNoOutput(); + fst.getFirstArc( scratchArc ); + BytesRef matchOutput = null; + + String noSpPhrase = phrase.replace( ' ', '_' ); + int charPos = 0; + while(charPos < noSpPhrase.length()) { + final int codePoint = noSpPhrase.codePointAt( charPos ); + if (fst.findTargetArc( codePoint, scratchArc, scratchArc, fstReader) == null) { + Log.debug( "No FieldName for " + phrase ); + return null; + } + + pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output); + charPos += Character.charCount(codePoint); + } + + if (scratchArc.isFinal()) { + Log.debug( "creating matchOutput" ); + matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput); + ArrayList mappedFields = new ArrayList( ); + bytesReader.reset( matchOutput.bytes, matchOutput.offset, matchOutput.length ); + + final int code = bytesReader.readVInt(); + final int count = code >>> 1; + for( int outputIDX = 0; outputIDX < count; outputIDX++ ) { + termMap.words.get( bytesReader.readVInt(), scratchBytes ); + scratchChars.copyUTF8Bytes(scratchBytes); + int lastStart = 0; + final int chEnd = lastStart + scratchChars.length(); + for( int chIDX = lastStart; chIDX <= chEnd; chIDX++ ) { + if (chIDX == chEnd || scratchChars.charAt(chIDX) == SynonymMap.WORD_SEPARATOR) { + int outputLen = chIDX - lastStart; + assert outputLen > 0: "output contains empty string: " + scratchChars; + mappedFields.add( new String( scratchChars.chars(), lastStart, outputLen ) ); + lastStart = chIDX + 1; + } + } + } + + if (mappedFields.size() == 1) { + Log.debug( "returning mapped fieldName " + mappedFields.get( 0 ) ); + return mappedFields.get( 0 ); + } + else { + StringBuilder fieldBuilder = new StringBuilder( ); + for (String fieldName : mappedFields ) { + if (fieldBuilder.length() > 0) fieldBuilder.append( fieldDelim ); + fieldBuilder.append( fieldName ); + } + Log.debug( "returning mapped fieldName " + fieldBuilder.toString( ) ); + return fieldBuilder.toString( ); + } + } + + Log.warn( "matchOutput but no FieldName for " + phrase ); + return null; + } + + + private void buildFieldMap( ResponseBuilder rb ) throws IOException { + Log.debug( "buildFieldMap" ); + SolrIndexSearcher searcher = rb.req.getSearcher(); + // build a synonym map from the SortedDocValues - + // for each field value: lower case, stemmed, lookup synonyms from synonyms.txt - map to fieldValue + SynonymMap.Builder fieldBuilder = new SynonymMap.Builder( true ); + SynonymMap.Builder termBuilder = new SynonymMap.Builder( true ); + + HashMap fieldTypeMap = new HashMap( ); + + ArrayList searchFields = getStringFields( searcher ); + for (String searchField : searchFields ) { + fieldTypeMap.put( searchField, UninvertingReader.Type.SORTED_SET_BINARY); + } + UninvertingReader unvRead = new UninvertingReader( searcher.getLeafReader( ), fieldTypeMap ); + + for (String searchField : searchFields ) { + Log.debug( "adding searchField " + searchField ); + CharsRef fieldChars = new CharsRef( searchField ); + SortedSetDocValues sdv = unvRead.getSortedSetDocValues( searchField ); + if (sdv == null) continue; + Log.debug( "got SortedSetDocValues for " + searchField ); + TermsEnum te = sdv.termsEnum(); + while (te.next() != null) { + BytesRef term = te.term(); + String fieldValue = term.utf8ToString( ); + addTerm ( fieldChars, fieldValue, fieldBuilder, termBuilder ); + } + } + + addDistributedTerms( rb, fieldBuilder, termBuilder, searchFields ); + + fieldMap = fieldBuilder.build( ); + termMap = termBuilder.build( ); + } + + // TODO: Filter this by the configuration fields ... + private ArrayList getStringFields( SolrIndexSearcher searcher ) { + + ArrayList strFields = new ArrayList( ); + + if ( hasWhitelist() ) { + Log.info("Using whitelist fields instead of schema."); + for ( String fieldName: whitelistFields ) { + strFields.add( fieldName ); + } + } else { + IndexSchema schema = searcher.getSchema(); + Collection fieldNames = searcher.getFieldNames(); + Iterator fnIt = fieldNames.iterator(); + while ( fnIt.hasNext() ) { + String fieldName = fnIt.next( ); + if (excludeFields == null || !excludeFields.contains( fieldName )) { + SchemaField field = schema.getField(fieldName); + if (field.stored() && field.getType() instanceof StrField ) { + strFields.add( fieldName ); + } + } + } + } + + return strFields; + } + + private boolean hasWhitelist() { + return this.whitelistFields != null && this.whitelistFields.size() > 0; + } + + private void addTerm( CharsRef fieldChars, String fieldValue, SynonymMap.Builder fieldBuilder, SynonymMap.Builder termBuilder ) throws IOException { + + Log.debug( "got fieldValue: '" + fieldValue + "'" ); + String nospVal = fieldValue.replace( ' ', '_' ); + Log.debug( "got nspace: '" + nospVal + "'" ); + CharsRef nospChars = new CharsRef( nospVal ); + CharsRef valueChars = new CharsRef( fieldValue ); + + fieldBuilder.add( nospChars, fieldChars, false ); + termBuilder.add( nospChars, valueChars, false ); + + // lower case term, + String lowercase = nospVal.toLowerCase( ); + CharsRef lcChars = new CharsRef( lowercase ); + fieldBuilder.add( lcChars, fieldChars, false ); + termBuilder.add( lcChars, valueChars, false ); + + // stem it + String stemmed = getStemmed( nospVal ); + if (stemmed.equals( fieldValue ) == false) { + Log.debug( "adding stemmed: " + stemmed ); + CharsRef stChars = new CharsRef( stemmed ); + fieldBuilder.add( stChars, fieldChars, false ); + termBuilder.add( stChars, valueChars, false ); + } + + if (this.synonyms != null) { + // get synonyms from synonyms.txt + ArrayList synonymLst = getSynonymsFor( this.synonyms, fieldValue ); + if ( synonymLst != null ) { + for (String synonym : synonymLst ) { + String nospSyn = synonym.replace( ' ', '_' ); + Log.debug( "adding: " + synonym + " -> " + fieldValue ); + CharsRef synChars = new CharsRef( nospSyn ); + fieldBuilder.add( synChars, fieldChars, false ); + termBuilder.add( synChars, valueChars, false ); + } + } + synonymLst = getSynonymsFor( this.synonyms, fieldValue.toLowerCase() ); + if ( synonymLst != null ) { + for (String synonym : synonymLst ) { + String nospSyn = synonym.replace( ' ', '_' ); + Log.debug( "adding: " + synonym + " -> " + fieldValue ); + CharsRef synChars = new CharsRef( nospSyn ); + fieldBuilder.add( synChars, fieldChars, false ); + termBuilder.add( synChars, valueChars, false ); + } + } + } + } + + private void addDistributedTerms( ResponseBuilder rb, SynonymMap.Builder fieldBuilder, SynonymMap.Builder termBuilder, ArrayList searchFields ) throws IOException { + SolrIndexSearcher searcher = rb.req.getSearcher(); + CoreContainer container = searcher.getCore().getCoreDescriptor().getCoreContainer(); + + ShardHandlerFactory shardHandlerFactory = container.getShardHandlerFactory( ); + ShardHandler shardHandler = shardHandlerFactory.getShardHandler(); + + final SolrParams distribParams = rb.req.getParams(); + final boolean isDistrib = distribParams.get(ShardParams.SHARDS) != null; + Log.debug( "Is Distributed = " + isDistrib ); + + if( isDistrib ) { + shardHandler.prepDistributed( rb ); + // create a ShardRequest that contains a Terms Request. + // don't send to this shard??? + ShardRequest sreq = new ShardRequest(); + sreq.purpose = ShardRequest.PURPOSE_GET_TERMS; + sreq.actualShards = rb.shards; + ModifiableSolrParams params = new ModifiableSolrParams( ); + + params.set( TermsParams.TERMS_LIMIT, -1); + params.set( TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_INDEX); + String[] fields = searchFields.toArray( new String[ searchFields.size( )] ); + params.set( TermsParams.TERMS_FIELD, fields ); + + params.set( CommonParams.DISTRIB, "false" ); + params.set( ShardParams.IS_SHARD, true ); + params.set( ShardParams.SHARDS_PURPOSE, sreq.purpose ); + params.set( CommonParams.QT, termsHandler ); + params.set( TermsParams.TERMS, "true" ); + + if (rb.requestInfo != null) { + params.set("NOW", Long.toString(rb.requestInfo.getNOW().getTime())); + } + sreq.params = params; + + for (String shard : rb.shards ) { + Log.debug( "sending request to shard " + shard ); + params.set(ShardParams.SHARD_URL, shard ); + shardHandler.submit( sreq, shard, params ); + } + + ShardResponse rsp = shardHandler.takeCompletedIncludingErrors( ); + if (rsp != null) { + Log.debug( "got " + rsp.getShardRequest().responses.size( ) + " responses" ); + for ( ShardResponse srsp : rsp.getShardRequest().responses ) { + Log.debug( "Got terms response from " + srsp.getShard( )); + + if (srsp.getException() != null) { + Log.debug( "ShardResponse Exception!! " + srsp.getException( ) ); + } + + @SuppressWarnings("unchecked") + NamedList> terms = (NamedList>) srsp.getSolrResponse().getResponse().get("terms"); + if (terms != null) { + addTerms( terms, fieldBuilder, termBuilder, searchFields ); + } + else { + Log.warn( "terms was NULL! - make sure that /terms request handler is defined in solrconfig.xml" ); + } + } + } + } + } + + private void addTerms( NamedList> terms, SynonymMap.Builder fieldBuilder, SynonymMap.Builder termBuilder, ArrayList searchFields ) throws IOException { + TermsResponse termsResponse = new TermsResponse( terms ); + for (String fieldName : searchFields ) { + CharsRef fieldChars = new CharsRef( fieldName ); + List termList = termsResponse.getTerms( fieldName ); + if (termList != null) { + for (TermsResponse.Term tc : termList) { + String term = tc.getTerm(); + Log.debug( "Add distributed term: " + fieldName + " = " + term ); + addTerm( fieldChars, term, fieldBuilder, termBuilder ); + } + } + } + } + + + private ArrayList getSynonymsFor( SynonymMap synMap, String term ) throws IOException { + Log.debug( "getSynonymsFor '" + term + "'" ); + + FST fst = synMap.fst; + FST.BytesReader fstReader = fst.getBytesReader(); + FST.Arc scratchArc = new FST.Arc<>( ); + BytesRef scratchBytes = new BytesRef(); + CharsRefBuilder scratchChars = new CharsRefBuilder(); + ByteArrayDataInput bytesReader = new ByteArrayDataInput(); + + BytesRef pendingOutput = fst.outputs.getNoOutput(); + fst.getFirstArc( scratchArc ); + BytesRef matchOutput = null; + + String[] tokens = term.split( " " ); + for (int i = 0; i < tokens.length; i++) { + + int charPos = 0; + while( charPos < tokens[i].length() ) { + final int codePoint = tokens[i].codePointAt( charPos ); + if (fst.findTargetArc( codePoint, scratchArc, scratchArc, fstReader) == null) { + Log.debug( "No Synonym for " + term ); + return null; + } + + pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output); + charPos += Character.charCount(codePoint); + } + + if (scratchArc.isFinal()) { + matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput); + } + + if (i < tokens.length-1 && fst.findTargetArc(SynonymMap.WORD_SEPARATOR, scratchArc, scratchArc, fstReader) != null) { + pendingOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput); + } + } + + if (matchOutput != null) { + ArrayList synonymLst = new ArrayList( ); + bytesReader.reset( matchOutput.bytes, matchOutput.offset, matchOutput.length ); + + final int code = bytesReader.readVInt(); + final int count = code >>> 1; + for( int outputIDX = 0; outputIDX < count; outputIDX++ ) { + synMap.words.get( bytesReader.readVInt(), scratchBytes); + scratchChars.copyUTF8Bytes(scratchBytes); + int lastStart = 0; + final int chEnd = lastStart + scratchChars.length(); + for( int chIDX = lastStart; chIDX <= chEnd; chIDX++ ) { + if (chIDX == chEnd || scratchChars.charAt(chIDX) == SynonymMap.WORD_SEPARATOR) { + int outputLen = chIDX - lastStart; + assert outputLen > 0: "output contains empty string: " + scratchChars; + String synonym = new String( scratchChars.chars(), lastStart, outputLen ); + Log.debug( "got synonym '" + synonym + "'" ); + synonymLst.add( synonym ); + lastStart = chIDX + 1; + } + } + } + + return synonymLst; + } + + return null; + } + + + // assume English for now ... + private String getStemmed( String input ) { + char[] inputChars = input.toCharArray( ); + + int lastCh = stem( inputChars, inputChars.length ); + if (lastCh < inputChars.length) { + return new String( inputChars, 0, lastCh ); + } + + return input; + } + + // similar to EnglishMinimalStemmer - fixes "...hes" as in batches couches + public int stem(char s[], int len) { + if (len < 3 || s[len-1] != 's') + return len; + + switch(s[len-2]) { + case 'u': + case 's': return len; + case 'e': + if (len > 3 && s[len-3] == 'i' && s[len-4] != 'a' && s[len-4] != 'e') { + s[len - 3] = 'y'; + return len - 2; + } + if (len > 3 && s[len-3] == 'h') { + return len-2; + } + if (s[len-3] == 'i' || s[len-3] == 'a' || s[len-3] == 'o' || s[len-3] == 'e') + return len; /* intentional fallthrough */ + default: return len - 1; + } + } + + private ArrayList tokenize( String input ) throws IOException { + + Log.debug( "tokenize '" + input + "'" ); + ArrayList tokens = new ArrayList( ); + Tokenizer tk = getTokenizerImpl( input ); + + CharTermAttribute term = tk.addAttribute( CharTermAttribute.class ); + tk.reset( ); + while (tk.incrementToken( ) ) { + int bufLen = term.length(); + char[] copy = new char[ bufLen ]; + System.arraycopy(term.buffer( ), 0, copy, 0, bufLen ); + tokens.add( copy ); + } + + return tokens; + } + + private Tokenizer getTokenizerImpl( String input ) throws IOException { + StandardTokenizer sttk = new StandardTokenizer( ); + sttk.setReader( new StringReader( input ) ); + return sttk; + } + + @Override + public void process(ResponseBuilder rb) throws IOException + { + // do nothing - needed so we don't execute the query here. + } + + // =========================================================================== + // Verb Modifier Code + // Using the verb modifier map if a verb modifier is adjacent to a field mapped phrase (can have noise words between) + // restrict the field names in the list to the one that is linked to the verb modifier + // TODO - how to deal with 'and' and 'or' Between modifiers + // =========================================================================== + private void filterFieldMap( ArrayList queryTokens, HashMap> fieldMap, + HashMap entityPositionMap, HashMap fieldPositionMap ) { + + Log.info( "filterFieldMap" ); + // need to find the modifiers that are in THIS set of tokens by position, in the order used ... + ArrayList usedModifiers = getOrderedModifierPositions( queryTokens ); + if (usedModifiers == null || usedModifiers.size() == 0) { + return; // nothing to do ... + } + + // find the verb modifiers in the query tokens list + // need to keep track of 'next entity' and 'last entity' as we iterate + boolean remapped = false; + for (ModifierInstance modInstance : usedModifiers) { + if (modInstance.templateRule != null) applyModifierTemplateRule( entityPositionMap, fieldMap, modInstance.templateRule ); + + HashMap fieldNameKeys = getFieldKeysForFieldName( modInstance.modifierFields, fieldMap ); + if (fieldNameKeys != null) { + // find the entity just before (maximum pos before) or after (minimum pos after) the modifier phrase from entityPositionMap + // assumming here that the modifiers can work bi-directionally + // as in 'songs Paul McCartney composed' or 'songs Paul McCartney has written' vs. 'songs composed by Paul McCartney' + // or 'Bands Paul McCartney was in' vs. 'who was in the Who' + for (String fieldNameKey : fieldNameKeys.keySet() ) { + String modifierField = fieldNameKeys.get( fieldNameKey ); + + HashSet entityPhrases = findLastEntitiesBefore( entityPositionMap, modInstance, usedModifiers, fieldMap.get( fieldNameKey ) ); + if ( entityPhrases != null ) { + remapEntity( fieldNameKey, entityPhrases, modifierField, fieldMap, fieldPositionMap, entityPositionMap ); + remapped = true; + } + else { + entityPhrases = findFirstEntitiesAfter( entityPositionMap, modInstance, usedModifiers, fieldMap.get( fieldNameKey ) ); + if (entityPhrases != null) { + remapEntity( fieldNameKey, entityPhrases, modifierField, fieldMap, fieldPositionMap, entityPositionMap ); + remapped = true; + } + } + } + } + + // add any filter fields for the verbs: + if (remapped && modInstance.filterFields != null) { + Log.info( "checking verb modifiers for " + modInstance.modifierFields ); + for (String filtField : modInstance.filterFields.keySet( ) ) { + ArrayList valList = new ArrayList( ); + valList.add( modInstance.filterFields.get( filtField ) ); + Log.info( "setting verb filter: " + filtField + ":" + modInstance.filterFields.get( filtField ) ); + fieldMap.put( filtField, valList ); + fieldPositionMap.put( filtField, modInstance.modifierPos ); + } + } + } + } + + private ArrayList getOrderedModifierPositions( ArrayList queryTokens ) { + ArrayList modifiers = null; + int i = 0; + while (i < queryTokens.size( ) ) { + char[] token = queryTokens.get( i ); + ModifierDefinition modifier = findModifier( token ); + if (modifier != null && matchesModifier( modifier.modTokens, queryTokens, i )) { + Log.info( "Adding Modifier Instance '" + modifier.modifierPhrase + "'" ); + ModifierInstance modInst = new ModifierInstance( ); + modInst.modifierPhrase = modifier.modifierPhrase; + modInst.modifierFields = modifier.modifierFields; + Log.info( "fields: " ); + for (String modField : modifier.modifierFields ) { Log.info( " " + modField ); } + modInst.modifierPos = new int[2]; + modInst.modifierPos[0] = i; + modInst.modifierPos[1] = i + modifier.modTokens.length - 1; + + modInst.filterFields = modifier.filterFields; + modInst.templateRule = modifier.templateRule; + if (modifiers == null) modifiers = new ArrayList( ); + modifiers.add( modInst ); + i += modifier.modTokens.length; + } + else { + ++i; + } + } + + return modifiers; + } + + private ModifierDefinition findModifier( char[] queryToken ) { + for (ModifierDefinition modifier : verbModifierList ) { + if (modifier.modifierPhrase.startsWith( new String( queryToken ) )) { + return modifier; + } + } + return null; + } + + private boolean matchesModifier( String[] modTokens, ArrayList queryTokens, int start ) { + int i = 0; + while ( (start + i) < queryTokens.size( ) && i < modTokens.length ) { + String token = new String( queryTokens.get( start + i ) ); + if (!token.toLowerCase( ).equals( modTokens[i].toLowerCase( ))) return false; + if (++i == modTokens.length) return true; + } + return false; + } + + + private HashMap getFieldKeysForFieldName( ArrayList modifierFields, HashMap> fieldMap ) { + Log.info( "getFieldKeysForFieldName" ); + HashMap fieldKeys = null; + for (String modifierField : modifierFields ) { + Log.info( "testing modifierField: " + modifierField ); + for (String fieldNameList : fieldMap.keySet() ) { + Log.info( "testing fieldNameList: " + fieldNameList ); + String[] fields = fieldNameList.split( fieldSplitExpr ); + for (int i = 0; i < fields.length; i++) { + if ( fields[i].equals( modifierField )) { + if (fieldKeys == null) fieldKeys = new HashMap( ); + Log.info( "adding field Key " + fieldNameList + ": " + modifierField ); + fieldKeys.put( fieldNameList, modifierField ); + } + } + } + } + return fieldKeys; + } + + + + // find entities before the current mod pos but after the last one (if modPos is not first in the list of modifier positions) + // we also need to keep track of the operator (???) + private HashSet findLastEntitiesBefore( HashMap entityPositionMap, ModifierInstance modifier, + ArrayList usedModifiers, ArrayList fieldVals ) { + Log.info( "findLastEntitiesBefore" ); + HashSet entitySet = null; + int previousModifierPosition = -1; + int thisModPos = modifier.modifierPos[0]; + + for ( ModifierInstance mod : usedModifiers ) { + if (mod.modifierPos[1] < thisModPos ) { + previousModifierPosition = mod.modifierPos[1]; + break; + } + } + + for (String entityPhrase : entityPositionMap.keySet( ) ) { + Log.info( " testing " + entityPhrase ); + if (fieldVals.contains( entityPhrase)) { + int[] entityPos = entityPositionMap.get( entityPhrase ); + Log.info( "entity is at " + entityPos[0] + "," + entityPos[1] ); + Log.info( "mod is at " + thisModPos + " previous mod was " + previousModifierPosition ); + if (entityPos[1] < thisModPos && entityPos[0] > previousModifierPosition ) { + if (entitySet == null) entitySet = new HashSet( ); + Log.info( "adding " + entityPhrase ); + entitySet.add( entityPhrase ); + } + } + } + + return entitySet; + } + + // find entities after the current mod pos but before the next modifier + private HashSet findFirstEntitiesAfter( HashMap entityPositionMap, ModifierInstance modifier, + ArrayList usedModifiers, ArrayList fieldVals ) { + Log.info( "findFirstEntitiesAfter" ); + HashSet entitySet = null; + int nextModifierPosition = Integer.MAX_VALUE; + int thisModPos = modifier.modifierPos[1]; + + for (ModifierInstance mod : usedModifiers ) { + if (mod.modifierPos[0] > thisModPos ) { + nextModifierPosition = mod.modifierPos[0]; + break; + } + } + + for (String entityPhrase : entityPositionMap.keySet( ) ) { + Log.info( " testing " + entityPhrase ); + if (fieldVals.contains( entityPhrase)) { + int[] entityPos = entityPositionMap.get( entityPhrase ); + Log.info( "entity is at " + entityPos[0] + "," + entityPos[1] ); + Log.info( "mod is at " + thisModPos + " next mod is " + nextModifierPosition ); + if (entityPos[0] > thisModPos && entityPos[1] < nextModifierPosition ) { + if (entitySet == null) entitySet = new HashSet( ); + Log.info( "adding " + entityPhrase ); + entitySet.add( entityPhrase ); + } + } + } + + return entitySet; + } + + + private void remapEntity( String fieldNameKey, HashSet entityValues, String modifierField, + HashMap> fieldMap, HashMap fieldPositionMap, HashMap entityPositionMap ) { + // find the fieldMap key that contains the fieldName + ArrayList fieldVals = fieldMap.get( fieldNameKey ); + + boolean allMatch = true; + for (String fieldVal : fieldVals ) { + if (!entityValues.contains( fieldVal )) { + allMatch = false; + break; + } + } + + // if the field values in the fieldMap match the set of entity values -- remove the fieldNameKey and replace it with the modifierField in the map + if ( allMatch ) { + if (fieldNameKey.equals( modifierField )) return; + + fieldMap.remove( fieldNameKey ); + Log.info( "remapping: " + modifierField ); + for( String val : fieldVals ) { Log.info( " " + val ); } + fieldMap.put( modifierField, fieldVals ); + } + else { + // for a partial map - remove the field values in the fieldMap that are in the entityValues set, and create a new entry with modifierField => entityValues + ArrayList remaining = new ArrayList( ); + ArrayList modList = new ArrayList( ); + for (String fieldVal : fieldVals ) { + if (entityValues.contains( fieldVal )) { + modList.add( fieldVal ); + } + else { + remaining.add( fieldVal ); + } + } + + fieldMap.put( modifierField, modList ); + fieldPositionMap.put( modifierField, getPosArrayFor( modList, entityPositionMap ) ); + + fieldMap.put( fieldNameKey, remaining ); + fieldPositionMap.put( fieldNameKey, getPosArrayFor( remaining, entityPositionMap ) ); + } + } + + private void applyModifierTemplateRule( HashMap entityPositionMap, HashMap> fieldMap, ModifierTemplateRule modifierRule ) { + Log.info( "applyModifierTemplateRule" ); + // find entity_1_field - from field map - find entityPosition from values + ArrayList firstEntityList = findEntityList( fieldMap, modifierRule.entity_1_field ); + if (firstEntityList == null) return; + String firstFieldList = null; + String entityValue = null; + + for (String firstEntity : firstEntityList ) { + Log.info( "checking entity: " + firstEntity ); + int[] firstPos = entityPositionMap.get( firstEntity ); + int[] secondPos = entityPositionMap.get( modifierRule.entity_2_value ); + if (secondPos != null && (secondPos[0] == firstPos[1] + 1) && findEntityList( fieldMap, modifierRule.entity_2_field ) != null ) { + if (modifierRule.entity_1_value.equals( "_ENTITY_" )) { + Log.info( "'" + firstEntity + "' matches pattern" ); + entityValue = firstEntity; + ArrayList outputList = new ArrayList( ); + outputList.add( firstEntity ); + firstFieldList = findFieldList( fieldMap, modifierRule.entity_1_field ); + fieldMap.put( modifierRule.output_field, outputList ); + break; + } + } + } + + if ( firstFieldList != null ) { + // remove remapped entity field from field list + Log.info( "removing " + modifierRule.entity_1_field + " from " + firstFieldList ); + String[] fields = firstFieldList.split( "\\|" ); + StringBuilder stb = new StringBuilder( ); + for (int i = 0; i < fields.length; i++) { + if (fields[i].equals( modifierRule.entity_1_field) == false ) { + if (stb.length() > 0) stb.append( "," ); + stb.append( fields[i] ); + } + } + + // remove entityValue from fieldMap arrayList + if (stb.length() > 0) { + Log.info( "new field list: " + stb.toString( ) ); + ArrayList remainder = new ArrayList( ); + for (String firstEntity : firstEntityList ) { + if (firstEntity.equals( entityValue ) == false ) { + Log.info( "adding remaining value " + firstEntity ); + remainder.add( firstEntity ); + } + } + if (remainder.size( ) > 0) { + Log.info( "remainder fields: " + stb.toString( ) ); + fieldMap.put( stb.toString( ), remainder ); + } + + Log.info( "removing field: " + firstFieldList ); + fieldMap.remove( firstFieldList ); + } + } + } + + private ArrayList findEntityList( HashMap> fieldMap, String entityField ) { + for (String fieldList : fieldMap.keySet() ) { + if (fieldList.contains( entityField )) { + return fieldMap.get( fieldList ); + } + } + return null; + } + + private String findFieldList( HashMap> fieldMap, String entityField ) { + for (String fieldList : fieldMap.keySet() ) { + if (fieldList.contains( entityField )) { + return fieldList; + } + } + return null; + } + + private int[] getPosArrayFor( ArrayList entities, HashMap entityPositionMap ) { + int[] newPosArray = null; + for ( String entity : entities ) { + int[] entityPos = entityPositionMap.get( entity ); + if (entityPos != null) { + if (newPosArray == null) newPosArray = entityPos; + else { + if (entityPos[1] < newPosArray[0] ) { + newPosArray[0] = entityPos[0]; + } + if (entityPos[0] > newPosArray[1] ) { + newPosArray[1] = entityPos[1]; + } + } + } + } + + return newPosArray; + } + + private class ModifierDefinition + { + String modifierPhrase; // the phrase that will modify like 'was in' + ArrayList modifierFields; // the field(s) that will be used like 'memberOfGroup_ss,groupMembers_ss' + String[] modTokens; + HashMap filterFields; + ModifierTemplateRule templateRule; + } + + private class ModifierInstance + { + String modifierPhrase; + ArrayList modifierFields; + int[] modifierPos; + HashMap filterFields; + ModifierTemplateRule templateRule; + } + + // original_performer_s:_ENTITY_,recording_type_ss:Song=>original_performer_s:_ENTITY_ + private class ModifierTemplateRule + { + String entity_1_field; + String entity_1_value; + + String entity_2_field; + String entity_2_value; + + String output_field; + String output_value; + + ModifierTemplateRule( String templatePattern ) { + String leftSide = new String(templatePattern.substring( 0, templatePattern.indexOf( "=>" ))); + String rightSide = new String(templatePattern.substring( templatePattern.indexOf( "=>" ) + 2 )); + + String entity_1 = new String( leftSide.substring( 0, leftSide.indexOf( "," ))); + String entity_2 = new String( leftSide.substring( leftSide.indexOf( "," ) + 1 )); + + entity_1_field = new String( entity_1.substring( 0, entity_1.indexOf( ":" ))); + entity_1_value = new String( entity_1.substring( entity_1.indexOf( ":" ) + 1 )); + entity_2_field = new String( entity_2.substring( 0, entity_2.indexOf( ":" ))); + entity_2_value = new String( entity_2.substring( entity_2.indexOf( ":" ) + 1 )); + + output_field = new String( rightSide.substring( 0, rightSide.indexOf( ":" ))); + output_value = new String( rightSide.substring( rightSide.indexOf( ":" ) + 1 )); + + Log.info( "entity_1_field: " + entity_1_field + " entity_1_value: " + entity_1_value ); + Log.info( "entity_2_field: " + entity_2_field + " entity_2_value: " + entity_2_value ); + Log.info( "output_field: " + output_field + " output_value: " + output_value ); + } + } + +} diff --git a/solr6.x/src/test/com/lucidworks/solr/handler/component/DistributedQueryAutoFilteringTest.java b/solr6.x/src/test/com/lucidworks/solr/handler/component/DistributedQueryAutoFilteringTest.java new file mode 100644 index 0000000..1cbba6a --- /dev/null +++ b/solr6.x/src/test/com/lucidworks/solr/handler/component/DistributedQueryAutoFilteringTest.java @@ -0,0 +1,75 @@ +package org.apache.solr.handler.component; + +import org.apache.solr.BaseDistributedSearchTestCase; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.junit.BeforeClass; +import org.junit.Test; + + +public class DistributedQueryAutoFilteringTest extends BaseDistributedSearchTestCase { + + public DistributedQueryAutoFilteringTest() { + stress = 0; + } + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + initCore( "solrconfig-autofilter.xml", "schema-autofilter.xml" ); + } + + @Test + @ShardsFixed(num = 3) + public void test() throws Exception { + del("*:*"); + + index( id, "1", "color", "red", "product", "shoes" ); + index( id, "2", "color", "red", "product", "socks" ); + index( id, "3", "color", "brown", "product", "socks" ); + index( id, "4", "color", "green", "brand", "red lion", "product", "socks" ); + index( id, "5", "color", "blue", "brand", "red lion", "product", "socks" ); + index( id, "6", "color", "blue", "brand", "red dragon", "product", "socks" ); + index( id, "7", "brand", "red baron", "product", "pizza" ); + index( id, "8", "brand", "red label", "product", "whiskey" ); + index( id, "9", "brand", "red light", "product", "smoke detector" ); + index( id, "10", "brand", "red star", "product", "yeast" ); + index( id, "11", "brand", "gallo", "product", "red wine" ); + index( id, "12", "brand", "heinz", "product", "red wine vinegar" ); + index( id, "13", "brand", "dole", "product", "red grapes" ); + index( id, "14", "brand", "acme", "product", "red brick" ); + commit(); + + handle.put("distrib", SKIP); + handle.put("shards", SKIP); + + QueryResponse rsp; + rsp = query( CommonParams.Q, "red lion socks", "fl", "id", "rows", 20, "qt", "/select", "sort", "id asc" ); + assertFieldValues(rsp.getResults(), id, "1", "10", "11", "12", "13", "14", "2", "3", "4", "5", "6", "7", "8", "9" ); + + rsp = query( CommonParams.Q, "red lion socks", "fl", "id", "qt", "/autofilter", "sort", "id asc" ); + assertFieldValues(rsp.getResults(), id, "4", "5" ); + + rsp = query( CommonParams.Q, "blue red lion socks", "fl", "id", "qt", "/autofilter" ); + assertFieldValues(rsp.getResults(), id, "5" ); + + rsp = query( CommonParams.Q, "red wine", "fl", "id", "qt", "/autofilter" ); + assertFieldValues(rsp.getResults(), id, "11" ); + + rsp = query( CommonParams.Q, "red wine vinegar", "fl", "id", "qt", "/autofilter" ); + assertFieldValues(rsp.getResults(), id, "12" ); + } + + @Override + protected QueryResponse query(Object... q) throws Exception { + + final ModifiableSolrParams params = new ModifiableSolrParams(); + + for (int i = 0; i < q.length; i += 2) { + params.add(q[i].toString(), q[i + 1].toString()); + } + params.set("shards", getShardsString()); + + return queryServer(params); + } +} diff --git a/solr6.x/src/test/com/lucidworks/solr/handler/component/QueryAutoFilteringComponentTest.java b/solr6.x/src/test/com/lucidworks/solr/handler/component/QueryAutoFilteringComponentTest.java new file mode 100644 index 0000000..e5d3db4 --- /dev/null +++ b/solr6.x/src/test/com/lucidworks/solr/handler/component/QueryAutoFilteringComponentTest.java @@ -0,0 +1,486 @@ +package org.apache.solr.handler.component; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.params.CommonParams; + +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +public class QueryAutoFilteringComponentTest extends SolrTestCaseJ4 { + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig-autofilter.xml","schema-autofilter.xml"); + } + + @Override + public void setUp() throws Exception { + super.setUp(); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + } + + @Test + public void testColors( ) { + clearIndex(); + assertU(commit()); + assertU(adoc("id", "1", "color", "red", "product", "shoes" )); + assertU(adoc("id", "2", "color", "Red", "product", "socks" )); + assertU(adoc("id", "3", "color", "brown", "product", "socks" )); + assertU(adoc("id", "4", "color", "green", "brand", "red lion", "product", "socks")); + assertU(adoc("id", "5", "color", "blue", "brand", "green dragon", "product", "socks" )); + assertU(adoc("id", "6", "color", "black", "brand", "buster brown", "product", "shoes" )); + assertU(commit()); + + assertQ("", req(CommonParams.Q, "red lion socks", CommonParams.QT, "/select" ) + , "//*[@numFound='5']" + , "//doc[./str[@name='id']='4']" + , "//doc[./str[@name='id']='2']" + , "//doc[./str[@name='id']='1']" + , "//doc[./str[@name='id']='3']" + , "//doc[./str[@name='id']='5']"); + + assertQ("", req(CommonParams.Q, "red socks", CommonParams.QT, "/select" ) + , "//*[@numFound='5']" + , "//doc[./str[@name='id']='2']" + , "//doc[./str[@name='id']='4']" + , "//doc[./str[@name='id']='1']" + , "//doc[./str[@name='id']='3']" + , "//doc[./str[@name='id']='5']"); + + assertQ("", req(CommonParams.Q, "red lion socks", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='4']" ); + + assertQ("", req(CommonParams.Q, "red socks", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='2']" ); + + assertQ("", req(CommonParams.Q, "brown shoes", CommonParams.QT, "/select" ) + , "//*[@numFound='3']" + , "//doc[./str[@name='id']='1']" + , "//doc[./str[@name='id']='3']" + , "//doc[./str[@name='id']='6']"); + + assertQ("", req(CommonParams.Q, "brown shoes", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='0']" ); + + } + + @Test + public void testSynonyms( ) { + clearIndex(); + assertU(commit()); + assertU(adoc("id", "1", "color", "red", "product", "chaise lounge" )); + assertU(adoc("id", "2", "color", "red", "product", "sofa" )); + assertU(adoc("id", "3", "color", "red", "product", "chair" )); + assertU(commit()); + + assertQ("", req(CommonParams.Q, "red couch", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='2']" ); + + assertQ("", req(CommonParams.Q, "rouge sofa", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='2']" ); + + assertQ("", req(CommonParams.Q, "red lounge chair", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='1']" ); + + assertQ("", req(CommonParams.Q, "rouge lounge chair", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='1']" ); + + assertQ("", req(CommonParams.Q, "crimson day bed", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='1']" ); + } + + @Test + public void testCaseInsensitive( ) { + clearIndex(); + assertU(commit()); + assertU(adoc("id", "1", "color", "red", "product", "shoes" )); + assertU(adoc("id", "2", "color", "red", "product", "socks" )); + assertU(adoc("id", "3", "color", "brown", "product", "socks" )); + assertU(adoc("id", "4", "color", "green", "brand", "Red Lion", "product", "socks")); + assertU(adoc("id", "5", "color", "blue", "brand", "Green Dragon", "product", "socks" )); + assertU(adoc("id", "6", "color", "black", "brand", "Buster Brown", "product", "shoes" )); + assertU(commit()); + + assertQ("", req(CommonParams.Q, "red lion socks", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='4']" ); + + assertQ("", req(CommonParams.Q, "Red Lion socks", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='4']" ); + } + + @Test + public void testSynonymsCaseInsensitive( ) { + clearIndex(); + assertU(commit()); + assertU(adoc("id", "1", "color", "red", "product", "Chaise Lounge" )); + assertU(adoc("id", "2", "color", "red", "product", "sofa" )); + assertU(adoc("id", "3", "color", "red", "product", "chair" )); + assertU(commit()); + + assertQ("", req(CommonParams.Q, "red lounge chair", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='1']" ); + + assertQ("", req(CommonParams.Q, "scarlet Lounge Chair", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='1']" ); + + assertQ("", req(CommonParams.Q, "Crimson Couch", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='2']" ); + + } + + + @Test + public void testStemming( ) { + clearIndex(); + assertU(commit()); + assertU(adoc("id", "1", "color", "red", "product", "shirt" )); + assertU(adoc("id", "2", "color", "red", "product", "socks" )); + assertU(adoc("id", "3", "color", "red", "product", "pants" )); + assertU(adoc("id", "4", "color", "red", "product", "sofa" )); + assertU(commit()); + + assertQ("", req(CommonParams.Q, "red shirts", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='1']" ); + + assertQ("", req(CommonParams.Q, "red shirt", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='1']" ); + + assertQ("", req(CommonParams.Q, "red couches", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='4']" ); + } + + @Test + public void testMinTokens( ) { + clearIndex(); + assertU(commit()); + assertU(adoc("id", "1", "color", "red", "product", "shoes" )); + assertU(adoc("id", "2", "color", "red", "product", "socks" )); + assertU(adoc("id", "3", "color", "green", "brand", "red lion", "product", "socks")); + assertU(adoc("id", "4", "brand", "red label", "product", "whiskey")); + assertU(commit()); + + assertQ("", req(CommonParams.Q, "red", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='2']" + , "//doc[./str[@name='id']='1']" + , "//doc[./str[@name='id']='2']" ); + + assertQ("", req(CommonParams.Q, "red", CommonParams.QT, "/autofilter", "mt", "2" ) + , "//*[@numFound='4']" ); + + assertQ("", req(CommonParams.Q, "red shoes", CommonParams.QT, "/autofilter", "mt", "2" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='1']" ); + } + + @Test + public void testBoostFilter( ) { + // use autofilter handler configured with boostFactor + clearIndex(); + assertU(commit()); + assertU(adoc( "id", "1", "color", "red", "product", "shoes" )); + assertU(adoc( "id", "2", "color", "red", "product", "socks" )); + assertU(adoc( "id", "3", "color", "brown", "product", "socks" )); + assertU(adoc( "id", "4", "color", "green", "brand", "red lion", "product", "socks" )); + assertU(adoc( "id", "5", "color", "blue", "brand", "red lion", "product", "socks" )); + assertU(adoc( "id", "6", "color", "blue", "brand", "red dragon", "product", "socks" )); + assertU(adoc( "id", "7", "brand", "red baron", "product", "pizza" )); + assertU(adoc( "id", "8", "brand", "red label", "product", "whiskey" )); + assertU(adoc( "id", "9", "brand", "red light", "product", "smoke detector" )); + assertU(adoc( "id", "10", "brand", "red star", "product", "yeast" )); + assertU(adoc( "id", "11", "brand", "gallo", "product", "red wine" )); + assertU(adoc( "id", "12", "brand", "heinz", "product", "red wine vinegar" )); + assertU(adoc( "id", "13", "brand", "dole", "product", "red grapes" )); + assertU(adoc( "id", "14", "brand", "acme", "product", "red brick" )); + assertU(commit()); + + assertQ("", req(CommonParams.Q, "blue red dragon socks", CommonParams.QT, "/autofilterBQ", "rows", "20" ) + , "//*[@numFound='14']" + , "//doc[./str[@name='id']='6']" + , "//doc[./str[@name='id']='5']" + , "//doc[./str[@name='id']='2']" + , "//doc[./str[@name='id']='4']" + , "//doc[./str[@name='id']='3']" + , "//doc[./str[@name='id']='1']" + , "//doc[./str[@name='id']='7']" + , "//doc[./str[@name='id']='8']" + , "//doc[./str[@name='id']='9']" + , "//doc[./str[@name='id']='10']" + , "//doc[./str[@name='id']='11']" + , "//doc[./str[@name='id']='12']" + , "//doc[./str[@name='id']='13']" + , "//doc[./str[@name='id']='14']" ); + } + + @Test + public void testExcludeFields( ) { + // use autofilter handler configured with excludeFields + clearIndex(); + assertU(commit()); + assertU(adoc("id", "1", "color", "red", "product", "shoes" )); + assertU(adoc("id", "2", "color", "red", "product", "socks" )); + assertU(adoc("id", "3", "color", "green", "brand", "red lion", "product", "socks")); + assertU(adoc("id", "4", "brand", "red label", "product", "whiskey")); + assertU(commit()); + + assertQ("", req(CommonParams.Q, "1", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='1']" ); + + // removes 'id' as an autofilter field + assertQ("", req(CommonParams.Q, "1", CommonParams.QT, "/autofilterEX" ) + , "//*[@numFound='0']" ); + + } + + @Test + public void testStopWords( ) { + clearIndex(); + assertU(commit()); + assertU(adoc("id", "1", "color", "red", "product", "shoes" )); + assertU(adoc("id", "2", "color", "red", "product", "socks" )); + assertU(adoc("id", "3", "color", "green", "brand", "red lion", "product", "socks")); + assertU(adoc("id", "4", "color", "red", "brand", "calvin klein", "product", "underwear")); + assertU(adoc("id", "5", "color", "red", "brand", "fruit of the loom", "product", "underwear")); + assertU(commit()); + + assertQ("", req(CommonParams.Q, "red calvin klein underwear", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='4']" ); + + // stop words should be removed: 'by' is not part of a brand name phrase + assertQ("", req(CommonParams.Q, "red underwear by calvin klein", CommonParams.QT, "/autofilterSW" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='4']"); + + // stop words should not be removed from within a matching phrase + assertQ("", req(CommonParams.Q, "red fruit of the loom underwear", CommonParams.QT, "/autofilterSW" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='5']"); + } + + @Test + public void testRandomOrder( ) { + clearIndex(); + assertU(commit()); + assertU(adoc("id", "1", "color", "red", "product", "shoes" )); + assertU(adoc("id", "2", "color", "red", "product", "socks" )); + assertU(adoc("id", "3", "color", "green", "brand", "red lion", "product", "socks")); + assertU(adoc("id", "4", "brand", "red label", "product", "whiskey")); + assertU(commit()); + + assertQ("", req(CommonParams.Q, "red lion socks", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='3']" ); + + assertQ("", req(CommonParams.Q, "socks red lion", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='3']" ); + } + + @Test + public void testBadQueries( ) { + clearIndex(); + assertU(commit()); + assertU(adoc("id", "1", "color", "red", "product", "shoes" )); + assertU(adoc("id", "2", "color", "red", "product", "socks" )); + assertU(adoc("id", "3", "color", "green", "brand", "red lion", "product", "socks")); + assertU(adoc("id", "4", "brand", "red label", "product", "whiskey")); + assertU(adoc("id", "5", "color", "blue", "brand", "green dragon", "product", "socks")); + assertU(commit()); + + // green red tiger socks -> tiger (color:(green OR red) AND product:socks) + assertQ("", req(CommonParams.Q, "green red tiger socks", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='2']" + , "//doc[./str[@name='id']='2']" + , "//doc[./str[@name='id']='3']"); + + // green red lion socks blahblah -> blahblah (color:green AND brand:"red lion" AND product:socks) + assertQ("", req(CommonParams.Q, "green red lion socks blahblah", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='3']" ); + } + + @Test + public void testMultipleFieldValues( ) { + clearIndex(); + assertU(commit()); + assertU(adoc("id", "1", "color", "red", "product", "shoes" )); + assertU(adoc("id", "2", "color", "red", "product", "socks" )); + assertU(adoc("id", "3", "color", "green", "brand", "red lion", "product", "socks")); + assertU(adoc("id", "4", "brand", "red label", "product", "whiskey")); + assertU(adoc("id", "5", "color", "blue", "brand", "green dragon", "product", "socks")); + assertU(commit()); + + // should create filter query: color:(red OR green) product:socks + assertQ("", req(CommonParams.Q, "red green socks", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='2']" + , "//doc[./str[@name='id']='2']" + , "//doc[./str[@name='id']='3']"); + } + + @Test + public void testMultipleFieldNames( ) { + clearIndex(); + assertU(commit()); + //assertU(adoc("id", "1", "first_name", "Tucker", "last_name", "Thomas", "full_name", "Tucker Thomas")); + //assertU(adoc("id", "2", "first_name", "Thomas", "last_name", "Tucker", "full_name", "Thomas Tucker")); + assertU(adoc("id", "1", "full_name", "Tucker Thomas", "text", "Tucker Thomas")); + assertU(adoc("id", "2", "full_name", "Thomas Tucker", "text", "Thomas Tucker")); + assertU(commit()); + + // should create filter query (first_name:thomas OR last_name:thomas) + assertQ("", req(CommonParams.Q, "Thomas", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='2']" ); + + // uses longer contiguous phrase for full_name - creates fq=full_name:"thomas tucker" + // this breaks now because of "fix" for testAmbiguousFields + assertQ("", req(CommonParams.Q, "Thomas Tucker", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" + , "//doc[./str[@name='id']='2']"); + } + + @Test + public void testMultiValuedField( ) { + clearIndex(); + assertU(commit()); + assertU( multiValueDocs ); + assertU(commit()); + + assertQ("", req(CommonParams.Q, "fast stylish", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" ); + + assertQ("", req(CommonParams.Q, "fast and stylish", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" ); + + assertQ("", req(CommonParams.Q, "fast or stylish", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='3']" ); + } + + @Test + public void testAmbiguousFields( ) { + clearIndex(); + assertU(commit()); + assertU( whiteAmbiguousDocs ); + assertU(commit()); + + // should create (brand_s:"white linen" OR (color:white AND material_s:linen)) + assertQ("", req(CommonParams.Q, "white linen", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='3']" ); + + assertQ("", req(CommonParams.Q, "white linen perfume", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" ); + + assertQ("", req(CommonParams.Q, "white linen shirt", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='2']" ); + + assertQ("", req(CommonParams.Q, "mens white linen shirt", CommonParams.QT, "/autofilter" ) + , "//*[@numFound='1']" ); + + } + + + @Test + public void testVerbMappings( ) { + clearIndex(); + assertU(commit()); + assertU( musicDocs ); + assertU(commit()); + + assertQ("", req(CommonParams.Q, "Bob Dylan Songs", CommonParams.QT, "/autofilterVRB" ) + , "//*[@numFound='3']" ); + + assertQ("", req(CommonParams.Q, "Songs Bob Dylan wrote", CommonParams.QT, "/autofilterVRB" ) + , "//*[@numFound='2']" ); + + assertQ("", req(CommonParams.Q, "Songs Bob Dylan performed", CommonParams.QT, "/autofilterVRB" ) + , "//*[@numFound='2']" ); + + assertQ("", req(CommonParams.Q, "Songs Bob Dylan covered", CommonParams.QT, "/autofilterVRB" ) + , "//*[@numFound='1']" ); + + } + + @Test + public void testNounPhraseMappings( ) { + clearIndex(); + assertU(commit()); + assertU( beatlesDocs ); + assertU(commit()); + + assertQ("", req(CommonParams.Q, "Beatles Songs", CommonParams.QT, "/autofilterVRB" ) + , "//*[@numFound='3']" ); + + assertQ("", req(CommonParams.Q, "Beatles Songs covered", CommonParams.QT, "/autofilterVRB" ) + , "//*[@numFound='2']" ); + + assertQ("", req(CommonParams.Q, "Beatles Songs covered by Joan Baez", CommonParams.QT, "/autofilterVRB" ) + , "//*[@numFound='1']" ); + + assertQ("", req(CommonParams.Q, "Songs Beatles covered", CommonParams.QT, "/autofilterVRB" ) + , "//*[@numFound='1']" ); + } + + + private static String multiValueDocs = "1fast" + + "stylish" + + "2fast" + + "powerful" + + "3stylish"; + + private static String whiteAmbiguousDocs = "1perfume" + + "fragrencesWhite Linen" + + "womens" + + "2dress shirt" + + "shirtWhite" + + "Linenwomens" + + "3dress shirt" + + "shirtWhite" + + "Linenmens"; + + private static String musicDocs = "1All Along the Watchtower" + + "Bob DylanJimi Hendrix" + + "SongCover" + + "2The Mighty Quinn" + + "Bob DylanBob Dylan" + + "SongOriginal" + + "3This Land is Your Land" + + "Woody GuthrieBob Dylan" + + "SongCover"; + + private static String beatlesDocs = "1Let It Be" + + "Beatles" + + "Joan Baez" + + "Cover" + + "Song" + + "2Something" + + "Beatles" + + "Frank Sinatra" + + "Cover" + + "Song" + + "3Honey Don't" + + "Carl Perkins" + + "Beatles" + + "Cover" + + "Song"; + +} \ No newline at end of file diff --git a/solr6.x/src/test/resources/solr/collection1/conf/currency.xml b/solr6.x/src/test/resources/solr/collection1/conf/currency.xml new file mode 100644 index 0000000..6a12b32 --- /dev/null +++ b/solr6.x/src/test/resources/solr/collection1/conf/currency.xml @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + diff --git a/solr6.x/src/test/resources/solr/collection1/conf/schema-autofilter.xml b/solr6.x/src/test/resources/solr/collection1/conf/schema-autofilter.xml new file mode 100644 index 0000000..7065dd4 --- /dev/null +++ b/solr6.x/src/test/resources/solr/collection1/conf/schema-autofilter.xml @@ -0,0 +1,273 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr6.x/src/test/resources/solr/collection1/conf/solrconfig-autofilter.xml b/solr6.x/src/test/resources/solr/collection1/conf/solrconfig-autofilter.xml new file mode 100644 index 0000000..785a177 --- /dev/null +++ b/solr6.x/src/test/resources/solr/collection1/conf/solrconfig-autofilter.xml @@ -0,0 +1,242 @@ + + + + + + ${tests.luceneMatchVersion:LATEST} + + + + ${solr.data.dir:} + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + true + + 10 + + + + + + + + + + + + + + explicit + 10 + text + + + + + + + explicit + text + + + autofilter + + + + + synonyms-autofilter.txt + + + + + explicit + text + + + autofilterSW + + + + + stopwords.txt + + + + + + explicit + text + + + autofilterBQ + + + + + 100 + + + + + + explicit + text + 2 + + + autofilter + + + + + + + explicit + text + + + autofilterEX + + + + + + id + + + + + + explicit + text + + + autofilterVRB + + + + + + written,wrote,composed:composer_s + performed,played,sang,recorded:performer_s + covered,covers:performer_s|version_s:Cover|original_performer_s:_ENTITY_,recording_type_s:Song=>original_performer_s:_ENTITY_ + + + + + + + + + termsComp + + + + + + + + max-age=30, public + + + + + solr + solrconfig.xml schema.xml admin-extra.html + + + + prefix-${solr.test.sys.prop2}-suffix + + + diff --git a/solr6.x/src/test/resources/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml b/solr6.x/src/test/resources/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml new file mode 100644 index 0000000..13f8214 --- /dev/null +++ b/solr6.x/src/test/resources/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml @@ -0,0 +1,47 @@ + + + + + + + + + ${useCompoundFile:false} + + ${solr.tests.maxBufferedDocs} + ${solr.tests.maxIndexingThreads:8} + ${solr.tests.ramBufferSizeMB} + + + + 1000 + 10000 + + + ${solr.tests.lockType:single} + diff --git a/solr6.x/src/test/resources/solr/collection1/conf/stopwords.txt b/solr6.x/src/test/resources/solr/collection1/conf/stopwords.txt new file mode 100644 index 0000000..b5824da --- /dev/null +++ b/solr6.x/src/test/resources/solr/collection1/conf/stopwords.txt @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +#Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +s +such +t +that +the +their +then +there +these +they +this +to +was +will +with + diff --git a/solr6.x/src/test/resources/solr/collection1/conf/synonyms-autofilter.txt b/solr6.x/src/test/resources/solr/collection1/conf/synonyms-autofilter.txt new file mode 100644 index 0000000..367e7a6 --- /dev/null +++ b/solr6.x/src/test/resources/solr/collection1/conf/synonyms-autofilter.txt @@ -0,0 +1,17 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +chaise lounge,lounge chair,daybed,day bed +red,rouge,crimson,scarlet +couch,sofa + diff --git a/solr6.x/src/test/resources/solr/collection1/conf/synonyms.txt b/solr6.x/src/test/resources/solr/collection1/conf/synonyms.txt new file mode 100644 index 0000000..b0e31cb --- /dev/null +++ b/solr6.x/src/test/resources/solr/collection1/conf/synonyms.txt @@ -0,0 +1,31 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaa => aaaa +bbb => bbbb1 bbbb2 +ccc => cccc1,cccc2 +a\=>a => b\=>b +a\,a => b\,b +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + From 085032d6404a79da96d1c998e7488047a2c10838 Mon Sep 17 00:00:00 2001 From: Gaston Gonzalez Date: Thu, 24 Aug 2017 13:40:13 -0700 Subject: [PATCH 08/13] Initial updates to build the plugin for Solr 6.1.0. --- solr6.x/build.xml | 4 +- .../QueryAutoFilteringComponent.java | 3 +- .../solr/collection1/conf/managed-schema | 126 ++++++++++++++++++ ...tofilter.xml => schema-autofilter.xml.bak} | 4 +- 4 files changed, 132 insertions(+), 5 deletions(-) create mode 100644 solr6.x/src/test/resources/solr/collection1/conf/managed-schema rename solr6.x/src/test/resources/solr/collection1/conf/{schema-autofilter.xml => schema-autofilter.xml.bak} (98%) diff --git a/solr6.x/build.xml b/solr6.x/build.xml index 30b91f9..c17f314 100644 --- a/solr6.x/build.xml +++ b/solr6.x/build.xml @@ -20,8 +20,8 @@ - - + + diff --git a/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java b/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java index 9fbc43a..26de589 100644 --- a/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java +++ b/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java @@ -748,8 +748,9 @@ private ArrayList getStringFields( SolrIndexSearcher searcher ) { } } else { IndexSchema schema = searcher.getSchema(); - Collection fieldNames = searcher.getFieldNames(); + Iterable fieldNames = searcher.getFieldNames(); Iterator fnIt = fieldNames.iterator(); + while ( fnIt.hasNext() ) { String fieldName = fnIt.next( ); if (excludeFields == null || !excludeFields.contains( fieldName )) { diff --git a/solr6.x/src/test/resources/solr/collection1/conf/managed-schema b/solr6.x/src/test/resources/solr/collection1/conf/managed-schema new file mode 100644 index 0000000..f16a44b --- /dev/null +++ b/solr6.x/src/test/resources/solr/collection1/conf/managed-schema @@ -0,0 +1,126 @@ + + + + id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/solr6.x/src/test/resources/solr/collection1/conf/schema-autofilter.xml b/solr6.x/src/test/resources/solr/collection1/conf/schema-autofilter.xml.bak similarity index 98% rename from solr6.x/src/test/resources/solr/collection1/conf/schema-autofilter.xml rename to solr6.x/src/test/resources/solr/collection1/conf/schema-autofilter.xml.bak index 7065dd4..51269dc 100644 --- a/solr6.x/src/test/resources/solr/collection1/conf/schema-autofilter.xml +++ b/solr6.x/src/test/resources/solr/collection1/conf/schema-autofilter.xml.bak @@ -258,13 +258,13 @@ http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4 --> + geo="true" distErrPct="0.025" maxDistErr="0.000009" distanceUnits="degrees" /> + geo="true" distanceUnits="degrees" numberType="_bbox_coord" /> From e1491c12c25ce63bbfd3f9a04d68fc214f2e4ccc Mon Sep 17 00:00:00 2001 From: Gaston Gonzalez Date: Fri, 1 Sep 2017 13:15:32 -0700 Subject: [PATCH 09/13] Clean-up imports. --- .../QueryAutoFilteringComponent.java | 71 ++++++------------- 1 file changed, 22 insertions(+), 49 deletions(-) diff --git a/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java b/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java index 26de589..5cf6582 100644 --- a/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java +++ b/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java @@ -1,71 +1,44 @@ package org.apache.solr.handler.component; -import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.common.params.CommonParams; -import org.apache.solr.common.params.TermsParams; -import org.apache.solr.common.params.ShardParams; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.KeywordTokenizer; +import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.synonym.SolrSynonymParser; +import org.apache.lucene.analysis.synonym.SynonymMap; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.uninverting.UninvertingReader; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.CharsRefBuilder; +import org.apache.lucene.util.fst.FST; +import org.apache.solr.client.solrj.response.TermsResponse; +import org.apache.solr.common.params.*; import org.apache.solr.common.util.NamedList; -import org.apache.solr.request.SolrQueryRequest; - -import org.apache.solr.util.plugin.SolrCoreAware; import org.apache.solr.core.CoreContainer; import org.apache.solr.core.SolrCore; -import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.core.SolrEventListener; - +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.StrField; import org.apache.solr.search.SolrIndexSearcher; - -import org.apache.solr.client.solrj.response.TermsResponse; - -import org.apache.lucene.store.ByteArrayDataInput; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.CharsRef; -import org.apache.lucene.util.CharsRefBuilder; -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.uninverting.UninvertingReader; -import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.index.Term; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.core.KeywordTokenizer; -import org.apache.lucene.analysis.core.LowerCaseFilter; -import org.apache.lucene.analysis.synonym.SynonymMap; -import org.apache.lucene.analysis.synonym.SynonymMap.Builder; -import org.apache.lucene.analysis.synonym.SolrSynonymParser; -import org.apache.lucene.analysis.util.TokenFilterFactory; -import org.apache.lucene.analysis.util.TokenizerFactory; -import org.apache.lucene.analysis.standard.StandardTokenizer; - -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.util.fst.FST; - -import java.util.ArrayList; -import java.util.List; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Collection; -import java.util.Iterator; -import java.util.StringTokenizer; - +import org.apache.solr.util.plugin.SolrCoreAware; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; -import java.io.BufferedReader; import java.io.StringReader; -import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; import java.nio.charset.StandardCharsets; +import java.util.*; /** * Creates filter or boost queries from freetext queries based on pattern matches with terms in stored String fields. Uses From 76bb31104e917a3c121e26435204b01b883b5330 Mon Sep 17 00:00:00 2001 From: Anddreas Schaefer Date: Mon, 20 Nov 2017 16:15:20 -0800 Subject: [PATCH 10/13] WG-1184 #comment Overwrote distributedProcess() to prevent Solr fom creating another set of Requests for this component. There is no need to create an additional set of requests for the Query Auto Filtering Component as it slows down performance and duplicates the facet counts. --- .../QueryAutoFilteringComponent.java | 67 ++++++++++++------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java b/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java index 5cf6582..bb1eca1 100644 --- a/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java +++ b/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java @@ -89,7 +89,6 @@ public class QueryAutoFilteringComponent extends QueryComponent implements SolrC @Override public void init( NamedList initArgs ) { - List whitelistFields = (List) initArgs.get("whitelistFields"); if (whitelistFields != null) { this.whitelistFields = new HashSet( ); @@ -105,7 +104,7 @@ public void init( NamedList initArgs ) { this.excludeFields.add( field ); } } - + List verbModifiers = (List)initArgs.get( "verbModifiers" ); if (verbModifiers != null) { this.verbModifierList = new ArrayList( ); @@ -124,7 +123,7 @@ public void init( NamedList initArgs ) { } } } - + Integer boostFactor = (Integer)initArgs.get( "boostFactor" ); if (boostFactor != null) { this.boostFactor = boostFactor; @@ -199,7 +198,7 @@ private void addModifier( String modifierPhrase, String modifierFields ) { public void inform( SolrCore core ) { if (initParams != null) { SolrResourceLoader resourceLoader = core.getResourceLoader( ); - + synonymsFile = (String)initParams.get( "synonyms" ); if (synonymsFile != null) { Analyzer analyzer = new Analyzer() { @@ -209,12 +208,12 @@ protected TokenStreamComponents createComponents(String fieldName) { return new TokenStreamComponents(tokenizer, tokenizer ); } }; - + try { SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer); CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); - + parser.parse(new InputStreamReader( resourceLoader.openResource(synonymsFile), decoder)); this.synonyms = parser.build( ); } @@ -223,7 +222,7 @@ protected TokenStreamComponents createComponents(String fieldName) { Log.warn( "Parsing Synonyms Got Exception " + e ); } } - + String stopwordsFile = (String)initParams.get( "stopwords" ); if (stopwordsFile != null) { this.stopwords = new HashSet( ); @@ -240,7 +239,7 @@ protected TokenStreamComponents createComponents(String fieldName) { } } } - + core.registerFirstSearcherListener( this ); core.registerNewSearcherListener( this ); } @@ -262,7 +261,7 @@ public void prepare( ResponseBuilder rb ) throws IOException { SolrQueryRequest req = rb.req; SolrParams params = req.getParams( ); - + // Only build the field map and do the processing if we are the main event String isShard = params.get( "isShard" ); if (isShard != null && isShard.equals( "true" )) { @@ -308,7 +307,23 @@ public void prepare( ResponseBuilder rb ) throws IOException } } } - + + /** + * If this method is not overridden then this will cause a request against + * the Shards causing performance degredation and duplicate values in the + * facet counts. + * Here we just return that this is done leaving it up to the Query to drive + * the requests. + * + * @param rb Ignored + * @return ResponseBuilder.STAGE_DONE + * @throws IOException never thrown + */ + @Override + public int distributedProcess(ResponseBuilder rb) throws IOException { + return ResponseBuilder.STAGE_DONE; + } + private boolean findPattern( ArrayList queryTokens, ResponseBuilder rb, ModifiableSolrParams modParams ) throws IOException { Log.debug( "findPattern " ); @@ -316,14 +331,14 @@ private boolean findPattern( ArrayList queryTokens, ResponseBuilder rb, HashMap> fieldMap = new HashMap>( ); HashMap fieldPositionMap = new HashMap( ); HashMap entityPositionMap = (verbModifierList != null) ? new HashMap() : null; - + String longestPhraseField = null; int startToken = 0; int lastEndToken = 0; while ( startToken < queryTokens.size() ) { Log.debug( "startToken = " + startToken ); int endToken = startToken; - + while ( endToken < queryTokens.size( ) ) { // FieldName can be comma separated if there are more than one field name for a set of tokens String fieldName = getFieldNameFor( queryTokens, startToken, endToken ); @@ -336,7 +351,7 @@ else if ( longestPhraseField != null ) { } ++endToken; } - + if (longestPhraseField != null) { // create matching phrase from startToken -> endToken String phrase = getPhrase( queryTokens, startToken, lastEndToken ); @@ -355,7 +370,7 @@ else if ( longestPhraseField != null ) { valList = new ArrayList( ); fieldMap.put( longestPhraseField, valList ); } - + Log.info( "indexedTerm: " + indexedTerm ); int[] entityPosition = null; if (entityPositionMap != null) { @@ -363,7 +378,7 @@ else if ( longestPhraseField != null ) { entityPosition[0] = startToken; entityPosition[1] = endToken-1; } - + Log.debug( "indexedTerm: " + indexedTerm ); if (indexedTerm.indexOf( fieldDelim ) > 0) { @@ -377,7 +392,7 @@ else if ( longestPhraseField != null ) { valList.add( indexedTerm ); if (entityPositionMap != null) entityPositionMap.put( indexedTerm, entityPosition ); } - + // save startToken and lastEndToken so can use for boolean operator context // for multi-value fields -save the min and max of all tokens positions for the field int[] posArray = fieldPositionMap.get( longestPhraseField ); @@ -392,7 +407,7 @@ else if ( longestPhraseField != null ) { { posArray[1] = lastEndToken; } - + longestPhraseField = null; for (int i = startToken; i <= lastEndToken; i++) { Log.debug( "adding used token at " + i ); @@ -405,14 +420,14 @@ else if ( longestPhraseField != null ) { ++startToken; } } - + if (usedTokens.size( ) > 0) { - + // filter field maps based on verbs here: if (entityPositionMap != null) { filterFieldMap( queryTokens, fieldMap, entityPositionMap, fieldPositionMap ); } - + String useBoost = modParams.get( BOOST_PARAM ); Integer boostFactor = (useBoost != null) ? new Integer( useBoost ) : this.boostFactor; if (boostFactor == null) { @@ -426,7 +441,7 @@ else if ( longestPhraseField != null ) { } } } - + Log.debug( "got qbuilder string = '" + qbuilder.toString() + "'" ); if (qbuilder.length() == 0 && fieldMap.size() > 0) { // build a filter query - @@ -464,7 +479,7 @@ else if (qbuilder.length() > 0 && fieldMap.size() > 0) { } return true; } - + return false; } @@ -475,7 +490,7 @@ private String getPhrase( ArrayList tokens, int startToken, int endToken private String getPhrase( ArrayList tokens, int startToken, int endToken, String tokenSep ) { StringBuilder strb = new StringBuilder( ); for (int i = startToken; i <= endToken; i++) { - if (i > startToken) strb.append( tokenSep ); + if (i > startToken) { strb.append( tokenSep ); } strb.append( tokens.get( i ) ); } @@ -557,7 +572,7 @@ private String getFieldNameFor( ArrayList queryTokens, int startToken, i } private String getSingleTermQuery( String multiTermValue ) { - + String multiTerm = multiTermValue; if (multiTermValue.startsWith( "\"" )) { multiTerm = new String( multiTermValue.substring( 1, multiTermValue.lastIndexOf( "\"" ))); @@ -958,7 +973,7 @@ private String getStemmed( String input ) { public int stem(char s[], int len) { if (len < 3 || s[len-1] != 's') return len; - + switch(s[len-2]) { case 'u': case 's': return len; @@ -1014,7 +1029,7 @@ public void process(ResponseBuilder rb) throws IOException // =========================================================================== private void filterFieldMap( ArrayList queryTokens, HashMap> fieldMap, HashMap entityPositionMap, HashMap fieldPositionMap ) { - + Log.info( "filterFieldMap" ); // need to find the modifiers that are in THIS set of tokens by position, in the order used ... ArrayList usedModifiers = getOrderedModifierPositions( queryTokens ); From e1b38551bf9faedb9c57848290ce93075575421d Mon Sep 17 00:00:00 2001 From: Anddreas Schaefer Date: Tue, 21 Nov 2017 12:28:36 -0800 Subject: [PATCH 11/13] WG-1335 #comment If Synonym Map was built without Finite State Machine (FST) input then this FST is null. In that case the Mapped Field Name ignores it and returns null instead --- .../QueryAutoFilteringComponent.java | 107 +++++++++--------- 1 file changed, 55 insertions(+), 52 deletions(-) diff --git a/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java b/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java index bb1eca1..fede188 100644 --- a/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java +++ b/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java @@ -622,65 +622,68 @@ private String getFieldNameFor( String phrase ) throws IOException { private String getMappedFieldName( SynonymMap termMap, String phrase ) throws IOException { Log.debug( "getMappedFieldName: '" + phrase + "'" ); FST fst = termMap.fst; - FST.BytesReader fstReader = fst.getBytesReader(); - FST.Arc scratchArc = new FST.Arc<>( ); - BytesRef scratchBytes = new BytesRef(); - CharsRefBuilder scratchChars = new CharsRefBuilder(); - ByteArrayDataInput bytesReader = new ByteArrayDataInput(); - - BytesRef pendingOutput = fst.outputs.getNoOutput(); - fst.getFirstArc( scratchArc ); - BytesRef matchOutput = null; - - String noSpPhrase = phrase.replace( ' ', '_' ); - int charPos = 0; - while(charPos < noSpPhrase.length()) { - final int codePoint = noSpPhrase.codePointAt( charPos ); - if (fst.findTargetArc( codePoint, scratchArc, scratchArc, fstReader) == null) { - Log.debug( "No FieldName for " + phrase ); - return null; + if(fst != null) { + FST.BytesReader fstReader = fst.getBytesReader(); + FST.Arc scratchArc = new FST.Arc<>(); + BytesRef scratchBytes = new BytesRef(); + CharsRefBuilder scratchChars = new CharsRefBuilder(); + ByteArrayDataInput bytesReader = new ByteArrayDataInput(); + + BytesRef pendingOutput = fst.outputs.getNoOutput(); + fst.getFirstArc(scratchArc); + BytesRef matchOutput = null; + + String noSpPhrase = phrase.replace(' ', '_'); + int charPos = 0; + while (charPos < noSpPhrase.length()) { + final int codePoint = noSpPhrase.codePointAt(charPos); + if (fst.findTargetArc(codePoint, scratchArc, scratchArc, fstReader) == null) { + Log.debug("No FieldName for " + phrase); + return null; + } + + pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output); + charPos += Character.charCount(codePoint); } - - pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output); - charPos += Character.charCount(codePoint); - } - if (scratchArc.isFinal()) { - Log.debug( "creating matchOutput" ); - matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput); - ArrayList mappedFields = new ArrayList( ); - bytesReader.reset( matchOutput.bytes, matchOutput.offset, matchOutput.length ); - - final int code = bytesReader.readVInt(); - final int count = code >>> 1; - for( int outputIDX = 0; outputIDX < count; outputIDX++ ) { - termMap.words.get( bytesReader.readVInt(), scratchBytes ); - scratchChars.copyUTF8Bytes(scratchBytes); - int lastStart = 0; - final int chEnd = lastStart + scratchChars.length(); - for( int chIDX = lastStart; chIDX <= chEnd; chIDX++ ) { - if (chIDX == chEnd || scratchChars.charAt(chIDX) == SynonymMap.WORD_SEPARATOR) { - int outputLen = chIDX - lastStart; - assert outputLen > 0: "output contains empty string: " + scratchChars; - mappedFields.add( new String( scratchChars.chars(), lastStart, outputLen ) ); - lastStart = chIDX + 1; + if (scratchArc.isFinal()) { + Log.debug("creating matchOutput"); + matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput); + ArrayList mappedFields = new ArrayList(); + bytesReader.reset(matchOutput.bytes, matchOutput.offset, matchOutput.length); + + final int code = bytesReader.readVInt(); + final int count = code >>> 1; + for (int outputIDX = 0; outputIDX < count; outputIDX++) { + termMap.words.get(bytesReader.readVInt(), scratchBytes); + scratchChars.copyUTF8Bytes(scratchBytes); + int lastStart = 0; + final int chEnd = lastStart + scratchChars.length(); + for (int chIDX = lastStart; chIDX <= chEnd; chIDX++) { + if (chIDX == chEnd || scratchChars.charAt(chIDX) == SynonymMap.WORD_SEPARATOR) { + int outputLen = chIDX - lastStart; + assert outputLen > 0 : "output contains empty string: " + scratchChars; + mappedFields.add(new String(scratchChars.chars(), lastStart, outputLen)); + lastStart = chIDX + 1; + } } } - } - if (mappedFields.size() == 1) { - Log.debug( "returning mapped fieldName " + mappedFields.get( 0 ) ); - return mappedFields.get( 0 ); - } - else { - StringBuilder fieldBuilder = new StringBuilder( ); - for (String fieldName : mappedFields ) { - if (fieldBuilder.length() > 0) fieldBuilder.append( fieldDelim ); - fieldBuilder.append( fieldName ); + if (mappedFields.size() == 1) { + Log.debug("returning mapped fieldName " + mappedFields.get(0)); + return mappedFields.get(0); + } else { + StringBuilder fieldBuilder = new StringBuilder(); + for (String fieldName : mappedFields) { + if (fieldBuilder.length() > 0) fieldBuilder.append(fieldDelim); + fieldBuilder.append(fieldName); + } + Log.debug("returning mapped fieldName " + fieldBuilder.toString()); + return fieldBuilder.toString(); } - Log.debug( "returning mapped fieldName " + fieldBuilder.toString( ) ); - return fieldBuilder.toString( ); } + } else { + Log.debug("Finite State Machine is null on Synonym Map -> ignored"); } Log.warn( "matchOutput but no FieldName for " + phrase ); From 695034009f4b83fed334f101ce29e8e474c3a2ac Mon Sep 17 00:00:00 2001 From: Gaston Gonzalez Date: Thu, 21 Dec 2017 10:18:07 -0700 Subject: [PATCH 12/13] Changed log level for a chatty message that should not have been marked as warn. --- .../solr/handler/component/QueryAutoFilteringComponent.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java b/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java index fede188..3de366f 100644 --- a/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java +++ b/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java @@ -685,8 +685,9 @@ private String getMappedFieldName( SynonymMap termMap, String phrase ) throws IO } else { Log.debug("Finite State Machine is null on Synonym Map -> ignored"); } - - Log.warn( "matchOutput but no FieldName for " + phrase ); + + // Surpressing this message since it is very chatty in production. + Log.debug( "matchOutput but no FieldName for " + phrase ); return null; } From df9f2cd4fc5208797ad4b411ed43f85048f10b03 Mon Sep 17 00:00:00 2001 From: Xan Nick Date: Mon, 15 Oct 2018 09:20:55 -0500 Subject: [PATCH 13/13] WG-1967 #comment Fix SynonymMap Builder error by preventing empty field values from being added. --- .../solr/handler/component/QueryAutoFilteringComponent.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java b/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java index 3de366f..f98c6ca 100644 --- a/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java +++ b/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java @@ -1,5 +1,6 @@ package org.apache.solr.handler.component; +import org.apache.commons.lang.StringUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; @@ -718,7 +719,9 @@ private void buildFieldMap( ResponseBuilder rb ) throws IOException { while (te.next() != null) { BytesRef term = te.term(); String fieldValue = term.utf8ToString( ); - addTerm ( fieldChars, fieldValue, fieldBuilder, termBuilder ); + if (StringUtils.isNotEmpty(fieldValue)) { + addTerm ( fieldChars, fieldValue, fieldBuilder, termBuilder ); + } } }