diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..eab767a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+build-lib
+build
+dist
diff --git a/README.md b/README.md
index ffc75df..9ca7361 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,23 @@
+# Notes about this fork
+
+This project is a fork of https://github.com/lucidworks/query-autofiltering-component, but includes the following changes to the 5.x code base:
+
+* Resolved issues with Ivy dependencies.
+* Upgraded component to work with Solr 5.3.1 and above.
+* Added support for using a field whitelist.
+
+The whilelist field definition feature was implemented to solve a gap regarding dynamic fields. Although asking the Luke admin handler would have been another implementation option, it seems that a generic whitelist is more powerful; albeit, a bit more verbose in the configuration. Simply define the following:
+
+
+ <searchComponent name="autofilter" class="org.apache.solr.handler.component.QueryAutoFilteringComponent" >
+ <arr name="whitelistFields">
+ <str>field1</str>
+ <str>field2</str>
+ <str>fieldN</str>
+ </arr>
+ </searchComponent>
+
+
# query-autofiltering-component
A Query Autofiltering SearchComponent for Solr that can translate free-text queries into structured queries using index metadata.
diff --git a/solr5.x/build.xml b/solr5.x/build.xml
index 6e74893..30b91f9 100644
--- a/solr5.x/build.xml
+++ b/solr5.x/build.xml
@@ -20,8 +20,8 @@
-
-
+
+
diff --git a/solr5.x/ivy/ivy-settings.xml b/solr5.x/ivy/ivy-settings.xml
index 19d4394..5680754 100644
--- a/solr5.x/ivy/ivy-settings.xml
+++ b/solr5.x/ivy/ivy-settings.xml
@@ -1,7 +1,11 @@
-
-
+
+
+
+
+
+
diff --git a/solr5.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java b/solr5.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java
index eaf13a1..9fbc43a 100644
--- a/solr5.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java
+++ b/solr5.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java
@@ -98,6 +98,7 @@ public class QueryAutoFilteringComponent extends QueryComponent implements SolrC
private String termsHandler = "/terms";
+ private HashSet whitelistFields;;
private HashSet excludeFields;
private HashSet stopwords;
@@ -115,6 +116,15 @@ public class QueryAutoFilteringComponent extends QueryComponent implements SolrC
@Override
public void init( NamedList initArgs ) {
+
+ List whitelistFields = (List) initArgs.get("whitelistFields");
+ if (whitelistFields != null) {
+ this.whitelistFields = new HashSet( );
+ for (String field : whitelistFields ) {
+ this.whitelistFields.add( field );
+ }
+ }
+
List excludeFields = (List) initArgs.get("excludeFields");
if (excludeFields != null) {
this.excludeFields = new HashSet( );
@@ -469,16 +479,15 @@ else if (qbuilder.length() > 0 && fieldMap.size() > 0) {
}
}
else { // boostFactor is NOT null
- // use the original query add fielded boost clauses
+ // use the bq field to add fielded boost clauses
StringBuilder bbuilder = new StringBuilder( );
String boostSuffix = "^" + boostFactor.toString( );
- bbuilder.append( getPhrase( queryTokens, 0, queryTokens.size() - 1, " " ) );
for (String fieldName : fieldMap.keySet( ) ) {
bbuilder.append( " " );
bbuilder.append( getFilterQuery( rb, fieldName, fieldMap.get( fieldName ), fieldPositionMap.get( fieldName ), queryTokens, boostSuffix ) );
}
- Log.info( "setting q = '" + bbuilder.toString() + "'" );
- modParams.set( "q", bbuilder.toString( ) );
+ Log.info( "adding bq = '" + bbuilder.toString() + "'" );
+ modParams.add( "bq", bbuilder.toString( ).trim() );
}
return true;
}
@@ -729,23 +738,35 @@ private void buildFieldMap( ResponseBuilder rb ) throws IOException {
// TODO: Filter this by the configuration fields ...
private ArrayList getStringFields( SolrIndexSearcher searcher ) {
- IndexSchema schema = searcher.getSchema();
+
ArrayList strFields = new ArrayList( );
-
- Collection fieldNames = searcher.getFieldNames();
- Iterator fnIt = fieldNames.iterator();
- while ( fnIt.hasNext() ) {
- String fieldName = fnIt.next( );
- if (excludeFields == null || !excludeFields.contains( fieldName )) {
- SchemaField field = schema.getField(fieldName);
- if (field.stored() && field.getType() instanceof StrField ) {
- strFields.add( fieldName );
+
+ if ( hasWhitelist() ) {
+ Log.info("Using whitelist fields instead of schema.");
+ for ( String fieldName: whitelistFields ) {
+ strFields.add( fieldName );
+ }
+ } else {
+ IndexSchema schema = searcher.getSchema();
+ Collection fieldNames = searcher.getFieldNames();
+ Iterator fnIt = fieldNames.iterator();
+ while ( fnIt.hasNext() ) {
+ String fieldName = fnIt.next( );
+ if (excludeFields == null || !excludeFields.contains( fieldName )) {
+ SchemaField field = schema.getField(fieldName);
+ if (field.stored() && field.getType() instanceof StrField ) {
+ strFields.add( fieldName );
+ }
}
}
}
-
+
return strFields;
}
+
+ private boolean hasWhitelist() {
+ return this.whitelistFields != null && this.whitelistFields.size() > 0;
+ }
private void addTerm( CharsRef fieldChars, String fieldValue, SynonymMap.Builder fieldBuilder, SynonymMap.Builder termBuilder ) throws IOException {
@@ -804,11 +825,13 @@ private void addDistributedTerms( ResponseBuilder rb, SynonymMap.Builder fieldBu
ShardHandlerFactory shardHandlerFactory = container.getShardHandlerFactory( );
ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
- shardHandler.checkDistributed( rb );
-
- Log.debug( "Is Distributed = " + rb.isDistrib );
+
+ final SolrParams distribParams = rb.req.getParams();
+ final boolean isDistrib = distribParams.get(ShardParams.SHARDS) != null;
+ Log.debug( "Is Distributed = " + isDistrib );
- if( rb.isDistrib ) {
+ if( isDistrib ) {
+ shardHandler.prepDistributed( rb );
// create a ShardRequest that contains a Terms Request.
// don't send to this shard???
ShardRequest sreq = new ShardRequest();
diff --git a/solr5.x/src/test/resources/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml b/solr5.x/src/test/resources/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml
index 7514aa4..13f8214 100644
--- a/solr5.x/src/test/resources/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml
+++ b/solr5.x/src/test/resources/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml
@@ -31,7 +31,7 @@ A solrconfig.xml snippet containing indexConfig settings for randomized testing.
${useCompoundFile:false}
${solr.tests.maxBufferedDocs}
- ${solr.tests.maxIndexingThreads}
+ ${solr.tests.maxIndexingThreads:8}
${solr.tests.ramBufferSizeMB}
diff --git a/solr6.x/build.xml b/solr6.x/build.xml
new file mode 100644
index 0000000..c17f314
--- /dev/null
+++ b/solr6.x/build.xml
@@ -0,0 +1,127 @@
+
+ Builds Query Autofiltering Component
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr6.x/ivy.xml b/solr6.x/ivy.xml
new file mode 100644
index 0000000..f0e8ae2
--- /dev/null
+++ b/solr6.x/ivy.xml
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr6.x/ivy/ivy-settings.xml b/solr6.x/ivy/ivy-settings.xml
new file mode 100644
index 0000000..5680754
--- /dev/null
+++ b/solr6.x/ivy/ivy-settings.xml
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java b/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java
new file mode 100644
index 0000000..f98c6ca
--- /dev/null
+++ b/solr6.x/src/main/java/org/apache/solr/handler/component/QueryAutoFilteringComponent.java
@@ -0,0 +1,1420 @@
+package org.apache.solr.handler.component;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.synonym.SolrSynonymParser;
+import org.apache.lucene.analysis.synonym.SynonymMap;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.uninverting.UninvertingReader;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.CharsRefBuilder;
+import org.apache.lucene.util.fst.FST;
+import org.apache.solr.client.solrj.response.TermsResponse;
+import org.apache.solr.common.params.*;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.CoreContainer;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.SolrEventListener;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.schema.StrField;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.util.plugin.SolrCoreAware;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.StringReader;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
+import java.util.*;
+
+/**
+ * Creates filter or boost queries from freetext queries based on pattern matches with terms in stored String fields. Uses
+ * the FieldCache (UninvertingIndex) to build a map of term to search field. This map is then used to parse the
+ * query to detect phrases that map to specific field values. These field/value pairs can then be used to generate
+ * a filter query or a boost query if recall needs to be preserved.
+ *
+ * For SolrCloud, this component requires that the TermsComponent be defined in solrconfig.xml. This is used
+ * to get distributed term maps.
+ *
+ * Compiles with Solr 5.x
+ */
+
+public class QueryAutoFilteringComponent extends QueryComponent implements SolrCoreAware, SolrEventListener {
+
+ private static final Logger Log = LoggerFactory.getLogger( QueryAutoFilteringComponent.class );
+
+ public static final String MINIMUM_TOKENS = "mt";
+ public static final String BOOST_PARAM = "afb";
+
+ private SynonymMap fieldMap; // Map of search terms to fieldName
+ private SynonymMap synonyms; // synonyms from synonyms.txt
+ private SynonymMap termMap; // Map of search term to indexed term
+
+ private String synonymsFile;
+
+ private NamedList initParams;
+
+ private boolean initFieldMap = false;
+
+ private String termsHandler = "/terms";
+
+ private HashSet whitelistFields;;
+ private HashSet excludeFields;
+ private HashSet stopwords;
+
+ private Integer boostFactor; // if null, use Filter Query
+
+ // For multiple terms in the same field, if field is multi-valued = use AND for filter query
+ private boolean useAndForMultiValuedFields = true;
+
+ private String fieldDelim = "|";
+
+ private String fieldSplitExpr = "\\|";
+
+ // map of a "verb" phrase to a metadata field
+ private ArrayList verbModifierList;
+
+ @Override
+ public void init( NamedList initArgs ) {
+ List whitelistFields = (List) initArgs.get("whitelistFields");
+ if (whitelistFields != null) {
+ this.whitelistFields = new HashSet( );
+ for (String field : whitelistFields ) {
+ this.whitelistFields.add( field );
+ }
+ }
+
+ List excludeFields = (List) initArgs.get("excludeFields");
+ if (excludeFields != null) {
+ this.excludeFields = new HashSet( );
+ for (String field : excludeFields ) {
+ this.excludeFields.add( field );
+ }
+ }
+
+ List verbModifiers = (List)initArgs.get( "verbModifiers" );
+ if (verbModifiers != null) {
+ this.verbModifierList = new ArrayList( );
+ for (String modifier : verbModifiers) {
+ String modifierPhrase = new String( modifier.substring( 0, modifier.indexOf( ":" )));
+ String modifierFields = new String( modifier.substring( modifier.indexOf( ":" ) + 1 ));
+
+ if (modifierPhrase.indexOf( "," ) > 0) {
+ String[] phrases = modifierPhrase.split( "," );
+ for (int i = 0; i < phrases.length; i++) {
+ addModifier( phrases[i], modifierFields );
+ }
+ }
+ else {
+ addModifier( modifierPhrase, modifierFields );
+ }
+ }
+ }
+
+ Integer boostFactor = (Integer)initArgs.get( "boostFactor" );
+ if (boostFactor != null) {
+ this.boostFactor = boostFactor;
+ }
+
+ String useAndForMV = (String)initArgs.get( "useAndForMultiValuedFields" );
+ if (useAndForMV != null) {
+ this.useAndForMultiValuedFields = useAndForMV.equalsIgnoreCase( "true" );
+ }
+
+ String useFieldDelim = (String)initArgs.get( "fieldDelimiter" );
+ if (useFieldDelim != null) {
+ this.fieldDelim = useFieldDelim;
+ this.fieldSplitExpr = useFieldDelim;
+ }
+
+ initParams = initArgs;
+ }
+
+ private void addModifier( String modifierPhrase, String modifierFields ) {
+ Log.info( "addModifier: " + modifierPhrase + ": " + modifierFields );
+ ModifierDefinition modDef = new ModifierDefinition( );
+ modDef.modifierPhrase = modifierPhrase.toLowerCase( );
+
+ if (modifierFields.indexOf( fieldDelim ) > 0) {
+ modDef.filterFields = new HashMap( );
+ String fieldPairs = new String( modifierFields.substring( modifierFields.indexOf( fieldDelim ) + 1 ));
+ modifierFields = new String( modifierFields.substring( 0, modifierFields.indexOf( fieldDelim )));
+ Log.info( "fieldPairs = " + fieldPairs );
+
+ String modifierTemplate = null;
+ if (fieldPairs.indexOf( fieldDelim ) > 0) {
+ modifierTemplate = new String( fieldPairs.substring( fieldPairs.indexOf( fieldDelim ) + 1 ));
+ fieldPairs = new String( fieldPairs.substring( 0, fieldPairs.indexOf( fieldDelim )));
+ }
+
+ if (fieldPairs.indexOf( "," ) > 0) {
+ String[] fieldPairList = fieldPairs.split( "," );
+ for (int i = 0; i < fieldPairList.length; i++) {
+ String field = new String( fieldPairList[i].substring( 0, fieldPairList[i].indexOf( ":" )));
+ String value = new String(fieldPairList[i].substring( fieldPairList[i].indexOf( ":" ) + 1 ));
+ modDef.filterFields.put( field, value );
+ }
+ }
+ else {
+ String field = new String(fieldPairs.substring( 0, fieldPairs.indexOf( ":" )));
+ String value = new String( fieldPairs.substring( fieldPairs.indexOf( ":" ) + 1 ));
+ modDef.filterFields.put( field, value );
+ }
+
+ if (modifierTemplate != null) {
+ modDef.templateRule = new ModifierTemplateRule( modifierTemplate );
+ }
+ }
+ modDef.modifierFields = new ArrayList( );
+ if (modifierFields.indexOf( "," ) > 0) {
+ String[] fields = modifierFields.split( "," );
+ for (int i = 0; i < fields.length; i++) {
+ modDef.modifierFields.add( fields[i] );
+ }
+ }
+ else {
+ modDef.modifierFields.add( modifierFields );
+ }
+
+ modDef.modTokens = modDef.modifierPhrase.split( " " );
+ verbModifierList.add( modDef );
+ }
+
+
+ @Override
+ public void inform( SolrCore core ) {
+ if (initParams != null) {
+ SolrResourceLoader resourceLoader = core.getResourceLoader( );
+
+ synonymsFile = (String)initParams.get( "synonyms" );
+ if (synonymsFile != null) {
+ Analyzer analyzer = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new KeywordTokenizer();
+ return new TokenStreamComponents(tokenizer, tokenizer );
+ }
+ };
+
+ try {
+ SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer);
+ CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT);
+
+ parser.parse(new InputStreamReader( resourceLoader.openResource(synonymsFile), decoder));
+ this.synonyms = parser.build( );
+ }
+ catch ( Exception e ) {
+ // ???
+ Log.warn( "Parsing Synonyms Got Exception " + e );
+ }
+ }
+
+ String stopwordsFile = (String)initParams.get( "stopwords" );
+ if (stopwordsFile != null) {
+ this.stopwords = new HashSet( );
+ try {
+ BufferedReader br = new BufferedReader( new InputStreamReader( resourceLoader.openResource( stopwordsFile )));
+ String line = null;
+ while ((line = br.readLine( )) != null) {
+ stopwords.add( line.toLowerCase( ) );
+ }
+ br.close( );
+ }
+ catch ( IOException ioe ) {
+ Log.warn( "Adding Stopwords Got Exception " + ioe );
+ }
+ }
+ }
+
+ core.registerFirstSearcherListener( this );
+ core.registerNewSearcherListener( this );
+ }
+
+ @Override
+ public void postCommit() { }
+
+ @Override
+ public void postSoftCommit() { }
+
+ public void newSearcher(SolrIndexSearcher newSearcher, SolrIndexSearcher currentSearcher) {
+ synchronized( this ) {
+ initFieldMap = true;
+ }
+ }
+
+ @Override
+ public void prepare( ResponseBuilder rb ) throws IOException
+ {
+ SolrQueryRequest req = rb.req;
+ SolrParams params = req.getParams( );
+
+ // Only build the field map and do the processing if we are the main event
+ String isShard = params.get( "isShard" );
+ if (isShard != null && isShard.equals( "true" )) {
+ Log.debug( "A shard query: don't process!" );
+ return;
+ }
+
+ Log.info( "prepare ..." );
+ if (initFieldMap) {
+ synchronized( this ) {
+ buildFieldMap( rb );
+ initFieldMap = false;
+ }
+ }
+
+ int mintok = 1;
+ String mt = params.get( MINIMUM_TOKENS );
+ if ( mt != null ) {
+ try {
+ mintok = Integer.parseInt( mt );
+ }
+ catch ( NumberFormatException nfe ) {
+ // ???
+ mintok = 1;
+ }
+ }
+
+ String qStr = params.get( CommonParams.Q );
+ Log.debug( "query is: " + qStr );
+ if (qStr.equals( "*" ) || qStr.indexOf( ":" ) > 0) {
+ Log.debug( "Complex query - do not process" );
+ return;
+ }
+
+ // tokenize the query string, if any part of it matches, remove the token from the list and
+ // add a filter query with :value:
+ ArrayList queryTokens = tokenize( qStr );
+
+ if (queryTokens.size( ) >= mintok) {
+ ModifiableSolrParams modParams = new ModifiableSolrParams( params );
+ if (findPattern( queryTokens, rb, modParams )) {
+ req.setParams( modParams );
+ }
+ }
+ }
+
+ /**
+ * If this method is not overridden then this will cause a request against
+ * the Shards causing performance degredation and duplicate values in the
+ * facet counts.
+ * Here we just return that this is done leaving it up to the Query to drive
+ * the requests.
+ *
+ * @param rb Ignored
+ * @return ResponseBuilder.STAGE_DONE
+ * @throws IOException never thrown
+ */
+ @Override
+ public int distributedProcess(ResponseBuilder rb) throws IOException {
+ return ResponseBuilder.STAGE_DONE;
+ }
+
+ private boolean findPattern( ArrayList queryTokens, ResponseBuilder rb, ModifiableSolrParams modParams ) throws IOException {
+ Log.debug( "findPattern " );
+
+ HashSet usedTokens = new HashSet( );
+ HashMap> fieldMap = new HashMap>( );
+ HashMap fieldPositionMap = new HashMap( );
+ HashMap entityPositionMap = (verbModifierList != null) ? new HashMap() : null;
+
+ String longestPhraseField = null;
+ int startToken = 0;
+ int lastEndToken = 0;
+ while ( startToken < queryTokens.size() ) {
+ Log.debug( "startToken = " + startToken );
+ int endToken = startToken;
+
+ while ( endToken < queryTokens.size( ) ) {
+ // FieldName can be comma separated if there are more than one field name for a set of tokens
+ String fieldName = getFieldNameFor( queryTokens, startToken, endToken );
+ if ( fieldName != null ) {
+ longestPhraseField = fieldName;
+ lastEndToken = endToken;
+ }
+ else if ( longestPhraseField != null ) {
+ break;
+ }
+ ++endToken;
+ }
+
+ if (longestPhraseField != null) {
+ // create matching phrase from startToken -> endToken
+ String phrase = getPhrase( queryTokens, startToken, lastEndToken );
+ Log.debug( "get Indexed Term for " + phrase );
+ String indexedTerm = getMappedFieldName( termMap, phrase.toLowerCase( ));
+ if (indexedTerm == null) {
+ indexedTerm = getMappedFieldName( termMap, getStemmed( phrase ));
+ }
+ if (indexedTerm != null) {
+ indexedTerm = indexedTerm.replace( '_', ' ' );
+ if (indexedTerm.indexOf( " " ) > 0 ) {
+ indexedTerm = "\"" + indexedTerm + "\"";
+ }
+ ArrayListvalList = fieldMap.get( longestPhraseField );
+ if (valList == null) {
+ valList = new ArrayList( );
+ fieldMap.put( longestPhraseField, valList );
+ }
+
+ Log.info( "indexedTerm: " + indexedTerm );
+ int[] entityPosition = null;
+ if (entityPositionMap != null) {
+ entityPosition = new int[2];
+ entityPosition[0] = startToken;
+ entityPosition[1] = endToken-1;
+ }
+
+ Log.debug( "indexedTerm: " + indexedTerm );
+ if (indexedTerm.indexOf( fieldDelim ) > 0)
+ {
+ String[] indexedTerms = indexedTerm.split( fieldSplitExpr );
+ for (int t = 0; t < indexedTerms.length; t++) {
+ valList.add( indexedTerms[t] );
+ if (entityPositionMap != null) entityPositionMap.put( indexedTerms[t], entityPosition );
+ }
+ }
+ else {
+ valList.add( indexedTerm );
+ if (entityPositionMap != null) entityPositionMap.put( indexedTerm, entityPosition );
+ }
+
+ // save startToken and lastEndToken so can use for boolean operator context
+ // for multi-value fields -save the min and max of all tokens positions for the field
+ int[] posArray = fieldPositionMap.get( longestPhraseField );
+ if (posArray == null)
+ {
+ posArray = new int[2];
+ posArray[0] = startToken;
+ posArray[1] = lastEndToken;
+ fieldPositionMap.put( longestPhraseField, posArray );
+ }
+ else
+ {
+ posArray[1] = lastEndToken;
+ }
+
+ longestPhraseField = null;
+ for (int i = startToken; i <= lastEndToken; i++) {
+ Log.debug( "adding used token at " + i );
+ usedTokens.add( new Integer( i ) );
+ }
+ startToken = lastEndToken + 1;
+ }
+ }
+ else {
+ ++startToken;
+ }
+ }
+
+ if (usedTokens.size( ) > 0) {
+
+ // filter field maps based on verbs here:
+ if (entityPositionMap != null) {
+ filterFieldMap( queryTokens, fieldMap, entityPositionMap, fieldPositionMap );
+ }
+
+ String useBoost = modParams.get( BOOST_PARAM );
+ Integer boostFactor = (useBoost != null) ? new Integer( useBoost ) : this.boostFactor;
+ if (boostFactor == null) {
+ StringBuilder qbuilder = new StringBuilder( );
+ if (usedTokens.size( ) < queryTokens.size( ) ) {
+ for (int i = 0; i < queryTokens.size(); i++) {
+ if (boostFactor != null || usedTokens.contains( new Integer( i ) ) == false ) {
+ char[] token = queryTokens.get( i );
+ if (qbuilder.length() > 0) qbuilder.append( " " );
+ qbuilder.append( token );
+ }
+ }
+ }
+
+ Log.debug( "got qbuilder string = '" + qbuilder.toString() + "'" );
+ if (qbuilder.length() == 0 && fieldMap.size() > 0) {
+ // build a filter query -
+ // EH: can't do this if dismax
+ Log.debug( "setting q = *:*" );
+ modParams.set( "q", "*:*" );
+ for (String fieldName : fieldMap.keySet() ) {
+ String fq = getFilterQuery( rb, fieldName, fieldMap.get( fieldName ), fieldPositionMap.get( fieldName ), queryTokens, "" );
+ Log.info( "adding filter query: " + fq );
+ modParams.add( "fq", fq );
+ }
+ }
+ else if (qbuilder.length() > 0 && fieldMap.size() > 0) {
+ // build a boolean query for the fielded data, OR with remainder of query
+ StringBuilder boolQ = new StringBuilder( );
+ for (String fieldName : fieldMap.keySet() ) {
+ if (boolQ.length() > 0) boolQ.append( " AND " );
+ boolQ.append( getFilterQuery( rb, fieldName, fieldMap.get( fieldName ), fieldPositionMap.get( fieldName ), queryTokens, "" ) );
+ }
+ String q = qbuilder.toString( ) + " (" + boolQ.toString() + ")";
+ Log.info( "setting q = '" + q + "'" );
+ modParams.set( "q", q );
+ }
+ }
+ else { // boostFactor is NOT null
+ // use the bq field to add fielded boost clauses
+ StringBuilder bbuilder = new StringBuilder( );
+ String boostSuffix = "^" + boostFactor.toString( );
+ for (String fieldName : fieldMap.keySet( ) ) {
+ bbuilder.append( " " );
+ bbuilder.append( getFilterQuery( rb, fieldName, fieldMap.get( fieldName ), fieldPositionMap.get( fieldName ), queryTokens, boostSuffix ) );
+ }
+ Log.info( "adding bq = '" + bbuilder.toString() + "'" );
+ modParams.add( "bq", bbuilder.toString( ).trim() );
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ private String getPhrase( ArrayList tokens, int startToken, int endToken ) {
+ return getPhrase( tokens, startToken, endToken, "_" );
+ }
+
+ private String getPhrase( ArrayList tokens, int startToken, int endToken, String tokenSep ) {
+ StringBuilder strb = new StringBuilder( );
+ for (int i = startToken; i <= endToken; i++) {
+ if (i > startToken) { strb.append( tokenSep ); }
+
+ strb.append( tokens.get( i ) );
+ }
+ Log.debug( "getPhrase returns " + strb.toString( ) );
+ return strb.toString( );
+ }
+
+ private String getFilterQuery( ResponseBuilder rb, String fieldName, ArrayList valList,
+ int[] termPosRange, ArrayList queryTokens, String suffix) {
+ if (fieldName.indexOf( fieldDelim ) > 0) {
+ return getFilterQuery( rb, fieldName.split( fieldSplitExpr ), valList, termPosRange, queryTokens, suffix );
+ }
+ if (valList.size() == 1) {
+ // check if valList[0] is multi-term - if so, check if there is a single term equivalent
+ // if this returns non-null, create an OR query with single term version
+ // example "white linen perfume" vs "white linen shirt" where "White Linen" is a brand
+ String term = valList.get( 0 );
+
+ if (term.indexOf( " " ) > 0) {
+ String singleTermQuery = getSingleTermQuery( term );
+ if (singleTermQuery != null) {
+ StringBuilder strb = new StringBuilder( );
+ // EH: possible meta-escaping problem if value includes {!field f=}value
+ strb.append( "(" ).append( fieldName ).append( ":" )
+ .append( term ).append( " OR (" ).append( singleTermQuery ).append( "))" ).append( suffix );
+ Log.debug( "returning composite query: " + strb.toString( ) );
+ return strb.toString( );
+ }
+ }
+
+ String query = fieldName + ":" + term + suffix;
+ Log.debug( "returning single query: " + query );
+ return query;
+ }
+ else {
+ SolrIndexSearcher searcher = rb.req.getSearcher();
+ IndexSchema schema = searcher.getSchema();
+ SchemaField field = schema.getField(fieldName);
+ boolean useAnd = field.multiValued() && useAndForMultiValuedFields;
+ // if query has 'or' in it and or is at a position 'within' the values for this field ...
+ if (useAnd) {
+ for (int i = termPosRange[0] + 1; i < termPosRange[1]; i++ ) {
+ char[] qToken = queryTokens.get( i );
+ // is the token 'or'?
+ if (qToken.length == 2 && qToken[0] == 'o' && qToken[1] == 'r' ) {
+ useAnd = false;
+ break;
+ }
+ }
+ }
+
+ StringBuilder orQ = new StringBuilder( );
+ for (String val : valList ) {
+ if (orQ.length() > 0) orQ.append( (useAnd ? " AND " : " OR ") );
+ orQ.append( val );
+ }
+ return fieldName + ":(" + orQ.toString() + ")" + suffix;
+ }
+ }
+
+ private String getFilterQuery( ResponseBuilder rb, String[] fieldNames, ArrayList valList,
+ int[] termPosRange, ArrayList queryTokens, String suffix) {
+ StringBuilder filterQBuilder = new StringBuilder( );
+ for (int i = 0; i < fieldNames.length; i++) {
+ if (i > 0) filterQBuilder.append( " OR " );
+ filterQBuilder.append( getFilterQuery( rb, fieldNames[i], valList, termPosRange, queryTokens, suffix ) );
+ }
+ return "(" + filterQBuilder.toString() + ")";
+ }
+
+ private String getFieldNameFor( ArrayList queryTokens, int startToken, int endToken ) throws IOException {
+ String phrase = getPhrase( queryTokens, startToken, endToken );
+ String fieldName = getFieldNameFor( phrase.toLowerCase( ) );
+ if (fieldName != null) return fieldName;
+
+ String stemmed = getStemmed( phrase );
+ Log.debug( "checking stemmed " + stemmed );
+ return (stemmed.equals( phrase )) ? null : getFieldNameFor( stemmed );
+ }
+
+ private String getSingleTermQuery( String multiTermValue ) {
+
+ String multiTerm = multiTermValue;
+ if (multiTermValue.startsWith( "\"" )) {
+ multiTerm = new String( multiTermValue.substring( 1, multiTermValue.lastIndexOf( "\"" )));
+ }
+ Log.debug( "getSingleTermQuery " + multiTerm + "" );
+
+ try {
+ StringBuilder strb = new StringBuilder( );
+
+ String[] terms = multiTerm.split( " " );
+ for (int i = 0; i < terms.length; i++) {
+ if (i > 0) strb.append( " AND " );
+
+ String fieldName = getFieldNameFor( terms[i].toLowerCase( ) );
+ Log.debug( "fieldName for " + terms[i].toLowerCase( ) + " is " + fieldName );
+ if (fieldName == null) return null;
+
+ if (fieldName.indexOf( fieldDelim ) > 0) {
+ String[] fields = fieldName.split( fieldSplitExpr );
+ strb.append( "(" );
+ for (int f = 0; f < fields.length; f++) {
+ if (f > 0) strb.append( " OR " );
+ strb.append( fields[f] ).append( ":" ).append( getMappedFieldName( termMap, terms[i].toLowerCase( ) ) );
+ }
+ strb.append( ")" );
+ }
+ else {
+ strb.append( fieldName ).append( ":" ).append( getMappedFieldName( termMap, terms[i].toLowerCase( ) ) );
+ }
+ }
+
+ Log.debug( "getSingleTermQuery returns: '" + strb.toString( ) + "'" );
+ return strb.toString( );
+ }
+ catch (IOException ioe ) {
+ return null;
+ }
+ }
+
+ private String getFieldNameFor( String phrase ) throws IOException {
+ Log.debug( "getFieldNameFor '" + phrase + "'" );
+ return ("*".equals( phrase) || "* *".equals( phrase )) ? null : getMappedFieldName( fieldMap, phrase.toLowerCase( ) );
+ }
+
+
+ // TODO: Return comma separated string if more than one
+ private String getMappedFieldName( SynonymMap termMap, String phrase ) throws IOException {
+ Log.debug( "getMappedFieldName: '" + phrase + "'" );
+ FST fst = termMap.fst;
+ if(fst != null) {
+ FST.BytesReader fstReader = fst.getBytesReader();
+ FST.Arc scratchArc = new FST.Arc<>();
+ BytesRef scratchBytes = new BytesRef();
+ CharsRefBuilder scratchChars = new CharsRefBuilder();
+ ByteArrayDataInput bytesReader = new ByteArrayDataInput();
+
+ BytesRef pendingOutput = fst.outputs.getNoOutput();
+ fst.getFirstArc(scratchArc);
+ BytesRef matchOutput = null;
+
+ String noSpPhrase = phrase.replace(' ', '_');
+ int charPos = 0;
+ while (charPos < noSpPhrase.length()) {
+ final int codePoint = noSpPhrase.codePointAt(charPos);
+ if (fst.findTargetArc(codePoint, scratchArc, scratchArc, fstReader) == null) {
+ Log.debug("No FieldName for " + phrase);
+ return null;
+ }
+
+ pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output);
+ charPos += Character.charCount(codePoint);
+ }
+
+ if (scratchArc.isFinal()) {
+ Log.debug("creating matchOutput");
+ matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput);
+ ArrayList mappedFields = new ArrayList();
+ bytesReader.reset(matchOutput.bytes, matchOutput.offset, matchOutput.length);
+
+ final int code = bytesReader.readVInt();
+ final int count = code >>> 1;
+ for (int outputIDX = 0; outputIDX < count; outputIDX++) {
+ termMap.words.get(bytesReader.readVInt(), scratchBytes);
+ scratchChars.copyUTF8Bytes(scratchBytes);
+ int lastStart = 0;
+ final int chEnd = lastStart + scratchChars.length();
+ for (int chIDX = lastStart; chIDX <= chEnd; chIDX++) {
+ if (chIDX == chEnd || scratchChars.charAt(chIDX) == SynonymMap.WORD_SEPARATOR) {
+ int outputLen = chIDX - lastStart;
+ assert outputLen > 0 : "output contains empty string: " + scratchChars;
+ mappedFields.add(new String(scratchChars.chars(), lastStart, outputLen));
+ lastStart = chIDX + 1;
+ }
+ }
+ }
+
+ if (mappedFields.size() == 1) {
+ Log.debug("returning mapped fieldName " + mappedFields.get(0));
+ return mappedFields.get(0);
+ } else {
+ StringBuilder fieldBuilder = new StringBuilder();
+ for (String fieldName : mappedFields) {
+ if (fieldBuilder.length() > 0) fieldBuilder.append(fieldDelim);
+ fieldBuilder.append(fieldName);
+ }
+ Log.debug("returning mapped fieldName " + fieldBuilder.toString());
+ return fieldBuilder.toString();
+ }
+ }
+ } else {
+ Log.debug("Finite State Machine is null on Synonym Map -> ignored");
+ }
+
+ // Surpressing this message since it is very chatty in production.
+ Log.debug( "matchOutput but no FieldName for " + phrase );
+ return null;
+ }
+
+
+ private void buildFieldMap( ResponseBuilder rb ) throws IOException {
+ Log.debug( "buildFieldMap" );
+ SolrIndexSearcher searcher = rb.req.getSearcher();
+ // build a synonym map from the SortedDocValues -
+ // for each field value: lower case, stemmed, lookup synonyms from synonyms.txt - map to fieldValue
+ SynonymMap.Builder fieldBuilder = new SynonymMap.Builder( true );
+ SynonymMap.Builder termBuilder = new SynonymMap.Builder( true );
+
+ HashMap fieldTypeMap = new HashMap( );
+
+ ArrayList searchFields = getStringFields( searcher );
+ for (String searchField : searchFields ) {
+ fieldTypeMap.put( searchField, UninvertingReader.Type.SORTED_SET_BINARY);
+ }
+ UninvertingReader unvRead = new UninvertingReader( searcher.getLeafReader( ), fieldTypeMap );
+
+ for (String searchField : searchFields ) {
+ Log.debug( "adding searchField " + searchField );
+ CharsRef fieldChars = new CharsRef( searchField );
+ SortedSetDocValues sdv = unvRead.getSortedSetDocValues( searchField );
+ if (sdv == null) continue;
+ Log.debug( "got SortedSetDocValues for " + searchField );
+ TermsEnum te = sdv.termsEnum();
+ while (te.next() != null) {
+ BytesRef term = te.term();
+ String fieldValue = term.utf8ToString( );
+ if (StringUtils.isNotEmpty(fieldValue)) {
+ addTerm ( fieldChars, fieldValue, fieldBuilder, termBuilder );
+ }
+ }
+ }
+
+ addDistributedTerms( rb, fieldBuilder, termBuilder, searchFields );
+
+ fieldMap = fieldBuilder.build( );
+ termMap = termBuilder.build( );
+ }
+
+ // TODO: Filter this by the configuration fields ...
+ private ArrayList getStringFields( SolrIndexSearcher searcher ) {
+
+ ArrayList strFields = new ArrayList( );
+
+ if ( hasWhitelist() ) {
+ Log.info("Using whitelist fields instead of schema.");
+ for ( String fieldName: whitelistFields ) {
+ strFields.add( fieldName );
+ }
+ } else {
+ IndexSchema schema = searcher.getSchema();
+ Iterable fieldNames = searcher.getFieldNames();
+ Iterator fnIt = fieldNames.iterator();
+
+ while ( fnIt.hasNext() ) {
+ String fieldName = fnIt.next( );
+ if (excludeFields == null || !excludeFields.contains( fieldName )) {
+ SchemaField field = schema.getField(fieldName);
+ if (field.stored() && field.getType() instanceof StrField ) {
+ strFields.add( fieldName );
+ }
+ }
+ }
+ }
+
+ return strFields;
+ }
+
+ private boolean hasWhitelist() {
+ return this.whitelistFields != null && this.whitelistFields.size() > 0;
+ }
+
+ private void addTerm( CharsRef fieldChars, String fieldValue, SynonymMap.Builder fieldBuilder, SynonymMap.Builder termBuilder ) throws IOException {
+
+ Log.debug( "got fieldValue: '" + fieldValue + "'" );
+ String nospVal = fieldValue.replace( ' ', '_' );
+ Log.debug( "got nspace: '" + nospVal + "'" );
+ CharsRef nospChars = new CharsRef( nospVal );
+ CharsRef valueChars = new CharsRef( fieldValue );
+
+ fieldBuilder.add( nospChars, fieldChars, false );
+ termBuilder.add( nospChars, valueChars, false );
+
+ // lower case term,
+ String lowercase = nospVal.toLowerCase( );
+ CharsRef lcChars = new CharsRef( lowercase );
+ fieldBuilder.add( lcChars, fieldChars, false );
+ termBuilder.add( lcChars, valueChars, false );
+
+ // stem it
+ String stemmed = getStemmed( nospVal );
+ if (stemmed.equals( fieldValue ) == false) {
+ Log.debug( "adding stemmed: " + stemmed );
+ CharsRef stChars = new CharsRef( stemmed );
+ fieldBuilder.add( stChars, fieldChars, false );
+ termBuilder.add( stChars, valueChars, false );
+ }
+
+ if (this.synonyms != null) {
+ // get synonyms from synonyms.txt
+ ArrayList synonymLst = getSynonymsFor( this.synonyms, fieldValue );
+ if ( synonymLst != null ) {
+ for (String synonym : synonymLst ) {
+ String nospSyn = synonym.replace( ' ', '_' );
+ Log.debug( "adding: " + synonym + " -> " + fieldValue );
+ CharsRef synChars = new CharsRef( nospSyn );
+ fieldBuilder.add( synChars, fieldChars, false );
+ termBuilder.add( synChars, valueChars, false );
+ }
+ }
+ synonymLst = getSynonymsFor( this.synonyms, fieldValue.toLowerCase() );
+ if ( synonymLst != null ) {
+ for (String synonym : synonymLst ) {
+ String nospSyn = synonym.replace( ' ', '_' );
+ Log.debug( "adding: " + synonym + " -> " + fieldValue );
+ CharsRef synChars = new CharsRef( nospSyn );
+ fieldBuilder.add( synChars, fieldChars, false );
+ termBuilder.add( synChars, valueChars, false );
+ }
+ }
+ }
+ }
+
+ private void addDistributedTerms( ResponseBuilder rb, SynonymMap.Builder fieldBuilder, SynonymMap.Builder termBuilder, ArrayList searchFields ) throws IOException {
+ SolrIndexSearcher searcher = rb.req.getSearcher();
+ CoreContainer container = searcher.getCore().getCoreDescriptor().getCoreContainer();
+
+ ShardHandlerFactory shardHandlerFactory = container.getShardHandlerFactory( );
+ ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
+
+ final SolrParams distribParams = rb.req.getParams();
+ final boolean isDistrib = distribParams.get(ShardParams.SHARDS) != null;
+ Log.debug( "Is Distributed = " + isDistrib );
+
+ if( isDistrib ) {
+ shardHandler.prepDistributed( rb );
+ // create a ShardRequest that contains a Terms Request.
+ // don't send to this shard???
+ ShardRequest sreq = new ShardRequest();
+ sreq.purpose = ShardRequest.PURPOSE_GET_TERMS;
+ sreq.actualShards = rb.shards;
+ ModifiableSolrParams params = new ModifiableSolrParams( );
+
+ params.set( TermsParams.TERMS_LIMIT, -1);
+ params.set( TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_INDEX);
+ String[] fields = searchFields.toArray( new String[ searchFields.size( )] );
+ params.set( TermsParams.TERMS_FIELD, fields );
+
+ params.set( CommonParams.DISTRIB, "false" );
+ params.set( ShardParams.IS_SHARD, true );
+ params.set( ShardParams.SHARDS_PURPOSE, sreq.purpose );
+ params.set( CommonParams.QT, termsHandler );
+ params.set( TermsParams.TERMS, "true" );
+
+ if (rb.requestInfo != null) {
+ params.set("NOW", Long.toString(rb.requestInfo.getNOW().getTime()));
+ }
+ sreq.params = params;
+
+ for (String shard : rb.shards ) {
+ Log.debug( "sending request to shard " + shard );
+ params.set(ShardParams.SHARD_URL, shard );
+ shardHandler.submit( sreq, shard, params );
+ }
+
+ ShardResponse rsp = shardHandler.takeCompletedIncludingErrors( );
+ if (rsp != null) {
+ Log.debug( "got " + rsp.getShardRequest().responses.size( ) + " responses" );
+ for ( ShardResponse srsp : rsp.getShardRequest().responses ) {
+ Log.debug( "Got terms response from " + srsp.getShard( ));
+
+ if (srsp.getException() != null) {
+ Log.debug( "ShardResponse Exception!! " + srsp.getException( ) );
+ }
+
+ @SuppressWarnings("unchecked")
+ NamedList> terms = (NamedList>) srsp.getSolrResponse().getResponse().get("terms");
+ if (terms != null) {
+ addTerms( terms, fieldBuilder, termBuilder, searchFields );
+ }
+ else {
+ Log.warn( "terms was NULL! - make sure that /terms request handler is defined in solrconfig.xml" );
+ }
+ }
+ }
+ }
+ }
+
+ private void addTerms( NamedList> terms, SynonymMap.Builder fieldBuilder, SynonymMap.Builder termBuilder, ArrayList searchFields ) throws IOException {
+ TermsResponse termsResponse = new TermsResponse( terms );
+ for (String fieldName : searchFields ) {
+ CharsRef fieldChars = new CharsRef( fieldName );
+ List termList = termsResponse.getTerms( fieldName );
+ if (termList != null) {
+ for (TermsResponse.Term tc : termList) {
+ String term = tc.getTerm();
+ Log.debug( "Add distributed term: " + fieldName + " = " + term );
+ addTerm( fieldChars, term, fieldBuilder, termBuilder );
+ }
+ }
+ }
+ }
+
+
+ private ArrayList getSynonymsFor( SynonymMap synMap, String term ) throws IOException {
+ Log.debug( "getSynonymsFor '" + term + "'" );
+
+ FST fst = synMap.fst;
+ FST.BytesReader fstReader = fst.getBytesReader();
+ FST.Arc scratchArc = new FST.Arc<>( );
+ BytesRef scratchBytes = new BytesRef();
+ CharsRefBuilder scratchChars = new CharsRefBuilder();
+ ByteArrayDataInput bytesReader = new ByteArrayDataInput();
+
+ BytesRef pendingOutput = fst.outputs.getNoOutput();
+ fst.getFirstArc( scratchArc );
+ BytesRef matchOutput = null;
+
+ String[] tokens = term.split( " " );
+ for (int i = 0; i < tokens.length; i++) {
+
+ int charPos = 0;
+ while( charPos < tokens[i].length() ) {
+ final int codePoint = tokens[i].codePointAt( charPos );
+ if (fst.findTargetArc( codePoint, scratchArc, scratchArc, fstReader) == null) {
+ Log.debug( "No Synonym for " + term );
+ return null;
+ }
+
+ pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output);
+ charPos += Character.charCount(codePoint);
+ }
+
+ if (scratchArc.isFinal()) {
+ matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput);
+ }
+
+ if (i < tokens.length-1 && fst.findTargetArc(SynonymMap.WORD_SEPARATOR, scratchArc, scratchArc, fstReader) != null) {
+ pendingOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput);
+ }
+ }
+
+ if (matchOutput != null) {
+ ArrayList synonymLst = new ArrayList( );
+ bytesReader.reset( matchOutput.bytes, matchOutput.offset, matchOutput.length );
+
+ final int code = bytesReader.readVInt();
+ final int count = code >>> 1;
+ for( int outputIDX = 0; outputIDX < count; outputIDX++ ) {
+ synMap.words.get( bytesReader.readVInt(), scratchBytes);
+ scratchChars.copyUTF8Bytes(scratchBytes);
+ int lastStart = 0;
+ final int chEnd = lastStart + scratchChars.length();
+ for( int chIDX = lastStart; chIDX <= chEnd; chIDX++ ) {
+ if (chIDX == chEnd || scratchChars.charAt(chIDX) == SynonymMap.WORD_SEPARATOR) {
+ int outputLen = chIDX - lastStart;
+ assert outputLen > 0: "output contains empty string: " + scratchChars;
+ String synonym = new String( scratchChars.chars(), lastStart, outputLen );
+ Log.debug( "got synonym '" + synonym + "'" );
+ synonymLst.add( synonym );
+ lastStart = chIDX + 1;
+ }
+ }
+ }
+
+ return synonymLst;
+ }
+
+ return null;
+ }
+
+
+ // assume English for now ...
+ private String getStemmed( String input ) {
+ char[] inputChars = input.toCharArray( );
+
+ int lastCh = stem( inputChars, inputChars.length );
+ if (lastCh < inputChars.length) {
+ return new String( inputChars, 0, lastCh );
+ }
+
+ return input;
+ }
+
+ // similar to EnglishMinimalStemmer - fixes "...hes" as in batches couches
+ public int stem(char s[], int len) {
+ if (len < 3 || s[len-1] != 's')
+ return len;
+
+ switch(s[len-2]) {
+ case 'u':
+ case 's': return len;
+ case 'e':
+ if (len > 3 && s[len-3] == 'i' && s[len-4] != 'a' && s[len-4] != 'e') {
+ s[len - 3] = 'y';
+ return len - 2;
+ }
+ if (len > 3 && s[len-3] == 'h') {
+ return len-2;
+ }
+ if (s[len-3] == 'i' || s[len-3] == 'a' || s[len-3] == 'o' || s[len-3] == 'e')
+ return len; /* intentional fallthrough */
+ default: return len - 1;
+ }
+ }
+
+ private ArrayList tokenize( String input ) throws IOException {
+
+ Log.debug( "tokenize '" + input + "'" );
+ ArrayList tokens = new ArrayList( );
+ Tokenizer tk = getTokenizerImpl( input );
+
+ CharTermAttribute term = tk.addAttribute( CharTermAttribute.class );
+ tk.reset( );
+ while (tk.incrementToken( ) ) {
+ int bufLen = term.length();
+ char[] copy = new char[ bufLen ];
+ System.arraycopy(term.buffer( ), 0, copy, 0, bufLen );
+ tokens.add( copy );
+ }
+
+ return tokens;
+ }
+
+ private Tokenizer getTokenizerImpl( String input ) throws IOException {
+ StandardTokenizer sttk = new StandardTokenizer( );
+ sttk.setReader( new StringReader( input ) );
+ return sttk;
+ }
+
+ @Override
+ public void process(ResponseBuilder rb) throws IOException
+ {
+ // do nothing - needed so we don't execute the query here.
+ }
+
+ // ===========================================================================
+ // Verb Modifier Code
+ // Using the verb modifier map if a verb modifier is adjacent to a field mapped phrase (can have noise words between)
+ // restrict the field names in the list to the one that is linked to the verb modifier
+ // TODO - how to deal with 'and' and 'or' Between modifiers
+ // ===========================================================================
+ private void filterFieldMap( ArrayList queryTokens, HashMap> fieldMap,
+ HashMap entityPositionMap, HashMap fieldPositionMap ) {
+
+ Log.info( "filterFieldMap" );
+ // need to find the modifiers that are in THIS set of tokens by position, in the order used ...
+ ArrayList usedModifiers = getOrderedModifierPositions( queryTokens );
+ if (usedModifiers == null || usedModifiers.size() == 0) {
+ return; // nothing to do ...
+ }
+
+ // find the verb modifiers in the query tokens list
+ // need to keep track of 'next entity' and 'last entity' as we iterate
+ boolean remapped = false;
+ for (ModifierInstance modInstance : usedModifiers) {
+ if (modInstance.templateRule != null) applyModifierTemplateRule( entityPositionMap, fieldMap, modInstance.templateRule );
+
+ HashMap fieldNameKeys = getFieldKeysForFieldName( modInstance.modifierFields, fieldMap );
+ if (fieldNameKeys != null) {
+ // find the entity just before (maximum pos before) or after (minimum pos after) the modifier phrase from entityPositionMap
+ // assumming here that the modifiers can work bi-directionally
+ // as in 'songs Paul McCartney composed' or 'songs Paul McCartney has written' vs. 'songs composed by Paul McCartney'
+ // or 'Bands Paul McCartney was in' vs. 'who was in the Who'
+ for (String fieldNameKey : fieldNameKeys.keySet() ) {
+ String modifierField = fieldNameKeys.get( fieldNameKey );
+
+ HashSet entityPhrases = findLastEntitiesBefore( entityPositionMap, modInstance, usedModifiers, fieldMap.get( fieldNameKey ) );
+ if ( entityPhrases != null ) {
+ remapEntity( fieldNameKey, entityPhrases, modifierField, fieldMap, fieldPositionMap, entityPositionMap );
+ remapped = true;
+ }
+ else {
+ entityPhrases = findFirstEntitiesAfter( entityPositionMap, modInstance, usedModifiers, fieldMap.get( fieldNameKey ) );
+ if (entityPhrases != null) {
+ remapEntity( fieldNameKey, entityPhrases, modifierField, fieldMap, fieldPositionMap, entityPositionMap );
+ remapped = true;
+ }
+ }
+ }
+ }
+
+ // add any filter fields for the verbs:
+ if (remapped && modInstance.filterFields != null) {
+ Log.info( "checking verb modifiers for " + modInstance.modifierFields );
+ for (String filtField : modInstance.filterFields.keySet( ) ) {
+ ArrayList valList = new ArrayList( );
+ valList.add( modInstance.filterFields.get( filtField ) );
+ Log.info( "setting verb filter: " + filtField + ":" + modInstance.filterFields.get( filtField ) );
+ fieldMap.put( filtField, valList );
+ fieldPositionMap.put( filtField, modInstance.modifierPos );
+ }
+ }
+ }
+ }
+
+ private ArrayList getOrderedModifierPositions( ArrayList queryTokens ) {
+ ArrayList modifiers = null;
+ int i = 0;
+ while (i < queryTokens.size( ) ) {
+ char[] token = queryTokens.get( i );
+ ModifierDefinition modifier = findModifier( token );
+ if (modifier != null && matchesModifier( modifier.modTokens, queryTokens, i )) {
+ Log.info( "Adding Modifier Instance '" + modifier.modifierPhrase + "'" );
+ ModifierInstance modInst = new ModifierInstance( );
+ modInst.modifierPhrase = modifier.modifierPhrase;
+ modInst.modifierFields = modifier.modifierFields;
+ Log.info( "fields: " );
+ for (String modField : modifier.modifierFields ) { Log.info( " " + modField ); }
+ modInst.modifierPos = new int[2];
+ modInst.modifierPos[0] = i;
+ modInst.modifierPos[1] = i + modifier.modTokens.length - 1;
+
+ modInst.filterFields = modifier.filterFields;
+ modInst.templateRule = modifier.templateRule;
+ if (modifiers == null) modifiers = new ArrayList( );
+ modifiers.add( modInst );
+ i += modifier.modTokens.length;
+ }
+ else {
+ ++i;
+ }
+ }
+
+ return modifiers;
+ }
+
+ private ModifierDefinition findModifier( char[] queryToken ) {
+ for (ModifierDefinition modifier : verbModifierList ) {
+ if (modifier.modifierPhrase.startsWith( new String( queryToken ) )) {
+ return modifier;
+ }
+ }
+ return null;
+ }
+
+ private boolean matchesModifier( String[] modTokens, ArrayList queryTokens, int start ) {
+ int i = 0;
+ while ( (start + i) < queryTokens.size( ) && i < modTokens.length ) {
+ String token = new String( queryTokens.get( start + i ) );
+ if (!token.toLowerCase( ).equals( modTokens[i].toLowerCase( ))) return false;
+ if (++i == modTokens.length) return true;
+ }
+ return false;
+ }
+
+
+ private HashMap getFieldKeysForFieldName( ArrayList modifierFields, HashMap> fieldMap ) {
+ Log.info( "getFieldKeysForFieldName" );
+ HashMap fieldKeys = null;
+ for (String modifierField : modifierFields ) {
+ Log.info( "testing modifierField: " + modifierField );
+ for (String fieldNameList : fieldMap.keySet() ) {
+ Log.info( "testing fieldNameList: " + fieldNameList );
+ String[] fields = fieldNameList.split( fieldSplitExpr );
+ for (int i = 0; i < fields.length; i++) {
+ if ( fields[i].equals( modifierField )) {
+ if (fieldKeys == null) fieldKeys = new HashMap( );
+ Log.info( "adding field Key " + fieldNameList + ": " + modifierField );
+ fieldKeys.put( fieldNameList, modifierField );
+ }
+ }
+ }
+ }
+ return fieldKeys;
+ }
+
+
+
+ // find entities before the current mod pos but after the last one (if modPos is not first in the list of modifier positions)
+ // we also need to keep track of the operator (???)
+ private HashSet findLastEntitiesBefore( HashMap entityPositionMap, ModifierInstance modifier,
+ ArrayList usedModifiers, ArrayList fieldVals ) {
+ Log.info( "findLastEntitiesBefore" );
+ HashSet entitySet = null;
+ int previousModifierPosition = -1;
+ int thisModPos = modifier.modifierPos[0];
+
+ for ( ModifierInstance mod : usedModifiers ) {
+ if (mod.modifierPos[1] < thisModPos ) {
+ previousModifierPosition = mod.modifierPos[1];
+ break;
+ }
+ }
+
+ for (String entityPhrase : entityPositionMap.keySet( ) ) {
+ Log.info( " testing " + entityPhrase );
+ if (fieldVals.contains( entityPhrase)) {
+ int[] entityPos = entityPositionMap.get( entityPhrase );
+ Log.info( "entity is at " + entityPos[0] + "," + entityPos[1] );
+ Log.info( "mod is at " + thisModPos + " previous mod was " + previousModifierPosition );
+ if (entityPos[1] < thisModPos && entityPos[0] > previousModifierPosition ) {
+ if (entitySet == null) entitySet = new HashSet( );
+ Log.info( "adding " + entityPhrase );
+ entitySet.add( entityPhrase );
+ }
+ }
+ }
+
+ return entitySet;
+ }
+
+ // find entities after the current mod pos but before the next modifier
+ private HashSet findFirstEntitiesAfter( HashMap entityPositionMap, ModifierInstance modifier,
+ ArrayList usedModifiers, ArrayList fieldVals ) {
+ Log.info( "findFirstEntitiesAfter" );
+ HashSet entitySet = null;
+ int nextModifierPosition = Integer.MAX_VALUE;
+ int thisModPos = modifier.modifierPos[1];
+
+ for (ModifierInstance mod : usedModifiers ) {
+ if (mod.modifierPos[0] > thisModPos ) {
+ nextModifierPosition = mod.modifierPos[0];
+ break;
+ }
+ }
+
+ for (String entityPhrase : entityPositionMap.keySet( ) ) {
+ Log.info( " testing " + entityPhrase );
+ if (fieldVals.contains( entityPhrase)) {
+ int[] entityPos = entityPositionMap.get( entityPhrase );
+ Log.info( "entity is at " + entityPos[0] + "," + entityPos[1] );
+ Log.info( "mod is at " + thisModPos + " next mod is " + nextModifierPosition );
+ if (entityPos[0] > thisModPos && entityPos[1] < nextModifierPosition ) {
+ if (entitySet == null) entitySet = new HashSet( );
+ Log.info( "adding " + entityPhrase );
+ entitySet.add( entityPhrase );
+ }
+ }
+ }
+
+ return entitySet;
+ }
+
+
+ private void remapEntity( String fieldNameKey, HashSet entityValues, String modifierField,
+ HashMap> fieldMap, HashMap fieldPositionMap, HashMap entityPositionMap ) {
+ // find the fieldMap key that contains the fieldName
+ ArrayList fieldVals = fieldMap.get( fieldNameKey );
+
+ boolean allMatch = true;
+ for (String fieldVal : fieldVals ) {
+ if (!entityValues.contains( fieldVal )) {
+ allMatch = false;
+ break;
+ }
+ }
+
+ // if the field values in the fieldMap match the set of entity values -- remove the fieldNameKey and replace it with the modifierField in the map
+ if ( allMatch ) {
+ if (fieldNameKey.equals( modifierField )) return;
+
+ fieldMap.remove( fieldNameKey );
+ Log.info( "remapping: " + modifierField );
+ for( String val : fieldVals ) { Log.info( " " + val ); }
+ fieldMap.put( modifierField, fieldVals );
+ }
+ else {
+ // for a partial map - remove the field values in the fieldMap that are in the entityValues set, and create a new entry with modifierField => entityValues
+ ArrayList remaining = new ArrayList( );
+ ArrayList modList = new ArrayList( );
+ for (String fieldVal : fieldVals ) {
+ if (entityValues.contains( fieldVal )) {
+ modList.add( fieldVal );
+ }
+ else {
+ remaining.add( fieldVal );
+ }
+ }
+
+ fieldMap.put( modifierField, modList );
+ fieldPositionMap.put( modifierField, getPosArrayFor( modList, entityPositionMap ) );
+
+ fieldMap.put( fieldNameKey, remaining );
+ fieldPositionMap.put( fieldNameKey, getPosArrayFor( remaining, entityPositionMap ) );
+ }
+ }
+
+ private void applyModifierTemplateRule( HashMap entityPositionMap, HashMap> fieldMap, ModifierTemplateRule modifierRule ) {
+ Log.info( "applyModifierTemplateRule" );
+ // find entity_1_field - from field map - find entityPosition from values
+ ArrayList firstEntityList = findEntityList( fieldMap, modifierRule.entity_1_field );
+ if (firstEntityList == null) return;
+ String firstFieldList = null;
+ String entityValue = null;
+
+ for (String firstEntity : firstEntityList ) {
+ Log.info( "checking entity: " + firstEntity );
+ int[] firstPos = entityPositionMap.get( firstEntity );
+ int[] secondPos = entityPositionMap.get( modifierRule.entity_2_value );
+ if (secondPos != null && (secondPos[0] == firstPos[1] + 1) && findEntityList( fieldMap, modifierRule.entity_2_field ) != null ) {
+ if (modifierRule.entity_1_value.equals( "_ENTITY_" )) {
+ Log.info( "'" + firstEntity + "' matches pattern" );
+ entityValue = firstEntity;
+ ArrayList outputList = new ArrayList( );
+ outputList.add( firstEntity );
+ firstFieldList = findFieldList( fieldMap, modifierRule.entity_1_field );
+ fieldMap.put( modifierRule.output_field, outputList );
+ break;
+ }
+ }
+ }
+
+ if ( firstFieldList != null ) {
+ // remove remapped entity field from field list
+ Log.info( "removing " + modifierRule.entity_1_field + " from " + firstFieldList );
+ String[] fields = firstFieldList.split( "\\|" );
+ StringBuilder stb = new StringBuilder( );
+ for (int i = 0; i < fields.length; i++) {
+ if (fields[i].equals( modifierRule.entity_1_field) == false ) {
+ if (stb.length() > 0) stb.append( "," );
+ stb.append( fields[i] );
+ }
+ }
+
+ // remove entityValue from fieldMap arrayList
+ if (stb.length() > 0) {
+ Log.info( "new field list: " + stb.toString( ) );
+ ArrayList remainder = new ArrayList( );
+ for (String firstEntity : firstEntityList ) {
+ if (firstEntity.equals( entityValue ) == false ) {
+ Log.info( "adding remaining value " + firstEntity );
+ remainder.add( firstEntity );
+ }
+ }
+ if (remainder.size( ) > 0) {
+ Log.info( "remainder fields: " + stb.toString( ) );
+ fieldMap.put( stb.toString( ), remainder );
+ }
+
+ Log.info( "removing field: " + firstFieldList );
+ fieldMap.remove( firstFieldList );
+ }
+ }
+ }
+
+ private ArrayList findEntityList( HashMap> fieldMap, String entityField ) {
+ for (String fieldList : fieldMap.keySet() ) {
+ if (fieldList.contains( entityField )) {
+ return fieldMap.get( fieldList );
+ }
+ }
+ return null;
+ }
+
+ private String findFieldList( HashMap> fieldMap, String entityField ) {
+ for (String fieldList : fieldMap.keySet() ) {
+ if (fieldList.contains( entityField )) {
+ return fieldList;
+ }
+ }
+ return null;
+ }
+
+ private int[] getPosArrayFor( ArrayList entities, HashMap entityPositionMap ) {
+ int[] newPosArray = null;
+ for ( String entity : entities ) {
+ int[] entityPos = entityPositionMap.get( entity );
+ if (entityPos != null) {
+ if (newPosArray == null) newPosArray = entityPos;
+ else {
+ if (entityPos[1] < newPosArray[0] ) {
+ newPosArray[0] = entityPos[0];
+ }
+ if (entityPos[0] > newPosArray[1] ) {
+ newPosArray[1] = entityPos[1];
+ }
+ }
+ }
+ }
+
+ return newPosArray;
+ }
+
+ private class ModifierDefinition
+ {
+ String modifierPhrase; // the phrase that will modify like 'was in'
+ ArrayList modifierFields; // the field(s) that will be used like 'memberOfGroup_ss,groupMembers_ss'
+ String[] modTokens;
+ HashMap filterFields;
+ ModifierTemplateRule templateRule;
+ }
+
+ private class ModifierInstance
+ {
+ String modifierPhrase;
+ ArrayList modifierFields;
+ int[] modifierPos;
+ HashMap filterFields;
+ ModifierTemplateRule templateRule;
+ }
+
+ // original_performer_s:_ENTITY_,recording_type_ss:Song=>original_performer_s:_ENTITY_
+ private class ModifierTemplateRule
+ {
+ String entity_1_field;
+ String entity_1_value;
+
+ String entity_2_field;
+ String entity_2_value;
+
+ String output_field;
+ String output_value;
+
+ ModifierTemplateRule( String templatePattern ) {
+ String leftSide = new String(templatePattern.substring( 0, templatePattern.indexOf( "=>" )));
+ String rightSide = new String(templatePattern.substring( templatePattern.indexOf( "=>" ) + 2 ));
+
+ String entity_1 = new String( leftSide.substring( 0, leftSide.indexOf( "," )));
+ String entity_2 = new String( leftSide.substring( leftSide.indexOf( "," ) + 1 ));
+
+ entity_1_field = new String( entity_1.substring( 0, entity_1.indexOf( ":" )));
+ entity_1_value = new String( entity_1.substring( entity_1.indexOf( ":" ) + 1 ));
+ entity_2_field = new String( entity_2.substring( 0, entity_2.indexOf( ":" )));
+ entity_2_value = new String( entity_2.substring( entity_2.indexOf( ":" ) + 1 ));
+
+ output_field = new String( rightSide.substring( 0, rightSide.indexOf( ":" )));
+ output_value = new String( rightSide.substring( rightSide.indexOf( ":" ) + 1 ));
+
+ Log.info( "entity_1_field: " + entity_1_field + " entity_1_value: " + entity_1_value );
+ Log.info( "entity_2_field: " + entity_2_field + " entity_2_value: " + entity_2_value );
+ Log.info( "output_field: " + output_field + " output_value: " + output_value );
+ }
+ }
+
+}
diff --git a/solr6.x/src/test/com/lucidworks/solr/handler/component/DistributedQueryAutoFilteringTest.java b/solr6.x/src/test/com/lucidworks/solr/handler/component/DistributedQueryAutoFilteringTest.java
new file mode 100644
index 0000000..1cbba6a
--- /dev/null
+++ b/solr6.x/src/test/com/lucidworks/solr/handler/component/DistributedQueryAutoFilteringTest.java
@@ -0,0 +1,75 @@
+package org.apache.solr.handler.component;
+
+import org.apache.solr.BaseDistributedSearchTestCase;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+
+public class DistributedQueryAutoFilteringTest extends BaseDistributedSearchTestCase {
+
+ public DistributedQueryAutoFilteringTest() {
+ stress = 0;
+ }
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ initCore( "solrconfig-autofilter.xml", "schema-autofilter.xml" );
+ }
+
+ @Test
+ @ShardsFixed(num = 3)
+ public void test() throws Exception {
+ del("*:*");
+
+ index( id, "1", "color", "red", "product", "shoes" );
+ index( id, "2", "color", "red", "product", "socks" );
+ index( id, "3", "color", "brown", "product", "socks" );
+ index( id, "4", "color", "green", "brand", "red lion", "product", "socks" );
+ index( id, "5", "color", "blue", "brand", "red lion", "product", "socks" );
+ index( id, "6", "color", "blue", "brand", "red dragon", "product", "socks" );
+ index( id, "7", "brand", "red baron", "product", "pizza" );
+ index( id, "8", "brand", "red label", "product", "whiskey" );
+ index( id, "9", "brand", "red light", "product", "smoke detector" );
+ index( id, "10", "brand", "red star", "product", "yeast" );
+ index( id, "11", "brand", "gallo", "product", "red wine" );
+ index( id, "12", "brand", "heinz", "product", "red wine vinegar" );
+ index( id, "13", "brand", "dole", "product", "red grapes" );
+ index( id, "14", "brand", "acme", "product", "red brick" );
+ commit();
+
+ handle.put("distrib", SKIP);
+ handle.put("shards", SKIP);
+
+ QueryResponse rsp;
+ rsp = query( CommonParams.Q, "red lion socks", "fl", "id", "rows", 20, "qt", "/select", "sort", "id asc" );
+ assertFieldValues(rsp.getResults(), id, "1", "10", "11", "12", "13", "14", "2", "3", "4", "5", "6", "7", "8", "9" );
+
+ rsp = query( CommonParams.Q, "red lion socks", "fl", "id", "qt", "/autofilter", "sort", "id asc" );
+ assertFieldValues(rsp.getResults(), id, "4", "5" );
+
+ rsp = query( CommonParams.Q, "blue red lion socks", "fl", "id", "qt", "/autofilter" );
+ assertFieldValues(rsp.getResults(), id, "5" );
+
+ rsp = query( CommonParams.Q, "red wine", "fl", "id", "qt", "/autofilter" );
+ assertFieldValues(rsp.getResults(), id, "11" );
+
+ rsp = query( CommonParams.Q, "red wine vinegar", "fl", "id", "qt", "/autofilter" );
+ assertFieldValues(rsp.getResults(), id, "12" );
+ }
+
+ @Override
+ protected QueryResponse query(Object... q) throws Exception {
+
+ final ModifiableSolrParams params = new ModifiableSolrParams();
+
+ for (int i = 0; i < q.length; i += 2) {
+ params.add(q[i].toString(), q[i + 1].toString());
+ }
+ params.set("shards", getShardsString());
+
+ return queryServer(params);
+ }
+}
diff --git a/solr6.x/src/test/com/lucidworks/solr/handler/component/QueryAutoFilteringComponentTest.java b/solr6.x/src/test/com/lucidworks/solr/handler/component/QueryAutoFilteringComponentTest.java
new file mode 100644
index 0000000..e5d3db4
--- /dev/null
+++ b/solr6.x/src/test/com/lucidworks/solr/handler/component/QueryAutoFilteringComponentTest.java
@@ -0,0 +1,486 @@
+package org.apache.solr.handler.component;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.params.CommonParams;
+
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class QueryAutoFilteringComponentTest extends SolrTestCaseJ4 {
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ initCore("solrconfig-autofilter.xml","schema-autofilter.xml");
+ }
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ }
+
+ @Override
+ public void tearDown() throws Exception {
+ super.tearDown();
+ }
+
+ @Test
+ public void testColors( ) {
+ clearIndex();
+ assertU(commit());
+ assertU(adoc("id", "1", "color", "red", "product", "shoes" ));
+ assertU(adoc("id", "2", "color", "Red", "product", "socks" ));
+ assertU(adoc("id", "3", "color", "brown", "product", "socks" ));
+ assertU(adoc("id", "4", "color", "green", "brand", "red lion", "product", "socks"));
+ assertU(adoc("id", "5", "color", "blue", "brand", "green dragon", "product", "socks" ));
+ assertU(adoc("id", "6", "color", "black", "brand", "buster brown", "product", "shoes" ));
+ assertU(commit());
+
+ assertQ("", req(CommonParams.Q, "red lion socks", CommonParams.QT, "/select" )
+ , "//*[@numFound='5']"
+ , "//doc[./str[@name='id']='4']"
+ , "//doc[./str[@name='id']='2']"
+ , "//doc[./str[@name='id']='1']"
+ , "//doc[./str[@name='id']='3']"
+ , "//doc[./str[@name='id']='5']");
+
+ assertQ("", req(CommonParams.Q, "red socks", CommonParams.QT, "/select" )
+ , "//*[@numFound='5']"
+ , "//doc[./str[@name='id']='2']"
+ , "//doc[./str[@name='id']='4']"
+ , "//doc[./str[@name='id']='1']"
+ , "//doc[./str[@name='id']='3']"
+ , "//doc[./str[@name='id']='5']");
+
+ assertQ("", req(CommonParams.Q, "red lion socks", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='4']" );
+
+ assertQ("", req(CommonParams.Q, "red socks", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='2']" );
+
+ assertQ("", req(CommonParams.Q, "brown shoes", CommonParams.QT, "/select" )
+ , "//*[@numFound='3']"
+ , "//doc[./str[@name='id']='1']"
+ , "//doc[./str[@name='id']='3']"
+ , "//doc[./str[@name='id']='6']");
+
+ assertQ("", req(CommonParams.Q, "brown shoes", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='0']" );
+
+ }
+
+ @Test
+ public void testSynonyms( ) {
+ clearIndex();
+ assertU(commit());
+ assertU(adoc("id", "1", "color", "red", "product", "chaise lounge" ));
+ assertU(adoc("id", "2", "color", "red", "product", "sofa" ));
+ assertU(adoc("id", "3", "color", "red", "product", "chair" ));
+ assertU(commit());
+
+ assertQ("", req(CommonParams.Q, "red couch", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='2']" );
+
+ assertQ("", req(CommonParams.Q, "rouge sofa", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='2']" );
+
+ assertQ("", req(CommonParams.Q, "red lounge chair", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='1']" );
+
+ assertQ("", req(CommonParams.Q, "rouge lounge chair", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='1']" );
+
+ assertQ("", req(CommonParams.Q, "crimson day bed", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='1']" );
+ }
+
+ @Test
+ public void testCaseInsensitive( ) {
+ clearIndex();
+ assertU(commit());
+ assertU(adoc("id", "1", "color", "red", "product", "shoes" ));
+ assertU(adoc("id", "2", "color", "red", "product", "socks" ));
+ assertU(adoc("id", "3", "color", "brown", "product", "socks" ));
+ assertU(adoc("id", "4", "color", "green", "brand", "Red Lion", "product", "socks"));
+ assertU(adoc("id", "5", "color", "blue", "brand", "Green Dragon", "product", "socks" ));
+ assertU(adoc("id", "6", "color", "black", "brand", "Buster Brown", "product", "shoes" ));
+ assertU(commit());
+
+ assertQ("", req(CommonParams.Q, "red lion socks", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='4']" );
+
+ assertQ("", req(CommonParams.Q, "Red Lion socks", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='4']" );
+ }
+
+ @Test
+ public void testSynonymsCaseInsensitive( ) {
+ clearIndex();
+ assertU(commit());
+ assertU(adoc("id", "1", "color", "red", "product", "Chaise Lounge" ));
+ assertU(adoc("id", "2", "color", "red", "product", "sofa" ));
+ assertU(adoc("id", "3", "color", "red", "product", "chair" ));
+ assertU(commit());
+
+ assertQ("", req(CommonParams.Q, "red lounge chair", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='1']" );
+
+ assertQ("", req(CommonParams.Q, "scarlet Lounge Chair", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='1']" );
+
+ assertQ("", req(CommonParams.Q, "Crimson Couch", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='2']" );
+
+ }
+
+
+ @Test
+ public void testStemming( ) {
+ clearIndex();
+ assertU(commit());
+ assertU(adoc("id", "1", "color", "red", "product", "shirt" ));
+ assertU(adoc("id", "2", "color", "red", "product", "socks" ));
+ assertU(adoc("id", "3", "color", "red", "product", "pants" ));
+ assertU(adoc("id", "4", "color", "red", "product", "sofa" ));
+ assertU(commit());
+
+ assertQ("", req(CommonParams.Q, "red shirts", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='1']" );
+
+ assertQ("", req(CommonParams.Q, "red shirt", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='1']" );
+
+ assertQ("", req(CommonParams.Q, "red couches", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='4']" );
+ }
+
+ @Test
+ public void testMinTokens( ) {
+ clearIndex();
+ assertU(commit());
+ assertU(adoc("id", "1", "color", "red", "product", "shoes" ));
+ assertU(adoc("id", "2", "color", "red", "product", "socks" ));
+ assertU(adoc("id", "3", "color", "green", "brand", "red lion", "product", "socks"));
+ assertU(adoc("id", "4", "brand", "red label", "product", "whiskey"));
+ assertU(commit());
+
+ assertQ("", req(CommonParams.Q, "red", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='2']"
+ , "//doc[./str[@name='id']='1']"
+ , "//doc[./str[@name='id']='2']" );
+
+ assertQ("", req(CommonParams.Q, "red", CommonParams.QT, "/autofilter", "mt", "2" )
+ , "//*[@numFound='4']" );
+
+ assertQ("", req(CommonParams.Q, "red shoes", CommonParams.QT, "/autofilter", "mt", "2" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='1']" );
+ }
+
+ @Test
+ public void testBoostFilter( ) {
+ // use autofilter handler configured with boostFactor
+ clearIndex();
+ assertU(commit());
+ assertU(adoc( "id", "1", "color", "red", "product", "shoes" ));
+ assertU(adoc( "id", "2", "color", "red", "product", "socks" ));
+ assertU(adoc( "id", "3", "color", "brown", "product", "socks" ));
+ assertU(adoc( "id", "4", "color", "green", "brand", "red lion", "product", "socks" ));
+ assertU(adoc( "id", "5", "color", "blue", "brand", "red lion", "product", "socks" ));
+ assertU(adoc( "id", "6", "color", "blue", "brand", "red dragon", "product", "socks" ));
+ assertU(adoc( "id", "7", "brand", "red baron", "product", "pizza" ));
+ assertU(adoc( "id", "8", "brand", "red label", "product", "whiskey" ));
+ assertU(adoc( "id", "9", "brand", "red light", "product", "smoke detector" ));
+ assertU(adoc( "id", "10", "brand", "red star", "product", "yeast" ));
+ assertU(adoc( "id", "11", "brand", "gallo", "product", "red wine" ));
+ assertU(adoc( "id", "12", "brand", "heinz", "product", "red wine vinegar" ));
+ assertU(adoc( "id", "13", "brand", "dole", "product", "red grapes" ));
+ assertU(adoc( "id", "14", "brand", "acme", "product", "red brick" ));
+ assertU(commit());
+
+ assertQ("", req(CommonParams.Q, "blue red dragon socks", CommonParams.QT, "/autofilterBQ", "rows", "20" )
+ , "//*[@numFound='14']"
+ , "//doc[./str[@name='id']='6']"
+ , "//doc[./str[@name='id']='5']"
+ , "//doc[./str[@name='id']='2']"
+ , "//doc[./str[@name='id']='4']"
+ , "//doc[./str[@name='id']='3']"
+ , "//doc[./str[@name='id']='1']"
+ , "//doc[./str[@name='id']='7']"
+ , "//doc[./str[@name='id']='8']"
+ , "//doc[./str[@name='id']='9']"
+ , "//doc[./str[@name='id']='10']"
+ , "//doc[./str[@name='id']='11']"
+ , "//doc[./str[@name='id']='12']"
+ , "//doc[./str[@name='id']='13']"
+ , "//doc[./str[@name='id']='14']" );
+ }
+
+ @Test
+ public void testExcludeFields( ) {
+ // use autofilter handler configured with excludeFields
+ clearIndex();
+ assertU(commit());
+ assertU(adoc("id", "1", "color", "red", "product", "shoes" ));
+ assertU(adoc("id", "2", "color", "red", "product", "socks" ));
+ assertU(adoc("id", "3", "color", "green", "brand", "red lion", "product", "socks"));
+ assertU(adoc("id", "4", "brand", "red label", "product", "whiskey"));
+ assertU(commit());
+
+ assertQ("", req(CommonParams.Q, "1", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='1']" );
+
+ // removes 'id' as an autofilter field
+ assertQ("", req(CommonParams.Q, "1", CommonParams.QT, "/autofilterEX" )
+ , "//*[@numFound='0']" );
+
+ }
+
+ @Test
+ public void testStopWords( ) {
+ clearIndex();
+ assertU(commit());
+ assertU(adoc("id", "1", "color", "red", "product", "shoes" ));
+ assertU(adoc("id", "2", "color", "red", "product", "socks" ));
+ assertU(adoc("id", "3", "color", "green", "brand", "red lion", "product", "socks"));
+ assertU(adoc("id", "4", "color", "red", "brand", "calvin klein", "product", "underwear"));
+ assertU(adoc("id", "5", "color", "red", "brand", "fruit of the loom", "product", "underwear"));
+ assertU(commit());
+
+ assertQ("", req(CommonParams.Q, "red calvin klein underwear", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='4']" );
+
+ // stop words should be removed: 'by' is not part of a brand name phrase
+ assertQ("", req(CommonParams.Q, "red underwear by calvin klein", CommonParams.QT, "/autofilterSW" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='4']");
+
+ // stop words should not be removed from within a matching phrase
+ assertQ("", req(CommonParams.Q, "red fruit of the loom underwear", CommonParams.QT, "/autofilterSW" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='5']");
+ }
+
+ @Test
+ public void testRandomOrder( ) {
+ clearIndex();
+ assertU(commit());
+ assertU(adoc("id", "1", "color", "red", "product", "shoes" ));
+ assertU(adoc("id", "2", "color", "red", "product", "socks" ));
+ assertU(adoc("id", "3", "color", "green", "brand", "red lion", "product", "socks"));
+ assertU(adoc("id", "4", "brand", "red label", "product", "whiskey"));
+ assertU(commit());
+
+ assertQ("", req(CommonParams.Q, "red lion socks", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='3']" );
+
+ assertQ("", req(CommonParams.Q, "socks red lion", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='3']" );
+ }
+
+ @Test
+ public void testBadQueries( ) {
+ clearIndex();
+ assertU(commit());
+ assertU(adoc("id", "1", "color", "red", "product", "shoes" ));
+ assertU(adoc("id", "2", "color", "red", "product", "socks" ));
+ assertU(adoc("id", "3", "color", "green", "brand", "red lion", "product", "socks"));
+ assertU(adoc("id", "4", "brand", "red label", "product", "whiskey"));
+ assertU(adoc("id", "5", "color", "blue", "brand", "green dragon", "product", "socks"));
+ assertU(commit());
+
+ // green red tiger socks -> tiger (color:(green OR red) AND product:socks)
+ assertQ("", req(CommonParams.Q, "green red tiger socks", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='2']"
+ , "//doc[./str[@name='id']='2']"
+ , "//doc[./str[@name='id']='3']");
+
+ // green red lion socks blahblah -> blahblah (color:green AND brand:"red lion" AND product:socks)
+ assertQ("", req(CommonParams.Q, "green red lion socks blahblah", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='3']" );
+ }
+
+ @Test
+ public void testMultipleFieldValues( ) {
+ clearIndex();
+ assertU(commit());
+ assertU(adoc("id", "1", "color", "red", "product", "shoes" ));
+ assertU(adoc("id", "2", "color", "red", "product", "socks" ));
+ assertU(adoc("id", "3", "color", "green", "brand", "red lion", "product", "socks"));
+ assertU(adoc("id", "4", "brand", "red label", "product", "whiskey"));
+ assertU(adoc("id", "5", "color", "blue", "brand", "green dragon", "product", "socks"));
+ assertU(commit());
+
+ // should create filter query: color:(red OR green) product:socks
+ assertQ("", req(CommonParams.Q, "red green socks", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='2']"
+ , "//doc[./str[@name='id']='2']"
+ , "//doc[./str[@name='id']='3']");
+ }
+
+ @Test
+ public void testMultipleFieldNames( ) {
+ clearIndex();
+ assertU(commit());
+ //assertU(adoc("id", "1", "first_name", "Tucker", "last_name", "Thomas", "full_name", "Tucker Thomas"));
+ //assertU(adoc("id", "2", "first_name", "Thomas", "last_name", "Tucker", "full_name", "Thomas Tucker"));
+ assertU(adoc("id", "1", "full_name", "Tucker Thomas", "text", "Tucker Thomas"));
+ assertU(adoc("id", "2", "full_name", "Thomas Tucker", "text", "Thomas Tucker"));
+ assertU(commit());
+
+ // should create filter query (first_name:thomas OR last_name:thomas)
+ assertQ("", req(CommonParams.Q, "Thomas", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='2']" );
+
+ // uses longer contiguous phrase for full_name - creates fq=full_name:"thomas tucker"
+ // this breaks now because of "fix" for testAmbiguousFields
+ assertQ("", req(CommonParams.Q, "Thomas Tucker", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']"
+ , "//doc[./str[@name='id']='2']");
+ }
+
+ @Test
+ public void testMultiValuedField( ) {
+ clearIndex();
+ assertU(commit());
+ assertU( multiValueDocs );
+ assertU(commit());
+
+ assertQ("", req(CommonParams.Q, "fast stylish", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']" );
+
+ assertQ("", req(CommonParams.Q, "fast and stylish", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']" );
+
+ assertQ("", req(CommonParams.Q, "fast or stylish", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='3']" );
+ }
+
+ @Test
+ public void testAmbiguousFields( ) {
+ clearIndex();
+ assertU(commit());
+ assertU( whiteAmbiguousDocs );
+ assertU(commit());
+
+ // should create (brand_s:"white linen" OR (color:white AND material_s:linen))
+ assertQ("", req(CommonParams.Q, "white linen", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='3']" );
+
+ assertQ("", req(CommonParams.Q, "white linen perfume", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']" );
+
+ assertQ("", req(CommonParams.Q, "white linen shirt", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='2']" );
+
+ assertQ("", req(CommonParams.Q, "mens white linen shirt", CommonParams.QT, "/autofilter" )
+ , "//*[@numFound='1']" );
+
+ }
+
+
+ @Test
+ public void testVerbMappings( ) {
+ clearIndex();
+ assertU(commit());
+ assertU( musicDocs );
+ assertU(commit());
+
+ assertQ("", req(CommonParams.Q, "Bob Dylan Songs", CommonParams.QT, "/autofilterVRB" )
+ , "//*[@numFound='3']" );
+
+ assertQ("", req(CommonParams.Q, "Songs Bob Dylan wrote", CommonParams.QT, "/autofilterVRB" )
+ , "//*[@numFound='2']" );
+
+ assertQ("", req(CommonParams.Q, "Songs Bob Dylan performed", CommonParams.QT, "/autofilterVRB" )
+ , "//*[@numFound='2']" );
+
+ assertQ("", req(CommonParams.Q, "Songs Bob Dylan covered", CommonParams.QT, "/autofilterVRB" )
+ , "//*[@numFound='1']" );
+
+ }
+
+ @Test
+ public void testNounPhraseMappings( ) {
+ clearIndex();
+ assertU(commit());
+ assertU( beatlesDocs );
+ assertU(commit());
+
+ assertQ("", req(CommonParams.Q, "Beatles Songs", CommonParams.QT, "/autofilterVRB" )
+ , "//*[@numFound='3']" );
+
+ assertQ("", req(CommonParams.Q, "Beatles Songs covered", CommonParams.QT, "/autofilterVRB" )
+ , "//*[@numFound='2']" );
+
+ assertQ("", req(CommonParams.Q, "Beatles Songs covered by Joan Baez", CommonParams.QT, "/autofilterVRB" )
+ , "//*[@numFound='1']" );
+
+ assertQ("", req(CommonParams.Q, "Songs Beatles covered", CommonParams.QT, "/autofilterVRB" )
+ , "//*[@numFound='1']" );
+ }
+
+
+ private static String multiValueDocs = "1fast"
+ + "stylish"
+ + "2fast"
+ + "powerful"
+ + "3stylish";
+
+ private static String whiteAmbiguousDocs = "1perfume"
+ + "fragrencesWhite Linen"
+ + "womens"
+ + "2dress shirt"
+ + "shirtWhite"
+ + "Linenwomens"
+ + "3dress shirt"
+ + "shirtWhite"
+ + "Linenmens";
+
+ private static String musicDocs = "1All Along the Watchtower"
+ + "Bob DylanJimi Hendrix"
+ + "SongCover"
+ + "2The Mighty Quinn"
+ + "Bob DylanBob Dylan"
+ + "SongOriginal"
+ + "3This Land is Your Land"
+ + "Woody GuthrieBob Dylan"
+ + "SongCover";
+
+ private static String beatlesDocs = "1Let It Be"
+ + "Beatles"
+ + "Joan Baez"
+ + "Cover"
+ + "Song"
+ + "2Something"
+ + "Beatles"
+ + "Frank Sinatra"
+ + "Cover"
+ + "Song"
+ + "3Honey Don't"
+ + "Carl Perkins"
+ + "Beatles"
+ + "Cover"
+ + "Song";
+
+}
\ No newline at end of file
diff --git a/solr6.x/src/test/resources/solr/collection1/conf/currency.xml b/solr6.x/src/test/resources/solr/collection1/conf/currency.xml
new file mode 100644
index 0000000..6a12b32
--- /dev/null
+++ b/solr6.x/src/test/resources/solr/collection1/conf/currency.xml
@@ -0,0 +1,37 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr6.x/src/test/resources/solr/collection1/conf/managed-schema b/solr6.x/src/test/resources/solr/collection1/conf/managed-schema
new file mode 100644
index 0000000..f16a44b
--- /dev/null
+++ b/solr6.x/src/test/resources/solr/collection1/conf/managed-schema
@@ -0,0 +1,126 @@
+
+
+
+ id
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/solr6.x/src/test/resources/solr/collection1/conf/schema-autofilter.xml.bak b/solr6.x/src/test/resources/solr/collection1/conf/schema-autofilter.xml.bak
new file mode 100644
index 0000000..51269dc
--- /dev/null
+++ b/solr6.x/src/test/resources/solr/collection1/conf/schema-autofilter.xml.bak
@@ -0,0 +1,273 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/solr6.x/src/test/resources/solr/collection1/conf/solrconfig-autofilter.xml b/solr6.x/src/test/resources/solr/collection1/conf/solrconfig-autofilter.xml
new file mode 100644
index 0000000..785a177
--- /dev/null
+++ b/solr6.x/src/test/resources/solr/collection1/conf/solrconfig-autofilter.xml
@@ -0,0 +1,242 @@
+
+
+
+
+
+ ${tests.luceneMatchVersion:LATEST}
+
+
+
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1024
+
+
+
+
+
+
+
+
+
+ true
+
+ 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+ explicit
+ 10
+ text
+
+
+
+
+
+
+ explicit
+ text
+
+
+ autofilter
+
+
+
+
+ synonyms-autofilter.txt
+
+
+
+
+ explicit
+ text
+
+
+ autofilterSW
+
+
+
+
+ stopwords.txt
+
+
+
+
+
+ explicit
+ text
+
+
+ autofilterBQ
+
+
+
+
+ 100
+
+
+
+
+
+ explicit
+ text
+ 2
+
+
+ autofilter
+
+
+
+
+
+
+ explicit
+ text
+
+
+ autofilterEX
+
+
+
+
+
+ id
+
+
+
+
+
+ explicit
+ text
+
+
+ autofilterVRB
+
+
+
+
+
+ written,wrote,composed:composer_s
+ performed,played,sang,recorded:performer_s
+ covered,covers:performer_s|version_s:Cover|original_performer_s:_ENTITY_,recording_type_s:Song=>original_performer_s:_ENTITY_
+
+
+
+
+
+
+
+
+ termsComp
+
+
+
+
+
+
+
+ max-age=30, public
+
+
+
+
+ solr
+ solrconfig.xml schema.xml admin-extra.html
+
+
+
+ prefix-${solr.test.sys.prop2}-suffix
+
+
+
diff --git a/solr6.x/src/test/resources/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml b/solr6.x/src/test/resources/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml
new file mode 100644
index 0000000..13f8214
--- /dev/null
+++ b/solr6.x/src/test/resources/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml
@@ -0,0 +1,47 @@
+
+
+
+
+
+
+
+
+ ${useCompoundFile:false}
+
+ ${solr.tests.maxBufferedDocs}
+ ${solr.tests.maxIndexingThreads:8}
+ ${solr.tests.ramBufferSizeMB}
+
+
+
+ 1000
+ 10000
+
+
+ ${solr.tests.lockType:single}
+
diff --git a/solr6.x/src/test/resources/solr/collection1/conf/stopwords.txt b/solr6.x/src/test/resources/solr/collection1/conf/stopwords.txt
new file mode 100644
index 0000000..b5824da
--- /dev/null
+++ b/solr6.x/src/test/resources/solr/collection1/conf/stopwords.txt
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+#Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+s
+such
+t
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
+
diff --git a/solr6.x/src/test/resources/solr/collection1/conf/synonyms-autofilter.txt b/solr6.x/src/test/resources/solr/collection1/conf/synonyms-autofilter.txt
new file mode 100644
index 0000000..367e7a6
--- /dev/null
+++ b/solr6.x/src/test/resources/solr/collection1/conf/synonyms-autofilter.txt
@@ -0,0 +1,17 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+chaise lounge,lounge chair,daybed,day bed
+red,rouge,crimson,scarlet
+couch,sofa
+
diff --git a/solr6.x/src/test/resources/solr/collection1/conf/synonyms.txt b/solr6.x/src/test/resources/solr/collection1/conf/synonyms.txt
new file mode 100644
index 0000000..b0e31cb
--- /dev/null
+++ b/solr6.x/src/test/resources/solr/collection1/conf/synonyms.txt
@@ -0,0 +1,31 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+#some test synonym mappings unlikely to appear in real input text
+aaa => aaaa
+bbb => bbbb1 bbbb2
+ccc => cccc1,cccc2
+a\=>a => b\=>b
+a\,a => b\,b
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma
+