diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-tagger.xml b/solr/core/src/test-files/solr/collection1/conf/schema-tagger.xml index 051cd10c7a5..976a3d1bb2d 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-tagger.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-tagger.xml @@ -86,7 +86,7 @@ - + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema.xml b/solr/core/src/test-files/solr/collection1/conf/schema.xml index c55d86bfbf4..06f99a28d42 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema.xml @@ -205,7 +205,7 @@ - @@ -213,7 +213,7 @@ - diff --git a/solr/core/src/test/org/apache/solr/analysis/TestWordDelimiterFilterFactory.java b/solr/core/src/test/org/apache/solr/analysis/TestWordDelimiterGraphFilterFactory.java similarity index 84% rename from solr/core/src/test/org/apache/solr/analysis/TestWordDelimiterFilterFactory.java rename to solr/core/src/test/org/apache/solr/analysis/TestWordDelimiterGraphFilterFactory.java index 61d1bb5c4d7..2919197e4e6 100644 --- a/solr/core/src/test/org/apache/solr/analysis/TestWordDelimiterFilterFactory.java +++ b/solr/core/src/test/org/apache/solr/analysis/TestWordDelimiterGraphFilterFactory.java @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory; +import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilterFactory; import org.apache.lucene.util.ResourceLoader; import org.apache.lucene.util.Version; import org.apache.solr.SolrTestCaseJ4; @@ -30,10 +30,10 @@ import org.junit.Test; /** - * New WordDelimiterFilter tests... most of the tests are in ConvertedLegacyTest + * New WordDelimiterGraphFilter tests... most of the tests are in ConvertedLegacyTest */ // TODO: add a low-level test for this factory -public class TestWordDelimiterFilterFactory extends SolrTestCaseJ4 { +public class TestWordDelimiterGraphFilterFactory extends SolrTestCaseJ4 { @BeforeClass public static void beforeClass() throws Exception { @@ -123,24 +123,6 @@ public void testPreserveOrignalTrue() { clearIndex(); } - /* - public void testPerformance() throws IOException { - String s = "now is the time-for all good men to come to-the aid of their country."; - Token tok = new Token(); - long start = System.currentTimeMillis(); - int ret=0; - for (int i=0; i<1000000; i++) { - StringReader r = new StringReader(s); - TokenStream ts = new WhitespaceTokenizer(r); - ts = new WordDelimiterFilter(ts, 1,1,1,1,0); - - while (ts.next(tok) != null) ret++; - } - - System.out.println("ret="+ret+" time="+(System.currentTimeMillis()-start)); - } - ***/ - @Test public void testAlphaNumericWords(){ assertU(adoc("id", "68","numericsubword","Java/J2SE")); @@ -207,16 +189,16 @@ public void testCustomTypes() throws Exception { args.put("splitOnCaseChange", "1"); /* default behavior */ - WordDelimiterFilterFactory factoryDefault = new WordDelimiterFilterFactory(args); + WordDelimiterGraphFilterFactory factoryDefault = new WordDelimiterGraphFilterFactory(args); factoryDefault.inform(loader); TokenStream ts = factoryDefault.create(whitespaceMockTokenizer(testText)); BaseTokenStreamTestCase.assertTokenStreamContents(ts, - new String[] { "I", "borrowed", "5", "540000", "400", "00", "at", "25", "interest", "interestrate", "rate" }); + new String[] { "I", "borrowed", "540000", "5", "400", "00", "at", "25", "interestrate", "interest", "rate" }); ts = factoryDefault.create(whitespaceMockTokenizer("foo\u200Dbar")); BaseTokenStreamTestCase.assertTokenStreamContents(ts, - new String[] { "foo", "foobar", "bar" }); + new String[] { "foobar", "foo", "bar" }); /* custom behavior */ @@ -230,12 +212,12 @@ public void testCustomTypes() throws Exception { args.put("catenateAll", "0"); args.put("splitOnCaseChange", "1"); args.put("types", "wdftypes.txt"); - WordDelimiterFilterFactory factoryCustom = new WordDelimiterFilterFactory(args); + WordDelimiterGraphFilterFactory factoryCustom = new WordDelimiterGraphFilterFactory(args); factoryCustom.inform(loader); ts = factoryCustom.create(whitespaceMockTokenizer(testText)); BaseTokenStreamTestCase.assertTokenStreamContents(ts, - new String[] { "I", "borrowed", "$5,400.00", "at", "25%", "interest", "interestrate", "rate" }); + new String[] { "I", "borrowed", "$5,400.00", "at", "25%", "interestrate", "interest", "rate" }); /* test custom behavior with a char > 0x7F, because we had to make a larger byte[] */ ts = factoryCustom.create(whitespaceMockTokenizer("foo\u200Dbar")); diff --git a/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java b/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java index 31aac34f121..a3e3aa7eac6 100644 --- a/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java +++ b/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java @@ -160,7 +160,7 @@ public void testCollationWithHypens() throws Exception List collations = collationHolder.getAll("collation"); assertTrue(collations.size()==1); String collation = collations.iterator().next(); - assertTrue("Incorrect collation: " + collation,"lowerfilt:(hyphenated-word)".equals(collation)); + assertTrue("Incorrect collation: " + collation,"lowerfilt:(hyphenword)".equals(collation)); } params.remove(CommonParams.Q); @@ -180,7 +180,7 @@ public void testCollationWithHypens() throws Exception List collations = collationHolder.getAll("collation"); assertTrue(collations.size()==1); String collation = collations.iterator().next(); - assertTrue("Incorrect collation: " + collation,"hyphenated-word".equals(collation)); + assertTrue("Incorrect collation: " + collation,"hyphenword".equals(collation)); } } diff --git a/solr/solr-ref-guide/src/filters.adoc b/solr/solr-ref-guide/src/filters.adoc index 83a153e3450..c37aac96dfb 100644 --- a/solr/solr-ref-guide/src/filters.adoc +++ b/solr/solr-ref-guide/src/filters.adoc @@ -2875,20 +2875,6 @@ If *false*, or undefined, the file defined in `types` is used as a blacklist. ==== -- -== Word Delimiter Filter - -This filter splits tokens at word delimiters. - -.Word Delimiter Filter has been Deprecated -[WARNING] -==== -Word Delimiter Filter has been deprecated in favor of Word Delimiter Graph Filter, which is required to produce a correct token graph so that e.g., phrase queries can work correctly. -==== - -*Factory class:* `solr.WordDelimiterFilterFactory` - -For a full description, including arguments and examples, see the Word Delimiter Graph Filter below. - == Word Delimiter Graph Filter This filter splits tokens at word delimiters. diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java index 4b024e98ef4..060d9c4be6c 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java @@ -87,7 +87,7 @@ public boolean isTokenized() { boolean={fields=[inStock],tokenized=false,analyzer=org.apache.solr.schema.BoolField$1@354949}, textTight={fields=[sku],tokenized=true,analyzer=TokenizerChain(org.apache.solr.analysis.WhitespaceTokenizerFactory@5e88f7, org.apache.solr.analysis.SynonymFilterFactory@723646, org.apache.solr.analysis.StopFilterFactory@492ff1, - org.apache.solr.analysis.WordDelimiterFilterFactory@eaabad, org.apache.solr.analysis.LowerCaseFilterFactory@ad1355, + org.apache.solr.analysis.WordDelimiterGraphFilterFactory@eaabad, org.apache.solr.analysis.LowerCaseFilterFactory@ad1355, org.apache.solr.analysis.EnglishPorterFilterFactory@d03a00, org.apache.solr.analysis.RemoveDuplicatesTokenFilterFactory@900079)}, long={fields=null,tokenized=false,analyzer=org.apache.solr.schema.FieldType$DefaultAnalyzer@f3b83}, double={fields=null,tokenized=false,analyzer=org.apache.solr.schema.FieldType$DefaultAnalyzer@c2b07},