diff --git a/pom.xml b/pom.xml
index 40b37ef50..b0c57f67c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -66,6 +66,11 @@
lucene-analyzers-kuromoji
${lucene-version}
+
+ org.apache.lucene
+ lucene-analyzers-icu
+ ${lucene-version}
+
org.apache.lucene
lucene-facet
diff --git a/src/main/scala/com/cloudant/clouseau/SupportedAnalyzers.scala b/src/main/scala/com/cloudant/clouseau/SupportedAnalyzers.scala
index 3e2ae0121..a645a30b5 100644
--- a/src/main/scala/com/cloudant/clouseau/SupportedAnalyzers.scala
+++ b/src/main/scala/com/cloudant/clouseau/SupportedAnalyzers.scala
@@ -66,6 +66,7 @@ import org.apache.lucene.analysis.tr.TurkishAnalyzer
// Extras
import org.apache.lucene.analysis.ja.JapaneseTokenizer
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter
+import org.apache.lucene.analysis.icu.ICUFoldingFilter
import org.apache.lucene.analysis.core.LowerCaseFilter
import org.apache.lucene.analysis.core.LetterTokenizer
@@ -120,6 +121,13 @@ object SupportedAnalyzers {
new TokenStreamComponents(tokenizer, new ASCIIFoldingFilter(new LowerCaseFilter(IndexService.version, tokenizer)))
}
})
+ case "simple_icufolding" =>
+ Some(new Analyzer() {
+ def createComponents(fieldName: String, reader: Reader): TokenStreamComponents = {
+ val tokenizer: Tokenizer = new LetterTokenizer(IndexService.version, reader);
+ new TokenStreamComponents(tokenizer, new ICUFoldingFilter(tokenizer))
+ }
+ })
case "arabic" =>
options.get("stopwords") match {
case Some(stopwords: List[String]) =>
diff --git a/src/test/scala/com/cloudant/clouseau/AnalyzerServiceSpec.scala b/src/test/scala/com/cloudant/clouseau/AnalyzerServiceSpec.scala
index e1ad47fc7..2b64d3ae8 100644
--- a/src/test/scala/com/cloudant/clouseau/AnalyzerServiceSpec.scala
+++ b/src/test/scala/com/cloudant/clouseau/AnalyzerServiceSpec.scala
@@ -30,6 +30,10 @@ class AnalyzerServiceSpec extends SpecificationWithJUnit {
"demonstrate simple_asciifolding tokenization" in new analyzer_service {
service.handleCall(null, ('analyze, "simple_asciifolding", "Ayşegül Özbayır")) must be equalTo (('ok, List("aysegul", "ozbayir")))
}
+
+ "demonstrate simple_icufolding tokenization" in new analyzer_service {
+ service.handleCall(null, ('analyze, "simple_icufolding", "Ayşegül Özbayır")) must be equalTo (('ok, List("aysegul", "ozbayir")))
+ }
}
}
diff --git a/src/test/scala/com/cloudant/clouseau/SupportedAnalyzersSpec.scala b/src/test/scala/com/cloudant/clouseau/SupportedAnalyzersSpec.scala
index 87c6d099f..653bd3391 100644
--- a/src/test/scala/com/cloudant/clouseau/SupportedAnalyzersSpec.scala
+++ b/src/test/scala/com/cloudant/clouseau/SupportedAnalyzersSpec.scala
@@ -78,6 +78,9 @@ class SupportedAnalyzersSpec extends SpecificationWithJUnit {
"simple_asciifolding" in {
createAnalyzer("simple_asciifolding") must haveClass[Some[Analyzer]]
}
+ "simple_icufolding" in {
+ createAnalyzer("simple_icufolding") must haveClass[Some[Analyzer]]
+ }
"email" in {
createAnalyzer("email") must haveClass[Some[UAX29URLEmailAnalyzer]]
}