Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
es_version: [8.9.2, 8.15.2]
es_version: [8.15.2]
steps:
- name: Checkout project sources
uses: actions/checkout@v2
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
.gradle
/build/
/bin/
*.class


# Ignore Gradle GUI config
gradle-app.setting
Expand Down
12 changes: 12 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
ES_VERSION ?= 8.15.2
JAVA_HOME ?= /usr/lib/jvm/java-17-openjdk-amd64

.PHONY: all build

all: build

build:
@echo "Building with Elasticsearch version $(ES_VERSION)"
@export JAVA_HOME=$(JAVA_HOME) && \
export PATH=$$JAVA_HOME/bin:$$PATH && \
./gradlew build -Pelasticsearch.version=$(ES_VERSION)
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,17 @@ in the [release](https://github.com/monitora-media/es-utils/releases/latest).

## Build

ES_VERSION=8.9.2
ES_VERSION=8.15.2
./gradlew build -Pelasticsearch.version=$ES_VERSION

export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
export PATH=$JAVA_HOME/bin:$PATH
JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64 PATH=$JAVA_HOME/bin:$PATH ./gradlew build -Pelasticsearch.version=8.15.2


## Testing

gradle test --debug
gradle test --info --tests "Croatian*"

## Install

Expand Down
2 changes: 1 addition & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
elasticsearch.version=8.5.3
elasticsearch.version=8.15.2
plugin.version=1.2.0-SNAPSHOT
2 changes: 2 additions & 0 deletions src/main/java/cz/monitora/elasticsearch/MonitoraESPlugin.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package cz.monitora.elasticsearch;

import cz.monitora.elasticsearch.analyzer.croatian.CroatianStemFilterFactory;
import cz.monitora.elasticsearch.analyzer.czech.CzechStemFilterFactory;
import cz.monitora.elasticsearch.analyzer.lowercase.LowerCaseTokenFilterFactory;
import cz.monitora.elasticsearch.analyzer.slovak.SlovakStemFilterFactory;
Expand All @@ -20,6 +21,7 @@ public Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> getToken
AnalysisPlugin.requiresAnalysisSettings(LowerCaseTokenFilterFactory::new));
extra.put("monitora_czech_stem", CzechStemFilterFactory::new);
extra.put("monitora_slovak_stem", SlovakStemFilterFactory::new);
extra.put("monitora_croatian_stem", CroatianStemFilterFactory::new);
return extra;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cz.monitora.elasticsearch.analyzer.croatian;

import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; // for javadoc
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;

/**
* A {@link TokenFilter} that applies {@link CroatianStemmer} to stem Croatian words.
*
* <p>To prevent terms from being stemmed use an instance of {@link SetKeywordMarkerFilter} or a
* custom {@link TokenFilter} that sets the {@link KeywordAttribute} before this {@link
* TokenStream}.
*
* <p><b>NOTE</b>: Input is expected to be in lowercase, but with diacritical marks
*
* @see SetKeywordMarkerFilter
*/
public final class CroatianStemFilter extends TokenFilter {
private final CroatianStemmer stemmer = new CroatianStemmer();
private final CharTermAttribute termAttr = addAttribute(CharTermAttribute.class);
private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);

public CroatianStemFilter(TokenStream input) {
super(input);
}

@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAttr.buffer(), termAttr.length());
termAttr.setLength(newlen);
}
return true;
} else {
return false;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package cz.monitora.elasticsearch.analyzer.croatian;

import org.apache.lucene.analysis.TokenStream;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;

public class CroatianStemFilterFactory extends AbstractTokenFilterFactory {

/** Creates a new CroatianStemFilterFactory */
public CroatianStemFilterFactory(
IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(name, settings);
}

@Override
public TokenStream create(TokenStream input) {
return new CroatianStemFilter(input);
}
}
Loading
Loading