Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better validation of language priorities passed #80

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,10 @@ public LanguageDetectorBuilder minimalConfidence(double minimalConfidence) {


/**
* TODO document exactly. Also explain how it influences the results.
* Maybe check for unsupported languages at some point, or not, but document whether it does throw or ignore.
* String key = language, Double value = priority (probably 0-1).
* Accepts a list of relative a priori probabilities of each language.
* The values passed have to be nonnegative. Passing a zero for a language (or not passing it at all)
* means this language will never be returned by detection.
* Languages that are not on profile list are ignored.
*/
public LanguageDetectorBuilder languagePriorities(@Nullable Map<LdLocale, Double> langWeightingMap) {
this.langWeightingMap = langWeightingMap;
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/com/optimaize/langdetect/cybozu/util/Util.java
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,13 @@ public static double[] makeInternalPrioMap(@NotNull Map<LdLocale, Double> langWe
LdLocale lang = langlist.get(i);
if (langWeightingMap.containsKey(lang)) {
double p = langWeightingMap.get(lang);
assert p>=0 : "Prior probability must be non-negative!";
if (p < 0 || Double.isNaN(p)) throw new IllegalArgumentException("Prior probability must be non-negative!");
priorMap[i] = p;
sump += p;
}
}
assert sump > 0 : "Sum must be greater than zero!";
if(sump <= 0 || Double.isNaN(sump)) throw new IllegalArgumentException("Sum of probabilities must be greater than zero!");
if(Double.isInfinite(sump)) throw new IllegalArgumentException("Sum of probabilities must be finite!");
for (int i=0;i<priorMap.length;++i) priorMap[i] /= sump;
return priorMap;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package com.optimaize.langdetect;

import com.optimaize.langdetect.i18n.LdLocale;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.LanguageProfile;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static org.junit.Assert.*;

/**
* Created by Administrator on 2017-07-27.
*/
public class LanguageDetectorBuilderTest {
private static List<LanguageProfile> languageProfiles;

@BeforeClass
public static void setUp() throws Exception {
languageProfiles = new LanguageProfileReader().readAllBuiltIn();
}

@Test
public void languagePrioritiesEmptyShouldNotThrow() {
Map<LdLocale, Double> priorityMap = new HashMap<LdLocale, Double>();
LanguageDetector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
.withProfiles(languageProfiles)
.languagePriorities(priorityMap)
.build();
}
@Test
public void languagePrioritiesShouldNotThrow() {
Map<LdLocale, Double> priorityMap = new HashMap<LdLocale, Double>();
priorityMap.put(LdLocale.fromString("en"),1.0);
LanguageDetector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
.withProfiles(languageProfiles)
.languagePriorities(priorityMap)
.build();
}
@Test(expected = IllegalArgumentException.class)
public void languagePrioritiesNegativeShouldThrow() {
Map<LdLocale, Double> priorityMap = new HashMap<LdLocale, Double>();
priorityMap.put(LdLocale.fromString("en"),-1.0);
LanguageDetector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
.withProfiles(languageProfiles)
.languagePriorities(priorityMap)
.build();
}
@Test(expected = IllegalArgumentException.class)
public void languagePrioritiesOnlyUnknownLocalesShouldThrow() {
Map<LdLocale, Double> priorityMap = new HashMap<LdLocale, Double>();
priorityMap.put(LdLocale.fromString("xx"),1.0);
LanguageDetector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
.withProfiles(languageProfiles)
.languagePriorities(priorityMap)
.build();
}
@Test(expected = IllegalArgumentException.class)
public void languagePrioritiesAllZerosShouldThrow() {
Map<LdLocale, Double> priorityMap = new HashMap<LdLocale, Double>();
priorityMap.put(LdLocale.fromString("en"),0.0);
LanguageDetector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
.withProfiles(languageProfiles)
.languagePriorities(priorityMap)
.build();
}
@Test(expected = IllegalArgumentException.class)
public void languagePrioritiesNANShouldThrow() {
Map<LdLocale, Double> priorityMap = new HashMap<LdLocale, Double>();
priorityMap.put(LdLocale.fromString("en"),Double.NaN);
LanguageDetector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
.withProfiles(languageProfiles)
.languagePriorities(priorityMap)
.build();
}
}