From 4ea991a092ebceb98f7b4cf13ef4e9648d3c1d20 Mon Sep 17 00:00:00 2001 From: Daniel Jelinski Date: Thu, 27 Jul 2017 13:04:32 +0200 Subject: [PATCH] Better validation of language profiles passed Asserts are usually disabled in production environments, and passing invalid values results in misleading error messages. See https://github.com/optimaize/language-detector/issues/24 --- .../langdetect/LanguageDetectorBuilder.java | 7 +- .../langdetect/cybozu/util/Util.java | 5 +- .../LanguageDetectorBuilderTest.java | 81 +++++++++++++++++++ 3 files changed, 88 insertions(+), 5 deletions(-) create mode 100644 src/test/java/com/optimaize/langdetect/LanguageDetectorBuilderTest.java diff --git a/src/main/java/com/optimaize/langdetect/LanguageDetectorBuilder.java b/src/main/java/com/optimaize/langdetect/LanguageDetectorBuilder.java index 4f26801..aa4965a 100644 --- a/src/main/java/com/optimaize/langdetect/LanguageDetectorBuilder.java +++ b/src/main/java/com/optimaize/langdetect/LanguageDetectorBuilder.java @@ -145,9 +145,10 @@ public LanguageDetectorBuilder minimalConfidence(double minimalConfidence) { /** - * TODO document exactly. Also explain how it influences the results. - * Maybe check for unsupported languages at some point, or not, but document whether it does throw or ignore. - * String key = language, Double value = priority (probably 0-1). + * Accepts a list of relative a priori probabilities of each language. + * The values passed have to be nonnegative. Passing a zero for a language (or not passing it at all) + * means this language will never be returned by detection. + * Languages that are not on profile list are ignored. */ public LanguageDetectorBuilder languagePriorities(@Nullable Map langWeightingMap) { this.langWeightingMap = langWeightingMap; diff --git a/src/main/java/com/optimaize/langdetect/cybozu/util/Util.java b/src/main/java/com/optimaize/langdetect/cybozu/util/Util.java index f0a2626..0a82550 100644 --- a/src/main/java/com/optimaize/langdetect/cybozu/util/Util.java +++ b/src/main/java/com/optimaize/langdetect/cybozu/util/Util.java @@ -120,12 +120,13 @@ public static double[] makeInternalPrioMap(@NotNull Map langWe LdLocale lang = langlist.get(i); if (langWeightingMap.containsKey(lang)) { double p = langWeightingMap.get(lang); - assert p>=0 : "Prior probability must be non-negative!"; + if (p < 0 || Double.isNaN(p)) throw new IllegalArgumentException("Prior probability must be non-negative!"); priorMap[i] = p; sump += p; } } - assert sump > 0 : "Sum must be greater than zero!"; + if(sump <= 0 || Double.isNaN(sump)) throw new IllegalArgumentException("Sum of probabilities must be greater than zero!"); + if(Double.isInfinite(sump)) throw new IllegalArgumentException("Sum of probabilities must be finite!"); for (int i=0;i languageProfiles; + + @BeforeClass + public static void setUp() throws Exception { + languageProfiles = new LanguageProfileReader().readAllBuiltIn(); + } + + @Test + public void languagePrioritiesEmptyShouldNotThrow() { + Map priorityMap = new HashMap(); + LanguageDetector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()) + .withProfiles(languageProfiles) + .languagePriorities(priorityMap) + .build(); + } + @Test + public void languagePrioritiesShouldNotThrow() { + Map priorityMap = new HashMap(); + priorityMap.put(LdLocale.fromString("en"),1.0); + LanguageDetector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()) + .withProfiles(languageProfiles) + .languagePriorities(priorityMap) + .build(); + } + @Test(expected = IllegalArgumentException.class) + public void languagePrioritiesNegativeShouldThrow() { + Map priorityMap = new HashMap(); + priorityMap.put(LdLocale.fromString("en"),-1.0); + LanguageDetector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()) + .withProfiles(languageProfiles) + .languagePriorities(priorityMap) + .build(); + } + @Test(expected = IllegalArgumentException.class) + public void languagePrioritiesOnlyUnknownLocalesShouldThrow() { + Map priorityMap = new HashMap(); + priorityMap.put(LdLocale.fromString("xx"),1.0); + LanguageDetector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()) + .withProfiles(languageProfiles) + .languagePriorities(priorityMap) + .build(); + } + @Test(expected = IllegalArgumentException.class) + public void languagePrioritiesAllZerosShouldThrow() { + Map priorityMap = new HashMap(); + priorityMap.put(LdLocale.fromString("en"),0.0); + LanguageDetector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()) + .withProfiles(languageProfiles) + .languagePriorities(priorityMap) + .build(); + } + @Test(expected = IllegalArgumentException.class) + public void languagePrioritiesNANShouldThrow() { + Map priorityMap = new HashMap(); + priorityMap.put(LdLocale.fromString("en"),Double.NaN); + LanguageDetector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()) + .withProfiles(languageProfiles) + .languagePriorities(priorityMap) + .build(); + } +} \ No newline at end of file