Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/upgrading dependencies jdk and code #113

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions changelog.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
This is a fork of the project Optimaize Language Detector https://github.com/optimaize/language-detector

License Apache 2.0 remains the same

Depending on the changes the project might be rename in the future to establish a clear difference.

Original version 0.7-SNAPSHOT

02/09/2025 0.8-SNAPSHOT

* pgrading dependencies to address security vulnerabilities reported in Guava, Logback.
* Compiling with Java 11
* Upgrading testing dependencies
* Gradually replacing the use of Guava classes with built-in JDK alternatives.
* Replacing cobertura with Jacoco as the former does not support Java 11
62 changes: 32 additions & 30 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
<groupId>com.optimaize.languagedetector</groupId>
<artifactId>language-detector</artifactId>
<name>language-detector</name>
<version>0.7-SNAPSHOT</version>
<version>0.8-SNAPSHOT</version>
<packaging>jar</packaging>

<url>https://github.com/optimaize/language-detector</url>
<url>https://github.com/hsolano1/language-detector</url>
<description>
Language Detection Library for Java.
</description>
Expand All @@ -23,8 +23,8 @@

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<compiler.source>1.7</compiler.source>
<compiler.target>1.7</compiler.target>
<compiler.source>11</compiler.source>
<compiler.target>11</compiler.target>
</properties>

<developers>
Expand All @@ -40,12 +40,15 @@
<developer>
<name>Robert Theis</name>
</developer>
<developer>
<name>Humberto Solano</name>
</developer>
</developers>

<scm>
<connection>scm:git:https://github.com/optimaize/language-detector</connection>
<developerConnection>scm:git:https://github.com/optimaize/language-detector</developerConnection>
<url>https://github.com/optimaize/language-detector</url>
<connection>scm:git:https://github.com/hsolano1/language-detector</connection>
<developerConnection>scm:git:https://github.com/hsolano1/language-detector</developerConnection>
<url>https://github.com/hsolano1/language-detector</url>
<tag>HEAD</tag>
</scm>

Expand Down Expand Up @@ -92,31 +95,30 @@
</executions>
</plugin>

<!-- Required: javadoc JAR -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.9.1</version>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>0.8.12</version>
<executions>
<execution>
<id>attach-javadoc</id>
<phase>verify</phase>
<goals>
<goal>jar</goal>
<goal>prepare-agent</goal>
</goals>
</execution>
<!-- attached to Maven test phase -->
<execution>
<id>report</id>
<phase>test</phase>
<goals>
<goal>report</goal>
</goals>
</execution>
</executions>
</plugin>

<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>cobertura-maven-plugin</artifactId>
<version>2.6</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<version>3.3</version>
<version>3.21.0</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
Expand Down Expand Up @@ -228,12 +230,12 @@
<dependency><!-- used for the Optional and other things -->
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>19.0</version>
<version>33.4.0-jre</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.6</version>
<version>1.7.36</version>
</dependency>


Expand All @@ -248,32 +250,32 @@
</dependency>
<dependency><!-- was used in the past, there are still many tests -->
<groupId>junit</groupId>
<artifactId>junit-dep</artifactId>
<version>4.11</version>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-core</artifactId>
<version>1.3</version>
<version>3.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-library</artifactId>
<version>1.3</version>
<version>3.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<version>1.9.5</version>
<artifactId>mockito-core</artifactId>
<version>5.15.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.1.1</version>
<version>1.5.16</version>
<scope>test</scope>
</dependency>
</dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

package com.optimaize.langdetect;

import com.google.common.base.Optional;
import java.util.Optional;
import com.optimaize.langdetect.i18n.LdLocale;

import java.util.List;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

package com.optimaize.langdetect;

import com.google.common.base.Optional;
import com.optimaize.langdetect.i18n.LdLocale;
import com.optimaize.langdetect.ngram.NgramExtractor;
import com.optimaize.langdetect.profiles.LanguageProfile;
Expand All @@ -25,6 +24,7 @@

import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
import java.util.Set;


Expand All @@ -43,7 +43,7 @@ public class LanguageDetectorBuilder {
private final NgramExtractor ngramExtractor;

private double alpha = ALPHA_DEFAULT;
private Optional<Long> seed = Optional.absent();
private Optional<Long> seed = Optional.empty();
private int shortTextAlgorithm = 50;
private double prefixFactor = 1.0d;
private double suffixFactor = 1.0d;
Expand Down
15 changes: 10 additions & 5 deletions src/main/java/com/optimaize/langdetect/LanguageDetectorImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,20 @@
package com.optimaize.langdetect;

import com.optimaize.langdetect.cybozu.util.Util;
import com.google.common.base.Optional;
import com.optimaize.langdetect.i18n.LdLocale;
import com.optimaize.langdetect.ngram.NgramExtractor;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Random;

/**
*
Expand Down Expand Up @@ -135,13 +140,13 @@ public int compare(DetectedLanguage a, DetectedLanguage b) {
public Optional<LdLocale> detect(CharSequence text) {
List<DetectedLanguage> probabilities = getProbabilities(text);
if (probabilities.isEmpty()) {
return Optional.absent();
return Optional.empty();
} else {
DetectedLanguage best = probabilities.get(0);
if (best.getProbability() >= minimalConfidence) {
return Optional.of(best.getLocale());
} else {
return Optional.absent();
return Optional.empty();
}
}
}
Expand Down Expand Up @@ -194,7 +199,7 @@ private double[] detectBlockShortText(Map<String, Integer> ngrams) {
private double[] detectBlockLongText(List<String> ngrams) {
assert !ngrams.isEmpty();
double[] langprob = new double[ngramFrequencyData.getLanguageList().size()];
Random rand = new Random(seed.or(DEFAULT_SEED));
Random rand = new Random(seed.orElse(DEFAULT_SEED));
for (int t = 0; t < N_TRIAL; ++t) {
double[] prob = initProbability();
double alpha = this.alpha + (rand.nextGaussian() * ALPHA_WIDTH);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import com.optimaize.langdetect.frma.LangProfileWriter;
import com.optimaize.langdetect.cybozu.util.LangProfile;
import com.google.common.base.Optional;
import com.optimaize.langdetect.DetectedLanguage;
import com.optimaize.langdetect.LanguageDetector;
import com.optimaize.langdetect.LanguageDetectorBuilder;
Expand All @@ -32,8 +31,20 @@
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

import java.io.*;
import java.util.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;

/**
* LangDetect Command Line Interface.
Expand Down Expand Up @@ -286,7 +297,7 @@ public void batchTest() throws IOException {
private LanguageDetector makeDetector() throws IOException {
double alpha = getParamDouble("alpha", DEFAULT_ALPHA);
String profileDirectory = requireParamString("directory") + "/";
Optional<Long> seed = Optional.fromNullable(getParamLongOrNull("seed"));
Optional<Long> seed = Optional.ofNullable(getParamLongOrNull("seed"));

List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAll(new File(profileDirectory));

Expand Down
6 changes: 3 additions & 3 deletions src/main/java/com/optimaize/langdetect/i18n/LdLocale.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@

package com.optimaize.langdetect.i18n;

import com.google.common.base.Optional;
import com.google.common.base.Splitter;
import org.jetbrains.annotations.NotNull;

import java.util.List;
import java.util.Optional;

/**
* A language-detector implementation of a Locale, similar to the java.util.Locale.
Expand Down Expand Up @@ -106,8 +106,8 @@ public static LdLocale fromString(@NotNull String string) {
}
}
assert language != null;
if (script==null) script = Optional.absent();
if (region==null) region = Optional.absent();
if (script==null) script = Optional.empty();
if (region==null) region = Optional.empty();
return new LdLocale(language, script, region);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

package com.optimaize.langdetect.ngram;

import com.google.common.collect.ImmutableList;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

Expand All @@ -39,7 +38,7 @@ public class NgramExtractor {
private final Character textPadding;

public static NgramExtractor gramLength(int gramLength) {
return new NgramExtractor(ImmutableList.of(gramLength), null, null);
return new NgramExtractor(List.of(gramLength), null, null);
}
public static NgramExtractor gramLengths(Integer... gramLength) {
return new NgramExtractor(Arrays.asList(gramLength), null, null);
Expand All @@ -65,7 +64,7 @@ public NgramExtractor textPadding(char textPadding) {

private NgramExtractor(@NotNull List<Integer> gramLengths, @Nullable NgramFilter filter, @Nullable Character textPadding) {
if (gramLengths.isEmpty()) throw new IllegalArgumentException();
this.gramLengths = ImmutableList.copyOf(gramLengths);
this.gramLengths = List.copyOf(gramLengths);
this.filter = filter;
this.textPadding = textPadding;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

package com.optimaize.langdetect.profiles;

import com.google.common.collect.ImmutableList;
import com.optimaize.langdetect.i18n.LdLocale;

import java.util.ArrayList;
Expand Down Expand Up @@ -106,7 +105,7 @@ public class BuiltInLanguages {
names.add(LdLocale.fromString("zh-CN"));
names.add(LdLocale.fromString("zh-TW"));

languages = ImmutableList.copyOf(names);
languages = List.copyOf(names);
}

static {
Expand All @@ -128,7 +127,7 @@ public class BuiltInLanguages {
texts.add("sv");
texts.add("tr");
texts.add("vi");
shortTextLanguages = ImmutableList.copyOf(texts);
shortTextLanguages = List.copyOf(texts);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,15 @@

package com.optimaize.langdetect.profiles;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.optimaize.langdetect.i18n.LdLocale;
import org.jetbrains.annotations.NotNull;

import java.util.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* <p>This class is immutable.</p>
Expand Down Expand Up @@ -63,9 +66,9 @@ private static class Stats {
public Stats(@NotNull Map<Integer, Long> numOccurrences,
@NotNull Map<Integer, Long> minGramCounts,
@NotNull Map<Integer, Long> maxGramCounts) {
this.numOccurrences = ImmutableMap.copyOf(numOccurrences);
this.minGramCounts = ImmutableMap.copyOf(minGramCounts);
this.maxGramCounts = ImmutableMap.copyOf(maxGramCounts);
this.numOccurrences = Map.copyOf(numOccurrences);
this.minGramCounts = Map.copyOf(minGramCounts);
this.maxGramCounts = Map.copyOf(maxGramCounts);
}
}

Expand All @@ -76,7 +79,7 @@ public Stats(@NotNull Map<Integer, Long> numOccurrences,
LanguageProfileImpl(@NotNull LdLocale locale,
@NotNull Map<Integer, Map<String, Integer>> ngrams) {
this.locale = locale;
this.ngrams = ImmutableMap.copyOf(ngrams);
this.ngrams = Map.copyOf(ngrams);
this.stats = makeStats(ngrams);
}

Expand Down
Loading