Skip to content

Commit

Permalink
[BUGS#1251] feat: add regression test
Browse files Browse the repository at this point in the history
- Add FindMatchesThreadTest to reproduce BUGS#1251
- Add a test case to test FindMatches with the case of BUGS#1251.

Signed-off-by: Hiroshi Miura <[email protected]>
  • Loading branch information
miurahr committed Dec 17, 2024
1 parent ecb65e3 commit 6d6aad4
Show file tree
Hide file tree
Showing 5 changed files with 320 additions and 7 deletions.
26 changes: 20 additions & 6 deletions src/org/omegat/gui/matches/FindMatchesThread.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
2008 Alex Buloichik
2012 Thomas Cordonnier, Martin Fleurke
2013 Aaron Madlon-Kay
2024 Hiroshi Miura
Home page: https://www.omegat.org/
Support center: https://omegat.org/support
Expand Down Expand Up @@ -34,15 +35,17 @@

import org.omegat.core.data.IProject;
import org.omegat.core.data.SourceTextEntry;
import org.omegat.core.events.IStopped;
import org.omegat.core.matching.NearString;
import org.omegat.core.statistics.FindMatches;
import org.omegat.gui.common.EntryInfoSearchThread;
import org.omegat.util.OConsts;

/**
* Find matches in separate thread then show result in the matches pane.
* Find matches in separate thread then show a result in the matches' pane.
*
* @author Alex Buloichik ([email protected])
* @author Hiroshi Miura
*/
public class FindMatchesThread extends EntryInfoSearchThread<List<NearString>> {
private static final Logger LOGGER = Logger.getLogger(FindMatchesThread.class.getName());
Expand All @@ -52,9 +55,9 @@ public class FindMatchesThread extends EntryInfoSearchThread<List<NearString>> {

/**
* Entry which is processed currently.
*
* If entry in controller was changed, it means user has moved to another entry, and there is no sense to
* continue.
* <p>
* If entry in controller was changed, it means the user has moved to
* another entry, and there is no sense to continue.
*/
private final SourceTextEntry processedEntry;

Expand All @@ -79,12 +82,23 @@ protected List<NearString> search() throws Exception {
long before = System.currentTimeMillis();

try {
FindMatches finder = new FindMatches(project, OConsts.MAX_NEAR_STRINGS, true, false);
List<NearString> result = finder.search(processedEntry.getSrcText(), true, true, this::isEntryChanged);
List<NearString> result = finderSearch(project, processedEntry.getSrcText(), this::isEntryChanged);
LOGGER.finer(() -> "Time for find matches: " + (System.currentTimeMillis() - before));
return result;
} catch (FindMatches.StoppedException ex) {
throw new EntryChangedException();
}
}

/**
* Search matches (static for test purpose).
* @param project OmegaT project.
* @param srcText source text to look for.
* @param isEntryChanged stop and raise StopException when it returns true.
* @return result as a list of NearString.
*/
protected static List<NearString> finderSearch(IProject project, String srcText, IStopped isEntryChanged) {
FindMatches finder = new FindMatches(project, OConsts.MAX_NEAR_STRINGS, true, false);
return finder.search(srcText, true, true, isEntryChanged);
}
}
16 changes: 16 additions & 0 deletions test/data/tmx/penalty-010/segment_1.tmx
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE tmx PUBLIC "-//LISA OSCAR:1998//DTD for Translation Memory eXchange//EN" "tmx14.dtd">

<tmx version="1.4">
<header creationtoolversion="0.1" adminlang="en" segtype="paragraph" creationdate="20230930T155211Z" datatype="unknown" srclang="ja" creationtool="txt2tmx" o-tmf="TextEdit"></header>
<body>
<tu>
<tuv xml:lang="fr">
<seg>weird behavior</seg>
</tuv>
<tuv xml:lang="ja">
<seg>地力の搾取と浪費が現われる。(1)</seg>
</tuv>
</tu>
</body>
</tmx>
46 changes: 46 additions & 0 deletions test/data/tmx/test-multiple-entries.tmx
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE tmx SYSTEM "tmx14.dtd">
<tmx version="1.4">
<header datatype="plaintext" srclang="en-US" adminlang="EN-US" o-tmf="OmegaT TMX" segtype="sentence"
creationtoolversion="test" creationtool="test"/>
<body>
<!-- Default translations -->
<tu>
<tuv lang="en-US">
<seg>Other</seg>
</tuv>
<tuv lang="co" changeid="id" changedate="20200523T143256Z">
<seg>Altre</seg>
</tuv>
</tu>
<tu>
<tuv lang="en-US">
<seg>For installation on Linux.</seg>
</tuv>
<tuv lang="co" changeid="id" changedate="20200526T131725Z" creationid="id" creationdate="20200526T131725Z">
<seg>Per l’installazioni nant’à i sistemi Linux.</seg>
</tuv>
</tu>
<tu>
<tuv lang="en-US">
<seg>For installation on other operating systems (such as FreeBSD and Solaris).</seg>
</tuv>
<tuv lang="co" changeid="id" changedate="20200526T131840Z" creationid="id"
creationdate="20200526T131840Z">
<seg>Per l’installazioni nant’à d’altri sistemi (cum’è FreeBSD è Solaris).</seg>
</tuv>
</tu>
<!-- Alternative translations -->
<tu>
<prop type="file">website/download.html</prop>
<prop type="prev">For installation on Linux.</prop>
<prop type="next">For installation on other operating systems (such as FreeBSD and Solaris).&lt;br0/></prop>
<tuv lang="en-US">
<seg>Other</seg>
</tuv>
<tuv lang="co" changeid="id" changedate="20200526T131742Z" creationid="id" creationdate="20200526T131742Z">
<seg>Altri</seg>
</tuv>
</tu>
</body>
</tmx>
68 changes: 67 additions & 1 deletion test/src/org/omegat/core/statistics/FindMatchesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2021 Hiroshi Miura
Copyright (C) 2021-2024 Hiroshi Miura
Home page: https://www.omegat.org/
Support center: https://omegat.org/support
Expand All @@ -26,6 +26,8 @@
package org.omegat.core.statistics;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;

import java.io.File;
Expand Down Expand Up @@ -55,10 +57,13 @@
import org.omegat.core.events.IStopped;
import org.omegat.core.matching.NearString;
import org.omegat.core.segmentation.Rule;
import org.omegat.core.segmentation.SRX;
import org.omegat.core.segmentation.Segmenter;
import org.omegat.tokenizer.DefaultTokenizer;
import org.omegat.tokenizer.ITokenizer;
import org.omegat.tokenizer.LuceneCJKTokenizer;
import org.omegat.tokenizer.LuceneEnglishTokenizer;
import org.omegat.tokenizer.LuceneFrenchTokenizer;
import org.omegat.util.Language;
import org.omegat.util.Log;
import org.omegat.util.OConsts;
Expand All @@ -71,6 +76,8 @@ public class FindMatchesTest {
private static final File TMX_MATCH_EN_CA = new File("test/data/tmx/test-match-stat-en-ca.tmx");
private static final File TMX_EN_US_SR = new File("test/data/tmx/en-US_sr.tmx");
private static final File TMX_EN_US_GB_SR = new File("test/data/tmx/en-US_en-GB_fr_sr.tmx");
private static final File TMX_SEGMENT = new File("test/data/tmx/penalty-010/segment_1.tmx");
private static final File TMX_MULTI = new File("test/data/tmx/test-multiple-entries.tmx");
private static Path tmpDir;


Expand Down Expand Up @@ -214,6 +221,59 @@ public void testSearchRFE1578_2() throws Exception {
assertEquals("ZZZ", result.get(2).translation); // sr
}

@Test
public void testSearchBUGS1251() throws Exception {
ProjectProperties prop = new ProjectProperties(tmpDir.toFile());
prop.setSourceLanguage("ja");
prop.setTargetLanguage("fr");
prop.setSupportDefaultTranslations(true);
prop.setSentenceSegmentingEnabled(false);
Segmenter segmenter = new Segmenter(SRX.getDefault());
IProject project = new TestProject(prop, null, TMX_SEGMENT, new LuceneCJKTokenizer(),
new LuceneFrenchTokenizer(), segmenter);
Core.setProject(project);
SourceTextEntry ste = project.getAllEntries().get(1);
Language sourceLanguage = prop.getSourceLanguage();
String srcText = ste.getSrcText();
List<StringBuilder> spaces = new ArrayList<>();
List<Rule> brules = new ArrayList<>();
List<String> segments = segmenter.segment(sourceLanguage, srcText, spaces, brules);
assertEquals(2, segments.size());
IStopped iStopped = () -> false;
FindMatches finder = new FindMatches(project, segmenter, OConsts.MAX_NEAR_STRINGS, true, false,
true, 30);
List<NearString> result = finder.search(srcText, true, true, iStopped);
assertEquals(srcText, result.get(0).source);
assertEquals(1, result.size());
assertEquals("TM", result.get(0).comesFrom.name());
assertEquals(90, result.get(0).scores[0].score);
assertEquals("weird behavior", result.get(0).translation);
}

@Test
public void testSearchMulti() throws Exception {
ProjectProperties prop = new ProjectProperties(tmpDir.toFile());
prop.setSourceLanguage("en-US");
prop.setTargetLanguage("co");
prop.setSupportDefaultTranslations(true);
prop.setSentenceSegmentingEnabled(true);
Segmenter segmenter = new Segmenter(SRX.getDefault());
IProject project = new TestProject(prop, TMX_MULTI, null, new LuceneEnglishTokenizer(),
new DefaultTokenizer(), segmenter);
IStopped iStopped = () -> false;
FindMatches finder = new FindMatches(project, segmenter, OConsts.MAX_NEAR_STRINGS, true, 85);
List<NearString> result = finder.search("Other", false, iStopped);
assertEquals(3, result.size());
assertEquals("Other", result.get(0).source);
assertEquals("Altre", result.get(0).translation); // default
assertNull(result.get(0).key);
assertEquals("Altri", result.get(1).translation); // alternative
assertNotNull(result.get(1).key);
assertEquals("website/download.html", result.get(1).key.file);
assertEquals("Other", result.get(2).translation); // source translation
}


@BeforeClass
public static void setUpClass() throws Exception {
tmpDir = Files.createTempDirectory("omegat");
Expand Down Expand Up @@ -309,6 +369,12 @@ public List<SourceTextEntry> getAllEntries() {
List<SourceTextEntry> ste = new ArrayList<>();
ste.add(new SourceTextEntry(new EntryKey("source.txt", "XXX", null, "", "", null),
1, null, null, Collections.emptyList()));
ste.add(new SourceTextEntry(new EntryKey("source.txt", "地力の搾取と浪費が現われる。(1)", null, "", "", null),
1, null, null, Collections.emptyList()));
ste.add(new SourceTextEntry(new EntryKey("website/download.html", "Other", "id",
"For installation on Linux.",
"For installation on other operating systems (such as FreeBSD and Solaris).&lt;br0/>",
null), 1, null, "Other", Collections.emptyList()));
return ste;
}

Expand Down
Loading

0 comments on commit 6d6aad4

Please sign in to comment.