diff --git a/src/org/omegat/core/statistics/CalcMatchStatistics.java b/src/org/omegat/core/statistics/CalcMatchStatistics.java index f48f2dcf57..674e4ddabd 100644 --- a/src/org/omegat/core/statistics/CalcMatchStatistics.java +++ b/src/org/omegat/core/statistics/CalcMatchStatistics.java @@ -110,7 +110,7 @@ public class CalcMatchStatistics extends LongProcessThread { public CalcMatchStatistics(IStatsConsumer callback, boolean perFile) { this(Core.getProject(), Core.getSegmenter(), callback, perFile, Preferences.getPreferenceDefault(Preferences.EXT_TMX_FUZZY_MATCH_THRESHOLD, - OConsts.FUZZY_MATCH_THRESHOLD)); + OConsts.FUZZY_MATCH_THRESHOLD)); } public CalcMatchStatistics(IProject project, Segmenter segmenter, IStatsConsumer callback, @@ -119,8 +119,7 @@ public CalcMatchStatistics(IProject project, Segmenter segmenter, IStatsConsumer this.callback = callback; this.perFile = perFile; finder = ThreadLocal.withInitial( - () -> new FindMatches(project, segmenter, OConsts.MAX_NEAR_STRINGS, true, - false, false, threshold)); + () -> new FindMatches(project, segmenter, OConsts.MAX_NEAR_STRINGS, false, threshold)); } @Override @@ -313,7 +312,7 @@ Optional calcSimilarity(List untranslatedEntri int calcMaxSimilarity(SourceTextEntry ste) { String srcNoXmlTags = removeXmlTags(ste); FindMatches localFinder = finder.get(); - List nears = localFinder.search(srcNoXmlTags, true, false, this::isInterrupted); + List nears = localFinder.search(srcNoXmlTags, false, this::isInterrupted); final Token[] strTokensStem = localFinder.tokenizeAll(ste.getSrcText()); int maxSimilarity = 0; CACHE: for (NearString near : nears) { diff --git a/src/org/omegat/core/statistics/FindMatches.java b/src/org/omegat/core/statistics/FindMatches.java index 614cb2c8f4..ebffbd8f3b 100644 --- a/src/org/omegat/core/statistics/FindMatches.java +++ b/src/org/omegat/core/statistics/FindMatches.java @@ -126,27 +126,15 @@ public class FindMatches { /** Tokens for original string, includes numbers and tags. */ private Token[] strTokensAll; - // This finder used for search separate segment matches - private FindMatches separateSegmentMatcher; - private final int fuzzyMatchThreshold; - private final boolean applyThreshold; - private final Segmenter segmenter; - /** - * @param searchExactlyTheSame - * allows to search similarities with the same text as source - * segment. This mode used only for separate sentence match in - * paragraph project, i.e. where source is just part of current - * source. - */ + @Deprecated(since = "6.1.0") public FindMatches(IProject project, int maxCount, boolean allowSeparateSegmentMatch, boolean searchExactlyTheSame) { - this(project, Core.getSegmenter(), maxCount, allowSeparateSegmentMatch, searchExactlyTheSame, true, - Preferences.getPreferenceDefault(Preferences.EXT_TMX_FUZZY_MATCH_THRESHOLD, - OConsts.FUZZY_MATCH_THRESHOLD)); + this(project, Core.getSegmenter(), maxCount, searchExactlyTheSame, Preferences.getPreferenceDefault( + Preferences.EXT_TMX_FUZZY_MATCH_THRESHOLD, OConsts.FUZZY_MATCH_THRESHOLD)); } /** @@ -166,19 +154,21 @@ public FindMatches(IProject project, int maxCount, boolean allowSeparateSegmentM * @param threshold * threshold to use. */ - public FindMatches(IProject project, Segmenter segmenter, int maxCount, boolean allowSeparateSegmentMatch, - boolean searchExactlyTheSame, boolean applyThreshold, int threshold) { + public FindMatches(IProject project, Segmenter segmenter, int maxCount, + boolean searchExactlyTheSame, int threshold) { this.project = project; this.segmenter = segmenter; this.tok = project.getSourceTokenizer(); this.srcLocale = project.getProjectProperties().getSourceLanguage().getLocale(); this.maxCount = maxCount; this.searchExactlyTheSame = searchExactlyTheSame; - if (allowSeparateSegmentMatch && !project.getProjectProperties().isSentenceSegmentingEnabled()) { - separateSegmentMatcher = new FindMatches(project, segmenter, 1, false, true, true, threshold); - } this.fuzzyMatchThreshold = threshold; - this.applyThreshold = applyThreshold; + } + + @Deprecated(since = "6.1.0") + public List search(final String searchText, final boolean requiresTranslation, + final boolean fillSimilarityData, final IStopped stop) throws StoppedException { + return search(searchText, fillSimilarityData, stop); } /** @@ -195,8 +185,33 @@ public FindMatches(IProject project, Segmenter segmenter, int maxCount, boolean * @throws StoppedException * raised when stopped during a search process. */ - public List search(String searchText, boolean requiresTranslation, boolean fillSimilarityData, - IStopped stop) throws StoppedException { + public List search(String searchText, boolean fillSimilarityData, IStopped stop) + throws StoppedException { + return search(searchText, fillSimilarityData, stop, + !project.getProjectProperties().isSentenceSegmentingEnabled()); + } + + /** + * Search Translation memories. + *

+ * Internal method to handle search conditions. + * It is accessible as package-private for testing. + * + * @param searchText + * target segment or term to search. + * @param fillSimilarityData + * fill similarity data into the result of NearString objects. + * @param stop + * IStopped callback object to indicate cancel operation. + * @param runSeparateSegmentMatch + * Also search with segmented terms search. + * @return + * List of NearString objects. + * @throws StoppedException + * When stopped the process during search. + */ + List search(String searchText, boolean fillSimilarityData, IStopped stop, + boolean runSeparateSegmentMatch) throws StoppedException { result = new ArrayList<>(OConsts.MAX_NEAR_STRINGS + 1); srcText = searchText; removedText = ""; @@ -226,7 +241,7 @@ public List search(String searchText, boolean requiresTranslation, b // skip original==original entry comparison return; } - if (requiresTranslation && trans.translation == null) { + if (trans.translation == null) { return; } String fileName = project.isOrphaned(source) ? ORPHANED_FILE_NAME : null; @@ -241,7 +256,7 @@ public List search(String searchText, boolean requiresTranslation, b // skip original==original entry comparison return; } - if (requiresTranslation && trans.translation == null) { + if (trans.translation == null) { return; } String fileName = project.isOrphaned(source) ? ORPHANED_FILE_NAME : null; @@ -255,7 +270,6 @@ public List search(String searchText, boolean requiresTranslation, b */ int foreignPenalty = Preferences.getPreferenceDefault(Preferences.PENALTY_FOR_FOREIGN_MATCHES, Preferences.PENALTY_FOR_FOREIGN_MATCHES_DEFAULT); - // travel by translation memories for (Map.Entry en : project.getTransMemories().entrySet()) { int penalty = 0; Matcher matcher = SEARCH_FOR_PENALTY.matcher(en.getKey()); @@ -265,11 +279,11 @@ public List search(String searchText, boolean requiresTranslation, b for (ITMXEntry tmen : en.getValue().getEntries()) { checkStopped(stop); if (tmen.getSourceText() == null) { - // Not all TMX entries have a source; in that case there can - // be no meaningful match, so skip. + // Not all TMX entries have a source; skip it in + // the case, because of no meaningful. continue; } - if (requiresTranslation && tmen.getTranslationText() == null) { + if (tmen.getTranslationText() == null) { continue; } int tmenPenalty = penalty; @@ -290,7 +304,9 @@ public List search(String searchText, boolean requiresTranslation, b ste.isSourceTranslationFuzzy(), 0); } } - if (separateSegmentMatcher != null) { + if (runSeparateSegmentMatch) { + FindMatches separateSegmentMatcher = new FindMatches(project, segmenter, 1, true, + fuzzyMatchThreshold); // split paragraph even when segmentation disabled, then find // matches for every segment List spaces = new ArrayList<>(); @@ -303,9 +319,10 @@ public List search(String searchText, boolean requiresTranslation, b List ftrans = new ArrayList<>(segments.size()); // multiple segments for (String onesrc : segments) { - // find match for a separate segment - List segmentMatch = separateSegmentMatcher.search(onesrc, requiresTranslation, - false, stop); + // find match for a separate segment. + // WARN: the 5th argument should be + // `false` to avoid an infinite-loop. + List segmentMatch = separateSegmentMatcher.search(onesrc, false, stop, false); if (!segmentMatch.isEmpty() && segmentMatch.get(0).scores[0].score >= SUBSEGMENT_MATCH_THRESHOLD) { fsrc.add(segmentMatch.get(0).source); @@ -415,7 +432,7 @@ public void processEntry(EntryKey key, ITMXEntry entry, String tmxName, } // BUGS#1236 - stat display does not use threshold config check - if (applyThreshold && similarityStem < fuzzyMatchThreshold + if (fuzzyMatchThreshold > 0 && similarityStem < fuzzyMatchThreshold && similarityNoStem < fuzzyMatchThreshold && simAdjusted < fuzzyMatchThreshold) { return; } diff --git a/src/org/omegat/gui/matches/FindMatchesThread.java b/src/org/omegat/gui/matches/FindMatchesThread.java index a83b1704f3..029c6c94dd 100644 --- a/src/org/omegat/gui/matches/FindMatchesThread.java +++ b/src/org/omegat/gui/matches/FindMatchesThread.java @@ -7,6 +7,7 @@ 2008 Alex Buloichik 2012 Thomas Cordonnier, Martin Fleurke 2013 Aaron Madlon-Kay + 2024 Hiroshi Miura Home page: https://www.omegat.org/ Support center: https://omegat.org/support @@ -32,17 +33,22 @@ import java.util.List; import java.util.logging.Logger; +import org.omegat.core.Core; import org.omegat.core.data.IProject; import org.omegat.core.data.SourceTextEntry; +import org.omegat.core.events.IStopped; import org.omegat.core.matching.NearString; +import org.omegat.core.segmentation.Segmenter; import org.omegat.core.statistics.FindMatches; import org.omegat.gui.common.EntryInfoSearchThread; import org.omegat.util.OConsts; +import org.omegat.util.Preferences; /** - * Find matches in separate thread then show result in the matches pane. + * Find matches in separate thread then show a result in the matches' pane. * * @author Alex Buloichik (alex73mail@gmail.com) + * @author Hiroshi Miura */ public class FindMatchesThread extends EntryInfoSearchThread> { private static final Logger LOGGER = Logger.getLogger(FindMatchesThread.class.getName()); @@ -52,9 +58,9 @@ public class FindMatchesThread extends EntryInfoSearchThread> { /** * Entry which is processed currently. - * - * If entry in controller was changed, it means user has moved to another entry, and there is no sense to - * continue. + *

+ * If entry in controller was changed, it means the user has moved to + * another entry, and there is no sense to continue. */ private final SourceTextEntry processedEntry; @@ -79,12 +85,26 @@ protected List search() throws Exception { long before = System.currentTimeMillis(); try { - FindMatches finder = new FindMatches(project, OConsts.MAX_NEAR_STRINGS, true, false); - List result = finder.search(processedEntry.getSrcText(), true, true, this::isEntryChanged); + List result = finderSearch(project, Core.getSegmenter(), processedEntry.getSrcText(), + this::isEntryChanged, Preferences.getPreferenceDefault( + Preferences.EXT_TMX_FUZZY_MATCH_THRESHOLD, OConsts.FUZZY_MATCH_THRESHOLD)); LOGGER.finer(() -> "Time for find matches: " + (System.currentTimeMillis() - before)); return result; } catch (FindMatches.StoppedException ex) { throw new EntryChangedException(); } } + + /** + * Search matches (static for test purpose). + * @param project OmegaT project. + * @param srcText source text to look for. + * @param isEntryChanged stop and raise StopException when it returns true. + * @return result as a list of NearString. + */ + protected static List finderSearch(IProject project, Segmenter segmenter, String srcText, + IStopped isEntryChanged, int threshold) { + FindMatches finder = new FindMatches(project, segmenter, OConsts.MAX_NEAR_STRINGS, false, threshold); + return finder.search(srcText, true, isEntryChanged); + } } diff --git a/test/data/tmx/penalty-010/segment_1.tmx b/test/data/tmx/penalty-010/segment_1.tmx new file mode 100644 index 0000000000..18e55fa724 --- /dev/null +++ b/test/data/tmx/penalty-010/segment_1.tmx @@ -0,0 +1,16 @@ + + + + +

+ + + + weird behavior + + + 地力の搾取と浪費が現われる。(1) + + + + diff --git a/test/data/tmx/test-multiple-entries.tmx b/test/data/tmx/test-multiple-entries.tmx new file mode 100644 index 0000000000..b2b9008c56 --- /dev/null +++ b/test/data/tmx/test-multiple-entries.tmx @@ -0,0 +1,46 @@ + + + +
+ + + + + Other + + + Altre + + + + + For installation on Linux. + + + Per l’installazioni nant’à i sistemi Linux. + + + + + For installation on other operating systems (such as FreeBSD and Solaris). + + + Per l’installazioni nant’à d’altri sistemi (cum’è FreeBSD è Solaris). + + + + + website/download.html + For installation on Linux. + For installation on other operating systems (such as FreeBSD and Solaris).<br0/> + + Other + + + Altri + + + + diff --git a/test/src/org/omegat/core/statistics/CalcMatchStatisticsTest.java b/test/src/org/omegat/core/statistics/CalcMatchStatisticsTest.java index 5da4dd3ffc..75e95739d8 100644 --- a/test/src/org/omegat/core/statistics/CalcMatchStatisticsTest.java +++ b/test/src/org/omegat/core/statistics/CalcMatchStatisticsTest.java @@ -48,6 +48,7 @@ import org.omegat.core.data.ProtectedPart; import org.omegat.core.data.SourceTextEntry; import org.omegat.core.data.TMXEntry; +import org.omegat.core.segmentation.SRX; import org.omegat.core.segmentation.Segmenter; import org.omegat.filters2.FilterContext; import org.omegat.filters2.IFilter; @@ -68,7 +69,7 @@ public class CalcMatchStatisticsTest { public void testCalcMatchStatics() throws Exception { TestProject project = new TestProject(new ProjectPropertiesTest()); IStatsConsumer callback = new TestStatsConsumer(); - Segmenter segmenter = new Segmenter(Preferences.getSRX()); + Segmenter segmenter = new Segmenter(SRX.getDefault()); CalcMatchStatisticsMock calcMatchStatistics = new CalcMatchStatisticsMock(project, segmenter, callback, 30); calcMatchStatistics.start(); @@ -123,7 +124,7 @@ public void testCalcMatchStatics() throws Exception { Assert.assertEquals("5699", result[7][4]); // change threshold - calcMatchStatistics = new CalcMatchStatisticsMock(project, segmenter, callback, 70); + calcMatchStatistics = new CalcMatchStatisticsMock(project, segmenter, callback, -1); calcMatchStatistics.start(); try { calcMatchStatistics.join(); diff --git a/test/src/org/omegat/core/statistics/FindMatchesTest.java b/test/src/org/omegat/core/statistics/FindMatchesTest.java index 46c1349c8c..128be5dc9c 100644 --- a/test/src/org/omegat/core/statistics/FindMatchesTest.java +++ b/test/src/org/omegat/core/statistics/FindMatchesTest.java @@ -3,7 +3,7 @@ with fuzzy matching, translation memory, keyword search, glossaries, and translation leveraging into updated projects. - Copyright (C) 2021 Hiroshi Miura + Copyright (C) 2021-2024 Hiroshi Miura Home page: https://www.omegat.org/ Support center: https://omegat.org/support @@ -26,6 +26,8 @@ package org.omegat.core.statistics; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import java.io.File; @@ -40,6 +42,7 @@ import org.junit.Before; import org.junit.BeforeClass; +import org.junit.Ignore; import org.junit.Test; import org.omegat.core.Core; @@ -55,10 +58,13 @@ import org.omegat.core.events.IStopped; import org.omegat.core.matching.NearString; import org.omegat.core.segmentation.Rule; +import org.omegat.core.segmentation.SRX; import org.omegat.core.segmentation.Segmenter; import org.omegat.tokenizer.DefaultTokenizer; import org.omegat.tokenizer.ITokenizer; +import org.omegat.tokenizer.LuceneCJKTokenizer; import org.omegat.tokenizer.LuceneEnglishTokenizer; +import org.omegat.tokenizer.LuceneFrenchTokenizer; import org.omegat.util.Language; import org.omegat.util.Log; import org.omegat.util.OConsts; @@ -71,6 +77,8 @@ public class FindMatchesTest { private static final File TMX_MATCH_EN_CA = new File("test/data/tmx/test-match-stat-en-ca.tmx"); private static final File TMX_EN_US_SR = new File("test/data/tmx/en-US_sr.tmx"); private static final File TMX_EN_US_GB_SR = new File("test/data/tmx/en-US_en-GB_fr_sr.tmx"); + private static final File TMX_SEGMENT = new File("test/data/tmx/penalty-010/segment_1.tmx"); + private static final File TMX_MULTI = new File("test/data/tmx/test-multiple-entries.tmx"); private static Path tmpDir; @@ -107,22 +115,22 @@ public void testSegmented() throws Exception { + "han passat prou temps al lloc web per a convertir-se en usuaris bàsics." + " Una comunitat vibrant necessita una entrada regular de nouvinguts que hi participen habitualment" + " i aporten veus noves a les converses.\n"; - FindMatches finder = new FindMatches(project, segmenter, OConsts.MAX_NEAR_STRINGS, false, false, - true, 30); - List result = finder.search(srcText, true, true, iStopped); + FindMatches finder = new FindMatches(project, segmenter, OConsts.MAX_NEAR_STRINGS, false, 30); + // search without a separated segment match. + List result = finder.search(srcText, true, iStopped, false); assertEquals(OConsts.MAX_NEAR_STRINGS, result.size()); assertEquals(65, result.get(0).scores[0].score); assertEquals(62, result.get(0).scores[0].scoreNoStem); assertEquals(62, result.get(0).scores[0].adjustedScore); assertEquals(expectFirst, result.get(0).translation); assertEquals(expectNear, result.get(1).translation); - // + // search with a segmented match. List spaces = new ArrayList<>(); List brules = new ArrayList<>(); List segments = segmenter.segment(prop.getSourceLanguage(), srcText, spaces, brules); assertEquals(3, segments.size()); - finder = new FindMatches(project, segmenter, OConsts.MAX_NEAR_STRINGS, true, false, true, 30); - result = finder.search(srcText, true, true, iStopped); + finder = new FindMatches(project, segmenter, OConsts.MAX_NEAR_STRINGS, false, 30); + result = finder.search(srcText, false, iStopped); assertEquals(OConsts.MAX_NEAR_STRINGS, result.size()); assertEquals("Hit with segmented tmx record", 100, result.get(0).scores[0].score); assertEquals(100, result.get(0).scores[0].scoreNoStem); @@ -163,9 +171,8 @@ public void testSearchRFE1578() throws Exception { IProject project = new TestProject(prop, null, TMX_EN_US_SR, new LuceneEnglishTokenizer(), new DefaultTokenizer(), segmenter); IStopped iStopped = () -> false; - FindMatches finder = new FindMatches(project, segmenter, OConsts.MAX_NEAR_STRINGS, true, false, - true, 30); - List result = finder.search("XXX", true, true, iStopped); + FindMatches finder = new FindMatches(project, segmenter, OConsts.MAX_NEAR_STRINGS, false, 30); + List result = finder.search("XXX", false, iStopped); // Without the fix, the result has two entries, but it should one. assertEquals(1, result.size()); assertEquals("XXX", result.get(0).source); @@ -202,10 +209,9 @@ public void testSearchRFE1578_2() throws Exception { IProject project = new TestProject(prop, null, TMX_EN_US_GB_SR, new LuceneEnglishTokenizer(), new DefaultTokenizer(), segmenter); IStopped iStopped = () -> false; - FindMatches finder = new FindMatches(project, segmenter, OConsts.MAX_NEAR_STRINGS, true, false, - true, 30); + FindMatches finder = new FindMatches(project, segmenter, OConsts.MAX_NEAR_STRINGS, false, 30); // Search source "XXx" in en-US - List result = finder.search("XXX", true, true, iStopped); + List result = finder.search("XXX", false, iStopped); // There should be three entries. assertEquals(3, result.size()); assertEquals("XXx", result.get(0).source); // should be en-US. @@ -214,6 +220,59 @@ public void testSearchRFE1578_2() throws Exception { assertEquals("ZZZ", result.get(2).translation); // sr } + @Ignore("Should be enalbed when the bug fix proposed.") + @Test + public void testSearchBUGS1251() throws Exception { + ProjectProperties prop = new ProjectProperties(tmpDir.toFile()); + prop.setSourceLanguage("ja"); + prop.setTargetLanguage("fr"); + prop.setSupportDefaultTranslations(true); + prop.setSentenceSegmentingEnabled(false); + Segmenter segmenter = new Segmenter(SRX.getDefault()); + IProject project = new TestProject(prop, null, TMX_SEGMENT, new LuceneCJKTokenizer(), + new LuceneFrenchTokenizer(), segmenter); + Core.setProject(project); + SourceTextEntry ste = project.getAllEntries().get(1); + Language sourceLanguage = prop.getSourceLanguage(); + String srcText = ste.getSrcText(); + List spaces = new ArrayList<>(); + List brules = new ArrayList<>(); + List segments = segmenter.segment(sourceLanguage, srcText, spaces, brules); + assertEquals(2, segments.size()); + IStopped iStopped = () -> false; + FindMatches finder = new FindMatches(project, segmenter, OConsts.MAX_NEAR_STRINGS, false, 30); + List result = finder.search(srcText, false, iStopped); + assertEquals(srcText, result.get(0).source); + assertEquals(1, result.size()); + assertEquals("TM", result.get(0).comesFrom.name()); + assertEquals(90, result.get(0).scores[0].score); + assertEquals("weird behavior", result.get(0).translation); + } + + @Test + public void testSearchMulti() throws Exception { + ProjectProperties prop = new ProjectProperties(tmpDir.toFile()); + prop.setSourceLanguage("en-US"); + prop.setTargetLanguage("co"); + prop.setSupportDefaultTranslations(true); + prop.setSentenceSegmentingEnabled(true); + Segmenter segmenter = new Segmenter(SRX.getDefault()); + IProject project = new TestProject(prop, TMX_MULTI, null, new LuceneEnglishTokenizer(), + new DefaultTokenizer(), segmenter); + IStopped iStopped = () -> false; + FindMatches finder = new FindMatches(project, segmenter, OConsts.MAX_NEAR_STRINGS, true, 85); + List result = finder.search("Other", false, iStopped); + assertEquals(3, result.size()); + assertEquals("Other", result.get(0).source); + assertEquals("Altre", result.get(0).translation); // default + assertNull(result.get(0).key); + assertEquals("Altri", result.get(1).translation); // alternative + assertNotNull(result.get(1).key); + assertEquals("website/download.html", result.get(1).key.file); + assertEquals("Other", result.get(2).translation); // source translation + } + + @BeforeClass public static void setUpClass() throws Exception { tmpDir = Files.createTempDirectory("omegat"); @@ -309,6 +368,12 @@ public List getAllEntries() { List ste = new ArrayList<>(); ste.add(new SourceTextEntry(new EntryKey("source.txt", "XXX", null, "", "", null), 1, null, null, Collections.emptyList())); + ste.add(new SourceTextEntry(new EntryKey("source.txt", "地力の搾取と浪費が現われる。(1)", null, "", "", null), + 1, null, null, Collections.emptyList())); + ste.add(new SourceTextEntry(new EntryKey("website/download.html", "Other", "id", + "For installation on Linux.", + "For installation on other operating systems (such as FreeBSD and Solaris).<br0/>", + null), 1, null, "Other", Collections.emptyList())); return ste; } diff --git a/test/src/org/omegat/gui/matches/FindMatchesThreadTest.java b/test/src/org/omegat/gui/matches/FindMatchesThreadTest.java new file mode 100644 index 0000000000..81246276e8 --- /dev/null +++ b/test/src/org/omegat/gui/matches/FindMatchesThreadTest.java @@ -0,0 +1,174 @@ +/******************************************************************************* + OmegaT - Computer Assisted Translation (CAT) tool + with fuzzy matching, translation memory, keyword search, + glossaries, and translation leveraging into updated projects. + + Copyright (C) 2024 Hiroshi Miura + Home page: https://www.omegat.org/ + Support center: https://omegat.org/support + + This file is part of OmegaT. + + OmegaT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OmegaT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + ******************************************************************************/ + +package org.omegat.gui.matches; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.commons.io.FileUtils; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; + +import org.omegat.core.Core; +import org.omegat.core.data.EntryKey; +import org.omegat.core.data.ExternalTMFactory; +import org.omegat.core.data.ExternalTMX; +import org.omegat.core.data.IProject; +import org.omegat.core.data.NotLoadedProject; +import org.omegat.core.data.ProjectProperties; +import org.omegat.core.data.ProjectTMX; +import org.omegat.core.data.SourceTextEntry; +import org.omegat.core.matching.NearString; +import org.omegat.core.segmentation.SRX; +import org.omegat.core.segmentation.Segmenter; +import org.omegat.tokenizer.DefaultTokenizer; +import org.omegat.tokenizer.ITokenizer; +import org.omegat.tokenizer.LuceneCJKTokenizer; +import org.omegat.tokenizer.LuceneEnglishTokenizer; +import org.omegat.tokenizer.LuceneFrenchTokenizer; +import org.omegat.util.Language; +import org.omegat.util.Preferences; +import org.omegat.util.TestPreferencesInitializer; + +public class FindMatchesThreadTest { + private static final File TMX_SEGMENT = new File("test/data/tmx/penalty-010/segment_1.tmx"); + private static final String SOURCE_TEXT = "地力の搾取と浪費が現われる。(1)"; + private static Path tmpDir; + + @BeforeClass + public static void setUpClass() throws Exception { + tmpDir = Files.createTempDirectory("omegat"); + assertTrue(tmpDir.toFile().isDirectory()); + } + + @Before + public void setUp() throws Exception { + Core.initializeConsole(new TreeMap<>()); + TestPreferencesInitializer.init(); + Preferences.setPreference(Preferences.EXT_TMX_SHOW_LEVEL2, false); + Preferences.setPreference(Preferences.EXT_TMX_USE_SLASH, false); + Preferences.setPreference(Preferences.EXT_TMX_KEEP_FOREIGN_MATCH, true); + Core.registerTokenizerClass(DefaultTokenizer.class); + Core.registerTokenizerClass(LuceneEnglishTokenizer.class); + } + + @Ignore("Should be enalbed when the bug fix proposed.") + @Test + public void testSearchBUGS1248() throws Exception { + ProjectProperties prop = new ProjectProperties(tmpDir.toFile()); + prop.setSourceLanguage("ja"); + prop.setTargetLanguage("fr"); + prop.setSupportDefaultTranslations(true); + prop.setSentenceSegmentingEnabled(false); + IProject project = new TestProject(prop, TMX_SEGMENT, new LuceneCJKTokenizer(), new LuceneFrenchTokenizer()); + Core.setProject(project); + Segmenter segmenter = new Segmenter(SRX.getDefault()); + List result = FindMatchesThread.finderSearch(project, segmenter, SOURCE_TEXT, () -> false, + 30); + assertEquals(2, result.size()); + assertEquals(SOURCE_TEXT, result.get(0).source); + assertEquals("TM", result.get(0).comesFrom.name()); + assertEquals(90, result.get(0).scores[0].score); + assertEquals("weird behavior", result.get(0).translation); + } + + static class TestProject extends NotLoadedProject implements IProject { + private final ProjectProperties prop; + private final File testTmx; + private final ITokenizer sourceTokenizer; + private final ITokenizer targetTokenizer; + + TestProject(ProjectProperties prop, File testTmx) { + this(prop, testTmx, new LuceneEnglishTokenizer(), new DefaultTokenizer()); + } + + TestProject(ProjectProperties prop, File testTmx, ITokenizer source, ITokenizer target) { + this.prop = prop; + this.testTmx = testTmx; + sourceTokenizer = source; + targetTokenizer = target; + } + + @Override + public ProjectProperties getProjectProperties() { + return prop; + } + + @Override + public List getAllEntries() { + List ste = new ArrayList<>(); + ste.add(new SourceTextEntry(new EntryKey("source.txt", SOURCE_TEXT, null, "", "", null), + 1, null, null, Collections.emptyList())); + return ste; + } + + @Override + public ITokenizer getSourceTokenizer() { + return sourceTokenizer; + }; + + @Override + public ITokenizer getTargetTokenizer() { + return targetTokenizer; + } + + @Override + public Map getOtherTargetLanguageTMs() { + return Collections.emptyMap(); + } + + @Override + public Map getTransMemories() { + Map transMemories = new TreeMap<>(); + try { + ExternalTMX newTMX = ExternalTMFactory.load(testTmx); + transMemories.put(testTmx.getPath(), newTMX); + } catch (Exception ignored) { + } + return Collections.unmodifiableMap(transMemories); + } + } + + @AfterClass + public static void tearDown() throws IOException { + FileUtils.deleteDirectory(tmpDir.toFile()); + assertFalse(tmpDir.toFile().exists()); + } +}