diff --git a/doc_src/en/App_FileFilters.xml b/doc_src/en/App_FileFilters.xml index 47f73f6fbc..13f34b0071 100644 --- a/doc_src/en/App_FileFilters.xml +++ b/doc_src/en/App_FileFilters.xml @@ -630,13 +630,13 @@ - Remove HTML comments in translated file + Remove HTML comments - Comments within an HTML file are generally addressed to - developers. + Comments in an HTML file are generally addressed to + developers. Use this option to remove them. If unchecked, the comments are displayed as tags. Text in HTML comments (between <!-- - and -->) will not be copied into the + and -->) are not copied into the translated document. @@ -652,7 +652,7 @@ Remove untranslated strings in the target files Having untranslated contents in the translated files - sometimes create compatibility issues. + sometimes creates compatibility issues. @@ -667,7 +667,7 @@ Remove untranslated strings in the target files Having untranslated contents in the translated files - sometimes create compatibility issues. + sometimes creates compatibility issues. diff --git a/src/org/omegat/Bundle.properties b/src/org/omegat/Bundle.properties index c0e948c11a..8535ba21d5 100644 --- a/src/org/omegat/Bundle.properties +++ b/src/org/omegat/Bundle.properties @@ -1660,7 +1660,7 @@ HTML_TRANSLATE_VALUE=&value (of form input, including button, submit and reset) HTML_TRANSLATE_BUTTON_VALUE=value (of button, &submit and reset input) HTML_COMPRESS_WHITESPACE=Compress whitespace in translated file -HTML_REMOVE_COMMENTS=Remove HTML comments in translated file +HTML_REMOVE_COMMENTS=Remove HTML comments HTML_PARAGRAPH_ON=Start a new segment on: HTML_PARAGRAPH_ON_BR=<&br> (breaks) diff --git a/src/org/omegat/Bundle_pl.properties b/src/org/omegat/Bundle_pl.properties index 7467d5d2db..dad4882d3c 100644 --- a/src/org/omegat/Bundle_pl.properties +++ b/src/org/omegat/Bundle_pl.properties @@ -1396,7 +1396,7 @@ HTML_TRANSLATE_VALUE=&value (of form input, including button, submit and reset) HTML_TRANSLATE_BUTTON_VALUE=value (of button, &submit and reset input) HTML_COMPRESS_WHITESPACE=Compress whitespace in translated document -HTML_REMOVE_COMMENTS=Remove HTML comments in translated document +HTML_REMOVE_COMMENTS=Remove HTML comments HTML_PARAGRAPH_ON=Start a new paragraph on: HTML_PARAGRAPH_ON_BR=<&br> (breaks) diff --git a/src/org/omegat/Bundle_us.properties b/src/org/omegat/Bundle_us.properties index 881dcaf7e0..a7490a4ea7 100644 --- a/src/org/omegat/Bundle_us.properties +++ b/src/org/omegat/Bundle_us.properties @@ -1667,7 +1667,7 @@ HTML_TRANSLATE_VALUE=&value (of form input, including button, submit and reset) HTML_TRANSLATE_BUTTON_VALUE=value (of button, &submit and reset input) HTML_COMPRESS_WHITESPACE=Compress whitespace in translated file -HTML_REMOVE_COMMENTS=Remove HTML comments in translated file +HTML_REMOVE_COMMENTS=Remove HTML comments HTML_PARAGRAPH_ON=Start a new segment on: HTML_PARAGRAPH_ON_BR=<&br> (breaks) diff --git a/src/org/omegat/filters2/html2/FilterVisitor.java b/src/org/omegat/filters2/html2/FilterVisitor.java index dfc389261b..d2b815fb02 100644 --- a/src/org/omegat/filters2/html2/FilterVisitor.java +++ b/src/org/omegat/filters2/html2/FilterVisitor.java @@ -97,7 +97,7 @@ public FilterVisitor(HTMLFilter2 htmlfilter, BufferedWriter bufwriter, HTMLOptio protected boolean isTextUpForCollection = false; /** Did the PRE block start (it means we mustn't compress the spaces). */ - protected boolean preformatting = false; + protected boolean betweenPreformattingTags = false; /** * The list of non-paragraph tags before a chunk of text. @@ -118,13 +118,13 @@ public FilterVisitor(HTMLFilter2 htmlfilter, BufferedWriter bufwriter, HTMLOptio * */ protected List followingNodes; /** The tags behind the shortcuts */ - protected List sTags; + protected List sTags; /** The tag numbers of shorcutized tags */ protected List sTagNumbers; /** The list of all the tag shortcuts */ @@ -162,9 +162,8 @@ public boolean shouldRecurseChildren() { @Override public void visitTag(Tag tag) { - boolean keepIntact = isProtectedTag(tag); - if (keepIntact) { + if (isProtectedTag(tag)) { if (isTextUpForCollection) { endup(); } else { @@ -180,7 +179,7 @@ public void visitTag(Tag tag) { handleParagraphTag(); } if (isPreformattingTag(tag)) { - preformatting = true; + betweenPreformattingTags = true; } // Translate attributes of tags if they are not null. maybeTranslateAttribute(tag, "abbr"); @@ -287,19 +286,19 @@ public void visitStringNode(Text string) { recurseSelf = true; recurseChildren = true; // nbsp is special case - process it like usual spaces - String trimmedtext = HTMLUtils.entitiesToChars(string.getText()).replace((char) 160, ' ').trim(); - if (!trimmedtext.isEmpty()) { + String textAsCleanedString = HTMLUtils.entitiesToChars(string.getText()).replace((char) 160, ' '); + if (hasMoreThanJustWhitepaces(textAsCleanedString)) { // Hack around HTMLParser not being able to handle XHTML - // RFE pending: + // RFE: // http://sourceforge.net/tracker/index.php?func=detail&aid=1227222&group_id=24399&atid=381402 - if (firstcall && PatternConsts.XML_HEADER.matcher(trimmedtext).matches()) { + if (firstcall && PatternConsts.XML_HEADER.matcher(textAsCleanedString.trim()).matches()) { writeout(string.toHtml()); return; } isTextUpForCollection = true; firstcall = false; - } else if (preformatting) { + } else if (betweenPreformattingTags) { isTextUpForCollection = true; } @@ -318,18 +317,25 @@ public void visitStringNode(Text string) { */ @Override public void visitRemarkNode(Remark remark) { - recurseSelf = true; - recurseChildren = true; - if (isTextUpForCollection) { - endup(); - } else { - writeOutPrecedingNodes(); - } - if (!options.getRemoveComments()) { - writeout(remark.toHtml()); + if (shouldKeepComments()) { + recurseSelf = true; + recurseChildren = true; + if (betweenPreformattingTags) { + isTextUpForCollection = true; + } + + if (isTextUpForCollection) { + queueTranslatable(remark); + } else { + queuePrefix(remark); + } } } + private boolean shouldKeepComments() { + return !options.getRemoveComments(); + } + /** * Called for each end Tag visited. * @@ -344,7 +350,7 @@ public void visitEndTag(Tag tag) { endup(); } if (isPreformattingTag(tag)) { - preformatting = false; + betweenPreformattingTags = false; } queuePrefix(tag); } @@ -580,6 +586,8 @@ protected void endup() { Node node = allNodesInParagraph.get(i); if (node instanceof Tag) { writeout("<" + node.getText() + ">"); + } else if (node instanceof Remark) { + writeout(node.toHtml()); } else { writeout(compressWhitespace(node.getText())); } @@ -591,7 +599,9 @@ protected void endup() { for (int i = firstTagToIncludeFromPreceding; i <= lastTagKeptInFollowing; i++) { Node node = allNodesInParagraph.get(i); if (node instanceof Tag) { - shortcut((Tag) node, paragraph); + assignShortcut((Tag) node, paragraph); + } else if (node instanceof Remark) { + assignShortcut((Remark) node, paragraph); } else { // node instanceof Text paragraph.append(HTMLUtils.entitiesToChars(node.toHtml())); } @@ -613,7 +623,7 @@ protected void endup() { // (This changes the layout, therefore it is an option. NB: an alternative implementation is to compress by // default, and use Core.getFilterMaster().getConfig().isPreserveSpaces() option instead to compress if // not checked.) - if (!preformatting) { + if (!betweenPreformattingTags) { spacePrefix = HTMLUtils.getSpacePrefix(uncompressed, options.getCompressWhitespace()); spacePostfix = HTMLUtils.getSpacePostfix(uncompressed, options.getCompressWhitespace()); @@ -640,7 +650,7 @@ protected void endup() { // note that this doesn't change < and > of tag shortcuts translation = HTMLUtils.charsToEntities(translation, filter.getTargetEncoding(), sShortcuts); // expands tag shortcuts into full-blown tags - translation = unshorcutize(translation); + translation = revertShortcut(translation); // writing out the paragraph into target file writeout(spacePrefix); writeout(translation); @@ -651,6 +661,8 @@ protected void endup() { Node node = allNodesInParagraph.get(i); if (node instanceof Tag) { writeout("<" + node.getText() + ">"); + } else if (node instanceof Remark) { + writeout(node.toHtml()); } else { writeout(compressWhitespace(node.getText())); } @@ -678,7 +690,7 @@ private void cleanup() { /** * Creates and stores a shortcut for the tag. */ - private void shortcut(Tag tag, StringBuilder paragraph) { + private void assignShortcut(Tag tag, StringBuilder paragraph) { StringBuilder result = new StringBuilder(); result.append('<'); int n = -1; @@ -687,17 +699,18 @@ private void shortcut(Tag tag, StringBuilder paragraph) { // trying to lookup for appropriate starting tag int recursion = 1; for (int i = sTags.size() - 1; i >= 0; i--) { - Tag othertag = sTags.get(i); - if (othertag.getTagName().equals(tag.getTagName())) { - if (othertag.isEndTag()) { - recursion++; - } else { - recursion--; - if (recursion == 0) { - // we've found a starting tag for this ending one - // !!! - n = sTagNumbers.get(i); - break; + if (sTags.get(i) instanceof Tag) { + Tag othertag = (Tag) sTags.get(i); + if (othertag.getTagName().equals(tag.getTagName())) { + if (othertag.isEndTag()) { + recursion++; + } else { + recursion--; + if (recursion == 0) { + // found starting tag for this endTag + n = sTagNumbers.get(i); + break; + } } } } @@ -742,23 +755,51 @@ private void shortcut(Tag tag, StringBuilder paragraph) { paragraph.append(shortcut); } + /** + * Creates and stores a shortcut for the comment (Remark node). + */ + private void assignShortcut(Remark remark, StringBuilder paragraph) { + StringBuilder result = new StringBuilder(); + int n = sNumShortcuts++; + result.append(""); + String shortcut = result.toString(); + sTags.add(remark); + sTagNumbers.add(n); + sShortcuts.add(shortcut); + paragraph.append(shortcut); + } + /** * Recovers tag shortcuts into full tags. */ - private String unshorcutize(String str) { + private String revertShortcut(String str) { for (int i = 0; i < sShortcuts.size(); i++) { String shortcut = sShortcuts.get(i); int pos = -1; while ((pos = str.indexOf(shortcut, pos + 1)) >= 0) { - Tag tag = sTags.get(i); - try { - str = str.substring(0, pos) + "<" + tag.getText() + ">" - + str.substring(pos + shortcut.length()); - } catch (StringIndexOutOfBoundsException sioobe) { - // nothing, string doesn't change - // but prevent endless loop - break; - } + if (sTags.get(i) instanceof Tag) { + Tag tag = (Tag) sTags.get(i); + try { + str = str.substring(0, pos) + "<" + tag.getText() + ">" + + str.substring(pos + shortcut.length()); + } catch (StringIndexOutOfBoundsException sioobe) { + // nothing, string doesn't change + // but prevent endless loop + break; + } + } else if (sTags.get(i) instanceof Remark) { + Remark comment = (Remark) sTags.get(i); + try { + str = str.substring(0, pos) + comment.toHtml() + + str.substring(pos + shortcut.length()); + } catch (StringIndexOutOfBoundsException sioobe) { + // nothing, string doesn't change + // but prevent endless loop + break; + } + } } } return str; @@ -773,7 +814,7 @@ private String unshorcutize(String str) { * Whitespace text is simply added to the queue. */ private void queueTranslatable(Text txt) { - if (!txt.toHtml().trim().isEmpty() || preformatting) { + if (hasMoreThanJustWhitepaces(txt.toHtml()) || betweenPreformattingTags) { translatableNodes.addAll(followingNodes); followingNodes.clear(); translatableNodes.add(txt); @@ -782,6 +823,20 @@ private void queueTranslatable(Text txt) { } } + private boolean hasMoreThanJustWhitepaces(String string) { + return !string.trim().isEmpty(); + } + + private void queueTranslatable(Remark remark) { + if (betweenPreformattingTags) { + translatableNodes.addAll(followingNodes); + followingNodes.clear(); + translatableNodes.add(remark); + } else { + followingNodes.add(remark); + } + } + /** * Queues the tag to the translatable paragraph. *

@@ -810,20 +865,28 @@ protected void queuePrefix(Tag tag) { } /** - * Queues up some text, possibly before a meaningful text. If the text is - * collected now, the tag is queued up as translatable by calling - * {@link #queueTranslatable(Tag)}, otherwise it's collected to a special - * list that is inspected when the translatable text is sent to OmegaT core. + * Queues up some Text node, possibly before more meaningful text. + * The Text node is added to the precedingNodes list. */ private void queuePrefix(Text txt) { precedingNodes.add(txt); } + /** + * Queues up some Remark node (HTML comment), possibly before more meaningful + * text. The Remark node is added to the precedingNodes list. + */ + private void queuePrefix(Remark remark) { + precedingNodes.add(remark); + } + /** Saves "precedingNodes" to output stream and cleans the list. */ private void writeOutPrecedingNodes() { for (Node node : precedingNodes) { if (node instanceof Tag) { writeout("<" + node.getText() + ">"); + } else if (node instanceof Remark) { + writeout(node.toHtml()); } else { writeout(compressWhitespace(node.getText())); } diff --git a/test/data/filters/html/file-HTMLFilter2-ignored-comments-no-break-SF610.html b/test/data/filters/html/file-HTMLFilter2-ignored-comments-no-break-SF610.html new file mode 100644 index 0000000000..3d79a73477 --- /dev/null +++ b/test/data/filters/html/file-HTMLFilter2-ignored-comments-no-break-SF610.html @@ -0,0 +1,11 @@ + + + +

+ This is the first line. +

+ This is the second line. +

+ + + \ No newline at end of file diff --git a/test/src/org/omegat/filters/HTMLFilter2Test.java b/test/src/org/omegat/filters/HTMLFilter2Test.java index 975dc361a4..9b13d5dae2 100644 --- a/test/src/org/omegat/filters/HTMLFilter2Test.java +++ b/test/src/org/omegat/filters/HTMLFilter2Test.java @@ -54,6 +54,17 @@ public void testParse() throws Exception { assertEquals("This is second line.", entries.get(2)); } + @Test + public void testIgnoreCommentParse() throws Exception { + Map options = new HashMap<>(); + options.put(HTMLOptions.OPTION_REMOVE_COMMENTS, "true"); + List entries = parse(new HTMLFilter2(), "test/data/filters/html/file-HTMLFilter2-ignored-comments-no-break-SF610.html", options); + assertEquals(3, entries.size()); + assertEquals("en", entries.get(0)); + assertEquals("This is the first line.", entries.get(1)); + assertEquals("This is the second line.", entries.get(2)); + } + @Test public void testParseAllBlockElements() throws Exception { List entries = parse(new HTMLFilter2(), @@ -77,7 +88,7 @@ public void testParseRegression() throws Exception { assertEquals(3, entries.size()); entries = parse(new HTMLFilter2(), "test/data/filters/html/file-HTMLFilter2-OmegaT.html"); - assertEquals(166, entries.size()); + assertEquals(165, entries.size()); } @Test