diff --git a/doc_src/en/App_FileFilters.xml b/doc_src/en/App_FileFilters.xml
index 47f73f6fbc..13f34b0071 100644
--- a/doc_src/en/App_FileFilters.xml
+++ b/doc_src/en/App_FileFilters.xml
@@ -630,13 +630,13 @@
- Remove HTML comments in translated file
+ Remove HTML comments
- Comments within an HTML file are generally addressed to
- developers.
+ Comments in an HTML file are generally addressed to
+ developers. Use this option to remove them. If unchecked, the comments are displayed as tags.
Text in HTML comments (between <!--
- and -->) will not be copied into the
+ and -->) are not copied into the
translated document.
@@ -652,7 +652,7 @@
Remove untranslated strings in the target files
Having untranslated contents in the translated files
- sometimes create compatibility issues.
+ sometimes creates compatibility issues.
@@ -667,7 +667,7 @@
Remove untranslated strings in the target files
Having untranslated contents in the translated files
- sometimes create compatibility issues.
+ sometimes creates compatibility issues.
diff --git a/src/org/omegat/Bundle.properties b/src/org/omegat/Bundle.properties
index c0e948c11a..8535ba21d5 100644
--- a/src/org/omegat/Bundle.properties
+++ b/src/org/omegat/Bundle.properties
@@ -1660,7 +1660,7 @@ HTML_TRANSLATE_VALUE=&value (of form input, including button, submit and reset)
HTML_TRANSLATE_BUTTON_VALUE=value (of button, &submit and reset input)
HTML_COMPRESS_WHITESPACE=Compress whitespace in translated file
-HTML_REMOVE_COMMENTS=Remove HTML comments in translated file
+HTML_REMOVE_COMMENTS=Remove HTML comments
HTML_PARAGRAPH_ON=Start a new segment on:
HTML_PARAGRAPH_ON_BR=<&br> (breaks)
diff --git a/src/org/omegat/Bundle_pl.properties b/src/org/omegat/Bundle_pl.properties
index 7467d5d2db..dad4882d3c 100644
--- a/src/org/omegat/Bundle_pl.properties
+++ b/src/org/omegat/Bundle_pl.properties
@@ -1396,7 +1396,7 @@ HTML_TRANSLATE_VALUE=&value (of form input, including button, submit and reset)
HTML_TRANSLATE_BUTTON_VALUE=value (of button, &submit and reset input)
HTML_COMPRESS_WHITESPACE=Compress whitespace in translated document
-HTML_REMOVE_COMMENTS=Remove HTML comments in translated document
+HTML_REMOVE_COMMENTS=Remove HTML comments
HTML_PARAGRAPH_ON=Start a new paragraph on:
HTML_PARAGRAPH_ON_BR=<&br> (breaks)
diff --git a/src/org/omegat/Bundle_us.properties b/src/org/omegat/Bundle_us.properties
index 881dcaf7e0..a7490a4ea7 100644
--- a/src/org/omegat/Bundle_us.properties
+++ b/src/org/omegat/Bundle_us.properties
@@ -1667,7 +1667,7 @@ HTML_TRANSLATE_VALUE=&value (of form input, including button, submit and reset)
HTML_TRANSLATE_BUTTON_VALUE=value (of button, &submit and reset input)
HTML_COMPRESS_WHITESPACE=Compress whitespace in translated file
-HTML_REMOVE_COMMENTS=Remove HTML comments in translated file
+HTML_REMOVE_COMMENTS=Remove HTML comments
HTML_PARAGRAPH_ON=Start a new segment on:
HTML_PARAGRAPH_ON_BR=<&br> (breaks)
diff --git a/src/org/omegat/filters2/html2/FilterVisitor.java b/src/org/omegat/filters2/html2/FilterVisitor.java
index dfc389261b..d2b815fb02 100644
--- a/src/org/omegat/filters2/html2/FilterVisitor.java
+++ b/src/org/omegat/filters2/html2/FilterVisitor.java
@@ -97,7 +97,7 @@ public FilterVisitor(HTMLFilter2 htmlfilter, BufferedWriter bufwriter, HTMLOptio
protected boolean isTextUpForCollection = false;
/** Did the PRE block start (it means we mustn't compress the spaces). */
- protected boolean preformatting = false;
+ protected boolean betweenPreformattingTags = false;
/**
* The list of non-paragraph tags before a chunk of text.
@@ -118,13 +118,13 @@ public FilterVisitor(HTMLFilter2 htmlfilter, BufferedWriter bufwriter, HTMLOptio
*
* - If another chunk of text follows, they get appended to the
* translatable paragraph,
- *
- Otherwise (paragraph tag follows), they are written out directly.
+ *
- Otherwise (eg if a paragraph tag follows), they are written out directly.
*
*/
protected List followingNodes;
/** The tags behind the shortcuts */
- protected List sTags;
+ protected List sTags;
/** The tag numbers of shorcutized tags */
protected List sTagNumbers;
/** The list of all the tag shortcuts */
@@ -162,9 +162,8 @@ public boolean shouldRecurseChildren() {
@Override
public void visitTag(Tag tag) {
- boolean keepIntact = isProtectedTag(tag);
- if (keepIntact) {
+ if (isProtectedTag(tag)) {
if (isTextUpForCollection) {
endup();
} else {
@@ -180,7 +179,7 @@ public void visitTag(Tag tag) {
handleParagraphTag();
}
if (isPreformattingTag(tag)) {
- preformatting = true;
+ betweenPreformattingTags = true;
}
// Translate attributes of tags if they are not null.
maybeTranslateAttribute(tag, "abbr");
@@ -287,19 +286,19 @@ public void visitStringNode(Text string) {
recurseSelf = true;
recurseChildren = true;
// nbsp is special case - process it like usual spaces
- String trimmedtext = HTMLUtils.entitiesToChars(string.getText()).replace((char) 160, ' ').trim();
- if (!trimmedtext.isEmpty()) {
+ String textAsCleanedString = HTMLUtils.entitiesToChars(string.getText()).replace((char) 160, ' ');
+ if (hasMoreThanJustWhitepaces(textAsCleanedString)) {
// Hack around HTMLParser not being able to handle XHTML
- // RFE pending:
+ // RFE:
// http://sourceforge.net/tracker/index.php?func=detail&aid=1227222&group_id=24399&atid=381402
- if (firstcall && PatternConsts.XML_HEADER.matcher(trimmedtext).matches()) {
+ if (firstcall && PatternConsts.XML_HEADER.matcher(textAsCleanedString.trim()).matches()) {
writeout(string.toHtml());
return;
}
isTextUpForCollection = true;
firstcall = false;
- } else if (preformatting) {
+ } else if (betweenPreformattingTags) {
isTextUpForCollection = true;
}
@@ -318,18 +317,25 @@ public void visitStringNode(Text string) {
*/
@Override
public void visitRemarkNode(Remark remark) {
- recurseSelf = true;
- recurseChildren = true;
- if (isTextUpForCollection) {
- endup();
- } else {
- writeOutPrecedingNodes();
- }
- if (!options.getRemoveComments()) {
- writeout(remark.toHtml());
+ if (shouldKeepComments()) {
+ recurseSelf = true;
+ recurseChildren = true;
+ if (betweenPreformattingTags) {
+ isTextUpForCollection = true;
+ }
+
+ if (isTextUpForCollection) {
+ queueTranslatable(remark);
+ } else {
+ queuePrefix(remark);
+ }
}
}
+ private boolean shouldKeepComments() {
+ return !options.getRemoveComments();
+ }
+
/**
* Called for each end Tag
visited.
*
@@ -344,7 +350,7 @@ public void visitEndTag(Tag tag) {
endup();
}
if (isPreformattingTag(tag)) {
- preformatting = false;
+ betweenPreformattingTags = false;
}
queuePrefix(tag);
}
@@ -580,6 +586,8 @@ protected void endup() {
Node node = allNodesInParagraph.get(i);
if (node instanceof Tag) {
writeout("<" + node.getText() + ">");
+ } else if (node instanceof Remark) {
+ writeout(node.toHtml());
} else {
writeout(compressWhitespace(node.getText()));
}
@@ -591,7 +599,9 @@ protected void endup() {
for (int i = firstTagToIncludeFromPreceding; i <= lastTagKeptInFollowing; i++) {
Node node = allNodesInParagraph.get(i);
if (node instanceof Tag) {
- shortcut((Tag) node, paragraph);
+ assignShortcut((Tag) node, paragraph);
+ } else if (node instanceof Remark) {
+ assignShortcut((Remark) node, paragraph);
} else { // node instanceof Text
paragraph.append(HTMLUtils.entitiesToChars(node.toHtml()));
}
@@ -613,7 +623,7 @@ protected void endup() {
// (This changes the layout, therefore it is an option. NB: an alternative implementation is to compress by
// default, and use Core.getFilterMaster().getConfig().isPreserveSpaces() option instead to compress if
// not checked.)
- if (!preformatting) {
+ if (!betweenPreformattingTags) {
spacePrefix = HTMLUtils.getSpacePrefix(uncompressed, options.getCompressWhitespace());
spacePostfix = HTMLUtils.getSpacePostfix(uncompressed, options.getCompressWhitespace());
@@ -640,7 +650,7 @@ protected void endup() {
// note that this doesn't change < and > of tag shortcuts
translation = HTMLUtils.charsToEntities(translation, filter.getTargetEncoding(), sShortcuts);
// expands tag shortcuts into full-blown tags
- translation = unshorcutize(translation);
+ translation = revertShortcut(translation);
// writing out the paragraph into target file
writeout(spacePrefix);
writeout(translation);
@@ -651,6 +661,8 @@ protected void endup() {
Node node = allNodesInParagraph.get(i);
if (node instanceof Tag) {
writeout("<" + node.getText() + ">");
+ } else if (node instanceof Remark) {
+ writeout(node.toHtml());
} else {
writeout(compressWhitespace(node.getText()));
}
@@ -678,7 +690,7 @@ private void cleanup() {
/**
* Creates and stores a shortcut for the tag.
*/
- private void shortcut(Tag tag, StringBuilder paragraph) {
+ private void assignShortcut(Tag tag, StringBuilder paragraph) {
StringBuilder result = new StringBuilder();
result.append('<');
int n = -1;
@@ -687,17 +699,18 @@ private void shortcut(Tag tag, StringBuilder paragraph) {
// trying to lookup for appropriate starting tag
int recursion = 1;
for (int i = sTags.size() - 1; i >= 0; i--) {
- Tag othertag = sTags.get(i);
- if (othertag.getTagName().equals(tag.getTagName())) {
- if (othertag.isEndTag()) {
- recursion++;
- } else {
- recursion--;
- if (recursion == 0) {
- // we've found a starting tag for this ending one
- // !!!
- n = sTagNumbers.get(i);
- break;
+ if (sTags.get(i) instanceof Tag) {
+ Tag othertag = (Tag) sTags.get(i);
+ if (othertag.getTagName().equals(tag.getTagName())) {
+ if (othertag.isEndTag()) {
+ recursion++;
+ } else {
+ recursion--;
+ if (recursion == 0) {
+ // found starting tag for this endTag
+ n = sTagNumbers.get(i);
+ break;
+ }
}
}
}
@@ -742,23 +755,51 @@ private void shortcut(Tag tag, StringBuilder paragraph) {
paragraph.append(shortcut);
}
+ /**
+ * Creates and stores a shortcut for the comment (Remark node).
+ */
+ private void assignShortcut(Remark remark, StringBuilder paragraph) {
+ StringBuilder result = new StringBuilder();
+ int n = sNumShortcuts++;
+ result.append("");
+ String shortcut = result.toString();
+ sTags.add(remark);
+ sTagNumbers.add(n);
+ sShortcuts.add(shortcut);
+ paragraph.append(shortcut);
+ }
+
/**
* Recovers tag shortcuts into full tags.
*/
- private String unshorcutize(String str) {
+ private String revertShortcut(String str) {
for (int i = 0; i < sShortcuts.size(); i++) {
String shortcut = sShortcuts.get(i);
int pos = -1;
while ((pos = str.indexOf(shortcut, pos + 1)) >= 0) {
- Tag tag = sTags.get(i);
- try {
- str = str.substring(0, pos) + "<" + tag.getText() + ">"
- + str.substring(pos + shortcut.length());
- } catch (StringIndexOutOfBoundsException sioobe) {
- // nothing, string doesn't change
- // but prevent endless loop
- break;
- }
+ if (sTags.get(i) instanceof Tag) {
+ Tag tag = (Tag) sTags.get(i);
+ try {
+ str = str.substring(0, pos) + "<" + tag.getText() + ">"
+ + str.substring(pos + shortcut.length());
+ } catch (StringIndexOutOfBoundsException sioobe) {
+ // nothing, string doesn't change
+ // but prevent endless loop
+ break;
+ }
+ } else if (sTags.get(i) instanceof Remark) {
+ Remark comment = (Remark) sTags.get(i);
+ try {
+ str = str.substring(0, pos) + comment.toHtml()
+ + str.substring(pos + shortcut.length());
+ } catch (StringIndexOutOfBoundsException sioobe) {
+ // nothing, string doesn't change
+ // but prevent endless loop
+ break;
+ }
+ }
}
}
return str;
@@ -773,7 +814,7 @@ private String unshorcutize(String str) {
* Whitespace text is simply added to the queue.
*/
private void queueTranslatable(Text txt) {
- if (!txt.toHtml().trim().isEmpty() || preformatting) {
+ if (hasMoreThanJustWhitepaces(txt.toHtml()) || betweenPreformattingTags) {
translatableNodes.addAll(followingNodes);
followingNodes.clear();
translatableNodes.add(txt);
@@ -782,6 +823,20 @@ private void queueTranslatable(Text txt) {
}
}
+ private boolean hasMoreThanJustWhitepaces(String string) {
+ return !string.trim().isEmpty();
+ }
+
+ private void queueTranslatable(Remark remark) {
+ if (betweenPreformattingTags) {
+ translatableNodes.addAll(followingNodes);
+ followingNodes.clear();
+ translatableNodes.add(remark);
+ } else {
+ followingNodes.add(remark);
+ }
+ }
+
/**
* Queues the tag to the translatable paragraph.
*
@@ -810,20 +865,28 @@ protected void queuePrefix(Tag tag) {
}
/**
- * Queues up some text, possibly before a meaningful text. If the text is
- * collected now, the tag is queued up as translatable by calling
- * {@link #queueTranslatable(Tag)}, otherwise it's collected to a special
- * list that is inspected when the translatable text is sent to OmegaT core.
+ * Queues up some Text node, possibly before more meaningful text.
+ * The Text node is added to the precedingNodes list.
*/
private void queuePrefix(Text txt) {
precedingNodes.add(txt);
}
+ /**
+ * Queues up some Remark node (HTML comment), possibly before more meaningful
+ * text. The Remark node is added to the precedingNodes list.
+ */
+ private void queuePrefix(Remark remark) {
+ precedingNodes.add(remark);
+ }
+
/** Saves "precedingNodes" to output stream and cleans the list. */
private void writeOutPrecedingNodes() {
for (Node node : precedingNodes) {
if (node instanceof Tag) {
writeout("<" + node.getText() + ">");
+ } else if (node instanceof Remark) {
+ writeout(node.toHtml());
} else {
writeout(compressWhitespace(node.getText()));
}
diff --git a/test/data/filters/html/file-HTMLFilter2-ignored-comments-no-break-SF610.html b/test/data/filters/html/file-HTMLFilter2-ignored-comments-no-break-SF610.html
new file mode 100644
index 0000000000..3d79a73477
--- /dev/null
+++ b/test/data/filters/html/file-HTMLFilter2-ignored-comments-no-break-SF610.html
@@ -0,0 +1,11 @@
+
+
+
+
+ This is the first line.
+
+ This is the second line.
+
+
+
+
\ No newline at end of file
diff --git a/test/src/org/omegat/filters/HTMLFilter2Test.java b/test/src/org/omegat/filters/HTMLFilter2Test.java
index 975dc361a4..9b13d5dae2 100644
--- a/test/src/org/omegat/filters/HTMLFilter2Test.java
+++ b/test/src/org/omegat/filters/HTMLFilter2Test.java
@@ -54,6 +54,17 @@ public void testParse() throws Exception {
assertEquals("This is second line.", entries.get(2));
}
+ @Test
+ public void testIgnoreCommentParse() throws Exception {
+ Map options = new HashMap<>();
+ options.put(HTMLOptions.OPTION_REMOVE_COMMENTS, "true");
+ List entries = parse(new HTMLFilter2(), "test/data/filters/html/file-HTMLFilter2-ignored-comments-no-break-SF610.html", options);
+ assertEquals(3, entries.size());
+ assertEquals("en", entries.get(0));
+ assertEquals("This is the first line.", entries.get(1));
+ assertEquals("This is the second line.", entries.get(2));
+ }
+
@Test
public void testParseAllBlockElements() throws Exception {
List entries = parse(new HTMLFilter2(),
@@ -77,7 +88,7 @@ public void testParseRegression() throws Exception {
assertEquals(3, entries.size());
entries = parse(new HTMLFilter2(),
"test/data/filters/html/file-HTMLFilter2-OmegaT.html");
- assertEquals(166, entries.size());
+ assertEquals(165, entries.size());
}
@Test