Merge pull request #6 from Kazephil/topic/kaze/SF610-do-not-split-seg…

…ment-on-removed-comment Update UI string and documentation for removing HTML comments
omegat-sonar · Feb 15, 2023 · e17720e · e17720e
2 parents 437dcb7 + ecaba15
commit e17720e
Show file tree

Hide file tree

Showing 7 changed files with 146 additions and 61 deletions.
diff --git a/doc_src/en/App_FileFilters.xml b/doc_src/en/App_FileFilters.xml
@@ -630,13 +630,13 @@
 			  </listitem>
 			</varlistentry>
 			<varlistentry>
-			  <term>Remove HTML comments in translated file</term>
+			  <term>Remove HTML comments</term>
 			  <listitem>
-				<para>Comments within an HTML file are generally addressed to
-				developers.</para>
+				<para>Comments in an HTML file are generally addressed to
+				developers. Use this option to remove them. If unchecked, the comments are displayed as tags.</para>
 
 				<para>Text in HTML comments (between <literal>&lt;!--</literal>
-				and <literal>--&gt;</literal>) will not be copied into the
+				and <literal>--&gt;</literal>) are not copied into the
 				translated document.</para>
 			  </listitem>
 			</varlistentry>
@@ -652,7 +652,7 @@
 			  <term>Remove untranslated strings in the target files</term>
 			  <listitem>
 				<para>Having untranslated contents in the translated files
-				sometimes create compatibility issues.</para>
+				sometimes creates compatibility issues.</para>
 			  </listitem>
 			</varlistentry>
 		  </variablelist>
@@ -667,7 +667,7 @@
 			  <term>Remove untranslated strings in the target files</term>
 			  <listitem>
 				  <para>Having untranslated contents in the translated files
-				  sometimes create compatibility issues.</para>
+				  sometimes creates compatibility issues.</para>
 			  </listitem>
 			</varlistentry>
 		  </variablelist>

diff --git a/src/org/omegat/Bundle.properties b/src/org/omegat/Bundle.properties
@@ -1660,7 +1660,7 @@ HTML_TRANSLATE_VALUE=&value (of form input, including button, submit and reset)
 HTML_TRANSLATE_BUTTON_VALUE=value (of button, &submit and reset input)
 
 HTML_COMPRESS_WHITESPACE=Compress whitespace in translated file
-HTML_REMOVE_COMMENTS=Remove HTML comments in translated file
+HTML_REMOVE_COMMENTS=Remove HTML comments
 
 HTML_PARAGRAPH_ON=Start a new segment on:
 HTML_PARAGRAPH_ON_BR=<&br> (breaks)

diff --git a/src/org/omegat/Bundle_pl.properties b/src/org/omegat/Bundle_pl.properties
@@ -1396,7 +1396,7 @@ HTML_TRANSLATE_VALUE=&value (of form input, including button, submit and reset)
 HTML_TRANSLATE_BUTTON_VALUE=value (of button, &submit and reset input)
 
 HTML_COMPRESS_WHITESPACE=Compress whitespace in translated document
-HTML_REMOVE_COMMENTS=Remove HTML comments in translated document
+HTML_REMOVE_COMMENTS=Remove HTML comments
 
 HTML_PARAGRAPH_ON=Start a new paragraph on:
 HTML_PARAGRAPH_ON_BR=<&br> (breaks)

diff --git a/src/org/omegat/Bundle_us.properties b/src/org/omegat/Bundle_us.properties
@@ -1667,7 +1667,7 @@ HTML_TRANSLATE_VALUE=&value (of form input, including button, submit and reset)
 HTML_TRANSLATE_BUTTON_VALUE=value (of button, &submit and reset input)
 
 HTML_COMPRESS_WHITESPACE=Compress whitespace in translated file
-HTML_REMOVE_COMMENTS=Remove HTML comments in translated file
+HTML_REMOVE_COMMENTS=Remove HTML comments
 
 HTML_PARAGRAPH_ON=Start a new segment on:
 HTML_PARAGRAPH_ON_BR=<&br> (breaks)

diff --git a/src/org/omegat/filters2/html2/FilterVisitor.java b/src/org/omegat/filters2/html2/FilterVisitor.java
@@ -97,7 +97,7 @@ public FilterVisitor(HTMLFilter2 htmlfilter, BufferedWriter bufwriter, HTMLOptio
     protected boolean isTextUpForCollection = false;
 
     /** Did the PRE block start (it means we mustn't compress the spaces). */
-    protected boolean preformatting = false;
+    protected boolean betweenPreformattingTags = false;
 
     /**
      * The list of non-paragraph tags before a chunk of text.
@@ -118,13 +118,13 @@ public FilterVisitor(HTMLFilter2 htmlfilter, BufferedWriter bufwriter, HTMLOptio
      * <ul>
      * <li>If another chunk of text follows, they get appended to the
      * translatable paragraph,
-     * <li>Otherwise (paragraph tag follows), they are written out directly.
+     * <li>Otherwise (eg if a paragraph tag follows), they are written out directly.
      * </ul>
      */
     protected List<Node> followingNodes;
 
     /** The tags behind the shortcuts */
-    protected List<Tag> sTags;
+    protected List<Node> sTags;
     /** The tag numbers of shorcutized tags */
     protected List<Integer> sTagNumbers;
     /** The list of all the tag shortcuts */
@@ -162,9 +162,8 @@ public boolean shouldRecurseChildren() {
     @Override
     public void visitTag(Tag tag) {
 
-        boolean keepIntact = isProtectedTag(tag);
 
-        if (keepIntact) {
+        if (isProtectedTag(tag)) {
             if (isTextUpForCollection) {
                 endup();
             } else {
@@ -180,7 +179,7 @@ public void visitTag(Tag tag) {
                 handleParagraphTag();
             }
             if (isPreformattingTag(tag)) {
-                preformatting = true;
+                betweenPreformattingTags = true;
             }
             // Translate attributes of tags if they are not null.
             maybeTranslateAttribute(tag, "abbr");
@@ -287,19 +286,19 @@ public void visitStringNode(Text string) {
         recurseSelf = true;
         recurseChildren = true;
         // nbsp is special case - process it like usual spaces
-        String trimmedtext = HTMLUtils.entitiesToChars(string.getText()).replace((char) 160, ' ').trim();
-        if (!trimmedtext.isEmpty()) {
+        String textAsCleanedString = HTMLUtils.entitiesToChars(string.getText()).replace((char) 160, ' ');
+        if (hasMoreThanJustWhitepaces(textAsCleanedString)) {
             // Hack around HTMLParser not being able to handle XHTML
-            // RFE pending:
+            // RFE:
             // http://sourceforge.net/tracker/index.php?func=detail&aid=1227222&group_id=24399&atid=381402
-            if (firstcall && PatternConsts.XML_HEADER.matcher(trimmedtext).matches()) {
+            if (firstcall && PatternConsts.XML_HEADER.matcher(textAsCleanedString.trim()).matches()) {
                 writeout(string.toHtml());
                 return;
             }
 
             isTextUpForCollection = true;
             firstcall = false;
-        } else if (preformatting) {
+        } else if (betweenPreformattingTags) {
             isTextUpForCollection = true;
         }
 
@@ -318,18 +317,25 @@ public void visitStringNode(Text string) {
      */
     @Override
     public void visitRemarkNode(Remark remark) {
-        recurseSelf = true;
-        recurseChildren = true;
-        if (isTextUpForCollection) {
-            endup();
-        } else {
-            writeOutPrecedingNodes();
-        }
-        if (!options.getRemoveComments()) {
-            writeout(remark.toHtml());
+        if (shouldKeepComments()) {
+            recurseSelf = true;
+            recurseChildren = true;
+            if (betweenPreformattingTags) {
+                isTextUpForCollection = true;
+            }
+
+            if (isTextUpForCollection) {
+                queueTranslatable(remark);
+            } else {
+                queuePrefix(remark);
+            }
         }
     }
 
+    private boolean shouldKeepComments() {
+        return !options.getRemoveComments();
+    }
+
     /**
      * Called for each end <code>Tag</code> visited.
      *
@@ -344,7 +350,7 @@ public void visitEndTag(Tag tag) {
             endup();
         }
         if (isPreformattingTag(tag)) {
-            preformatting = false;
+            betweenPreformattingTags = false;
         }
         queuePrefix(tag);
     }
@@ -580,6 +586,8 @@ protected void endup() {
             Node node = allNodesInParagraph.get(i);
             if (node instanceof Tag) {
                 writeout("<" + node.getText() + ">");
+            } else if (node instanceof Remark) {
+                writeout(node.toHtml());
             } else {
                 writeout(compressWhitespace(node.getText()));
             }
@@ -591,7 +599,9 @@ protected void endup() {
         for (int i = firstTagToIncludeFromPreceding; i <= lastTagKeptInFollowing; i++) {
             Node node = allNodesInParagraph.get(i);
             if (node instanceof Tag) {
-                shortcut((Tag) node, paragraph);
+                assignShortcut((Tag) node, paragraph);
+            } else if (node instanceof Remark) {
+                assignShortcut((Remark) node, paragraph);
             } else { // node instanceof Text
                 paragraph.append(HTMLUtils.entitiesToChars(node.toHtml()));
             }
@@ -613,7 +623,7 @@ protected void endup() {
         // (This changes the layout, therefore it is an option. NB: an alternative implementation is to compress by
         // default, and use Core.getFilterMaster().getConfig().isPreserveSpaces() option instead to compress if
         // not checked.)
-        if (!preformatting) {
+        if (!betweenPreformattingTags) {
 
             spacePrefix = HTMLUtils.getSpacePrefix(uncompressed, options.getCompressWhitespace());
             spacePostfix = HTMLUtils.getSpacePostfix(uncompressed, options.getCompressWhitespace());
@@ -640,7 +650,7 @@ protected void endup() {
         // note that this doesn't change < and > of tag shortcuts
         translation = HTMLUtils.charsToEntities(translation, filter.getTargetEncoding(), sShortcuts);
         // expands tag shortcuts into full-blown tags
-        translation = unshorcutize(translation);
+        translation = revertShortcut(translation);
         // writing out the paragraph into target file
         writeout(spacePrefix);
         writeout(translation);
@@ -651,6 +661,8 @@ protected void endup() {
             Node node = allNodesInParagraph.get(i);
             if (node instanceof Tag) {
                 writeout("<" + node.getText() + ">");
+            } else if (node instanceof Remark) {
+                writeout(node.toHtml());
             } else {
                 writeout(compressWhitespace(node.getText()));
             }
@@ -678,7 +690,7 @@ private void cleanup() {
     /**
      * Creates and stores a shortcut for the tag.
      */
-    private void shortcut(Tag tag, StringBuilder paragraph) {
+    private void assignShortcut(Tag tag, StringBuilder paragraph) {
         StringBuilder result = new StringBuilder();
         result.append('<');
         int n = -1;
@@ -687,17 +699,18 @@ private void shortcut(Tag tag, StringBuilder paragraph) {
             // trying to lookup for appropriate starting tag
             int recursion = 1;
             for (int i = sTags.size() - 1; i >= 0; i--) {
-                Tag othertag = sTags.get(i);
-                if (othertag.getTagName().equals(tag.getTagName())) {
-                    if (othertag.isEndTag()) {
-                        recursion++;
-                    } else {
-                        recursion--;
-                        if (recursion == 0) {
-                            // we've found a starting tag for this ending one
-                            // !!!
-                            n = sTagNumbers.get(i);
-                            break;
+                if (sTags.get(i) instanceof Tag) {
+                    Tag othertag = (Tag) sTags.get(i);
+                    if (othertag.getTagName().equals(tag.getTagName())) {
+                        if (othertag.isEndTag()) {
+                            recursion++;
+                        } else {
+                            recursion--;
+                            if (recursion == 0) {
+                                // found starting tag for this endTag
+                                n = sTagNumbers.get(i);
+                                break;
+                            }
                         }
                     }
                 }
@@ -742,23 +755,51 @@ private void shortcut(Tag tag, StringBuilder paragraph) {
         paragraph.append(shortcut);
     }
 
+    /**
+     * Creates and stores a shortcut for the comment (Remark node).
+     */
+    private void assignShortcut(Remark remark, StringBuilder paragraph) {
+        StringBuilder result = new StringBuilder();
+        int n = sNumShortcuts++;
+        result.append("<c");
+        result.append(n);
+        result.append("/>");
+        String shortcut = result.toString();
+        sTags.add(remark);
+        sTagNumbers.add(n);
+        sShortcuts.add(shortcut);
+        paragraph.append(shortcut);
+    }
+
     /**
      * Recovers tag shortcuts into full tags.
      */
-    private String unshorcutize(String str) {
+    private String revertShortcut(String str) {
         for (int i = 0; i < sShortcuts.size(); i++) {
             String shortcut = sShortcuts.get(i);
             int pos = -1;
             while ((pos = str.indexOf(shortcut, pos + 1)) >= 0) {
-                Tag tag = sTags.get(i);
-                try {
-                    str = str.substring(0, pos) + "<" + tag.getText() + ">"
-                            + str.substring(pos + shortcut.length());
-                } catch (StringIndexOutOfBoundsException sioobe) {
-                    // nothing, string doesn't change
-                    // but prevent endless loop
-                    break;
-                }
+                if (sTags.get(i) instanceof Tag) {
+                    Tag tag = (Tag) sTags.get(i);
+                    try {
+                        str = str.substring(0, pos) + "<" + tag.getText() + ">"
+                                + str.substring(pos + shortcut.length());
+                    } catch (StringIndexOutOfBoundsException sioobe) {
+                        // nothing, string doesn't change
+                        // but prevent endless loop
+                        break;
+                    }
+                } else if (sTags.get(i) instanceof Remark) {
+                     Remark comment = (Remark) sTags.get(i);
+                     try {
+                         str = str.substring(0, pos) + comment.toHtml()
+                                 + str.substring(pos + shortcut.length());
+                     } catch (StringIndexOutOfBoundsException sioobe) {
+                         // nothing, string doesn't change
+                         // but prevent endless loop
+                         break;
+                     }
+                 }
             }
         }
         return str;
@@ -773,7 +814,7 @@ private String unshorcutize(String str) {
      * Whitespace text is simply added to the queue.
      */
     private void queueTranslatable(Text txt) {
-        if (!txt.toHtml().trim().isEmpty() || preformatting) {
+        if (hasMoreThanJustWhitepaces(txt.toHtml()) || betweenPreformattingTags) {
             translatableNodes.addAll(followingNodes);
             followingNodes.clear();
             translatableNodes.add(txt);
@@ -782,6 +823,20 @@ private void queueTranslatable(Text txt) {
         }
     }
 
+    private boolean hasMoreThanJustWhitepaces(String string) {
+        return !string.trim().isEmpty();
+    }
+
+    private void queueTranslatable(Remark remark) {
+        if (betweenPreformattingTags) {
+            translatableNodes.addAll(followingNodes);
+            followingNodes.clear();
+            translatableNodes.add(remark);
+        } else {
+            followingNodes.add(remark);
+        }
+    }
+
     /**
      * Queues the tag to the translatable paragraph.
      * <p>
@@ -810,20 +865,28 @@ protected void queuePrefix(Tag tag) {
     }
 
     /**
-     * Queues up some text, possibly before a meaningful text. If the text is
-     * collected now, the tag is queued up as translatable by calling
-     * {@link #queueTranslatable(Tag)}, otherwise it's collected to a special
-     * list that is inspected when the translatable text is sent to OmegaT core.
+     * Queues up some Text node, possibly before more meaningful text.
+     * The Text node is added to the precedingNodes list.
      */
     private void queuePrefix(Text txt) {
         precedingNodes.add(txt);
     }
 
+    /**
+     * Queues up some Remark node (HTML comment), possibly before more meaningful
+     * text. The Remark node is added to the precedingNodes list.
+     */
+    private void queuePrefix(Remark remark) {
+        precedingNodes.add(remark);
+    }
+
     /** Saves "precedingNodes" to output stream and cleans the list. */
     private void writeOutPrecedingNodes() {
         for (Node node : precedingNodes) {
             if (node instanceof Tag) {
                 writeout("<" + node.getText() + ">");
+            } else if (node instanceof Remark) {
+                writeout(node.toHtml());
             } else {
                 writeout(compressWhitespace(node.getText()));
             }