Skip to content

Commit

Permalink
Merge pull request #6 from Kazephil/topic/kaze/SF610-do-not-split-seg…
Browse files Browse the repository at this point in the history
…ment-on-removed-comment

Update UI string and documentation for removing HTML comments
  • Loading branch information
damien-rembert authored Feb 15, 2023
2 parents 437dcb7 + ecaba15 commit e17720e
Show file tree
Hide file tree
Showing 7 changed files with 146 additions and 61 deletions.
12 changes: 6 additions & 6 deletions doc_src/en/App_FileFilters.xml
Original file line number Diff line number Diff line change
Expand Up @@ -630,13 +630,13 @@
</listitem>
</varlistentry>
<varlistentry>
<term>Remove HTML comments in translated file</term>
<term>Remove HTML comments</term>
<listitem>
<para>Comments within an HTML file are generally addressed to
developers.</para>
<para>Comments in an HTML file are generally addressed to
developers. Use this option to remove them. If unchecked, the comments are displayed as tags.</para>

<para>Text in HTML comments (between <literal>&lt;!--</literal>
and <literal>--&gt;</literal>) will not be copied into the
and <literal>--&gt;</literal>) are not copied into the
translated document.</para>
</listitem>
</varlistentry>
Expand All @@ -652,7 +652,7 @@
<term>Remove untranslated strings in the target files</term>
<listitem>
<para>Having untranslated contents in the translated files
sometimes create compatibility issues.</para>
sometimes creates compatibility issues.</para>
</listitem>
</varlistentry>
</variablelist>
Expand All @@ -667,7 +667,7 @@
<term>Remove untranslated strings in the target files</term>
<listitem>
<para>Having untranslated contents in the translated files
sometimes create compatibility issues.</para>
sometimes creates compatibility issues.</para>
</listitem>
</varlistentry>
</variablelist>
Expand Down
2 changes: 1 addition & 1 deletion src/org/omegat/Bundle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1660,7 +1660,7 @@ HTML_TRANSLATE_VALUE=&value (of form input, including button, submit and reset)
HTML_TRANSLATE_BUTTON_VALUE=value (of button, &submit and reset input)

HTML_COMPRESS_WHITESPACE=Compress whitespace in translated file
HTML_REMOVE_COMMENTS=Remove HTML comments in translated file
HTML_REMOVE_COMMENTS=Remove HTML comments

HTML_PARAGRAPH_ON=Start a new segment on:
HTML_PARAGRAPH_ON_BR=<&br> (breaks)
Expand Down
2 changes: 1 addition & 1 deletion src/org/omegat/Bundle_pl.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1396,7 +1396,7 @@ HTML_TRANSLATE_VALUE=&value (of form input, including button, submit and reset)
HTML_TRANSLATE_BUTTON_VALUE=value (of button, &submit and reset input)

HTML_COMPRESS_WHITESPACE=Compress whitespace in translated document
HTML_REMOVE_COMMENTS=Remove HTML comments in translated document
HTML_REMOVE_COMMENTS=Remove HTML comments

HTML_PARAGRAPH_ON=Start a new paragraph on:
HTML_PARAGRAPH_ON_BR=<&br> (breaks)
Expand Down
2 changes: 1 addition & 1 deletion src/org/omegat/Bundle_us.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1667,7 +1667,7 @@ HTML_TRANSLATE_VALUE=&value (of form input, including button, submit and reset)
HTML_TRANSLATE_BUTTON_VALUE=value (of button, &submit and reset input)

HTML_COMPRESS_WHITESPACE=Compress whitespace in translated file
HTML_REMOVE_COMMENTS=Remove HTML comments in translated file
HTML_REMOVE_COMMENTS=Remove HTML comments

HTML_PARAGRAPH_ON=Start a new segment on:
HTML_PARAGRAPH_ON_BR=<&br> (breaks)
Expand Down
165 changes: 114 additions & 51 deletions src/org/omegat/filters2/html2/FilterVisitor.java
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ public FilterVisitor(HTMLFilter2 htmlfilter, BufferedWriter bufwriter, HTMLOptio
protected boolean isTextUpForCollection = false;

/** Did the PRE block start (it means we mustn't compress the spaces). */
protected boolean preformatting = false;
protected boolean betweenPreformattingTags = false;

/**
* The list of non-paragraph tags before a chunk of text.
Expand All @@ -118,13 +118,13 @@ public FilterVisitor(HTMLFilter2 htmlfilter, BufferedWriter bufwriter, HTMLOptio
* <ul>
* <li>If another chunk of text follows, they get appended to the
* translatable paragraph,
* <li>Otherwise (paragraph tag follows), they are written out directly.
* <li>Otherwise (eg if a paragraph tag follows), they are written out directly.
* </ul>
*/
protected List<Node> followingNodes;

/** The tags behind the shortcuts */
protected List<Tag> sTags;
protected List<Node> sTags;
/** The tag numbers of shorcutized tags */
protected List<Integer> sTagNumbers;
/** The list of all the tag shortcuts */
Expand Down Expand Up @@ -162,9 +162,8 @@ public boolean shouldRecurseChildren() {
@Override
public void visitTag(Tag tag) {

boolean keepIntact = isProtectedTag(tag);

if (keepIntact) {
if (isProtectedTag(tag)) {
if (isTextUpForCollection) {
endup();
} else {
Expand All @@ -180,7 +179,7 @@ public void visitTag(Tag tag) {
handleParagraphTag();
}
if (isPreformattingTag(tag)) {
preformatting = true;
betweenPreformattingTags = true;
}
// Translate attributes of tags if they are not null.
maybeTranslateAttribute(tag, "abbr");
Expand Down Expand Up @@ -287,19 +286,19 @@ public void visitStringNode(Text string) {
recurseSelf = true;
recurseChildren = true;
// nbsp is special case - process it like usual spaces
String trimmedtext = HTMLUtils.entitiesToChars(string.getText()).replace((char) 160, ' ').trim();
if (!trimmedtext.isEmpty()) {
String textAsCleanedString = HTMLUtils.entitiesToChars(string.getText()).replace((char) 160, ' ');
if (hasMoreThanJustWhitepaces(textAsCleanedString)) {
// Hack around HTMLParser not being able to handle XHTML
// RFE pending:
// RFE:
// http://sourceforge.net/tracker/index.php?func=detail&aid=1227222&group_id=24399&atid=381402
if (firstcall && PatternConsts.XML_HEADER.matcher(trimmedtext).matches()) {
if (firstcall && PatternConsts.XML_HEADER.matcher(textAsCleanedString.trim()).matches()) {
writeout(string.toHtml());
return;
}

isTextUpForCollection = true;
firstcall = false;
} else if (preformatting) {
} else if (betweenPreformattingTags) {
isTextUpForCollection = true;
}

Expand All @@ -318,18 +317,25 @@ public void visitStringNode(Text string) {
*/
@Override
public void visitRemarkNode(Remark remark) {
recurseSelf = true;
recurseChildren = true;
if (isTextUpForCollection) {
endup();
} else {
writeOutPrecedingNodes();
}
if (!options.getRemoveComments()) {
writeout(remark.toHtml());
if (shouldKeepComments()) {
recurseSelf = true;
recurseChildren = true;
if (betweenPreformattingTags) {
isTextUpForCollection = true;
}

if (isTextUpForCollection) {
queueTranslatable(remark);
} else {
queuePrefix(remark);
}
}
}

private boolean shouldKeepComments() {
return !options.getRemoveComments();
}

/**
* Called for each end <code>Tag</code> visited.
*
Expand All @@ -344,7 +350,7 @@ public void visitEndTag(Tag tag) {
endup();
}
if (isPreformattingTag(tag)) {
preformatting = false;
betweenPreformattingTags = false;
}
queuePrefix(tag);
}
Expand Down Expand Up @@ -580,6 +586,8 @@ protected void endup() {
Node node = allNodesInParagraph.get(i);
if (node instanceof Tag) {
writeout("<" + node.getText() + ">");
} else if (node instanceof Remark) {
writeout(node.toHtml());
} else {
writeout(compressWhitespace(node.getText()));
}
Expand All @@ -591,7 +599,9 @@ protected void endup() {
for (int i = firstTagToIncludeFromPreceding; i <= lastTagKeptInFollowing; i++) {
Node node = allNodesInParagraph.get(i);
if (node instanceof Tag) {
shortcut((Tag) node, paragraph);
assignShortcut((Tag) node, paragraph);
} else if (node instanceof Remark) {
assignShortcut((Remark) node, paragraph);
} else { // node instanceof Text
paragraph.append(HTMLUtils.entitiesToChars(node.toHtml()));
}
Expand All @@ -613,7 +623,7 @@ protected void endup() {
// (This changes the layout, therefore it is an option. NB: an alternative implementation is to compress by
// default, and use Core.getFilterMaster().getConfig().isPreserveSpaces() option instead to compress if
// not checked.)
if (!preformatting) {
if (!betweenPreformattingTags) {

spacePrefix = HTMLUtils.getSpacePrefix(uncompressed, options.getCompressWhitespace());
spacePostfix = HTMLUtils.getSpacePostfix(uncompressed, options.getCompressWhitespace());
Expand All @@ -640,7 +650,7 @@ protected void endup() {
// note that this doesn't change < and > of tag shortcuts
translation = HTMLUtils.charsToEntities(translation, filter.getTargetEncoding(), sShortcuts);
// expands tag shortcuts into full-blown tags
translation = unshorcutize(translation);
translation = revertShortcut(translation);
// writing out the paragraph into target file
writeout(spacePrefix);
writeout(translation);
Expand All @@ -651,6 +661,8 @@ protected void endup() {
Node node = allNodesInParagraph.get(i);
if (node instanceof Tag) {
writeout("<" + node.getText() + ">");
} else if (node instanceof Remark) {
writeout(node.toHtml());
} else {
writeout(compressWhitespace(node.getText()));
}
Expand Down Expand Up @@ -678,7 +690,7 @@ private void cleanup() {
/**
* Creates and stores a shortcut for the tag.
*/
private void shortcut(Tag tag, StringBuilder paragraph) {
private void assignShortcut(Tag tag, StringBuilder paragraph) {
StringBuilder result = new StringBuilder();
result.append('<');
int n = -1;
Expand All @@ -687,17 +699,18 @@ private void shortcut(Tag tag, StringBuilder paragraph) {
// trying to lookup for appropriate starting tag
int recursion = 1;
for (int i = sTags.size() - 1; i >= 0; i--) {
Tag othertag = sTags.get(i);
if (othertag.getTagName().equals(tag.getTagName())) {
if (othertag.isEndTag()) {
recursion++;
} else {
recursion--;
if (recursion == 0) {
// we've found a starting tag for this ending one
// !!!
n = sTagNumbers.get(i);
break;
if (sTags.get(i) instanceof Tag) {
Tag othertag = (Tag) sTags.get(i);
if (othertag.getTagName().equals(tag.getTagName())) {
if (othertag.isEndTag()) {
recursion++;
} else {
recursion--;
if (recursion == 0) {
// found starting tag for this endTag
n = sTagNumbers.get(i);
break;
}
}
}
}
Expand Down Expand Up @@ -742,23 +755,51 @@ private void shortcut(Tag tag, StringBuilder paragraph) {
paragraph.append(shortcut);
}

/**
* Creates and stores a shortcut for the comment (Remark node).
*/
private void assignShortcut(Remark remark, StringBuilder paragraph) {
StringBuilder result = new StringBuilder();
int n = sNumShortcuts++;
result.append("<c");
result.append(n);
result.append("/>");
String shortcut = result.toString();
sTags.add(remark);
sTagNumbers.add(n);
sShortcuts.add(shortcut);
paragraph.append(shortcut);
}

/**
* Recovers tag shortcuts into full tags.
*/
private String unshorcutize(String str) {
private String revertShortcut(String str) {
for (int i = 0; i < sShortcuts.size(); i++) {
String shortcut = sShortcuts.get(i);
int pos = -1;
while ((pos = str.indexOf(shortcut, pos + 1)) >= 0) {
Tag tag = sTags.get(i);
try {
str = str.substring(0, pos) + "<" + tag.getText() + ">"
+ str.substring(pos + shortcut.length());
} catch (StringIndexOutOfBoundsException sioobe) {
// nothing, string doesn't change
// but prevent endless loop
break;
}
if (sTags.get(i) instanceof Tag) {
Tag tag = (Tag) sTags.get(i);
try {
str = str.substring(0, pos) + "<" + tag.getText() + ">"
+ str.substring(pos + shortcut.length());
} catch (StringIndexOutOfBoundsException sioobe) {
// nothing, string doesn't change
// but prevent endless loop
break;
}
} else if (sTags.get(i) instanceof Remark) {
Remark comment = (Remark) sTags.get(i);
try {
str = str.substring(0, pos) + comment.toHtml()
+ str.substring(pos + shortcut.length());
} catch (StringIndexOutOfBoundsException sioobe) {
// nothing, string doesn't change
// but prevent endless loop
break;
}
}
}
}
return str;
Expand All @@ -773,7 +814,7 @@ private String unshorcutize(String str) {
* Whitespace text is simply added to the queue.
*/
private void queueTranslatable(Text txt) {
if (!txt.toHtml().trim().isEmpty() || preformatting) {
if (hasMoreThanJustWhitepaces(txt.toHtml()) || betweenPreformattingTags) {
translatableNodes.addAll(followingNodes);
followingNodes.clear();
translatableNodes.add(txt);
Expand All @@ -782,6 +823,20 @@ private void queueTranslatable(Text txt) {
}
}

private boolean hasMoreThanJustWhitepaces(String string) {
return !string.trim().isEmpty();
}

private void queueTranslatable(Remark remark) {
if (betweenPreformattingTags) {
translatableNodes.addAll(followingNodes);
followingNodes.clear();
translatableNodes.add(remark);
} else {
followingNodes.add(remark);
}
}

/**
* Queues the tag to the translatable paragraph.
* <p>
Expand Down Expand Up @@ -810,20 +865,28 @@ protected void queuePrefix(Tag tag) {
}

/**
* Queues up some text, possibly before a meaningful text. If the text is
* collected now, the tag is queued up as translatable by calling
* {@link #queueTranslatable(Tag)}, otherwise it's collected to a special
* list that is inspected when the translatable text is sent to OmegaT core.
* Queues up some Text node, possibly before more meaningful text.
* The Text node is added to the precedingNodes list.
*/
private void queuePrefix(Text txt) {
precedingNodes.add(txt);
}

/**
* Queues up some Remark node (HTML comment), possibly before more meaningful
* text. The Remark node is added to the precedingNodes list.
*/
private void queuePrefix(Remark remark) {
precedingNodes.add(remark);
}

/** Saves "precedingNodes" to output stream and cleans the list. */
private void writeOutPrecedingNodes() {
for (Node node : precedingNodes) {
if (node instanceof Tag) {
writeout("<" + node.getText() + ">");
} else if (node instanceof Remark) {
writeout(node.toHtml());
} else {
writeout(compressWhitespace(node.getText()));
}
Expand Down
Loading

0 comments on commit e17720e

Please sign in to comment.