Skip to content

Commit 5162fed

Browse files
committed
DefaultUserAgent: use XMLDocumentBuilder with v.nu's HtmlParser instead of HtmlDocumentBuilder.
This is due to a DOCTYPE processing bug in HtmlDocumentBuilder.
1 parent 472b563 commit 5162fed

File tree

1 file changed

+9
-10
lines changed

1 file changed

+9
-10
lines changed

src/io/sf/carte/doc/agent/net/DefaultUserAgent.java

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@
2020
import java.nio.charset.StandardCharsets;
2121
import java.util.EnumSet;
2222

23-
import javax.xml.parsers.DocumentBuilder;
24-
2523
import org.w3c.dom.DocumentType;
2624
import org.w3c.dom.Element;
2725
import org.w3c.dom.NodeList;
@@ -39,7 +37,8 @@
3937
import io.sf.carte.doc.style.css.nsac.Parser;
4038
import io.sf.carte.doc.xml.dtd.DefaultEntityResolver;
4139
import io.sf.carte.util.agent.AgentUtil;
42-
import nu.validator.htmlparser.dom.HtmlDocumentBuilder;
40+
import nu.validator.htmlparser.common.XmlViolationPolicy;
41+
import nu.validator.htmlparser.sax.HtmlParser;
4342

4443
/**
4544
* Default User Agent.
@@ -129,15 +128,15 @@ public DOMDocument readURL(URL url) throws IOException, io.sf.carte.doc.Document
129128
}
130129
isHtml = mimeType.equals("text/html");
131130
}
132-
DocumentBuilder builder;
131+
XMLDocumentBuilder builder = new XMLDocumentBuilder(domImpl);
133132
if (isHtml) {
134-
builder = new HtmlDocumentBuilder(domImpl);
135-
((HtmlDocumentBuilder) builder).setIgnoringComments(false);
133+
HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET);
134+
parser.setReportingDoctype(true);
135+
parser.setCommentPolicy(XmlViolationPolicy.ALLOW);
136+
builder.setXMLReader(parser);
136137
} else {
137-
XMLDocumentBuilder xmlbuilder = new XMLDocumentBuilder(domImpl);
138-
xmlbuilder.setIgnoreElementContentWhitespace(true);
139-
xmlbuilder.setEntityResolver(resolver);
140-
builder = xmlbuilder;
138+
builder.setIgnoreElementContentWhitespace(true);
139+
builder.setEntityResolver(resolver);
141140
}
142141
try {
143142
is = openInputStream(con);

0 commit comments

Comments
 (0)