|
20 | 20 | import java.nio.charset.StandardCharsets;
|
21 | 21 | import java.util.EnumSet;
|
22 | 22 |
|
23 |
| -import javax.xml.parsers.DocumentBuilder; |
24 |
| - |
25 | 23 | import org.w3c.dom.DocumentType;
|
26 | 24 | import org.w3c.dom.Element;
|
27 | 25 | import org.w3c.dom.NodeList;
|
|
39 | 37 | import io.sf.carte.doc.style.css.nsac.Parser;
|
40 | 38 | import io.sf.carte.doc.xml.dtd.DefaultEntityResolver;
|
41 | 39 | import io.sf.carte.util.agent.AgentUtil;
|
42 |
| -import nu.validator.htmlparser.dom.HtmlDocumentBuilder; |
| 40 | +import nu.validator.htmlparser.common.XmlViolationPolicy; |
| 41 | +import nu.validator.htmlparser.sax.HtmlParser; |
43 | 42 |
|
44 | 43 | /**
|
45 | 44 | * Default User Agent.
|
@@ -129,15 +128,15 @@ public DOMDocument readURL(URL url) throws IOException, io.sf.carte.doc.Document
|
129 | 128 | }
|
130 | 129 | isHtml = mimeType.equals("text/html");
|
131 | 130 | }
|
132 |
| - DocumentBuilder builder; |
| 131 | + XMLDocumentBuilder builder = new XMLDocumentBuilder(domImpl); |
133 | 132 | if (isHtml) {
|
134 |
| - builder = new HtmlDocumentBuilder(domImpl); |
135 |
| - ((HtmlDocumentBuilder) builder).setIgnoringComments(false); |
| 133 | + HtmlParser parser = new HtmlParser(XmlViolationPolicy.ALTER_INFOSET); |
| 134 | + parser.setReportingDoctype(true); |
| 135 | + parser.setCommentPolicy(XmlViolationPolicy.ALLOW); |
| 136 | + builder.setXMLReader(parser); |
136 | 137 | } else {
|
137 |
| - XMLDocumentBuilder xmlbuilder = new XMLDocumentBuilder(domImpl); |
138 |
| - xmlbuilder.setIgnoreElementContentWhitespace(true); |
139 |
| - xmlbuilder.setEntityResolver(resolver); |
140 |
| - builder = xmlbuilder; |
| 138 | + builder.setIgnoreElementContentWhitespace(true); |
| 139 | + builder.setEntityResolver(resolver); |
141 | 140 | }
|
142 | 141 | try {
|
143 | 142 | is = openInputStream(con);
|
|
0 commit comments