Skip to content

Commit 07a65cd

Browse files
[3.12] gh-128302: Fix bugs in xml.dom.xmlbuilder (GH-128284) (#128583)
gh-128302: Fix bugs in xml.dom.xmlbuilder (GH-128284) * Allow DOMParser.parse() to correctly handle DOMInputSource instances that only have a systemId attribute set. * Fix DOMEntityResolver.resolveEntity(), which was broken by the Python 3.0 transition. * Add Lib/test/test_xml_dom_xmlbuilder.py with few tests. (cherry picked from commit 6ea04da) Co-authored-by: Stephen Morton <[email protected]>
1 parent b55c404 commit 07a65cd

File tree

4 files changed

+100
-5
lines changed

4 files changed

+100
-5
lines changed

Lib/test/test_xml_dom_xmlbuilder.py

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import io
2+
import unittest
3+
from http import client
4+
from test.test_httplib import FakeSocket
5+
from unittest import mock
6+
from xml.dom import getDOMImplementation, minidom, xmlbuilder
7+
8+
SMALL_SAMPLE = b"""<?xml version="1.0"?>
9+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books">
10+
<!-- A comment -->
11+
<title>Introduction to XSL</title>
12+
<hr/>
13+
<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p>
14+
</html>"""
15+
16+
17+
class XMLBuilderTest(unittest.TestCase):
18+
def test_entity_resolver(self):
19+
body = (
20+
b"HTTP/1.1 200 OK\r\nContent-Type: text/xml; charset=utf-8\r\n\r\n"
21+
+ SMALL_SAMPLE
22+
)
23+
24+
sock = FakeSocket(body)
25+
response = client.HTTPResponse(sock)
26+
response.begin()
27+
attrs = {"open.return_value": response}
28+
opener = mock.Mock(**attrs)
29+
30+
resolver = xmlbuilder.DOMEntityResolver()
31+
32+
with mock.patch("urllib.request.build_opener") as mock_build:
33+
mock_build.return_value = opener
34+
source = resolver.resolveEntity(None, "http://example.com/2000/svg")
35+
36+
self.assertIsInstance(source, xmlbuilder.DOMInputSource)
37+
self.assertIsNone(source.publicId)
38+
self.assertEqual(source.systemId, "http://example.com/2000/svg")
39+
self.assertEqual(source.baseURI, "http://example.com/2000/")
40+
self.assertEqual(source.encoding, "utf-8")
41+
self.assertIs(source.byteStream, response)
42+
43+
self.assertIsNone(source.characterStream)
44+
self.assertIsNone(source.stringData)
45+
46+
def test_builder(self):
47+
imp = getDOMImplementation()
48+
self.assertIsInstance(imp, xmlbuilder.DOMImplementationLS)
49+
50+
builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None)
51+
self.assertIsInstance(builder, xmlbuilder.DOMBuilder)
52+
53+
def test_parse_uri(self):
54+
body = (
55+
b"HTTP/1.1 200 OK\r\nContent-Type: text/xml; charset=utf-8\r\n\r\n"
56+
+ SMALL_SAMPLE
57+
)
58+
59+
sock = FakeSocket(body)
60+
response = client.HTTPResponse(sock)
61+
response.begin()
62+
attrs = {"open.return_value": response}
63+
opener = mock.Mock(**attrs)
64+
65+
with mock.patch("urllib.request.build_opener") as mock_build:
66+
mock_build.return_value = opener
67+
68+
imp = getDOMImplementation()
69+
builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None)
70+
document = builder.parseURI("http://example.com/2000/svg")
71+
72+
self.assertIsInstance(document, minidom.Document)
73+
self.assertEqual(len(document.childNodes), 1)
74+
75+
def test_parse_with_systemId(self):
76+
response = io.BytesIO(SMALL_SAMPLE)
77+
78+
with mock.patch("urllib.request.urlopen") as mock_open:
79+
mock_open.return_value = response
80+
81+
imp = getDOMImplementation()
82+
source = imp.createDOMInputSource()
83+
builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None)
84+
source.systemId = "http://example.com/2000/svg"
85+
document = builder.parse(source)
86+
87+
self.assertIsInstance(document, minidom.Document)
88+
self.assertEqual(len(document.childNodes), 1)

Lib/xml/dom/xmlbuilder.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ def parse(self, input):
189189
options.filter = self.filter
190190
options.errorHandler = self.errorHandler
191191
fp = input.byteStream
192-
if fp is None and options.systemId:
192+
if fp is None and input.systemId:
193193
import urllib.request
194194
fp = urllib.request.urlopen(input.systemId)
195195
return self._parse_bytestream(fp, options)
@@ -247,10 +247,12 @@ def _create_opener(self):
247247

248248
def _guess_media_encoding(self, source):
249249
info = source.byteStream.info()
250-
if "Content-Type" in info:
251-
for param in info.getplist():
252-
if param.startswith("charset="):
253-
return param.split("=", 1)[1].lower()
250+
# import email.message
251+
# assert isinstance(info, email.message.Message)
252+
charset = info.get_param('charset')
253+
if charset is not None:
254+
return charset.lower()
255+
return None
254256

255257

256258
class DOMInputSource(object):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Allow :meth:`!xml.dom.xmlbuilder.DOMParser.parse` to correctly handle
2+
:class:`!xml.dom.xmlbuilder.DOMInputSource` instances that only have a
3+
:attr:`!systemId` attribute set.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix :meth:`!xml.dom.xmlbuilder.DOMEntityResolver.resolveEntity`, which was
2+
broken by the Python 3.0 transition.

0 commit comments

Comments
 (0)