Skip to content

Commit a2773de

Browse files
authored
fix(py_wheel): Quote wheel RECORD file entry elements if needed (bazel-contrib#2269)
The `RECORD` file written when wheels are patched using `pip.override()` is not quoting filenames as needed, so wheels that (unfortunately) include files whose name contain commas would be written in such a way that https://pypi.org/project/installer/ would fail to parse them, resulting in an error like: ``` installer.records.InvalidRecordEntry: Row Index 360: expected 3 elements, got 5 ``` This PR fixes that by using `csv` to read and write `RECORD` file entries which takes care of quoting elements of record entries as needed. See PEP376 for more info about the `RECORD` file format here: https://peps.python.org/pep-0376/#record Fixes bazel-contrib#2261
1 parent d85a392 commit a2773de

File tree

7 files changed

+60
-23
lines changed

7 files changed

+60
-23
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ A brief description of the categories of changes:
4040
{attr}`pip.parse.experimental_index_url`. See
4141
[#2239](https://github.com/bazelbuild/rules_python/issues/2239).
4242
* (whl_filegroup): Provide per default also the `RECORD` file
43+
* (py_wheel): `RECORD` file entry elements are now quoted if necessary when a
44+
wheel is created
4345

4446
### Added
4547
* (py_wheel) Now supports `compress = (True|False)` to allow disabling

examples/wheel/lib/BUILD.bazel

+10-1
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,20 @@ py_library(
2626
py_library(
2727
name = "module_with_data",
2828
srcs = ["module_with_data.py"],
29-
data = [":data.txt"],
29+
data = [
30+
"data,with,commas.txt",
31+
":data.txt",
32+
],
3033
)
3134

3235
genrule(
3336
name = "make_data",
3437
outs = ["data.txt"],
3538
cmd = "echo foo bar baz > $@",
3639
)
40+
41+
genrule(
42+
name = "make_data_with_commas_in_name",
43+
outs = ["data,with,commas.txt"],
44+
cmd = "echo foo bar baz > $@",
45+
)

examples/wheel/wheel_test.py

+13-6
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ def test_py_package_wheel(self):
9595
self.assertEqual(
9696
zf.namelist(),
9797
[
98+
"examples/wheel/lib/data,with,commas.txt",
9899
"examples/wheel/lib/data.txt",
99100
"examples/wheel/lib/module_with_data.py",
100101
"examples/wheel/lib/simple_module.py",
@@ -105,7 +106,7 @@ def test_py_package_wheel(self):
105106
],
106107
)
107108
self.assertFileSha256Equal(
108-
filename, "b4815a1d3a17cc6a5ce717ed42b940fa7788cb5168f5c1de02f5f50abed7083e"
109+
filename, "82370bf61310e2d3c7b1218368457dc7e161bf5dc1a280d7d45102b5e56acf43"
109110
)
110111

111112
def test_customized_wheel(self):
@@ -117,6 +118,7 @@ def test_customized_wheel(self):
117118
self.assertEqual(
118119
zf.namelist(),
119120
[
121+
"examples/wheel/lib/data,with,commas.txt",
120122
"examples/wheel/lib/data.txt",
121123
"examples/wheel/lib/module_with_data.py",
122124
"examples/wheel/lib/simple_module.py",
@@ -140,6 +142,7 @@ def test_customized_wheel(self):
140142
record_contents,
141143
# The entries are guaranteed to be sorted.
142144
b"""\
145+
"examples/wheel/lib/data,with,commas.txt",sha256=9vJKEdfLu8bZRArKLroPZJh1XKkK3qFMXiM79MBL2Sg,12
143146
examples/wheel/lib/data.txt,sha256=9vJKEdfLu8bZRArKLroPZJh1XKkK3qFMXiM79MBL2Sg,12
144147
examples/wheel/lib/module_with_data.py,sha256=8s0Khhcqz3yVsBKv2IB5u4l4TMKh7-c_V6p65WVHPms,637
145148
examples/wheel/lib/simple_module.py,sha256=z2hwciab_XPNIBNH8B1Q5fYgnJvQTeYf0ZQJpY8yLLY,637
@@ -194,7 +197,7 @@ def test_customized_wheel(self):
194197
second = second.main:s""",
195198
)
196199
self.assertFileSha256Equal(
197-
filename, "27f3038be6e768d28735441a1bc567eca2213bd3568d18b22a414e6399a2d48e"
200+
filename, "706e8dd45884d8cb26e92869f7d29ab7ed9f683b4e2d08f06c03dbdaa12191b8"
198201
)
199202

200203
def test_filename_escaping(self):
@@ -205,6 +208,7 @@ def test_filename_escaping(self):
205208
self.assertEqual(
206209
zf.namelist(),
207210
[
211+
"examples/wheel/lib/data,with,commas.txt",
208212
"examples/wheel/lib/data.txt",
209213
"examples/wheel/lib/module_with_data.py",
210214
"examples/wheel/lib/simple_module.py",
@@ -241,6 +245,7 @@ def test_custom_package_root_wheel(self):
241245
self.assertEqual(
242246
zf.namelist(),
243247
[
248+
"wheel/lib/data,with,commas.txt",
244249
"wheel/lib/data.txt",
245250
"wheel/lib/module_with_data.py",
246251
"wheel/lib/simple_module.py",
@@ -260,7 +265,7 @@ def test_custom_package_root_wheel(self):
260265
for line in record_contents.splitlines():
261266
self.assertFalse(line.startswith("/"))
262267
self.assertFileSha256Equal(
263-
filename, "f034b3278781f4df32a33df70d794bb94170b450e477c8bd9cd42d2d922476ae"
268+
filename, "568922541703f6edf4b090a8413991f9fa625df2844e644dd30bdbe9deb660be"
264269
)
265270

266271
def test_custom_package_root_multi_prefix_wheel(self):
@@ -273,6 +278,7 @@ def test_custom_package_root_multi_prefix_wheel(self):
273278
self.assertEqual(
274279
zf.namelist(),
275280
[
281+
"data,with,commas.txt",
276282
"data.txt",
277283
"module_with_data.py",
278284
"simple_module.py",
@@ -291,7 +297,7 @@ def test_custom_package_root_multi_prefix_wheel(self):
291297
for line in record_contents.splitlines():
292298
self.assertFalse(line.startswith("/"))
293299
self.assertFileSha256Equal(
294-
filename, "ff19f5e4540948247742716338bb4194d619cb56df409045d1a99f265ce8e36c"
300+
filename, "a8b91ce9d6f570e97b40a357a292a6f595d3470f07c479cb08550257cc9c8306"
295301
)
296302

297303
def test_custom_package_root_multi_prefix_reverse_order_wheel(self):
@@ -304,6 +310,7 @@ def test_custom_package_root_multi_prefix_reverse_order_wheel(self):
304310
self.assertEqual(
305311
zf.namelist(),
306312
[
313+
"lib/data,with,commas.txt",
307314
"lib/data.txt",
308315
"lib/module_with_data.py",
309316
"lib/simple_module.py",
@@ -322,7 +329,7 @@ def test_custom_package_root_multi_prefix_reverse_order_wheel(self):
322329
for line in record_contents.splitlines():
323330
self.assertFalse(line.startswith("/"))
324331
self.assertFileSha256Equal(
325-
filename, "4331e378ea8b8148409ae7c02177e4eb24d151a85ef937bb44b79ff5258d634b"
332+
filename, "8f44e940731757c186079a42cfe7ea3d43cd96b526e3fb2ca2a3ea3048a9d489"
326333
)
327334

328335
def test_python_requires_wheel(self):
@@ -347,7 +354,7 @@ def test_python_requires_wheel(self):
347354
""",
348355
)
349356
self.assertFileSha256Equal(
350-
filename, "b34676828f93da8cd898d50dcd4f36e02fe273150e213aacb999310a05f5f38c"
357+
filename, "ba32493f5e43e481346384aaab9e8fa09c23884276ad057c5f432096a0350101"
351358
)
352359

353360
def test_python_abi3_binary_wheel(self):

python/private/pypi/repack_whl.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from __future__ import annotations
2323

2424
import argparse
25+
import csv
2526
import difflib
2627
import logging
2728
import pathlib
@@ -65,8 +66,8 @@ def _files_to_pack(dir: pathlib.Path, want_record: str) -> list[pathlib.Path]:
6566
# First get existing files by using the RECORD file
6667
got_files = []
6768
got_distinfos = []
68-
for line in want_record.splitlines():
69-
rec, _, _ = line.partition(",")
69+
for row in csv.reader(want_record.splitlines()):
70+
rec = row[0]
7071
path = dir / rec
7172

7273
if not path.exists():

python/private/whl_filegroup/extract_wheel_files.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Extract files from a wheel's RECORD."""
22

3+
import csv
34
import re
45
import sys
56
import zipfile
@@ -20,10 +21,7 @@ def get_record(whl_path: Path) -> WhlRecord:
2021
except ValueError:
2122
raise RuntimeError(f"{whl_path} doesn't contain exactly one .dist-info/RECORD")
2223
record_lines = zipf.read(record_file).decode().splitlines()
23-
return (
24-
line.split(",")[0]
25-
for line in record_lines
26-
)
24+
return (row[0] for row in csv.reader(record_lines))
2725

2826

2927
def get_files(whl_record: WhlRecord, regex_pattern: str) -> list[str]:

tests/whl_filegroup/extract_wheel_files_test.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ class WheelRecordTest(unittest.TestCase):
1111
def test_get_wheel_record(self) -> None:
1212
record = extract_wheel_files.get_record(_WHEEL)
1313
expected = (
14+
"examples/wheel/lib/data,with,commas.txt",
1415
"examples/wheel/lib/data.txt",
1516
"examples/wheel/lib/module_with_data.py",
1617
"examples/wheel/lib/simple_module.py",
@@ -26,11 +27,19 @@ def test_get_files(self) -> None:
2627
pattern = "(examples/wheel/lib/.*\.txt$|.*main)"
2728
record = extract_wheel_files.get_record(_WHEEL)
2829
files = extract_wheel_files.get_files(record, pattern)
29-
expected = ["examples/wheel/lib/data.txt", "examples/wheel/main.py"]
30+
expected = [
31+
"examples/wheel/lib/data,with,commas.txt",
32+
"examples/wheel/lib/data.txt",
33+
"examples/wheel/main.py",
34+
]
3035
self.assertEqual(files, expected)
3136

3237
def test_extract(self) -> None:
33-
files = {"examples/wheel/lib/data.txt", "examples/wheel/main.py"}
38+
files = {
39+
"examples/wheel/lib/data,with,commas.txt",
40+
"examples/wheel/lib/data.txt",
41+
"examples/wheel/main.py",
42+
}
3443
with tempfile.TemporaryDirectory() as tmpdir:
3544
outdir = Path(tmpdir)
3645
extract_wheel_files.extract_files(_WHEEL, files, outdir)

tools/wheelmaker.py

+19-8
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@
1616

1717
import argparse
1818
import base64
19+
import csv
1920
import hashlib
21+
import io
2022
import os
2123
import re
2224
import stat
@@ -208,14 +210,23 @@ def add_recordfile(self):
208210
"""Write RECORD file to the distribution."""
209211
record_path = self.distinfo_path("RECORD")
210212
entries = self._record + [(record_path, b"", b"")]
211-
contents = b""
212-
for filename, digest, size in entries:
213-
if isinstance(filename, str):
214-
filename = filename.lstrip("/").encode("utf-8", "surrogateescape")
215-
contents += b"%s,%s,%s\n" % (filename, digest, size)
216-
217-
self.add_string(record_path, contents)
218-
return contents
213+
with io.StringIO() as contents_io:
214+
writer = csv.writer(contents_io, lineterminator="\n")
215+
for filename, digest, size in entries:
216+
if isinstance(filename, str):
217+
filename = filename.lstrip("/")
218+
writer.writerow(
219+
(
220+
c
221+
if isinstance(c, str)
222+
else c.decode("utf-8", "surrogateescape")
223+
for c in (filename, digest, size)
224+
)
225+
)
226+
227+
contents = contents_io.getvalue()
228+
self.add_string(record_path, contents)
229+
return contents.encode("utf-8", "surrogateescape")
219230

220231

221232
class WheelMaker(object):

0 commit comments

Comments
 (0)