Skip to content

Commit 1befb25

Browse files
committed
Add file opener brotli.open
1 parent f83aa51 commit 1befb25

File tree

2 files changed

+389
-0
lines changed

2 files changed

+389
-0
lines changed

python/brotli_file.py

+321
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
"""Functions that read and write gzipped files.
2+
3+
The user of the file doesn't have to worry about the compression,
4+
but random access is not allowed."""
5+
6+
# based on Andrew Kuchling's minigzip.py distributed with the zlib module
7+
8+
import os
9+
import builtins
10+
import io
11+
import _compression
12+
import brotli
13+
14+
__all__ = ["BrotliFile", "open"]
15+
16+
READ, WRITE = 1, 2
17+
18+
19+
def open(filename, mode="rb", quality=11, lgwin=22, lgblock=0,
20+
encoding=None, errors=None, newline=None):
21+
"""Open a brotli-compressed file in binary or text mode.
22+
23+
The filename argument can be an actual filename (a str or bytes object), or
24+
an existing file object to read from or write to.
25+
26+
The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or "ab" for
27+
binary mode, or "rt", "wt", "xt" or "at" for text mode. The default mode is
28+
"rb", and the default compresslevel is 9.
29+
30+
For binary mode, this function is equivalent to the BrotliFile constructor:
31+
BrotliFile(filename, mode, compresslevel). In this case, the encoding,
32+
errors and newline arguments must not be provided.
33+
34+
For text mode, a BrotliFile object is created, and wrapped in an
35+
io.TextIOWrapper instance with the specified encoding, error handling
36+
behavior, and line ending(s).
37+
"""
38+
if "t" in mode:
39+
if "b" in mode:
40+
raise ValueError("Invalid mode: %r" % (mode,))
41+
else:
42+
if encoding is not None:
43+
raise ValueError("Argument 'encoding' not supported in binary mode")
44+
if errors is not None:
45+
raise ValueError("Argument 'errors' not supported in binary mode")
46+
if newline is not None:
47+
raise ValueError("Argument 'newline' not supported in binary mode")
48+
49+
gz_mode = mode.replace("t", "")
50+
if isinstance(filename, (str, bytes, os.PathLike)):
51+
binary_file = BrotliFile(filename, gz_mode, quality, lgwin, lgblock)
52+
elif hasattr(filename, "read") or hasattr(filename, "write"):
53+
binary_file = BrotliFile(
54+
None, gz_mode, quality, lgwin, lgblock, filename)
55+
else:
56+
raise TypeError("filename must be a str or bytes object, or a file")
57+
58+
if "t" in mode:
59+
return io.TextIOWrapper(binary_file, encoding, errors, newline)
60+
else:
61+
return binary_file
62+
63+
64+
class BrotliFile(_compression.BaseStream):
65+
"""The BrotliFile class simulates most of the methods of a file object with
66+
the exception of the truncate() method.
67+
68+
This class only supports opening files in binary mode. If you need to open
69+
a compressed file in text mode, use the brotli.open() function.
70+
"""
71+
72+
# Overridden with internal file object to be closed, if only a filename
73+
# is passed in
74+
myfileobj = None
75+
76+
def __init__(self, filename=None, mode=None,
77+
quality=11, lgwin=22, lgblock=0,
78+
fileobj=None):
79+
"""Constructor for the BrotliFile class.
80+
81+
At least one of fileobj and filename must be given a
82+
non-trivial value.
83+
84+
The new class instance is based on fileobj, which can be a regular
85+
file, an io.BytesIO object, or any other object which simulates a file.
86+
It defaults to None, in which case filename is opened to provide
87+
a file object.
88+
89+
The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', 'wb', 'x',
90+
or 'xb' depending on whether the file will be read or written. The
91+
default is the mode of fileobj if discernible; otherwise, the default
92+
is 'rb'. A mode of 'r' is equivalent to one of 'rb', and similarly for
93+
'w' and 'wb', 'a' and 'ab', and 'x' and 'xb'.
94+
"""
95+
96+
if mode and ('t' in mode or 'U' in mode):
97+
raise ValueError("Invalid mode: {!r}".format(mode))
98+
if mode and 'b' not in mode:
99+
mode += 'b'
100+
if fileobj is None:
101+
fileobj = self.myfileobj = builtins.open(filename, mode or 'rb')
102+
if filename is None:
103+
filename = getattr(fileobj, 'name', '')
104+
if not isinstance(filename, (str, bytes)):
105+
filename = ''
106+
else:
107+
filename = os.fspath(filename)
108+
if mode is None:
109+
mode = getattr(fileobj, 'mode', 'rb')
110+
111+
if mode.startswith('r'):
112+
self.mode = READ
113+
raw = _BrotliReader(fileobj, _BrotliDecompressor)
114+
self._buffer = io.BufferedReader(raw)
115+
self.name = filename
116+
117+
elif mode.startswith(('w', 'a', 'x')):
118+
self.mode = WRITE
119+
self.size = 0
120+
self.offset = 0
121+
self.name = filename
122+
self.compress = brotli.Compressor(
123+
quality=quality, lgwin=lgwin, lgblock=lgblock)
124+
else:
125+
raise ValueError("Invalid mode: {!r}".format(mode))
126+
127+
self.fileobj = fileobj
128+
129+
@property
130+
def mtime(self):
131+
"""Last modification time read from stream, or None"""
132+
return self._buffer.raw._last_mtime
133+
134+
def __repr__(self):
135+
s = repr(self.fileobj)
136+
return '<brotli ' + s[1:-1] + ' ' + hex(id(self)) + '>'
137+
138+
def write(self, data):
139+
self._check_not_closed()
140+
if self.mode != WRITE:
141+
import errno
142+
raise OSError(errno.EBADF, "write() on read-only BrotliFile object")
143+
144+
if self.fileobj is None:
145+
raise ValueError("write() on closed BrotliFile object")
146+
147+
if isinstance(data, bytes):
148+
length = len(data)
149+
else:
150+
# accept any data that supports the buffer protocol
151+
data = memoryview(data)
152+
length = data.nbytes
153+
154+
if length > 0:
155+
self.fileobj.write(self.compress.process(data))
156+
self.size += length
157+
self.offset += length
158+
159+
return length
160+
161+
def read(self, size=-1):
162+
self._check_not_closed()
163+
if self.mode != READ:
164+
import errno
165+
raise OSError(errno.EBADF, "read() on write-only BrotliFile object")
166+
return self._buffer.read(size)
167+
168+
def read1(self, size=-1):
169+
"""Implements BufferedIOBase.read1()
170+
171+
Reads up to a buffer's worth of data if size is negative."""
172+
self._check_not_closed()
173+
if self.mode != READ:
174+
import errno
175+
raise OSError(errno.EBADF, "read1() on write-only BrotliFile object")
176+
177+
if size < 0:
178+
size = io.DEFAULT_BUFFER_SIZE
179+
return self._buffer.read1(size)
180+
181+
def peek(self, n):
182+
self._check_not_closed()
183+
if self.mode != READ:
184+
import errno
185+
raise OSError(errno.EBADF, "peek() on write-only BrotliFile object")
186+
return self._buffer.peek(n)
187+
188+
@property
189+
def closed(self):
190+
return self.fileobj is None
191+
192+
def close(self):
193+
fileobj = self.fileobj
194+
if fileobj is None:
195+
return
196+
self.fileobj = None
197+
try:
198+
if self.mode == WRITE:
199+
fileobj.write(self.compress.flush())
200+
fileobj.write(self.compress.finish())
201+
elif self.mode == READ:
202+
self._buffer.close()
203+
finally:
204+
myfileobj = self.myfileobj
205+
if myfileobj:
206+
self.myfileobj = None
207+
myfileobj.close()
208+
209+
def flush(self):
210+
self._check_not_closed()
211+
if self.mode == WRITE:
212+
# Ensure the compressor's buffer is flushed
213+
self.fileobj.write(self.compress.flush())
214+
self.fileobj.flush()
215+
216+
def fileno(self):
217+
"""Invoke the underlying file object's fileno() method.
218+
219+
This will raise AttributeError if the underlying file object
220+
doesn't support fileno().
221+
"""
222+
return self.fileobj.fileno()
223+
224+
def rewind(self):
225+
'''Return the uncompressed stream file position indicator to the
226+
beginning of the file'''
227+
if self.mode != READ:
228+
raise OSError("Can't rewind in write mode")
229+
self._buffer.seek(0)
230+
231+
def readable(self):
232+
return self.mode == READ
233+
234+
def writable(self):
235+
return self.mode == WRITE
236+
237+
def seekable(self):
238+
return True
239+
240+
def seek(self, offset, whence=io.SEEK_SET):
241+
if self.mode == WRITE:
242+
if whence != io.SEEK_SET:
243+
if whence == io.SEEK_CUR:
244+
offset = self.offset + offset
245+
else:
246+
raise ValueError('Seek from end not supported')
247+
if offset < self.offset:
248+
raise OSError('Negative seek in write mode')
249+
count = offset - self.offset
250+
chunk = b'\0' * 1024
251+
for i in range(count // 1024):
252+
self.write(chunk)
253+
self.write(b'\0' * (count % 1024))
254+
elif self.mode == READ:
255+
self._check_not_closed()
256+
return self._buffer.seek(offset, whence)
257+
258+
return self.offset
259+
260+
def readline(self, size=-1):
261+
self._check_not_closed()
262+
return self._buffer.readline(size)
263+
264+
265+
class _BrotliDecompressor:
266+
eof = False
267+
268+
def __init__(self):
269+
self.decompressor = brotli.Decompressor()
270+
self.needs_input = True
271+
self._buffer = bytearray(1)
272+
self._bufview = memoryview(self._buffer)
273+
self._buflen = len(self._buffer)
274+
self._pos = 0
275+
276+
def _check_buffer(self, new_len):
277+
if self._buflen < new_len:
278+
new_len = max(self._buflen, new_len)
279+
del self._bufview
280+
self._buffer.extend(b'\0' * (new_len * 2))
281+
self._bufview = memoryview(self._buffer)
282+
self._buflen = len(self._buffer)
283+
284+
def decompress(self, raw, size):
285+
if raw:
286+
uncompress = self.decompressor.process(raw)
287+
new_len = len(uncompress)
288+
self.needs_input = False
289+
else:
290+
uncompress = b''
291+
new_len = 0
292+
293+
if self._pos >= size:
294+
r = bytes(self._bufview[:size])
295+
pos = self._pos - size
296+
297+
self._check_buffer(pos + new_len)
298+
self._bufview[:pos] = self._bufview[size:self._pos]
299+
self._bufview[pos:pos + new_len] = uncompress
300+
self._pos = pos + new_len
301+
elif self._pos + new_len >= size:
302+
used_len = size - self._pos
303+
r = bytes(self._bufview[:self._pos]) + uncompress[:used_len]
304+
305+
rem_len = new_len - used_len
306+
self._check_buffer(rem_len)
307+
self._bufview[:rem_len] = uncompress[used_len:]
308+
self._pos = rem_len
309+
else:
310+
r = bytes(self._bufview[:self._pos]) + uncompress
311+
self._pos = 0
312+
self.needs_input = True
313+
return r
314+
315+
316+
class _BrotliReader(_compression.DecompressReader):
317+
def read(self, size=-1):
318+
try:
319+
return super(_BrotliReader, self).read(size)
320+
except EOFError:
321+
return b''

python/tests/file_test.py

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# Copyright 2016 The Brotli Authors. All rights reserved.
2+
#
3+
# Distributed under MIT license.
4+
# See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5+
6+
import unittest
7+
8+
from . import _test_utils
9+
import brotli
10+
import brotli_file
11+
12+
13+
class TestCompress(_test_utils.TestCase):
14+
15+
VARIANTS = {'quality': (1, 6, 9, 11), 'lgwin': (10, 15, 20, 24)}
16+
17+
def _check_decompression(self, test_data, **kwargs):
18+
kwargs = {}
19+
# Write decompression to temp file and verify it matches the original.
20+
temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
21+
temp_compressed = _test_utils.get_temp_compressed_name(test_data)
22+
original = test_data
23+
with open(temp_uncompressed, 'wb') as out_file:
24+
with open(temp_compressed, 'rb') as in_file:
25+
out_file.write(brotli.decompress(in_file.read(), **kwargs))
26+
self.assertFilesMatch(temp_uncompressed, original)
27+
28+
def _compress(self, test_data, **kwargs):
29+
temp_compressed = _test_utils.get_temp_compressed_name(test_data)
30+
with brotli_file.open(temp_compressed, 'w', **kwargs) as out_file:
31+
with open(test_data, 'rb') as in_file:
32+
out_file.write(in_file.read())
33+
34+
def _test_compress(self, test_data, **kwargs):
35+
self._compress(test_data, **kwargs)
36+
self._check_decompression(test_data, **kwargs)
37+
38+
39+
_test_utils.generate_test_methods(TestCompress, variants=TestCompress.VARIANTS)
40+
41+
42+
def _get_original_name(test_data):
43+
return test_data.split('.compressed')[0]
44+
45+
46+
class TestDecompress(_test_utils.TestCase):
47+
48+
def _check_decompression(self, test_data):
49+
# Verify decompression matches the original.
50+
temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
51+
original = _get_original_name(test_data)
52+
self.assertFilesMatch(temp_uncompressed, original)
53+
54+
def _decompress(self, test_data):
55+
temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
56+
with open(temp_uncompressed, 'wb') as out_file:
57+
with brotli_file.open(test_data) as in_file:
58+
out_file.write(in_file.read())
59+
60+
def _test_decompress(self, test_data):
61+
self._decompress(test_data)
62+
self._check_decompression(test_data)
63+
64+
65+
_test_utils.generate_test_methods(TestDecompress, for_decompression=True)
66+
67+
if __name__ == '__main__':
68+
unittest.main()

0 commit comments

Comments
 (0)