@@ -94,6 +94,16 @@ def _init_decomp(self, decomp_type):
94
94
self .decomp_type = None
95
95
self .decompressor = None
96
96
97
+ def _fillbuff_has_more_data (self ):
98
+ """Returns T/F to indicate if there is more data
99
+ to be read by _fillbuff.
100
+
101
+ :return: T/F to indicate if there is more data
102
+ to be read
103
+ :rtype: bool
104
+ """
105
+ return self .decompressor and not self .decompressor .unused_data and self .empty ()
106
+
97
107
def _fillbuff (self , block_size = None ):
98
108
if not self .empty ():
99
109
return
@@ -112,12 +122,17 @@ def _fillbuff(self, block_size=None):
112
122
113
123
self ._process_read (data )
114
124
125
+ # perf references in order to avoid the cost of dot property lookup
126
+ self_stream_read = self .stream .read
127
+ self_process_read = self ._process_read
128
+ self_fillbuff_has_more_data = self ._fillbuff_has_more_data
129
+
115
130
# if raw data is not empty and decompressor set, but
116
131
# decompressed buff is empty, keep reading --
117
132
# decompressor likely needs more data to decompress
118
- while data and self . decompressor and not self . decompressor . unused_data and self . empty ():
119
- data = self . stream . read (block_size )
120
- self . _process_read (data )
133
+ while data and self_fillbuff_has_more_data ():
134
+ data = self_stream_read (block_size )
135
+ self_process_read (data )
121
136
122
137
def _process_read (self , data ):
123
138
# don't process if no raw data read
@@ -149,6 +164,14 @@ def _decompress(self, data):
149
164
return b''
150
165
return data
151
166
167
+ def _buff_read (self , length ):
168
+ """Utility method for read that returns
169
+ the results of self.buff.read(length).
170
+
171
+ :param int length: The mount to be read
172
+ """
173
+ return self .buff .read (length )
174
+
152
175
def read (self , length = None ):
153
176
"""
154
177
Fill bytes and read some number of bytes
@@ -158,19 +181,32 @@ def read(self, length=None):
158
181
specified length is read
159
182
"""
160
183
all_buffs = []
184
+
185
+ # perf references in order to avoid the cost of dot property lookup
186
+ all_buffs_append = all_buffs .append
187
+ self_fillbuff = self ._fillbuff
188
+ self_empty = self .empty
189
+ self_buff_read = self ._buff_read
190
+
161
191
while length is None or length > 0 :
162
- self . _fillbuff ()
163
- if self . empty ():
192
+ self_fillbuff ()
193
+ if self_empty ():
164
194
break
165
195
166
- buff = self . buff . read (length )
167
- all_buffs . append (buff )
196
+ buff = self_buff_read (length )
197
+ all_buffs_append (buff )
168
198
if length :
169
199
length -= len (buff )
170
200
171
201
return b'' .join (all_buffs )
172
202
203
+ def _buff_readline (self , length ):
204
+ """Utility method for read that returns
205
+ the results of self.buff.readline(length).
173
206
207
+ :param int length: The mount to be read
208
+ """
209
+ return self .buff .readline (length )
174
210
175
211
def readline (self , length = None ):
176
212
"""
@@ -189,21 +225,35 @@ def readline(self, length=None):
189
225
190
226
linebuff = self .buff .readline (length )
191
227
228
+ # perf references in order to avoid the cost of dot property lookup
229
+ self_fillbuff = self ._fillbuff
230
+ self_empty = self .empty
231
+ self_buff_readline = self ._buff_readline
232
+
233
+ # string concatenation using += is an expensive operation due to python's string internment
234
+ # appending each part of the string using a list is the pythonic way
235
+ # https://wiki.python.org/moin/PythonSpeed/PerformanceTips#String_Concatenation
236
+ current_full_line_buff_len = len (linebuff )
237
+ full_line_buff = [linebuff ]
238
+ full_line_buff_append = full_line_buff .append
239
+
192
240
# we may be at a boundary
193
241
while not linebuff .endswith (b'\n ' ):
194
242
if length :
195
- length -= len ( linebuff )
243
+ length -= current_full_line_buff_len
196
244
if length <= 0 :
197
245
break
198
246
199
- self . _fillbuff ()
247
+ self_fillbuff ()
200
248
201
- if self . empty ():
249
+ if self_empty ():
202
250
break
203
251
204
- linebuff += self .buff .readline (length )
252
+ linebuff = self_buff_readline (length )
253
+ full_line_buff_append (linebuff )
254
+ current_full_line_buff_len += len (linebuff )
205
255
206
- return linebuff
256
+ return b'' . join ( full_line_buff )
207
257
208
258
def empty (self ):
209
259
if not self .buff or self .buff .tell () >= self .buff_size :
@@ -292,29 +342,53 @@ def _fillbuff(self, block_size=None):
292
342
if self .not_chunked :
293
343
return super (ChunkedDataReader , self )._fillbuff (block_size )
294
344
345
+ length_header = None
346
+
347
+ # perf references in order to avoid the cost of dot property lookup
348
+ self_chunked_fillbuff_has_more_data = self ._chunked_fillbuff_has_more_data
349
+ self_stream_readline = self .stream .readline
350
+ self_try_decode = self ._try_decode
351
+ self_chunked_fillbuff_handle_exception = self ._chunked_fillbuff_handle_exception
352
+
295
353
# Loop over chunks until there is some data (not empty())
296
354
# In particular, gzipped data may require multiple chunks to
297
355
# return any decompressed result
298
- while (self .empty () and
299
- not self .all_chunks_read and
300
- not self .not_chunked ):
301
-
356
+ while self_chunked_fillbuff_has_more_data ():
302
357
try :
303
- length_header = self . stream . readline (64 )
304
- self . _try_decode (length_header )
358
+ length_header = self_stream_readline (64 )
359
+ self_try_decode (length_header )
305
360
except ChunkedDataException as e :
306
- if self .raise_chunked_data_exceptions :
307
- raise
361
+ self_chunked_fillbuff_handle_exception (e , length_header , block_size )
308
362
309
- # Can't parse the data as chunked.
310
- # It's possible that non-chunked data is served
311
- # with a Transfer-Encoding: chunked.
312
- # Treat this as non-chunk encoded from here on.
313
- self ._process_read (length_header + e .data )
314
- self .not_chunked = True
363
+ def _chunked_fillbuff_has_more_data (self ):
364
+ """Determines if there is more data to be had for filling the
365
+ _fillbuff method.
315
366
316
- # parse as block as non-chunked
317
- return super (ChunkedDataReader , self )._fillbuff (block_size )
367
+ :return: T/F indicating if there is more data in the stream
368
+ :rtype: bool
369
+ """
370
+ return self .empty () and not self .all_chunks_read and not self .not_chunked
371
+
372
+ def _chunked_fillbuff_handle_exception (self , e , length_header , block_size ):
373
+ """Handles the ChunkedDataException raised by _try_decode while attempting
374
+ to fill the buffer
375
+
376
+ :param ChunkedDataException e:
377
+ :param bytes length_header:
378
+ :param int block_size:
379
+ """
380
+ if self .raise_chunked_data_exceptions :
381
+ raise e
382
+
383
+ # Can't parse the data as chunked.
384
+ # It's possible that non-chunked data is served
385
+ # with a Transfer-Encoding: chunked.
386
+ # Treat this as non-chunk encoded from here on.
387
+ self ._process_read (length_header + e .data )
388
+ self .not_chunked = True
389
+
390
+ # parse as block as non-chunked
391
+ return super (ChunkedDataReader , self )._fillbuff (block_size )
318
392
319
393
def _try_decode (self , length_header ):
320
394
# decode length header
@@ -336,36 +410,53 @@ def _try_decode(self, length_header):
336
410
return
337
411
338
412
data_len = 0
339
- data = b''
413
+
414
+ # string concatenation perf
415
+ data = []
416
+
417
+ # perf references in order to avoid the cost of dot property lookup
418
+ data_append = data .append
419
+ self_stream_read = self .stream .read
420
+ self_try_decode_no_new_data = self ._try_decode_no_new_data
340
421
341
422
# read chunk
342
423
while data_len < chunk_size :
343
- new_data = self . stream . read (chunk_size - data_len )
424
+ new_data = self_stream_read (chunk_size - data_len )
344
425
345
426
# if we unexpectedly run out of data,
346
427
# either raise an exception or just stop reading,
347
428
# assuming file was cut off
348
429
if not new_data :
349
- if self .raise_chunked_data_exceptions :
350
- msg = 'Ran out of data before end of chunk'
351
- raise ChunkedDataException (msg , data )
352
- else :
353
- chunk_size = data_len
354
- self .all_chunks_read = True
430
+ # if self_try_decode_no_new_data does not raise an exception
431
+ # set chunk_size to the current data_len in order to stop reading
432
+ self_try_decode_no_new_data (data )
433
+ chunk_size = data_len
355
434
356
- data += new_data
357
- data_len = len (data )
435
+ data_append ( new_data )
436
+ data_len + = len (new_data )
358
437
359
438
# if we successfully read a block without running out,
360
439
# it should end in \r\n
361
440
if not self .all_chunks_read :
362
441
clrf = self .stream .read (2 )
363
442
if clrf != b'\r \n ' :
364
- raise ChunkedDataException (b"Chunk terminator not found." ,
365
- data )
443
+ raise ChunkedDataException (b"Chunk terminator not found." , b'' .join (data ))
366
444
367
445
# hand to base class for further processing
368
- self ._process_read (data )
446
+ self ._process_read (b'' .join (data ))
447
+
448
+ def _try_decode_no_new_data (self , data_buffer ):
449
+ """If we unexpectedly run out of data, either raise an exception or just stop reading,
450
+ assuming file was cut off.
451
+
452
+ :param list[bytes] data_buffer: The list of byte strings being
453
+ :return:
454
+ """
455
+ if self .raise_chunked_data_exceptions :
456
+ msg = 'Ran out of data before end of chunk'
457
+ raise ChunkedDataException (msg , b'' .join (data_buffer ))
458
+ else :
459
+ self .all_chunks_read = True
369
460
370
461
371
462
#=================================================================
0 commit comments