@@ -19,6 +19,7 @@ import (
19
19
"github.com/jpillora/backoff"
20
20
p2phttp "github.com/libp2p/go-libp2p-http"
21
21
"github.com/libp2p/go-libp2p/core/host"
22
+ "golang.org/x/sync/errgroup"
22
23
)
23
24
24
25
const (
@@ -30,6 +31,13 @@ const (
30
31
maxBackOff = 10 * time .Minute
31
32
factor = 1.5
32
33
maxReconnectAttempts = 15
34
+
35
+ nChunks = 5
36
+ // minChunkBytes is a minimum size of a chunk. If splitting download into nChunks results into a chunk being smaller than minChunkBytes
37
+ // then the total number of chunks will be reduced. For example if nChunks is 5, number of remaining bytes is 100 and minChunkBytes is 50
38
+ // then download will be performed in 2 parallel streams.
39
+ // 1 Mib
40
+ minChunkBytes = 1048576
33
41
)
34
42
35
43
type httpError struct {
@@ -50,6 +58,15 @@ func BackOffRetryOpt(minBackoff, maxBackoff time.Duration, factor, maxReconnectA
50
58
}
51
59
}
52
60
61
+ func NChunksOpt (nChunks int ) Option {
62
+ return func (h * httpTransport ) {
63
+ if nChunks < 1 || nChunks > 16 {
64
+ return
65
+ }
66
+ h .nChunks = nChunks
67
+ }
68
+ }
69
+
53
70
type httpTransport struct {
54
71
libp2pHost host.Host
55
72
libp2pClient * http.Client
@@ -59,6 +76,8 @@ type httpTransport struct {
59
76
backOffFactor float64
60
77
maxReconnectAttempts float64
61
78
79
+ nChunks int
80
+
62
81
dl * logs.DealLogger
63
82
}
64
83
@@ -69,6 +88,7 @@ func New(host host.Host, dealLogger *logs.DealLogger, opts ...Option) *httpTrans
69
88
maxBackoffWait : maxBackOff ,
70
89
backOffFactor : factor ,
71
90
maxReconnectAttempts : maxReconnectAttempts ,
91
+ nChunks : nChunks ,
72
92
dl : dealLogger .Subsystem ("http-transport" ),
73
93
}
74
94
for _ , o := range opts {
@@ -120,6 +140,12 @@ func (h *httpTransport) Execute(ctx context.Context, transportInfo []byte, dealI
120
140
}
121
141
h .dl .Infow (duuid , "existing file size" , "file size" , fileSize , "deal size" , dealInfo .DealSize )
122
142
143
+ // default to a single stream for libp2p urls as libp2p server doesn't support range requests
144
+ nChunks := h .nChunks
145
+ if u .Scheme == "libp2p" {
146
+ nChunks = 1
147
+ }
148
+
123
149
// construct the transfer instance that will act as the transfer handler
124
150
tctx , cancel := context .WithCancel (ctx )
125
151
t := & transfer {
@@ -136,6 +162,7 @@ func (h *httpTransport) Execute(ctx context.Context, transportInfo []byte, dealI
136
162
},
137
163
maxReconnectAttempts : h .maxReconnectAttempts ,
138
164
dl : h .dl ,
165
+ nChunks : nChunks ,
139
166
}
140
167
141
168
cleanupFns := []func (){
@@ -216,16 +243,13 @@ type transfer struct {
216
243
217
244
client * http.Client
218
245
dl * logs.DealLogger
246
+
247
+ nChunks int
219
248
}
220
249
221
250
func (t * transfer ) execute (ctx context.Context ) error {
222
251
duuid := t .dealInfo .DealUuid
223
252
for {
224
- // construct request
225
- req , err := http .NewRequest ("GET" , t .tInfo .URL , nil )
226
- if err != nil {
227
- return fmt .Errorf ("failed to create http req: %w" , err )
228
- }
229
253
230
254
// get the number of bytes already received (the size of the output file)
231
255
st , err := os .Stat (t .dealInfo .OutputFile )
@@ -234,15 +258,6 @@ func (t *transfer) execute(ctx context.Context) error {
234
258
}
235
259
t .nBytesReceived = st .Size ()
236
260
237
- // add request headers
238
- for name , val := range t .tInfo .Headers {
239
- req .Header .Set (name , val )
240
- }
241
-
242
- // add range req to start reading from the last byte we have in the output file
243
- req .Header .Set ("Range" , fmt .Sprintf ("bytes=%d-" , t .nBytesReceived ))
244
- // init the request with the transfer context
245
- req = req .WithContext (ctx )
246
261
// open output file in append-only mode for writing
247
262
of , err := os .OpenFile (t .dealInfo .OutputFile , os .O_APPEND | os .O_WRONLY , 0644 )
248
263
if err != nil {
@@ -252,7 +267,75 @@ func (t *transfer) execute(ctx context.Context) error {
252
267
253
268
// start the http transfer
254
269
remaining := t .dealInfo .DealSize - t .nBytesReceived
255
- reqErr := t .doHttp (ctx , req , of , remaining )
270
+
271
+ // split download into chunks
272
+ // each chunk should be bigger than minChunkBytes
273
+ nChunks := int (remaining / minChunkBytes ) + 1
274
+ if nChunks > t .nChunks {
275
+ nChunks = t .nChunks
276
+ }
277
+
278
+ chunkLen := remaining / int64 (nChunks )
279
+ var pch , nch chan bool
280
+ group := errgroup.Group {}
281
+
282
+ nBytesReceived := t .nBytesReceived
283
+
284
+ for i := 0 ; i < nChunks ; i ++ {
285
+ isLast := i == nChunks - 1
286
+
287
+ // construct request
288
+ req , err := http .NewRequest ("GET" , t .tInfo .URL , nil )
289
+ if err != nil {
290
+ return fmt .Errorf ("failed to create http req: %w" , err )
291
+ }
292
+
293
+ // add request headers
294
+ for name , val := range t .tInfo .Headers {
295
+ req .Header .Set (name , val )
296
+ }
297
+
298
+ chunkStart := nBytesReceived + int64 (i )* chunkLen
299
+ var chunkEnd int64
300
+ var srange string
301
+ if isLast {
302
+ chunkEnd = t .dealInfo .DealSize
303
+ srange = fmt .Sprintf ("bytes=%d-" , chunkStart )
304
+ } else {
305
+ chunkEnd = chunkStart + chunkLen
306
+ srange = fmt .Sprintf ("bytes=%d-%d" , chunkStart , chunkEnd )
307
+ }
308
+
309
+ // add range req to start reading from the last byte we have in the output file
310
+ req .Header .Set ("Range" , srange )
311
+ // init the request with the transfer context
312
+ req = req .WithContext (ctx )
313
+
314
+ if isLast {
315
+ nch = nil
316
+ } else {
317
+ nch = make (chan bool , 1 )
318
+ }
319
+ cpch := pch
320
+ cnch := nch
321
+ group .Go (func () error {
322
+ err , success := t .doHttp (ctx , req , of , chunkEnd - chunkStart , cpch )
323
+ // signal to the next download goroutine to either proceed writing into the file if the current download has finished successfully
324
+ // or to abandon if it has not
325
+ if cnch != nil {
326
+ cnch <- (err == nil && success )
327
+ close (cnch )
328
+ }
329
+ return err
330
+ })
331
+
332
+ pch = nch
333
+ }
334
+ var reqErr * httpError
335
+ if err := group .Wait (); err != nil {
336
+ reqErr = err .(* httpError )
337
+ }
338
+
256
339
if reqErr == nil {
257
340
t .dl .Infow (duuid , "http transfer completed successfully" )
258
341
// if there's no error, transfer was successful
@@ -326,22 +409,45 @@ func (t *transfer) execute(ctx context.Context) error {
326
409
return nil
327
410
}
328
411
329
- func (t * transfer ) doHttp (ctx context.Context , req * http.Request , dst io.Writer , toRead int64 ) * httpError {
412
+ func (t * transfer ) doHttp (ctx context.Context , req * http.Request , dst io.Writer , toRead int64 , pch chan bool ) ( * httpError , bool ) {
330
413
duid := t .dealInfo .DealUuid
331
414
t .dl .Infow (duid , "sending http request" , "received" , t .nBytesReceived , "remaining" ,
332
415
toRead , "range-rq" , req .Header .Get ("Range" ))
333
416
334
417
// send http request and validate response
335
418
resp , err := t .client .Do (req )
336
419
if err != nil {
337
- return & httpError {error : fmt .Errorf ("failed to send http req: %w" , err )}
420
+ return & httpError {error : fmt .Errorf ("failed to send http req: %w" , err )}, false
338
421
}
339
422
// we should either get back a 200 or a 206 -> anything else means something has gone wrong and we return an error.
340
423
defer resp .Body .Close () // nolint
424
+
425
+ // do not fail the whole download if one of the goroutines' range couldn't be satisfied
426
+ // if not enough bytes have ben donwloaded, then a length mismatch error will be raised up the stack
427
+ if resp .StatusCode == http .StatusRequestedRangeNotSatisfiable {
428
+ return nil , false
429
+ }
430
+
341
431
if resp .StatusCode != http .StatusOK && resp .StatusCode != http .StatusPartialContent {
342
432
return & httpError {
343
433
error : fmt .Errorf ("http req failed: code: %d, status: %s" , resp .StatusCode , resp .Status ),
344
434
code : resp .StatusCode ,
435
+ }, false
436
+ }
437
+
438
+ // if previous download goroutine has failed - don't write into the file. Downloads can be performed in parallel while writing
439
+ // must be done strictly in sequential order
440
+ if pch != nil {
441
+ select {
442
+ case success := <- pch :
443
+ if ! success {
444
+ return nil , false
445
+ }
446
+ case <- ctx .Done ():
447
+ if ctx .Err () != nil {
448
+ return & httpError {error : ctx .Err ()}, false
449
+ }
450
+ return nil , false
345
451
}
346
452
}
347
453
@@ -351,7 +457,7 @@ func (t *transfer) doHttp(ctx context.Context, req *http.Request, dst io.Writer,
351
457
for {
352
458
if ctx .Err () != nil {
353
459
t .dl .LogError (duid , "stopped reading http response: context canceled" , ctx .Err ())
354
- return & httpError {error : ctx .Err ()}
460
+ return & httpError {error : ctx .Err ()}, false
355
461
}
356
462
nr , readErr := limitR .Read (buf )
357
463
@@ -362,9 +468,9 @@ func (t *transfer) doHttp(ctx context.Context, req *http.Request, dst io.Writer,
362
468
// if the number of read and written bytes don't match -> something has gone wrong, abort the http req.
363
469
if nw < 0 || nr != nw {
364
470
if writeErr != nil {
365
- return & httpError {error : fmt .Errorf ("failed to write to output file: %w" , writeErr )}
471
+ return & httpError {error : fmt .Errorf ("failed to write to output file: %w" , writeErr )}, false
366
472
}
367
- return & httpError {error : fmt .Errorf ("read-write mismatch writing to the output file, read=%d, written=%d" , nr , nw )}
473
+ return & httpError {error : fmt .Errorf ("read-write mismatch writing to the output file, read=%d, written=%d" , nr , nw )}, false
368
474
}
369
475
370
476
t .nBytesReceived = t .nBytesReceived + int64 (nw )
@@ -375,10 +481,10 @@ func (t *transfer) doHttp(ctx context.Context, req *http.Request, dst io.Writer,
375
481
// the http stream we're reading from has sent us an EOF, nothing to do here.
376
482
if readErr == io .EOF {
377
483
t .dl .Infow (duid , "http server sent EOF" , "received" , t .nBytesReceived , "deal-size" , t .dealInfo .DealSize )
378
- return nil
484
+ return nil , true
379
485
}
380
486
if readErr != nil {
381
- return & httpError {error : fmt .Errorf ("error reading from http response stream: %w" , readErr )}
487
+ return & httpError {error : fmt .Errorf ("error reading from http response stream: %w" , readErr )}, false
382
488
}
383
489
}
384
490
}
0 commit comments