-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathparallel.go
170 lines (141 loc) · 3.23 KB
/
parallel.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
package s3hash
import (
"context"
"crypto/md5"
"encoding/hex"
"fmt"
"io"
"os"
"strconv"
"sync"
)
type work struct {
io.Reader
partNum int
}
type result struct {
partNum int
sum []byte
err error
}
// ReaderAtSeeker is both io.ReaderAt and io.Seeker. os.File satisfies it. To satisfy this from a byte slice, io.NewSectionReader() can be used.
type ReaderAtSeeker interface {
io.ReaderAt
io.Seeker
}
// CalculateForFileInParallel calculates the S3 hash of a given file with the given chunk size and number of workers.
func CalculateForFileInParallel(ctx context.Context, filename string, chunkSize int64, numWorkers int) (sum string, err error) {
f, err := os.Open(filename)
if err != nil {
return "", err
}
defer f.Close()
return CalculateInParallel(ctx, f, chunkSize, numWorkers)
}
// CalculateInParallel calculates the S3 hash of a given readerSeekerAt with the given chunk size and number of workers.
// io.NewSectionReader() can be used to create input from a byte slice.
//
// Example:
// data := []byte("test data")
// rdr := io.NewSectionReader(bytes.NewReader(data), 0, int64(len(data)))
// result, err := CalculateInParallel(context.Background(), rdr, g.chunkSize, runtime.NumCPU())
func CalculateInParallel(ctx context.Context, input ReaderAtSeeker, chunkSize int64, numWorkers int) (sum string, err error) {
ctx, cancelFunc := context.WithCancel(ctx)
defer cancelFunc()
var dataSize int64
dataSize, err = input.Seek(0, io.SeekEnd)
if err != nil {
return
}
var wg sync.WaitGroup
ch := make(chan work)
results := make(chan result)
wg.Add(numWorkers)
for i := 0; i < numWorkers; i++ {
go worker(ctx, &wg, ch, results)
}
resultMap := make(map[int][]byte)
var resultWg sync.WaitGroup
resultWg.Add(1)
go func() {
defer resultWg.Done()
for r := range results {
if r.err != nil {
if err == nil || err != context.Canceled {
err = r.err
}
cancelFunc()
return
}
resultMap[r.partNum] = r.sum
}
}()
parts := 0
for i := int64(0); i < dataSize; i += chunkSize {
parts++
length := chunkSize
if i+chunkSize > dataSize {
length = dataSize - i
}
select {
case <-ctx.Done():
if err != nil {
return
}
err = ctx.Err()
return
case ch <- work{
io.NewSectionReader(input, i, length),
parts,
}:
}
}
close(ch)
wg.Wait()
close(results)
resultWg.Wait()
var sumOfSums []byte
for i := 1; i <= parts; i++ {
sum, ok := resultMap[i]
if !ok || sum == nil {
return "", fmt.Errorf("resultMap incomplete %d", i)
}
sumOfSums = append(sumOfSums, sum...)
}
var finalSum []byte
if parts == 1 {
finalSum = sumOfSums
} else {
h := md5.New()
_, err := h.Write(sumOfSums)
if err != nil {
return "", err
}
finalSum = h.Sum(nil)
}
sumHex := hex.EncodeToString(finalSum)
if parts > 1 {
sumHex += "-" + strconv.Itoa(parts)
}
return sumHex, err
}
func worker(ctx context.Context, wg *sync.WaitGroup, ch chan work, results chan result) {
defer wg.Done()
for w := range ch {
select {
case <-ctx.Done():
return
case results <- singleWork(w):
}
}
}
func singleWork(w work) result {
r := result{partNum: w.partNum}
h := md5.New()
if _, err := io.Copy(h, w); err != nil {
r.err = err
return r
}
r.sum = h.Sum(nil)
return r
}