Skip to content

Commit 1803f02

Browse files
authored
Add PutObject checksums (#1690)
Single part, checksums must be done manually for now until we have trailing headers. For multipart we add CRC32C if we don't already send a checksum and we read the content before sending. Also serves as test for minio/minio#15433 - but should be backwards compatible.
1 parent def1174 commit 1803f02

9 files changed

+378
-80
lines changed

api-datatypes.go

+12
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,12 @@ type UploadInfo struct {
8484
// not to be confused with `Expires` HTTP header.
8585
Expiration time.Time
8686
ExpirationRuleID string
87+
88+
// Verified checksum values, if any.
89+
ChecksumCRC32 string
90+
ChecksumCRC32C string
91+
ChecksumSHA1 string
92+
ChecksumSHA256 string
8793
}
8894

8995
// RestoreInfo contains information of the restore operation of an archived object
@@ -148,6 +154,12 @@ type ObjectInfo struct {
148154

149155
Restore *RestoreInfo
150156

157+
// Checksum values
158+
ChecksumCRC32 string
159+
ChecksumCRC32C string
160+
ChecksumSHA1 string
161+
ChecksumSHA256 string
162+
151163
// Error
152164
Err error `json:"-"`
153165
}

api-get-options.go

+9
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ type GetObjectOptions struct {
3838
ServerSideEncryption encrypt.ServerSide
3939
VersionID string
4040
PartNumber int
41+
42+
// Include any checksums, if object was uploaded with checksum.
43+
// For multipart objects this is a checksum of part checksums.
44+
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html
45+
Checksum bool
46+
4147
// To be not used by external applications
4248
Internal AdvancedGetOptions
4349
}
@@ -60,6 +66,9 @@ func (o GetObjectOptions) Header() http.Header {
6066
if o.Internal.ReplicationProxyRequest != "" {
6167
headers.Set(minIOBucketReplicationProxyRequest, o.Internal.ReplicationProxyRequest)
6268
}
69+
if o.Checksum {
70+
headers.Set("x-amz-checksum-mode", "ENABLED")
71+
}
6372
return headers
6473
}
6574

api-put-object-multipart.go

+53-19
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"encoding/hex"
2525
"encoding/xml"
2626
"fmt"
27+
"hash/crc32"
2728
"io"
2829
"io/ioutil"
2930
"net/http"
@@ -79,11 +80,23 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj
7980
return UploadInfo{}, err
8081
}
8182

83+
// Choose hash algorithms to be calculated by hashCopyN,
84+
// avoid sha256 with non-v4 signature request or
85+
// HTTPS connection.
86+
hashAlgos, hashSums := c.hashMaterials(opts.SendContentMd5, !opts.DisableContentSha256)
87+
if len(hashSums) == 0 {
88+
if opts.UserMetadata == nil {
89+
opts.UserMetadata = make(map[string]string, 1)
90+
}
91+
opts.UserMetadata["X-Amz-Checksum-Algorithm"] = "CRC32C"
92+
}
93+
8294
// Initiate a new multipart upload.
8395
uploadID, err := c.newUploadID(ctx, bucketName, objectName, opts)
8496
if err != nil {
8597
return UploadInfo{}, err
8698
}
99+
delete(opts.UserMetadata, "X-Amz-Checksum-Algorithm")
87100

88101
defer func() {
89102
if err != nil {
@@ -100,12 +113,12 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj
100113
// Create a buffer.
101114
buf := make([]byte, partSize)
102115

116+
// Create checksums
117+
// CRC32C is ~50% faster on AMD64 @ 30GB/s
118+
var crcBytes []byte
119+
customHeader := make(http.Header)
120+
crc := crc32.New(crc32.MakeTable(crc32.Castagnoli))
103121
for partNumber <= totalPartsCount {
104-
// Choose hash algorithms to be calculated by hashCopyN,
105-
// avoid sha256 with non-v4 signature request or
106-
// HTTPS connection.
107-
hashAlgos, hashSums := c.hashMaterials(opts.SendContentMd5, !opts.DisableContentSha256)
108-
109122
length, rErr := readFull(reader, buf)
110123
if rErr == io.EOF && partNumber > 1 {
111124
break
@@ -131,18 +144,23 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj
131144
md5Base64 string
132145
sha256Hex string
133146
)
147+
134148
if hashSums["md5"] != nil {
135149
md5Base64 = base64.StdEncoding.EncodeToString(hashSums["md5"])
136150
}
137151
if hashSums["sha256"] != nil {
138152
sha256Hex = hex.EncodeToString(hashSums["sha256"])
139153
}
154+
if len(hashSums) == 0 {
155+
crc.Reset()
156+
crc.Write(buf[:length])
157+
cSum := crc.Sum(nil)
158+
customHeader.Set("x-amz-checksum-crc32c", base64.StdEncoding.EncodeToString(cSum))
159+
crcBytes = append(crcBytes, cSum...)
160+
}
140161

141162
// Proceed to upload the part.
142-
objPart, uerr := c.uploadPart(ctx, bucketName, objectName, uploadID, rd, partNumber,
143-
md5Base64, sha256Hex, int64(length),
144-
opts.ServerSideEncryption,
145-
!opts.DisableContentSha256)
163+
objPart, uerr := c.uploadPart(ctx, bucketName, objectName, uploadID, rd, partNumber, md5Base64, sha256Hex, int64(length), opts.ServerSideEncryption, !opts.DisableContentSha256, customHeader)
146164
if uerr != nil {
147165
return UploadInfo{}, uerr
148166
}
@@ -171,15 +189,25 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj
171189
return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i))
172190
}
173191
complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{
174-
ETag: part.ETag,
175-
PartNumber: part.PartNumber,
192+
ETag: part.ETag,
193+
PartNumber: part.PartNumber,
194+
ChecksumCRC32: part.ChecksumCRC32,
195+
ChecksumCRC32C: part.ChecksumCRC32C,
196+
ChecksumSHA1: part.ChecksumSHA1,
197+
ChecksumSHA256: part.ChecksumSHA256,
176198
})
177199
}
178200

179201
// Sort all completed parts.
180202
sort.Sort(completedParts(complMultipartUpload.Parts))
181-
182-
uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, PutObjectOptions{})
203+
opts = PutObjectOptions{}
204+
if len(crcBytes) > 0 {
205+
// Add hash of hashes.
206+
crc.Reset()
207+
crc.Write(crcBytes)
208+
opts.UserMetadata = map[string]string{"X-Amz-Checksum-Crc32c": base64.StdEncoding.EncodeToString(crc.Sum(nil))}
209+
}
210+
uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts)
183211
if err != nil {
184212
return UploadInfo{}, err
185213
}
@@ -242,9 +270,7 @@ func (c *Client) initiateMultipartUpload(ctx context.Context, bucketName, object
242270
}
243271

244272
// uploadPart - Uploads a part in a multipart upload.
245-
func (c *Client) uploadPart(ctx context.Context, bucketName, objectName, uploadID string, reader io.Reader,
246-
partNumber int, md5Base64, sha256Hex string, size int64, sse encrypt.ServerSide, streamSha256 bool,
247-
) (ObjectPart, error) {
273+
func (c *Client) uploadPart(ctx context.Context, bucketName string, objectName string, uploadID string, reader io.Reader, partNumber int, md5Base64 string, sha256Hex string, size int64, sse encrypt.ServerSide, streamSha256 bool, customHeader http.Header) (ObjectPart, error) {
248274
// Input validation.
249275
if err := s3utils.CheckValidBucketName(bucketName); err != nil {
250276
return ObjectPart{}, err
@@ -273,7 +299,9 @@ func (c *Client) uploadPart(ctx context.Context, bucketName, objectName, uploadI
273299
urlValues.Set("uploadId", uploadID)
274300

275301
// Set encryption headers, if any.
276-
customHeader := make(http.Header)
302+
if customHeader == nil {
303+
customHeader = make(http.Header)
304+
}
277305
// https://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadUploadPart.html
278306
// Server-side encryption is supported by the S3 Multipart Upload actions.
279307
// Unless you are using a customer-provided encryption key, you don't need
@@ -306,11 +334,17 @@ func (c *Client) uploadPart(ctx context.Context, bucketName, objectName, uploadI
306334
}
307335
}
308336
// Once successfully uploaded, return completed part.
309-
objPart := ObjectPart{}
337+
h := resp.Header
338+
objPart := ObjectPart{
339+
ChecksumCRC32: h.Get("x-amz-checksum-crc32"),
340+
ChecksumCRC32C: h.Get("x-amz-checksum-crc32c"),
341+
ChecksumSHA1: h.Get("x-amz-checksum-sha1"),
342+
ChecksumSHA256: h.Get("x-amz-checksum-sha256"),
343+
}
310344
objPart.Size = size
311345
objPart.PartNumber = partNumber
312346
// Trim off the odd double quotes from ETag in the beginning and end.
313-
objPart.ETag = trimEtag(resp.Header.Get("ETag"))
347+
objPart.ETag = trimEtag(h.Get("ETag"))
314348
return objPart, nil
315349
}
316350

api-put-object-streaming.go

+66-42
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"context"
2323
"encoding/base64"
2424
"fmt"
25+
"hash/crc32"
2526
"io"
2627
"net/http"
2728
"net/url"
@@ -38,9 +39,8 @@ import (
3839
//
3940
// Following code handles these types of readers.
4041
//
41-
// - *minio.Object
42-
// - Any reader which has a method 'ReadAt()'
43-
//
42+
// - *minio.Object
43+
// - Any reader which has a method 'ReadAt()'
4444
func (c *Client) putObjectMultipartStream(ctx context.Context, bucketName, objectName string,
4545
reader io.Reader, size int64, opts PutObjectOptions,
4646
) (info UploadInfo, err error) {
@@ -184,12 +184,7 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN
184184
sectionReader := newHook(io.NewSectionReader(reader, readOffset, partSize), opts.Progress)
185185

186186
// Proceed to upload the part.
187-
objPart, err := c.uploadPart(ctx, bucketName, objectName,
188-
uploadID, sectionReader, uploadReq.PartNum,
189-
"", "", partSize,
190-
opts.ServerSideEncryption,
191-
!opts.DisableContentSha256,
192-
)
187+
objPart, err := c.uploadPart(ctx, bucketName, objectName, uploadID, sectionReader, uploadReq.PartNum, "", "", partSize, opts.ServerSideEncryption, !opts.DisableContentSha256, nil)
193188
if err != nil {
194189
uploadedPartsCh <- uploadedPartRes{
195190
Error: err,
@@ -260,6 +255,13 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b
260255
return UploadInfo{}, err
261256
}
262257

258+
if !opts.SendContentMd5 {
259+
if opts.UserMetadata == nil {
260+
opts.UserMetadata = make(map[string]string, 1)
261+
}
262+
opts.UserMetadata["X-Amz-Checksum-Algorithm"] = "CRC32C"
263+
}
264+
263265
// Calculate the optimal parts info for a given size.
264266
totalPartsCount, partSize, lastPartSize, err := OptimalPartInfo(size, opts.PartSize)
265267
if err != nil {
@@ -270,6 +272,7 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b
270272
if err != nil {
271273
return UploadInfo{}, err
272274
}
275+
delete(opts.UserMetadata, "X-Amz-Checksum-Algorithm")
273276

274277
// Aborts the multipart upload if the function returns
275278
// any error, since we do not resume we should purge
@@ -281,6 +284,14 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b
281284
}
282285
}()
283286

287+
// Create checksums
288+
// CRC32C is ~50% faster on AMD64 @ 30GB/s
289+
var crcBytes []byte
290+
customHeader := make(http.Header)
291+
crc := crc32.New(crc32.MakeTable(crc32.Castagnoli))
292+
md5Hash := c.md5Hasher()
293+
defer md5Hash.Close()
294+
284295
// Total data read and written to server. should be equal to 'size' at the end of the call.
285296
var totalUploadedSize int64
286297

@@ -292,7 +303,6 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b
292303

293304
// Avoid declaring variables in the for loop
294305
var md5Base64 string
295-
var hookReader io.Reader
296306

297307
// Part number always starts with '1'.
298308
var partNumber int
@@ -303,37 +313,34 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b
303313
partSize = lastPartSize
304314
}
305315

306-
if opts.SendContentMd5 {
307-
length, rerr := readFull(reader, buf)
308-
if rerr == io.EOF && partNumber > 1 {
309-
break
310-
}
311-
312-
if rerr != nil && rerr != io.ErrUnexpectedEOF && err != io.EOF {
313-
return UploadInfo{}, rerr
314-
}
316+
length, rerr := readFull(reader, buf)
317+
if rerr == io.EOF && partNumber > 1 {
318+
break
319+
}
315320

316-
// Calculate md5sum.
317-
hash := c.md5Hasher()
318-
hash.Write(buf[:length])
319-
md5Base64 = base64.StdEncoding.EncodeToString(hash.Sum(nil))
320-
hash.Close()
321+
if rerr != nil && rerr != io.ErrUnexpectedEOF && err != io.EOF {
322+
return UploadInfo{}, rerr
323+
}
321324

322-
// Update progress reader appropriately to the latest offset
323-
// as we read from the source.
324-
hookReader = newHook(bytes.NewReader(buf[:length]), opts.Progress)
325+
// Calculate md5sum.
326+
if opts.SendContentMd5 {
327+
md5Hash.Reset()
328+
md5Hash.Write(buf[:length])
329+
md5Base64 = base64.StdEncoding.EncodeToString(md5Hash.Sum(nil))
325330
} else {
326-
// Update progress reader appropriately to the latest offset
327-
// as we read from the source.
328-
hookReader = newHook(reader, opts.Progress)
331+
// Add CRC32C instead.
332+
crc.Reset()
333+
crc.Write(buf[:length])
334+
cSum := crc.Sum(nil)
335+
customHeader.Set("x-amz-checksum-crc32c", base64.StdEncoding.EncodeToString(cSum))
336+
crcBytes = append(crcBytes, cSum...)
329337
}
330338

331-
objPart, uerr := c.uploadPart(ctx, bucketName, objectName, uploadID,
332-
io.LimitReader(hookReader, partSize),
333-
partNumber, md5Base64, "", partSize,
334-
opts.ServerSideEncryption,
335-
!opts.DisableContentSha256,
336-
)
339+
// Update progress reader appropriately to the latest offset
340+
// as we read from the source.
341+
hooked := newHook(bytes.NewReader(buf[:length]), opts.Progress)
342+
343+
objPart, uerr := c.uploadPart(ctx, bucketName, objectName, uploadID, hooked, partNumber, md5Base64, "", partSize, opts.ServerSideEncryption, !opts.DisableContentSha256, customHeader)
337344
if uerr != nil {
338345
return UploadInfo{}, uerr
339346
}
@@ -363,15 +370,26 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b
363370
return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i))
364371
}
365372
complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{
366-
ETag: part.ETag,
367-
PartNumber: part.PartNumber,
373+
ETag: part.ETag,
374+
PartNumber: part.PartNumber,
375+
ChecksumCRC32: part.ChecksumCRC32,
376+
ChecksumCRC32C: part.ChecksumCRC32C,
377+
ChecksumSHA1: part.ChecksumSHA1,
378+
ChecksumSHA256: part.ChecksumSHA256,
368379
})
369380
}
370381

371382
// Sort all completed parts.
372383
sort.Sort(completedParts(complMultipartUpload.Parts))
373384

374-
uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, PutObjectOptions{})
385+
opts = PutObjectOptions{}
386+
if len(crcBytes) > 0 {
387+
// Add hash of hashes.
388+
crc.Reset()
389+
crc.Write(crcBytes)
390+
opts.UserMetadata = map[string]string{"X-Amz-Checksum-Crc32c": base64.StdEncoding.EncodeToString(crc.Sum(nil))}
391+
}
392+
uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts)
375393
if err != nil {
376394
return UploadInfo{}, err
377395
}
@@ -490,14 +508,20 @@ func (c *Client) putObjectDo(ctx context.Context, bucketName, objectName string,
490508

491509
// extract lifecycle expiry date and rule ID
492510
expTime, ruleID := amzExpirationToExpiryDateRuleID(resp.Header.Get(amzExpiration))
493-
511+
h := resp.Header
494512
return UploadInfo{
495513
Bucket: bucketName,
496514
Key: objectName,
497-
ETag: trimEtag(resp.Header.Get("ETag")),
498-
VersionID: resp.Header.Get(amzVersionID),
515+
ETag: trimEtag(h.Get("ETag")),
516+
VersionID: h.Get(amzVersionID),
499517
Size: size,
500518
Expiration: expTime,
501519
ExpirationRuleID: ruleID,
520+
521+
// Checksum values
522+
ChecksumCRC32: h.Get("x-amz-checksum-crc32"),
523+
ChecksumCRC32C: h.Get("x-amz-checksum-crc32c"),
524+
ChecksumSHA1: h.Get("x-amz-checksum-sha1"),
525+
ChecksumSHA256: h.Get("x-amz-checksum-sha256"),
502526
}, nil
503527
}

0 commit comments

Comments
 (0)