-
Notifications
You must be signed in to change notification settings - Fork 50
Dynamic default part size #575
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 23 commits
d804f65
5fb1c44
59c18b2
f0ee675
772c4a0
a12d2cb
3796c1e
350281d
cbcc4e5
3eadf8a
2141cfc
d196a83
2982e3e
a58305c
66424cc
1686881
1a334d0
2e900b4
e0fe6d8
c7ebc80
bb61f3b
2a4f6bb
72e6976
e6a395b
d1fe086
587c3f9
982fcce
c0de491
64a6e5a
f0199da
af29d4f
287b507
54d5c38
5d7d268
fadd3dc
bd5f363
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -122,8 +122,19 @@ struct aws_s3_request_metrics { | |||
| int error_code; | ||||
| /* Retry attempt. */ | ||||
| uint32_t retry_attempt; | ||||
| /* Is the memory for the request allocated from the buffer pool or not. */ | ||||
| bool memory_allocated_from_pool; | ||||
| } crt_info_metrics; | ||||
|
|
||||
| struct { | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thinking out loud:
we can probably do some of the same sanity checks in upload path as well |
||||
| /* Beginning range of this part. */ | ||||
| uint64_t part_range_start; | ||||
| /* Last byte of this part. */ | ||||
| uint64_t part_range_end; | ||||
| /* Part number that this request refers to. */ | ||||
| uint32_t part_number; | ||||
| } part_info_metrics; | ||||
|
|
||||
| struct aws_ref_count ref_count; | ||||
| }; | ||||
|
|
||||
|
|
||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -62,6 +62,7 @@ struct aws_s3_meta_request *aws_s3_meta_request_auto_ranged_get_new( | |
| struct aws_allocator *allocator, | ||
| struct aws_s3_client *client, | ||
| size_t part_size, | ||
| bool part_size_set, | ||
| const struct aws_s3_meta_request_options *options) { | ||
| AWS_PRECONDITION(allocator); | ||
| AWS_PRECONDITION(client); | ||
|
|
@@ -91,6 +92,8 @@ struct aws_s3_meta_request *aws_s3_meta_request_auto_ranged_get_new( | |
| return NULL; | ||
| } | ||
|
|
||
| auto_ranged_get->part_size_set = part_size_set; | ||
| auto_ranged_get->force_dynamic_part_size = options->force_dynamic_part_size; | ||
| struct aws_http_headers *headers = aws_http_message_get_headers(auto_ranged_get->base.initial_request_message); | ||
| AWS_ASSERT(headers != NULL); | ||
|
|
||
|
|
@@ -110,6 +113,7 @@ struct aws_s3_meta_request *aws_s3_meta_request_auto_ranged_get_new( | |
| } | ||
| } | ||
| auto_ranged_get->initial_message_has_if_match_header = aws_http_headers_has(headers, g_if_match_header_name); | ||
|
|
||
| auto_ranged_get->synced_data.first_part_size = auto_ranged_get->base.part_size; | ||
| if (options->object_size_hint != NULL) { | ||
| auto_ranged_get->object_size_hint_available = true; | ||
|
|
@@ -200,6 +204,13 @@ static bool s_s3_auto_ranged_get_update( | |
| { | ||
| aws_s3_meta_request_lock_synced_data(meta_request); | ||
|
|
||
| #ifdef AWS_C_S3_ENABLE_TEST_STUBS | ||
| if (meta_request->vtable->synced_update_stub && meta_request->vtable->synced_update_stub(meta_request)) { | ||
| /* TEST ONLY, allow test to stub here. */ | ||
| aws_s3_meta_request_unlock_synced_data(meta_request); | ||
| return true; | ||
| } | ||
| #endif | ||
| /* If nothing has set the "finish result" then this meta request is still in progress, and we can potentially | ||
| * send additional requests. */ | ||
| if (!aws_s3_meta_request_has_finish_result_synced(meta_request)) { | ||
|
|
@@ -319,8 +330,12 @@ static bool s_s3_auto_ranged_get_update( | |
| * we could end up stuck in a situation where the user is | ||
| * waiting for more bytes before they'll open the window, | ||
| * and this implementation is waiting for more window before it will send more parts. */ | ||
| uint64_t read_data_requested = | ||
| auto_ranged_get->synced_data.num_parts_requested * meta_request->part_size; | ||
| uint64_t read_data_requested = 0; | ||
| if (auto_ranged_get->synced_data.num_parts_requested > 0) { | ||
| read_data_requested = | ||
| (auto_ranged_get->synced_data.num_parts_requested - 1) * meta_request->part_size + | ||
| auto_ranged_get->synced_data.first_part_size; | ||
| } | ||
| if (read_data_requested >= meta_request->synced_data.read_window_running_total) { | ||
|
|
||
| /* Avoid spamming users with this DEBUG message */ | ||
|
|
@@ -757,29 +772,74 @@ static void s_s3_auto_ranged_get_request_finished( | |
|
|
||
| goto update_synced_data; | ||
| } | ||
| if ((!request_failed || first_part_size_mismatch) && !auto_ranged_get->initial_message_has_if_match_header) { | ||
| AWS_ASSERT(auto_ranged_get->etag == NULL); | ||
| /* Always extract ETag header for part size estimation */ | ||
| if (!request_failed || first_part_size_mismatch) { | ||
| struct aws_byte_cursor etag_header_value; | ||
| AWS_ASSERT(auto_ranged_get->etag == NULL); | ||
| if (aws_http_headers_get(request->send_data.response_headers, g_etag_header_name, &etag_header_value) == | ||
| AWS_OP_SUCCESS) { | ||
| AWS_LOGF_TRACE( | ||
| AWS_LS_S3_META_REQUEST, | ||
| "id=%p ETag received for the meta request. value is: " PRInSTR "", | ||
| (void *)meta_request, | ||
| AWS_BYTE_CURSOR_PRI(etag_header_value)); | ||
|
|
||
| if (aws_http_headers_get(request->send_data.response_headers, g_etag_header_name, &etag_header_value)) { | ||
| if (!auto_ranged_get->initial_message_has_if_match_header) { | ||
| /* Store ETag if needed for If-Match header */ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why only if needed? isnt it easier to just always store it (i.e. the old way)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the old way also only store the etag when |
||
| auto_ranged_get->etag = | ||
| aws_string_new_from_cursor(auto_ranged_get->base.allocator, &etag_header_value); | ||
| } | ||
| } else { | ||
| AWS_LOGF_ERROR(AWS_LS_S3_META_REQUEST, "id=%p ETag headers are missing", (void *)meta_request); | ||
| aws_raise_error(AWS_ERROR_S3_MISSING_ETAG); | ||
| error_code = AWS_ERROR_S3_MISSING_ETAG; | ||
| goto update_synced_data; | ||
| } | ||
| /* Extract number of parts from ETag and calculate estimated part size */ | ||
| uint32_t num_parts = 0; | ||
| if (aws_s3_extract_parts_from_etag(etag_header_value, &num_parts) == AWS_OP_SUCCESS && num_parts > 0) { | ||
| auto_ranged_get->estimated_object_stored_part_size = object_size / num_parts; | ||
|
|
||
| AWS_LOGF_TRACE( | ||
| AWS_LS_S3_META_REQUEST, | ||
| "id=%p Etag received for the meta request. value is: " PRInSTR "", | ||
| (void *)meta_request, | ||
| AWS_BYTE_CURSOR_PRI(etag_header_value)); | ||
| auto_ranged_get->etag = aws_string_new_from_cursor(auto_ranged_get->base.allocator, &etag_header_value); | ||
| AWS_LOGF_DEBUG( | ||
| AWS_LS_S3_META_REQUEST, | ||
| "id=%p Estimated object stored part size: object_size=%" PRIu64 ", num_parts=%" PRIu32 | ||
| ", estimated_part_size=%" PRIu64, | ||
| (void *)meta_request, | ||
| object_size, | ||
| num_parts, | ||
| auto_ranged_get->estimated_object_stored_part_size); | ||
| } else { | ||
| /* Failed to parse ETags */ | ||
| aws_raise_error(AWS_ERROR_S3_MISSING_ETAG); | ||
| error_code = AWS_ERROR_S3_MISSING_ETAG; | ||
TingDaoK marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| goto update_synced_data; | ||
| } | ||
| } | ||
|
|
||
| /* If we were able to discover the object-range/content length successfully, then any error code that was passed | ||
| * into this function is being handled and does not indicate an overall failure.*/ | ||
| error_code = AWS_ERROR_SUCCESS; | ||
| found_object_size = true; | ||
|
|
||
| if (auto_ranged_get->force_dynamic_part_size || | ||
| (!auto_ranged_get->part_size_set && !meta_request->client->part_size_set)) { | ||
| /* No part size has been set from user. Now we use the optimal part size based on the throughput and memory | ||
| * limit */ | ||
| uint64_t out_request_optimal_range_size = 0; | ||
| int error = aws_s3_calculate_request_optimal_range_size( | ||
TingDaoK marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| meta_request->client->optimal_range_size, | ||
| auto_ranged_get->estimated_object_stored_part_size, | ||
| &out_request_optimal_range_size); | ||
| if (!error) { | ||
| /* Override the part size to be optimal */ | ||
| *((size_t *)&meta_request->part_size) = (size_t)out_request_optimal_range_size; | ||
| if (request->request_tag == AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_HEAD_OBJECT) { | ||
| /* Update the first part size as well */ | ||
| first_part_size = meta_request->part_size; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /* Check for checksums if requested to */ | ||
| if (meta_request->checksum_config.validate_response_checksum) { | ||
| if (aws_s3_check_headers_for_checksum( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
any concern adding this def here? Or it's better to keep as a separate one.
pro: Nothing will break.
con: Maybe people built with tests in prod and don't want to have this stub enabled.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we enable this ourselves when we package java, python, etc...?
probably not a big concern in practice, it add a private api. we already expose a bunch of private apis used by tests only
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://github.com/awslabs/aws-crt-python/blob/main/crt/CMakeLists.txt#L26
https://github.com/awslabs/aws-crt-java/blob/main/pom.xml#L185
Yeah, we do set this to off when we build the bindings.
Also, we should not build the C tests when we package for the CRT bindings.