-
Notifications
You must be signed in to change notification settings - Fork 53
Dynamic Scaling using a token based implementation #603
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 14 commits
ec77588
72fcc1f
5b6a70a
6e25a4f
258edb9
73f3acb
37a0ff6
3185861
8799bc4
0d6368c
562da66
8e063c7
d0c0f01
cdbdcb8
a30d417
714d235
64025f1
640088c
e2a3f86
417433d
982ba7f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -44,6 +44,9 @@ | |
| #include <inttypes.h> | ||
| #include <math.h> | ||
|
|
||
| #define S_IDEAL_PART_SIZE 8 | ||
| #define S_S3_CLIENT_MINIMUM_CONCURRENT_REQUESTS 8 | ||
|
|
||
| #ifdef _MSC_VER | ||
| # pragma warning(disable : 4232) /* function pointer to dll symbol */ | ||
| #endif /* _MSC_VER */ | ||
|
|
@@ -75,6 +78,33 @@ const uint32_t g_min_num_connections = 10; /* Magic value based on: 10 was old b | |
| * be 2500 Gbps. */ | ||
| const uint32_t g_max_num_connections = 10000; | ||
|
|
||
| /* This is a first pass at a token based implementation, the calculations are approximate and can be improved in the | ||
| * future. The idea is to scale the number of connections we require up and down based on the different requests we | ||
| * receive and hence dynamically scale the maximum number of connections we need to open. One token is equivalent to | ||
| * 1Mbps of throughput. */ | ||
|
|
||
| /* All throughput values are in MBps and provided by S3 team */ | ||
|
|
||
| // 90 MBps | ||
| const uint32_t s_s3_download_throughput_per_connection_mbps = 90 * 8; | ||
| // 20 MBps | ||
| const uint32_t s_s3_upload_throughput_per_connection_mbps = 20 * 8; | ||
| // 150 MBps | ||
| const uint32_t s_s3_express_download_throughput_per_connection_mbps = 150 * 8; | ||
| // 100 MBps | ||
| const uint32_t s_s3_express_upload_throughput_per_connection_mbps = 100 * 8; | ||
|
|
||
| /* All latency values are in milliseconds (ms) and provided by S3 team */ | ||
| // 30ms | ||
| const double s_s3_p50_request_latency_ms = 0.03; | ||
| // 4ms | ||
| const double s_s3_express_p50_request_latency_ms = 0.004; | ||
|
|
||
| /* Currently the ideal part size is 8MB and hence the value set. | ||
| * However, this is subject to change due to newer part sizes and adjustments. */ | ||
|
|
||
| const uint32_t s_s3_minimum_tokens = S_IDEAL_PART_SIZE * 8 * S_S3_CLIENT_MINIMUM_CONCURRENT_REQUESTS; | ||
|
|
||
| /** | ||
| * Default max part size is 5GiB as the server limit. | ||
| */ | ||
|
|
@@ -206,6 +236,115 @@ uint32_t aws_s3_client_get_max_active_connections( | |
| return max_active_connections; | ||
| } | ||
|
|
||
| /* Initialize token bucket based on target throughput */ | ||
| void s_s3_client_init_tokens(struct aws_s3_client *client, double target_throughput_gbps) { | ||
| AWS_PRECONDITION(client); | ||
| aws_atomic_store_int( | ||
| &client->token_bucket, aws_max_u32((uint32_t)target_throughput_gbps * 1024, s_s3_minimum_tokens)); | ||
| } | ||
|
|
||
| /* Releases tokens back after request is complete. */ | ||
| void s_s3_client_release_tokens(struct aws_s3_client *client, struct aws_s3_request *request) { | ||
| AWS_PRECONDITION(client); | ||
| AWS_PRECONDITION(request); | ||
|
|
||
| uint32_t tokens = 0; | ||
|
|
||
| switch (request->request_type) { | ||
|
||
| case AWS_S3_REQUEST_TYPE_GET_OBJECT: { | ||
| if (request->meta_request->is_express) { | ||
| tokens = aws_min_u32( | ||
| (uint32_t)ceil(request->buffer_size * 8 / (MB_TO_BYTES(1) * s_s3_express_p50_request_latency_ms)), | ||
| s_s3_express_download_throughput_per_connection_mbps); | ||
| } else { | ||
| tokens = aws_min_u32( | ||
| (uint32_t)ceil(request->buffer_size * 8 / (MB_TO_BYTES(1) * s_s3_p50_request_latency_ms)), | ||
| s_s3_download_throughput_per_connection_mbps); | ||
| } | ||
| break; | ||
| } | ||
| case AWS_S3_REQUEST_TYPE_UPLOAD_PART: { | ||
| if (request->meta_request->is_express) { | ||
| tokens = aws_min_u32( | ||
| (uint32_t)ceil(request->buffer_size * 8 / (MB_TO_BYTES(1) * s_s3_express_p50_request_latency_ms)), | ||
| s_s3_express_upload_throughput_per_connection_mbps); | ||
| } else { | ||
| tokens = aws_min_u32( | ||
| (uint32_t)ceil(request->buffer_size * 8 / (MB_TO_BYTES(1) * s_s3_p50_request_latency_ms)), | ||
| s_s3_upload_throughput_per_connection_mbps); | ||
| } | ||
| break; | ||
| } | ||
| default: { | ||
| tokens = s_s3_minimum_tokens; | ||
| } | ||
| } | ||
|
|
||
| // do we need error handling here? | ||
| aws_atomic_fetch_add(&client->token_bucket, tokens); | ||
| } | ||
|
|
||
| /* Returns true or false based on whether the request was able to avail the required amount of tokens. | ||
| * TODO: try to introduce a scalability factor instead of using pure latency. */ | ||
| bool s_s3_client_acquire_tokens(struct aws_s3_client *client, struct aws_s3_request *request) { | ||
| AWS_PRECONDITION(client); | ||
| AWS_PRECONDITION(request); | ||
|
|
||
| // We ensure we do not violate the user set max-connections limit | ||
| if ((uint32_t)aws_atomic_load_int(&client->stats.num_requests_network_total) >= | ||
| client->max_active_connections_override && | ||
| client->max_active_connections_override > 0) { | ||
| return false; | ||
| } | ||
|
|
||
| struct aws_s3_meta_request *meta_request = request->meta_request; | ||
| if (meta_request && | ||
| (uint32_t)aws_atomic_load_int(&meta_request->num_requests_network) >= | ||
| meta_request->max_active_connections_override && | ||
| meta_request->max_active_connections_override > 0) { | ||
| return false; | ||
| } | ||
azkrishpy marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| uint32_t required_tokens = 0; | ||
|
|
||
| switch (request->request_type) { | ||
azkrishpy marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| case AWS_S3_REQUEST_TYPE_GET_OBJECT: { | ||
| if (request->meta_request->is_express) { | ||
| required_tokens = aws_min_u32( | ||
| (uint32_t)ceil(request->buffer_size * 8 / (MB_TO_BYTES(1) * s_s3_express_p50_request_latency_ms)), | ||
| s_s3_express_download_throughput_per_connection_mbps); | ||
| } else { | ||
| required_tokens = aws_min_u32( | ||
| (uint32_t)ceil(request->buffer_size * 8 / (MB_TO_BYTES(1) * s_s3_p50_request_latency_ms)), | ||
| s_s3_download_throughput_per_connection_mbps); | ||
| } | ||
| break; | ||
| } | ||
| case AWS_S3_REQUEST_TYPE_UPLOAD_PART: { | ||
| if (request->meta_request->is_express) { | ||
| required_tokens = aws_min_u32( | ||
| (uint32_t)ceil(request->buffer_size * 8 / (MB_TO_BYTES(1) * s_s3_express_p50_request_latency_ms)), | ||
| s_s3_express_upload_throughput_per_connection_mbps); | ||
| } else { | ||
| required_tokens = aws_min_u32( | ||
| (uint32_t)ceil(request->buffer_size * 8 / (MB_TO_BYTES(1) * s_s3_p50_request_latency_ms)), | ||
| s_s3_upload_throughput_per_connection_mbps); | ||
azkrishpy marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| break; | ||
| } | ||
| default: { | ||
| required_tokens = s_s3_minimum_tokens; | ||
| } | ||
| } | ||
|
|
||
| if ((uint32_t)aws_atomic_load_int(&client->token_bucket) > required_tokens) { | ||
| // do we need error handling here? | ||
| aws_atomic_fetch_sub(&client->token_bucket, required_tokens); | ||
| return true; | ||
| } | ||
| return false; | ||
| } | ||
|
|
||
| /* Returns the max number of requests allowed to be in memory */ | ||
| uint32_t aws_s3_client_get_max_requests_in_flight(struct aws_s3_client *client) { | ||
| AWS_PRECONDITION(client); | ||
|
|
@@ -421,6 +560,8 @@ struct aws_s3_client *aws_s3_client_new( | |
| *(uint32_t *)&client->ideal_connection_count = aws_max_u32( | ||
azkrishpy marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| g_min_num_connections, s_get_ideal_connection_number_from_throughput(client->throughput_target_gbps)); | ||
|
|
||
| s_s3_client_init_tokens(client, client->throughput_target_gbps); | ||
|
||
|
|
||
| size_t part_size = (size_t)g_default_part_size_fallback; | ||
| if (client_config->part_size != 0) { | ||
| if (client_config->part_size > SIZE_MAX) { | ||
|
|
@@ -2266,7 +2407,7 @@ void aws_s3_client_update_connections_threaded(struct aws_s3_client *client) { | |
|
|
||
| struct aws_s3_request *request = aws_s3_client_dequeue_request_threaded(client); | ||
| struct aws_s3_meta_request *meta_request = request->meta_request; | ||
| const uint32_t max_active_connections = aws_s3_client_get_max_active_connections(client, meta_request); | ||
|
|
||
| if (request->is_noop) { | ||
| /* If request is no-op, finishes and cleans up the request */ | ||
| s_s3_client_meta_request_finished_request(client, meta_request, request, AWS_ERROR_SUCCESS); | ||
|
|
@@ -2286,7 +2427,7 @@ void aws_s3_client_update_connections_threaded(struct aws_s3_client *client) { | |
|
|
||
| s_s3_client_meta_request_finished_request(client, meta_request, request, AWS_ERROR_S3_CANCELED); | ||
| request = aws_s3_request_release(request); | ||
| } else if ((uint32_t)aws_atomic_load_int(&meta_request->num_requests_network) < max_active_connections) { | ||
| } else if (s_s3_client_acquire_tokens(client, request)) { | ||
|
||
| /* Make sure it's above the max request level limitation. */ | ||
| s_s3_client_create_connection_for_request(client, request); | ||
| } else { | ||
|
|
@@ -2336,6 +2477,7 @@ static void s_s3_client_create_connection_for_request_default( | |
|
|
||
| aws_atomic_fetch_add(&meta_request->num_requests_network, 1); | ||
| aws_atomic_fetch_add(&client->stats.num_requests_network_io[meta_request->type], 1); | ||
| aws_atomic_fetch_add(&client->stats.num_requests_network_total, 1); | ||
|
|
||
| struct aws_s3_connection *connection = aws_mem_calloc(client->allocator, 1, sizeof(struct aws_s3_connection)); | ||
|
|
||
|
|
@@ -2612,6 +2754,9 @@ void aws_s3_client_notify_connection_finished( | |
| request->send_data.metrics->time_metrics.s3_request_last_attempt_end_timestamp_ns - | ||
| request->send_data.metrics->time_metrics.s3_request_first_attempt_start_timestamp_ns; | ||
|
|
||
| // release tokens acquired for the request | ||
| s_s3_client_release_tokens(client, request); | ||
|
|
||
| if (connection->retry_token != NULL) { | ||
| /* If we have a retry token and successfully finished, record that success. */ | ||
| if (finish_code == AWS_S3_CONNECTION_FINISH_CODE_SUCCESS) { | ||
|
|
@@ -2631,6 +2776,7 @@ void aws_s3_client_notify_connection_finished( | |
| } | ||
| aws_atomic_fetch_sub(&meta_request->num_requests_network, 1); | ||
| aws_atomic_fetch_sub(&client->stats.num_requests_network_io[meta_request->type], 1); | ||
| aws_atomic_fetch_sub(&client->stats.num_requests_network_total, 1); | ||
|
|
||
| s_s3_client_meta_request_finished_request(client, meta_request, request, error_code); | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand.
S_S3_CLIENT_MINIMUM_CONCURRENT_REQUESTSis 8?