Skip to content

Commit 332dd22

Browse files
authored
update the default behaviors on the fio options (#577)
1 parent e4ba8f1 commit 332dd22

File tree

3 files changed

+32
-10
lines changed

3 files changed

+32
-10
lines changed

include/aws/s3/s3_client.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -315,27 +315,28 @@ enum aws_s3_recv_file_options {
315315
AWS_S3_RECV_FILE_WRITE_TO_POSITION,
316316
};
317317

318-
/* Controls how client performance file I/O operations. Only applies to the file based workload. */
318+
/**
319+
* WARNING: experimental/unstable:
320+
* Controls how client performance file I/O operations. Only applies to the file based workload.
321+
**/
319322
struct aws_s3_file_io_options {
320323
/**
321324
* Skip buffering the part in memory before sending the request.
322-
* If set, set the `disk_throughput_gbps` to be reasonable align with the available disk throughput.
323-
* Otherwise, the transfer may fail with connection starvation.
324325
*
325-
* Default to false.
326+
* Default to false on small objects, and true when the object size exceed a certain threshold
327+
*`g_streaming_object_size_threshold`.
326328
**/
327329
bool should_stream;
328330

329331
/**
330332
* The estimated disk throughput. Only be applied when `streaming_upload` is true.
331333
* in gigabits per second (Gbps).
332334
*
333-
* When doing upload with streaming, it's important to set the disk throughput to prevent the connection starvation.
334335
* Notes: There are possibilities that cannot reach the all available disk throughput:
335336
* 1. Disk is busy with other applications
336337
* 2. OS Cache may cap the throughput, use `direct_io` to get around this.
337338
*
338-
* Note: When `streaming_upload` is true, this default to 10 Gbps.
339+
* Default to throughput_target_gbps.
339340
**/
340341
double disk_throughput_gbps;
341342

source/s3_meta_request.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ int aws_s3_meta_request_init_base(
194194

195195
if (meta_request->fio_opts.should_stream && meta_request->fio_opts.disk_throughput_gbps == 0) {
196196
/* If disk throughput is not set, set it to the default. */
197-
meta_request->fio_opts.disk_throughput_gbps = g_default_throughput_target_gbps;
197+
meta_request->fio_opts.disk_throughput_gbps = client->throughput_target_gbps;
198198
}
199199

200200
/* Set up reference count. */

source/s3_util.c

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,31 @@ const struct aws_byte_cursor g_user_agent_header_unknown = AWS_BYTE_CUR_INIT_FRO
6464

6565
const uint32_t g_s3_max_num_upload_parts = 10000;
6666
const size_t g_s3_min_upload_part_size = MB_TO_BYTES(5);
67-
const size_t g_streaming_buffer_size = MB_TO_BYTES(8);
67+
6868
const double g_default_throughput_target_gbps = 10.0;
69-
/* TODO: disable this threshold until we have a better option for threshold */
70-
const uint64_t g_streaming_object_size_threshold = UINT64_MAX;
69+
70+
/**
71+
* Streaming buffer size selection based on experimental results on EBS:
72+
*
73+
* - Too small buffer sizes (e.g., 16KiB) impact disk read performance,
74+
* achieving only 6.73 Gbps throughput from EBS.
75+
* - Too large buffer sizes cause network connections to starve more easily
76+
* when disk reads cannot provide data fast enough.
77+
* - 1MiB buffer size provides optimal balance: sufficient disk read throughput
78+
* while maintaining reasonable retry rates due to connection starvation.
79+
*/
80+
const size_t g_streaming_buffer_size = MB_TO_BYTES(1);
81+
82+
/**
83+
* The streaming approach reduces memory consumption without introducing unexpected errors
84+
* or performance degradation.
85+
*
86+
* We start streaming for objects larger than 1TiB, with plans to lower this threshold in future iterations.
87+
*
88+
* The 1TiB threshold was chosen to minimize the blast radius of this behavioral change
89+
* while still providing meaningful memory usage improvements for large objects.
90+
*/
91+
const uint64_t g_streaming_object_size_threshold = TB_TO_BYTES(1);
7192

7293
void copy_http_headers(const struct aws_http_headers *src, struct aws_http_headers *dest) {
7394
AWS_PRECONDITION(src);

0 commit comments

Comments
 (0)