Skip to content

Commit b642136

Browse files
committed
perf: reduce latency via timing constant tuning
Tighten all relay timing constants to cut dead-wait time and flow-control stalls without touching any logic paths. tunnel_client.rs: - REPLY_TIMEOUT 35s -> 20s: GAS hard execution limit is 30s, so 35s can never catch a live-but-killed session; 20s still covers slow legitimate responses (~5-10s) with margin. - Pre-fill poll stagger 1s -> 100ms per slot: eliminated 1s dead time at every session startup (INFLIGHT_OPTIMIST=2 means 1 slot was always delayed by 1s). domain_fronter.rs: - POOL_TTL_SECS 60 -> 30: faster turnover when IP/DNS changes. - POOL_REFILL_INTERVAL_SECS 5 -> 2: halves h1 pool recovery window after an h2 outage. - H2_READY_TIMEOUT_SECS 5 -> 3: faster h1 fallback on saturated h2 connections. - H1_KEEPALIVE_INTERVAL_SECS 240 -> 60: keeps GAS containers warm after 1-min idle instead of 4-min; eliminates 1-3s cold-start penalty for users who pause streaming. Quota cost is ~360 extra invocations/day, well under the free-tier 6M/day limit. - H2 flow-control windows 4MB/8MB -> 16MB/32MB: eliminates flow- control stalls during range-parallel streaming (256 KB chunks). Memory overhead is zero on idle pooled connections. - Body Vec pre-sized from content-length header: avoids O(log n) realloc-and-copy cycles on large GAS responses (up to 40 MB).
1 parent b37f7be commit b642136

2 files changed

Lines changed: 22 additions & 12 deletions

File tree

src/domain_fronter.rs

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,9 @@ impl FronterError {
9595
}
9696

9797
type PooledStream = TlsStream<TcpStream>;
98-
const POOL_TTL_SECS: u64 = 60;
98+
const POOL_TTL_SECS: u64 = 30;
9999
const POOL_MIN: usize = 8;
100-
const POOL_REFILL_INTERVAL_SECS: u64 = 5;
100+
const POOL_REFILL_INTERVAL_SECS: u64 = 2;
101101
const POOL_MAX: usize = 80;
102102
const REQUEST_TIMEOUT_SECS: u64 = 25;
103103
const RANGE_PARALLEL_CHUNK_BYTES: u64 = 256 * 1024;
@@ -118,7 +118,7 @@ const H2_CONN_TTL_SECS: u64 = 540;
118118
/// `h2_round_trip`. This way a slow but legitimate Apps Script call
119119
/// isn't cut off at an arbitrary fixed cap, and Full-mode batches can
120120
/// honor the user's `request_timeout_secs` setting.
121-
const H2_READY_TIMEOUT_SECS: u64 = 5;
121+
const H2_READY_TIMEOUT_SECS: u64 = 3;
122122
/// Default response-phase deadline used by `relay_uncoalesced` callers
123123
/// (the Apps-Script direct path). Sized to be just under the outer
124124
/// `REQUEST_TIMEOUT_SECS` (25 s) so an h2 timeout still leaves a few
@@ -147,7 +147,7 @@ const H1_OPEN_TIMEOUT_SECS: u64 = 8;
147147
/// containers go cold after ~5min idle and cost 1-3s on the first
148148
/// request to wake back up — most painful on YouTube / streaming where
149149
/// the first chunk after a quiet pause stalls the player.
150-
const H1_KEEPALIVE_INTERVAL_SECS: u64 = 240;
150+
const H1_KEEPALIVE_INTERVAL_SECS: u64 = 60;
151151
/// Largest response body Apps Script's `UrlFetchApp` will deliver before
152152
/// the script gets killed mid-execution. The hard wire ceiling is ~50 MiB;
153153
/// after base64 / envelope overhead and edge variance, the practical raw
@@ -1405,10 +1405,13 @@ impl DomainFronter {
14051405
// `release_capacity` on every chunk for typical Apps Script
14061406
// payloads (usually < 1 MB; range chunks are 256 KB). We still
14071407
// release capacity in the body-read loop for safety on larger
1408-
// bodies.
1408+
// bodies. 16/32 MB windows eliminate stalls for range-parallel
1409+
// streaming (256 KB chunks × many streams) without adding memory
1410+
// overhead on idle connections (the window is just a counter until
1411+
// data flows).
14091412
let (send, conn) = h2::client::Builder::new()
1410-
.initial_window_size(4 * 1024 * 1024)
1411-
.initial_connection_window_size(8 * 1024 * 1024)
1413+
.initial_window_size(16 * 1024 * 1024)
1414+
.initial_connection_window_size(32 * 1024 * 1024)
14121415
.handshake(tls)
14131416
.await
14141417
.map_err(|e| OpenH2Error::Handshake(e.to_string()))?;
@@ -1626,9 +1629,15 @@ impl DomainFronter {
16261629
// through Apps Script (where a 256 KB range chunk can take 30-90s
16271630
// of wall-clock time) are not killed by the tighter `batch_timeout`.
16281631
// Release flow-control credit per chunk so large responses don't
1629-
// stall after the initial 4 MB window.
1632+
// stall after the initial window.
1633+
// Pre-size from content-length to avoid O(log n) realloc cycles
1634+
// on large GAS responses (up to 40 MB).
16301635
let stream_timeout = self.stream_timeout();
1631-
let mut buf: Vec<u8> = Vec::new();
1636+
let body_hint: usize = headers.iter()
1637+
.find(|(k, _)| k.eq_ignore_ascii_case("content-length"))
1638+
.and_then(|(_, v)| v.parse().ok())
1639+
.unwrap_or(0);
1640+
let mut buf: Vec<u8> = Vec::with_capacity(body_hint.min(APPS_SCRIPT_BODY_MAX_BYTES as usize));
16321641
loop {
16331642
match tokio::time::timeout(stream_timeout, body.data()).await {
16341643
Ok(None) => break,

src/tunnel_client.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ const REPLY_TIMEOUT_SLACK: Duration = Duration::from_secs(5);
5656
/// Per-inflight reply timeout used by the pipelined poll loop. Each
5757
/// in-flight future independently times out after this duration so a
5858
/// dead target on the tunnel-node side doesn't block the session.
59-
const REPLY_TIMEOUT: Duration = Duration::from_secs(35);
59+
const REPLY_TIMEOUT: Duration = Duration::from_secs(20);
6060

6161
/// How long we'll briefly hold the client socket after the local
6262
/// CONNECT/SOCKS5 handshake, waiting for the client's first bytes (the
@@ -1543,13 +1543,14 @@ async fn tunnel_loop(
15431543
}
15441544

15451545
// Send initial pre-fill empty polls (optimist depth), staggered
1546-
// 1s apart so they land in separate batches. The pending data op
1546+
// 100ms apart so they land in separate batches without blocking
1547+
// session startup for a full second per slot. The pending data op
15471548
// (if any) already occupies one slot.
15481549
{
15491550
let polls_to_send = max_inflight.saturating_sub(inflight.len());
15501551
for i in 0..polls_to_send {
15511552
if i > 0 {
1552-
tokio::time::sleep(Duration::from_secs(1)).await;
1553+
tokio::time::sleep(Duration::from_millis(100)).await;
15531554
}
15541555
let (meta, reply_rx) = send_empty_poll(sid, &mut next_send_seq, mux);
15551556
tracing::debug!(

0 commit comments

Comments
 (0)