Skip to content

Commit f67d5c0

Browse files
sanityclaude
andcommitted
fix: add timeout to WaitingForResponses state (30s no progress = fail)
The JoinerState tracks last_progress but never enforced a timeout. If a connect operation received no acceptances, it would wait indefinitely. Now we check for timeout at the start of process_message: - If gateway is set (joiner) and in WaitingForResponses state - And last_progress exceeds JOINER_PROGRESS_TIMEOUT (30s) - Return OpError::Timeout to fail the operation This prevents indefinitely stalled connect operations from blocking new connection attempts. Fixes the WaitingForResponses timeout issue from #2173. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent ef6a5e5 commit f67d5c0

File tree

2 files changed

+25
-0
lines changed

2 files changed

+25
-0
lines changed

crates/core/src/operations/connect.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ use freenet_stdlib::client_api::HostResponse;
2828

2929
const FORWARD_ATTEMPT_TIMEOUT: Duration = Duration::from_secs(20);
3030
const RECENCY_COOLDOWN: Duration = Duration::from_secs(30);
31+
/// Timeout for joiner waiting for responses. If no progress (acceptances or
32+
/// observed address updates) is made within this duration, the operation fails.
33+
const JOINER_PROGRESS_TIMEOUT: Duration = Duration::from_secs(30);
3134

3235
/// Top-level message envelope used by the new connect handshake.
3336
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -512,6 +515,11 @@ impl JoinerState {
512515
self.observed_address = Some(address);
513516
self.last_progress = now;
514517
}
518+
519+
/// Returns true if no progress has been made within the timeout period.
520+
pub(crate) fn has_timed_out(&self, now: Instant) -> bool {
521+
now.duration_since(self.last_progress) >= JOINER_PROGRESS_TIMEOUT
522+
}
515523
}
516524

517525
/// Placeholder operation wrapper so we can exercise the logic in isolation in
@@ -806,6 +814,21 @@ impl Operation for ConnectOp {
806814
Box<dyn std::future::Future<Output = Result<OperationResult, OpError>> + Send + 'a>,
807815
> {
808816
Box::pin(async move {
817+
// Check for joiner timeout before processing any message
818+
if self.gateway.is_some() {
819+
if let Some(ConnectState::WaitingForResponses(ref state)) = self.state {
820+
if state.has_timed_out(Instant::now()) {
821+
tracing::warn!(
822+
tx = %self.id,
823+
last_progress_secs = state.last_progress.elapsed().as_secs(),
824+
accepted_count = state.accepted.len(),
825+
"connect: joiner timed out waiting for responses"
826+
);
827+
return Err(OpError::Timeout);
828+
}
829+
}
830+
}
831+
809832
match msg {
810833
ConnectMsg::Request { payload, .. } => {
811834
let env = RelayEnv::new(op_manager);

crates/core/src/operations/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,8 @@ pub(crate) enum OpError {
324324
OpNotPresent(Transaction),
325325
#[error("max number of retries for tx {0} of op type `{1}` reached")]
326326
MaxRetriesExceeded(Transaction, TransactionType),
327+
#[error("operation timed out waiting for progress")]
328+
Timeout,
327329
#[error("op not available")]
328330
OpNotAvailable(#[from] OpNotAvailable),
329331

0 commit comments

Comments
 (0)