diff --git a/crates/data/src/network.rs b/crates/data/src/network.rs index f77a7a1b..d5946a4e 100644 --- a/crates/data/src/network.rs +++ b/crates/data/src/network.rs @@ -87,7 +87,7 @@ impl Network { let (action_sender, action_receiver) = mpsc::channel(64); let meter = metrics::global::meter(); let request_timeout = - (Duration::from_millis(network_config.rtt_ms()) * 10).max(Duration::from_secs(10)); + (Duration::from_millis(network_config.rtt_ms()) * 100).min(Duration::from_secs(30)); let swarm = SwarmBuilder::with_existing_identity(cert_chain, private_key, ca_certs, crls) .with_tokio() diff --git a/crates/scheduler/src/network.rs b/crates/scheduler/src/network.rs index f6c1c474..16f10a6b 100644 --- a/crates/scheduler/src/network.rs +++ b/crates/scheduler/src/network.rs @@ -113,7 +113,7 @@ impl Network { let (action_sender, action_receiver) = mpsc::channel(512); let meter = metrics::global::meter(); let request_timeout = - (Duration::from_millis(network_config.rtt_ms()) * 10).max(Duration::from_secs(10)); + (Duration::from_millis(network_config.rtt_ms()) * 100).min(Duration::from_secs(30)); // Build libp2p Swarm using the derived identity and mTLS config let swarm = SwarmBuilder::with_existing_identity(cert_chain, private_key, ca_certs, crls) diff --git a/crates/worker/src/arbiter.rs b/crates/worker/src/arbiter.rs index e44cc4d9..bcd0b4f0 100644 --- a/crates/worker/src/arbiter.rs +++ b/crates/worker/src/arbiter.rs @@ -25,9 +25,9 @@ const WORKER_TOPIC: &str = "hypha/worker"; // This allows proper handling of multiple schedulers by batching advertisements const WINDOW_LIMIT: usize = 100; const WINDOW_WAIT: std::time::Duration = std::time::Duration::from_millis(200); -const OFFER_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(5); +const OFFER_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); const PRUNE_INTERVAL: std::time::Duration = std::time::Duration::from_millis(250); -const LEASE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60); +const LEASE_TIMEOUT: std::time::Duration = std::time::Duration::from_mins(3); #[derive(Debug, Error)] #[error("lease error")] diff --git a/crates/worker/src/network.rs b/crates/worker/src/network.rs index c25cb61d..c0624abf 100644 --- a/crates/worker/src/network.rs +++ b/crates/worker/src/network.rs @@ -107,7 +107,7 @@ impl Network { let (action_sender, action_receiver) = mpsc::channel(512); let meter = metrics::global::meter(); let request_timeout = - (Duration::from_millis(network_config.rtt_ms()) * 10).max(Duration::from_secs(10)); + (Duration::from_millis(network_config.rtt_ms()) * 100).min(Duration::from_secs(30)); let swarm = SwarmBuilder::with_existing_identity(cert_chain, private_key, ca_certs, crls) .with_tokio() diff --git a/scripts/network-sim.sh b/scripts/network-sim.sh index abd3c87d..fe74cb19 100755 --- a/scripts/network-sim.sh +++ b/scripts/network-sim.sh @@ -6,15 +6,15 @@ # between Hypha components. # # Usage: -# sudo ./network-sim.sh start [delay_ms] [packet_loss_%] [bandwidth_kbit] +# sudo ./network-sim.sh start [options] # sudo ./network-sim.sh status # sudo ./network-sim.sh stop # # Examples: -# sudo ./network-sim.sh start 100 5 1000 # 100ms delay, 5% loss, 1Mbps -# sudo ./network-sim.sh start 50 0 10000 # 50ms delay, no loss, 10Mbps -# sudo ./network-sim.sh start 200 # 200ms delay only -# sudo ./network-sim.sh stop # Remove all rules +# sudo ./network-sim.sh start --min-delay 50 --max-delay 200 --loss 5 --bandwidth 1000 +# sudo ./network-sim.sh start --min-delay 100 --max-delay 100 # Static 100ms delay only +# sudo ./network-sim.sh start --spike-pct 10 --spike-mult 4 # More frequent spikes +# sudo ./network-sim.sh stop # Remove all rules set -euo pipefail @@ -23,20 +23,36 @@ set -euo pipefail ANCHOR="com.apple/hypha-test" PIPE_NUM=1 +# Jitter defaults (configure fluctuating RTT) +JITTER_MIN_DELAY_MS=20 +JITTER_MAX_DELAY_MS=120 +JITTER_INTERVAL_SEC=5 +JITTER_SPIKE_PCT=5 +JITTER_SPIKE_MULT=3 + +# Background jitter PID + metadata +PID_FILE="/tmp/hypha-network-sim.pid" +META_FILE="/tmp/hypha-network-sim.meta" + show_usage() { cat < Minimum RTT delay (default: 20) + --max-delay Maximum RTT delay (default: 120; set min=max for static delay) + --jitter-interval How often to randomize delay (default: 2) + --spike-pct <0-100> Chance (percent) to inject a spike (default: 5) + --spike-mult Multiplier applied when a spike occurs (default: 3) + --loss <0-100> Packet loss percentage (default: 0) + --bandwidth Bandwidth cap (default: unlimited) Examples: - sudo $0 start 100 5 1000 # 100ms delay, 5% loss, 1Mbps - sudo $0 start 50 # 50ms delay only + sudo $0 start --min-delay 100 --max-delay 100 --loss 5 --bandwidth 1000 + sudo $0 start --min-delay 20 --max-delay 120 --jitter-interval 1 + sudo $0 start --min-delay 50 --max-delay 200 --spike-pct 10 --spike-mult 4 status Show current simulation configuration @@ -46,6 +62,10 @@ Commands: Traffic affected: All IPv4 and IPv6 traffic on localhost (lo0). + +Behavior: + The start command now runs in the foreground. Press Ctrl+C to stop and clean up. + The explicit 'stop' command remains available as a fallback. EOF } @@ -56,19 +76,14 @@ check_root() { fi } -start_simulation() { - local delay_ms=${1:-100} - local loss_pct=${2:-0} - local bw_kbit=${3:-0} +log() { + echo "[$(date +'%H:%M:%S')] $*" +} - echo "Starting network simulation..." - echo " Delay: ${delay_ms}ms" - echo " Packet loss: ${loss_pct}%" - if [[ $bw_kbit -gt 0 ]]; then - echo " Bandwidth: ${bw_kbit}kbit/s" - else - echo " Bandwidth: unlimited" - fi +configure_pipe() { + local delay_ms=$1 + local loss_pct=$2 + local bw_kbit=$3 # Build dnctl pipe configuration local pipe_config="delay ${delay_ms}ms" @@ -89,8 +104,245 @@ start_simulation() { # NOTE: Split stats by flow (src/dst/proto/ports) pipe_config="$pipe_config mask all" - echo "Configuring dummynet pipe $PIPE_NUM..." dnctl pipe "$PIPE_NUM" config $pipe_config +} + +jitter_pid_running() { + [[ -f "$PID_FILE" ]] || return 1 + local pid + pid=$(cat "$PID_FILE" 2>/dev/null || true) + [[ -n "$pid" ]] || return 1 + kill -0 "$pid" 2>/dev/null +} + +stop_jitter_loop() { + if jitter_pid_running; then + local pid + pid=$(cat "$PID_FILE") + log "Stopping jitter loop (pid $pid)..." + kill "$pid" 2>/dev/null || true + wait "$pid" 2>/dev/null || true + fi + rm -f "$PID_FILE" "$META_FILE" +} + +random_delay_in_range() { + local min_ms=$1 + local max_ms=$2 + + if [[ $min_ms -ge $max_ms ]]; then + echo "$min_ms" + return + fi + + local range=$((max_ms - min_ms + 1)) + echo $((min_ms + RANDOM % range)) +} + +next_jitter_delay() { + local min_ms=$1 + local max_ms=$2 + local spike_pct=$3 + local spike_mult=$4 + + local delay_ms + delay_ms=$(random_delay_in_range "$min_ms" "$max_ms") + + if [[ $spike_pct -gt 0 && $spike_mult -gt 1 ]]; then + local roll=$((RANDOM % 100)) + if [[ $roll -lt $spike_pct ]]; then + delay_ms=$((delay_ms * spike_mult)) + fi + fi + + echo "$delay_ms" +} + +start_jitter_loop() { + local min_ms=$1 + local max_ms=$2 + local interval_sec=$3 + local loss_pct=$4 + local bw_kbit=$5 + local spike_pct=$6 + local spike_mult=$7 + local jitter_enabled=$8 + + ( + set -euo pipefail + trap "exit 0" TERM INT + + while true; do + if [[ $jitter_enabled -eq 1 ]]; then + local delay_ms + delay_ms=$(next_jitter_delay "$min_ms" "$max_ms" "$spike_pct" "$spike_mult") + log "Applying delay ${delay_ms}ms (loss ${loss_pct}%, bw ${bw_kbit}kbit/s)" + configure_pipe "$delay_ms" "$loss_pct" "$bw_kbit" + sleep "$interval_sec" + else + sleep "$interval_sec" + fi + done + ) & + + local loop_pid=$! + echo "$loop_pid" > "$PID_FILE" + JITTER_LOOP_PID=$loop_pid + + cat > "$META_FILE" <= min-delay" + exit 1 + fi + + # Clean up any existing jitter loop to avoid conflicting writers + if jitter_pid_running; then + echo "Stopping existing jitter loop (pid $(cat "$PID_FILE"))..." + stop_jitter_loop + fi + + local jitter_enabled=0 + if [[ $min_delay_ms -ne $max_delay_ms ]]; then + jitter_enabled=1 + elif [[ $spike_pct -gt 0 && $spike_mult -gt 1 ]]; then + jitter_enabled=1 + fi + + if [[ $jitter_interval_sec -le 0 ]]; then + echo "Error: jitter-interval must be > 0" + exit 1 + fi + + # Compute initial delay (randomized if jitter is enabled so we don't start flat) + local applied_delay_ms + if [[ $jitter_enabled -eq 1 ]]; then + applied_delay_ms=$(next_jitter_delay "$min_delay_ms" "$max_delay_ms" "$spike_pct" "$spike_mult") + else + applied_delay_ms=$min_delay_ms + fi + + echo "Starting network simulation..." + echo " Delay range: ${min_delay_ms}ms - ${max_delay_ms}ms (initial ${applied_delay_ms}ms)" + if [[ $jitter_enabled -eq 1 ]]; then + echo " Jitter interval: ${jitter_interval_sec}s, spikes: ${spike_pct}% @ x${spike_mult}" + else + echo " Jitter: disabled (static delay)" + fi + echo " Packet loss: ${loss_pct}%" + if [[ $bw_kbit -gt 0 ]]; then + echo " Bandwidth: ${bw_kbit}kbit/s" + else + echo " Bandwidth: unlimited" + fi + + echo "Configuring dummynet pipe $PIPE_NUM..." + configure_pipe "$applied_delay_ms" "$loss_pct" "$bw_kbit" echo "Configuring packet filter rules (anchor: $ANCHOR) for localhost traffic..." @@ -114,10 +366,24 @@ start_simulation() { pfctl -E >/dev/null 2>&1 || pfctl -e >/dev/null 2>&1 || true fi - echo "Network simulation started successfully!" - echo "" - echo "To adjust settings, run: $0 stop && sudo $0 start [new_params]" - echo "To stop simulation, run: sudo $0 stop" + # Start loop (keeps process in foreground). Even when jitter is disabled we + # keep a sleep loop running so Ctrl+C can trigger cleanup. + start_jitter_loop "$min_delay_ms" "$max_delay_ms" "$jitter_interval_sec" "$loss_pct" "$bw_kbit" "$spike_pct" "$spike_mult" "$jitter_enabled" + echo "Loop started (pid $(cat "$PID_FILE")). Running in foreground; press Ctrl+C to stop." + + cleanup_and_exit() { + echo "" + log "Signal received, stopping simulation..." + stop_simulation + exit 0 + } + trap cleanup_and_exit INT TERM + + # Wait for loop to exit (e.g., killed by stop command or errors) + wait "$JITTER_LOOP_PID" || true + + # If we get here without a signal, ensure cleanup to avoid stale rules. + stop_simulation } show_status() { @@ -135,6 +401,28 @@ show_status() { fi echo "" + echo "Jitter loop:" + if jitter_pid_running; then + echo " ✓ Running (pid $(cat "$PID_FILE"))" + if [[ -f "$META_FILE" ]]; then + # shellcheck disable=SC1090 + source "$META_FILE" + if [[ ${jitter_enabled:-1} -eq 1 ]]; then + echo " Jitter: enabled" + else + echo " Jitter: disabled (static delay)" + fi + echo " Delay range: ${min_delay_ms}ms - ${max_delay_ms}ms" + echo " Interval: ${jitter_interval_sec}s" + echo " Spikes: ${spike_pct}% @ x${spike_mult}" + fi + elif [[ -f "$PID_FILE" ]]; then + echo " ✗ Not running (stale pid $(cat "$PID_FILE"))" + else + echo " ✗ Not running" + fi + echo "" + # Show our dummynet rules in the anchor echo "Hypha dummynet rules (anchor: $ANCHOR):" if pfctl -a "$ANCHOR" -s dummynet 2>/dev/null | grep -q .; then @@ -156,6 +444,9 @@ show_status() { stop_simulation() { echo "Stopping network simulation..." + # Stop jitter writer first to avoid racing with teardown + stop_jitter_loop + # Flush rules from our anchor (only this anchor, not system rules) pfctl -q -a "$ANCHOR" -F all 2>/dev/null || true