diff --git a/bin/network-monitor/assets/index.js b/bin/network-monitor/assets/index.js index b3740d532..741850e63 100644 --- a/bin/network-monitor/assets/index.js +++ b/bin/network-monitor/assets/index.js @@ -367,8 +367,20 @@ function updateDisplay() { rpcService?.details?.RpcStatus?.block_producer_status?.chain_tip ?? null; + // Compute effective health for a service, considering all signals for remote provers. + const isServiceHealthy = (s) => { + if (s.details && s.details.RemoteProverStatus) { + const statusOk = s.status === 'Healthy'; + const testOk = s.testStatus == null || s.testStatus === 'Healthy'; + const probeResult = grpcWebProbeResults.get(s.details.RemoteProverStatus.url); + const probeOk = !probeResult || probeResult.ok; + return statusOk && testOk && probeOk; + } + return s.status === 'Healthy'; + }; + // Count healthy vs unhealthy services - const healthyServices = processedServices.filter(s => s.status === 'Healthy').length; + const healthyServices = processedServices.filter(isServiceHealthy).length; const totalServices = processedServices.length; const allHealthy = healthyServices === totalServices; @@ -388,7 +400,7 @@ function updateDisplay() { // Generate status cards const serviceCardsHtml = processedServices.map(service => { - const isHealthy = service.status === 'Healthy'; + const isHealthy = isServiceHealthy(service); const statusColor = isHealthy ? '#22C55D' : '#ff5500'; const statusIcon = isHealthy ? '✓' : '✗'; const numOrDash = value => isHealthy ? (value?.toLocaleString?.() ?? value ?? '-') : '-'; diff --git a/bin/network-monitor/src/faucet.rs b/bin/network-monitor/src/faucet.rs index 1e50a173d..603cad103 100644 --- a/bin/network-monitor/src/faucet.rs +++ b/bin/network-monitor/src/faucet.rs @@ -102,6 +102,7 @@ pub async fn run_faucet_test_task( let mut success_count = 0u64; let mut failure_count = 0u64; let mut last_tx_id = None; + let mut last_error: Option; let mut faucet_metadata = None; let mut interval = tokio::time::interval(test_interval); @@ -118,11 +119,13 @@ pub async fn run_faucet_test_task( Ok((minted_tokens, metadata)) => { success_count += 1; last_tx_id = Some(minted_tokens.tx_id.clone()); + last_error = None; faucet_metadata = Some(metadata); info!("Faucet test successful: tx_id={}", minted_tokens.tx_id); }, Err(e) => { failure_count += 1; + last_error = Some(format!("{e:#}")); warn!("Faucet test failed: {}", e); }, } @@ -140,13 +143,13 @@ pub async fn run_faucet_test_task( let status = ServiceStatus { name: "Faucet".to_string(), - status: if success_count > 0 || failure_count == 0 { - Status::Healthy - } else { + status: if last_error.is_some() { Status::Unhealthy + } else { + Status::Healthy }, last_checked: current_time, - error: None, + error: last_error.clone(), details: ServiceDetails::FaucetTest(test_details), }; diff --git a/bin/network-monitor/src/status.rs b/bin/network-monitor/src/status.rs index 1657bf7fc..a15f4c3d1 100644 --- a/bin/network-monitor/src/status.rs +++ b/bin/network-monitor/src/status.rs @@ -590,10 +590,11 @@ pub(crate) async fn check_remote_prover_status( // Use the new method to convert gRPC status to domain type let remote_prover_details = RemoteProverStatusDetails::from_proxy_status(status, url); - // Determine overall health based on worker statuses + // Determine overall health based on worker statuses. + // All workers must be healthy for the prover to be considered healthy. let overall_health = if remote_prover_details.workers.is_empty() { Status::Unknown - } else if remote_prover_details.workers.iter().any(|w| w.status == Status::Healthy) { + } else if remote_prover_details.workers.iter().all(|w| w.status == Status::Healthy) { Status::Healthy } else { Status::Unhealthy