diff --git a/crates/api-core/src/cfg/file.rs b/crates/api-core/src/cfg/file.rs index 3043917898..f1dbb299d5 100644 --- a/crates/api-core/src/cfg/file.rs +++ b/crates/api-core/src/cfg/file.rs @@ -738,6 +738,10 @@ pub struct CarbideConfig { /// chain and write target are operator-configured (defaulting to the same /// env -> file -> vault behavior as when it is absent); see `SecretsConfig`. pub secrets: Option, + + /// IP cleanup on lease expiry + #[serde(default)] + pub dhcp_lease_expiry_handling: bool, } #[derive(Clone, Debug, Deserialize, Serialize)] diff --git a/crates/api-core/src/dhcp/expire.rs b/crates/api-core/src/dhcp/expire.rs index 761c247ba6..e403bb3b60 100644 --- a/crates/api-core/src/dhcp/expire.rs +++ b/crates/api-core/src/dhcp/expire.rs @@ -33,11 +33,28 @@ pub async fn expire_dhcp_lease( mac_address, } = request.into_inner(); let ip_address: IpAddr = ip_address.parse()?; + let mac_address: Option = mac_address .as_deref() .map(|m| m.parse::().map_err(CarbideError::from)) .transpose()?; + if !api.runtime_config.dhcp_lease_expiry_handling { + // Controlled by the `dhcp_lease_expiry_handling` runtime config flag (default: disabled). + // The problem with lease expiry handling is that + // 1. If a BMC IP is released, there is no way to update it in machine_topologies table, + // which causes a mismatch between machine_interface and topology entry. + // 2. Since this might cauase BMC IP to change, DPF right now does not support BMC IP + // change. Again a mismatch between DPF and NICo. + // 3. State machine can't process the host since there is no address attached to a interface. + // Blocking this handling for now and will revisit once DPF releases the fix. + tracing::info!("Expire lease handling for DHCP is disabled."); + return Ok(Response::new(rpc::ExpireDhcpLeaseResponse { + ip_address: ip_address.to_string(), + status: rpc::ExpireDhcpLeaseStatus::FeatureDisabled.into(), + })); + } + let mut txn = api.txn_begin().await?; // Look up the interface that owns this IP before deleting so we can clear diff --git a/crates/api-core/src/test_support/default_config.rs b/crates/api-core/src/test_support/default_config.rs index 9c1cfe6766..62cfe84eaf 100644 --- a/crates/api-core/src/test_support/default_config.rs +++ b/crates/api-core/src/test_support/default_config.rs @@ -243,6 +243,7 @@ pub fn get() -> CarbideConfig { tracing: TracingConfig::default(), ntp_servers: vec![], secrets: None, + dhcp_lease_expiry_handling: false, } } diff --git a/crates/api-core/src/tests/common/api_fixtures/mod.rs b/crates/api-core/src/tests/common/api_fixtures/mod.rs index 128f1df651..7cc3d27b54 100644 --- a/crates/api-core/src/tests/common/api_fixtures/mod.rs +++ b/crates/api-core/src/tests/common/api_fixtures/mod.rs @@ -184,6 +184,7 @@ pub struct TestEnvOverrides { pub redfish_overrides: Option, pub nras_should_fail_parsing: Option>, pub vpc_prefixes_drain_period: Option, + pub dhcp_lease_expiry_handling: Option, } #[derive(Clone, Debug, Default)] @@ -1304,6 +1305,10 @@ pub async fn create_test_env_with_overrides( config.compute_allocation_enforcement = overrides.compute_allocation_enforcement.unwrap_or_default(); + if let Some(val) = overrides.dhcp_lease_expiry_handling { + config.dhcp_lease_expiry_handling = val; + } + let config = Arc::new(config); let site_fabric_networks = overrides diff --git a/crates/api-core/src/tests/dhcp_lease_expiration.rs b/crates/api-core/src/tests/dhcp_lease_expiration.rs index 76f6444417..ba8e616f3e 100644 --- a/crates/api-core/src/tests/dhcp_lease_expiration.rs +++ b/crates/api-core/src/tests/dhcp_lease_expiration.rs @@ -18,7 +18,9 @@ use std::net::IpAddr; use std::str::FromStr; -use common::api_fixtures::{FIXTURE_DHCP_RELAY_ADDRESS, create_test_env}; +use common::api_fixtures::{ + FIXTURE_DHCP_RELAY_ADDRESS, TestEnvOverrides, create_test_env, create_test_env_with_overrides, +}; use mac_address::MacAddress; use model::address_selection_strategy::AddressSelectionStrategy; use rpc::forge::forge_server::Forge; @@ -48,7 +50,7 @@ async fn test_expire_releases_allocation( let ip = interface.addresses[0]; txn.commit().await?; - // Expire the lease via the RPC endpoint + // Expire the lease via the RPC endpoint — currently blocked, returns FeatureDisabled. let response = env .api .expire_dhcp_lease(Request::new(ExpireDhcpLeaseRequest { @@ -59,20 +61,20 @@ async fn test_expire_releases_allocation( let resp = response.into_inner(); assert_eq!(resp.ip_address, ip.to_string()); - assert_eq!(resp.status(), ExpireDhcpLeaseStatus::Released); + assert_eq!(resp.status(), ExpireDhcpLeaseStatus::FeatureDisabled); - // Verify the address was deleted but the interface preserved + // Address and interface must both still exist (expiry is blocked). let mut txn = env.pool.begin().await?; - let result = - db::machine_interface_address::find_ipv4_for_interface(&mut txn, interface.id).await; - assert!(result.is_err(), "address should have been deleted"); - - let iface = db::machine_interface::find_one(&mut *txn, interface.id).await?; + let addr = + db::machine_interface_address::find_ipv4_for_interface(&mut txn, interface.id).await?; assert_eq!( - iface.id, interface.id, - "interface should still exist (only the address is removed)" + addr.address, ip, + "address should still exist while expiry is blocked" ); + let iface = db::machine_interface::find_one(&mut *txn, interface.id).await?; + assert_eq!(iface.id, interface.id, "interface should still exist"); + Ok(()) } @@ -80,7 +82,14 @@ async fn test_expire_releases_allocation( async fn test_expire_nonexistent_address_returns_not_found( pool: sqlx::PgPool, ) -> Result<(), Box> { - let env = create_test_env(pool).await; + let env = create_test_env_with_overrides( + pool, + TestEnvOverrides { + dhcp_lease_expiry_handling: Some(true), + ..Default::default() + }, + ) + .await; let response = env .api @@ -118,7 +127,14 @@ async fn test_expire_invalid_address_fails( #[crate::sqlx_test] async fn test_expire_ipv6_address(pool: sqlx::PgPool) -> Result<(), Box> { - let env = create_test_env(pool).await; + let env = create_test_env_with_overrides( + pool, + TestEnvOverrides { + dhcp_lease_expiry_handling: Some(true), + ..Default::default() + }, + ) + .await; let response = env .api @@ -139,7 +155,14 @@ async fn test_expire_ipv6_address(pool: sqlx::PgPool) -> Result<(), Box Result<(), Box> { - let env = create_test_env(pool).await; + let env = create_test_env_with_overrides( + pool, + TestEnvOverrides { + dhcp_lease_expiry_handling: Some(true), + ..Default::default() + }, + ) + .await; let mac_address = "aa:bb:cc:dd:ee:07"; // First, we do the initial DHCP discover, which @@ -156,10 +179,6 @@ async fn test_discover_reallocates_after_expiration( !original_ip.is_empty(), "should get an IP on first discover" ); - - // Now, expire the lease by actually sending an `expire_dhcp_lease()` - // API call. This deletes the address, BUT keeps the interface (which - // now doesn't have an address). let expire_response = env .api .expire_dhcp_lease(Request::new(ExpireDhcpLeaseRequest { @@ -198,7 +217,14 @@ async fn test_discover_reallocates_after_expiration( async fn test_expire_does_not_delete_static_allocation( pool: sqlx::PgPool, ) -> Result<(), Box> { - let env = create_test_env(pool).await; + let env = create_test_env_with_overrides( + pool, + TestEnvOverrides { + dhcp_lease_expiry_handling: Some(true), + ..Default::default() + }, + ) + .await; let static_ip: IpAddr = "192.0.2.200".parse().unwrap(); // Create an interface with a static IP via the proper create path. @@ -249,7 +275,14 @@ async fn test_expire_does_not_delete_static_allocation( async fn test_static_address_survives_expiration_and_rediscover( pool: sqlx::PgPool, ) -> Result<(), Box> { - let env = create_test_env(pool).await; + let env = create_test_env_with_overrides( + pool, + TestEnvOverrides { + dhcp_lease_expiry_handling: Some(true), + ..Default::default() + }, + ) + .await; let mac = MacAddress::from_str("aa:bb:cc:dd:ee:09").unwrap(); let static_ip: IpAddr = "192.0.2.201".parse().unwrap(); @@ -315,7 +348,14 @@ async fn test_static_address_survives_expiration_and_rediscover( async fn test_expire_with_matching_mac_releases( pool: sqlx::PgPool, ) -> Result<(), Box> { - let env = create_test_env(pool).await; + let env = create_test_env_with_overrides( + pool, + TestEnvOverrides { + dhcp_lease_expiry_handling: Some(true), + ..Default::default() + }, + ) + .await; let relay: std::net::IpAddr = FIXTURE_DHCP_RELAY_ADDRESS.parse().unwrap(); let mac = MacAddress::from_str("aa:bb:cc:dd:ee:0a").unwrap(); @@ -330,7 +370,6 @@ async fn test_expire_with_matching_mac_releases( .await?; let ip = interface.addresses[0]; txn.commit().await?; - let response = env .api .expire_dhcp_lease(Request::new(ExpireDhcpLeaseRequest { @@ -348,7 +387,14 @@ async fn test_expire_with_matching_mac_releases( async fn test_expire_resets_hostname_and_discover_restores_it( pool: sqlx::PgPool, ) -> Result<(), Box> { - let env = create_test_env(pool).await; + let env = create_test_env_with_overrides( + pool, + TestEnvOverrides { + dhcp_lease_expiry_handling: Some(true), + ..Default::default() + }, + ) + .await; let mac_address = "aa:bb:cc:dd:ee:0d"; // Initial discover: interface is created and an IP-derived hostname is set. @@ -376,7 +422,7 @@ async fn test_expire_resets_hostname_and_discover_restores_it( ); drop(txn); - // Expire the lease. + // Expire the lease: address is removed and hostname resets to dormant format. let expire_response = env .api .expire_dhcp_lease(Request::new(ExpireDhcpLeaseRequest { @@ -387,14 +433,16 @@ async fn test_expire_resets_hostname_and_discover_restores_it( .into_inner(); assert_eq!(expire_response.status(), ExpireDhcpLeaseStatus::Released); - // After expiry the hostname must be the dormant no-IP placeholder. + // Hostname should have reset to the dormant n- placeholder. let mut txn = env.pool.begin().await?; let iface_after_expiry = db::machine_interface::find_one(&mut *txn, interface_id).await?; - let expected_dormant = format!("noip-{}", mac_address.replace(':', "-")); - assert_eq!( - iface_after_expiry.hostname.to_lowercase(), - expected_dormant.to_lowercase(), - "hostname should be reset to dormant placeholder after lease expiry" + assert!( + iface_after_expiry + .hostname + .to_lowercase() + .starts_with("noip"), + "hostname should reset to dormant format after expiry, got: {}", + iface_after_expiry.hostname, ); drop(txn); @@ -428,7 +476,14 @@ async fn test_expire_with_mismatched_mac_is_no_op( // Simulates a race where MacAddressA expires after the IP was already // re-allocated to MacAddressB. This late expiration hook should not // delete MacAddressB's record/row. - let env = create_test_env(pool).await; + let env = create_test_env_with_overrides( + pool, + TestEnvOverrides { + dhcp_lease_expiry_handling: Some(true), + ..Default::default() + }, + ) + .await; let relay: std::net::IpAddr = FIXTURE_DHCP_RELAY_ADDRESS.parse().unwrap(); let mac_b = MacAddress::from_str("aa:bb:cc:dd:ee:0b").unwrap(); let mac_a_stale = MacAddress::from_str("aa:bb:cc:dd:ee:0c").unwrap(); diff --git a/crates/dhcp/src/lease_expiration.rs b/crates/dhcp/src/lease_expiration.rs index 224ade09ad..c98fbdfa35 100644 --- a/crates/dhcp/src/lease_expiration.rs +++ b/crates/dhcp/src/lease_expiration.rs @@ -102,6 +102,9 @@ fn expire_lease_at( rpc::ExpireDhcpLeaseStatus::NotFound => { log::info!("No allocation found for expired lease {ip_str}"); } + rpc::ExpireDhcpLeaseStatus::FeatureDisabled => { + log::info!("Feature is disabled at NICo"); + } } LeaseExpirationResult::Success } diff --git a/crates/rpc/proto/forge.proto b/crates/rpc/proto/forge.proto index c6c414dedd..a5eb6dc5ca 100644 --- a/crates/rpc/proto/forge.proto +++ b/crates/rpc/proto/forge.proto @@ -3937,6 +3937,7 @@ message ExpireDhcpLeaseRequest { enum ExpireDhcpLeaseStatus { EXPIRE_DHCP_LEASE_STATUS_RELEASED = 0; EXPIRE_DHCP_LEASE_STATUS_NOT_FOUND = 1; + EXPIRE_DHCP_LEASE_STATUS_FEATURE_DISABLED = 2; } message ExpireDhcpLeaseResponse { diff --git a/rest-api/flow/internal/nicoapi/nicoproto/nico.proto b/rest-api/flow/internal/nicoapi/nicoproto/nico.proto index 66d0663f5f..5f80029f68 100644 --- a/rest-api/flow/internal/nicoapi/nicoproto/nico.proto +++ b/rest-api/flow/internal/nicoapi/nicoproto/nico.proto @@ -3804,6 +3804,7 @@ message ExpireDhcpLeaseRequest { enum ExpireDhcpLeaseStatus { EXPIRE_DHCP_LEASE_STATUS_RELEASED = 0; EXPIRE_DHCP_LEASE_STATUS_NOT_FOUND = 1; + EXPIRE_DHCP_LEASE_STATUS_FEATURE_DISABLED = 2; } message ExpireDhcpLeaseResponse {