Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions crates/api-core/src/cfg/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -734,6 +734,10 @@ pub struct CarbideConfig {
/// encrypted in Postgres and vault leaves the credential chain
/// entirely; when absent, vault remains the credential store.
pub secrets: Option<SecretsConfig>,

/// IP cleanup on lease expiry
#[serde(default)]
pub dhcp_lease_expiry_handling: bool,
}

#[derive(Clone, Debug, Deserialize, Serialize)]
Expand Down
17 changes: 17 additions & 0 deletions crates/api-core/src/dhcp/expire.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,28 @@ pub async fn expire_dhcp_lease(
mac_address,
} = request.into_inner();
let ip_address: IpAddr = ip_address.parse()?;

let mac_address: Option<MacAddress> = mac_address
.as_deref()
.map(|m| m.parse::<MacAddress>().map_err(CarbideError::from))
.transpose()?;

if !api.runtime_config.dhcp_lease_expiry_handling {
// Controlled by the `dhcp_lease_expiry_handling` runtime config flag (default: disabled).
// The problem with lease expiry handling is that
// 1. If a BMC IP is released, there is no way to update it in machine_topologies table,
// which causes a mismatch between machine_interface and topology entry.
// 2. Since this might cauase BMC IP to change, DPF right now does not support BMC IP
// change. Again a mismatch between DPF and NICo.
// 3. State machine can't process the host since there is no address attached to a interface.
// Blocking this handling for now and will revisit once DPF releases the fix.
tracing::info!("Expire lease handling for DHCP is disabled.");

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this going to spam the log?

return Ok(Response::new(rpc::ExpireDhcpLeaseResponse {
ip_address: ip_address.to_string(),
status: rpc::ExpireDhcpLeaseStatus::FeatureDisabled.into(),
}));
}

let mut txn = api.txn_begin().await?;

// Look up the interface that owns this IP before deleting so we can clear
Expand Down
1 change: 1 addition & 0 deletions crates/api-core/src/test_support/default_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ pub fn get() -> CarbideConfig {
tracing: TracingConfig::default(),
ntp_servers: vec![],
secrets: None,
dhcp_lease_expiry_handling: false,
}
}

Expand Down
5 changes: 5 additions & 0 deletions crates/api-core/src/tests/common/api_fixtures/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ pub struct TestEnvOverrides {
pub redfish_overrides: Option<RedfishOverrides>,
pub nras_should_fail_parsing: Option<Arc<AtomicBool>>,
pub vpc_prefixes_drain_period: Option<chrono::Duration>,
pub dhcp_lease_expiry_handling: Option<bool>,
}

#[derive(Clone, Debug, Default)]
Expand Down Expand Up @@ -1304,6 +1305,10 @@ pub async fn create_test_env_with_overrides(
config.compute_allocation_enforcement =
overrides.compute_allocation_enforcement.unwrap_or_default();

if let Some(val) = overrides.dhcp_lease_expiry_handling {
config.dhcp_lease_expiry_handling = val;
}

let config = Arc::new(config);

let site_fabric_networks = overrides
Expand Down
117 changes: 86 additions & 31 deletions crates/api-core/src/tests/dhcp_lease_expiration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
use std::net::IpAddr;
use std::str::FromStr;

use common::api_fixtures::{FIXTURE_DHCP_RELAY_ADDRESS, create_test_env};
use common::api_fixtures::{
FIXTURE_DHCP_RELAY_ADDRESS, TestEnvOverrides, create_test_env, create_test_env_with_overrides,
};
use mac_address::MacAddress;
use model::address_selection_strategy::AddressSelectionStrategy;
use rpc::forge::forge_server::Forge;
Expand Down Expand Up @@ -48,7 +50,7 @@ async fn test_expire_releases_allocation(
let ip = interface.addresses[0];
txn.commit().await?;

// Expire the lease via the RPC endpoint
// Expire the lease via the RPC endpoint — currently blocked, returns FeatureDisabled.
let response = env
.api
.expire_dhcp_lease(Request::new(ExpireDhcpLeaseRequest {
Expand All @@ -59,28 +61,35 @@ async fn test_expire_releases_allocation(

let resp = response.into_inner();
assert_eq!(resp.ip_address, ip.to_string());
assert_eq!(resp.status(), ExpireDhcpLeaseStatus::Released);
assert_eq!(resp.status(), ExpireDhcpLeaseStatus::FeatureDisabled);

// Verify the address was deleted but the interface preserved
// Address and interface must both still exist (expiry is blocked).
let mut txn = env.pool.begin().await?;
let result =
db::machine_interface_address::find_ipv4_for_interface(&mut txn, interface.id).await;
assert!(result.is_err(), "address should have been deleted");

let iface = db::machine_interface::find_one(&mut *txn, interface.id).await?;
let addr =
db::machine_interface_address::find_ipv4_for_interface(&mut txn, interface.id).await?;
assert_eq!(
iface.id, interface.id,
"interface should still exist (only the address is removed)"
addr.address, ip,
"address should still exist while expiry is blocked"
);

let iface = db::machine_interface::find_one(&mut *txn, interface.id).await?;
assert_eq!(iface.id, interface.id, "interface should still exist");

Ok(())
}

#[crate::sqlx_test]
async fn test_expire_nonexistent_address_returns_not_found(
pool: sqlx::PgPool,
) -> Result<(), Box<dyn std::error::Error>> {
let env = create_test_env(pool).await;
let env = create_test_env_with_overrides(
pool,
TestEnvOverrides {
dhcp_lease_expiry_handling: Some(true),
..Default::default()
},
)
.await;

let response = env
.api
Expand Down Expand Up @@ -118,7 +127,14 @@ async fn test_expire_invalid_address_fails(

#[crate::sqlx_test]
async fn test_expire_ipv6_address(pool: sqlx::PgPool) -> Result<(), Box<dyn std::error::Error>> {
let env = create_test_env(pool).await;
let env = create_test_env_with_overrides(
pool,
TestEnvOverrides {
dhcp_lease_expiry_handling: Some(true),
..Default::default()
},
)
.await;

let response = env
.api
Expand All @@ -139,7 +155,14 @@ async fn test_expire_ipv6_address(pool: sqlx::PgPool) -> Result<(), Box<dyn std:
async fn test_discover_reallocates_after_expiration(
pool: sqlx::PgPool,
) -> Result<(), Box<dyn std::error::Error>> {
let env = create_test_env(pool).await;
let env = create_test_env_with_overrides(
pool,
TestEnvOverrides {
dhcp_lease_expiry_handling: Some(true),
..Default::default()
},
)
.await;
let mac_address = "aa:bb:cc:dd:ee:07";

// First, we do the initial DHCP discover, which
Expand All @@ -156,10 +179,6 @@ async fn test_discover_reallocates_after_expiration(
!original_ip.is_empty(),
"should get an IP on first discover"
);

// Now, expire the lease by actually sending an `expire_dhcp_lease()`
// API call. This deletes the address, BUT keeps the interface (which
// now doesn't have an address).
let expire_response = env
.api
.expire_dhcp_lease(Request::new(ExpireDhcpLeaseRequest {
Expand Down Expand Up @@ -198,7 +217,14 @@ async fn test_discover_reallocates_after_expiration(
async fn test_expire_does_not_delete_static_allocation(
pool: sqlx::PgPool,
) -> Result<(), Box<dyn std::error::Error>> {
let env = create_test_env(pool).await;
let env = create_test_env_with_overrides(
pool,
TestEnvOverrides {
dhcp_lease_expiry_handling: Some(true),
..Default::default()
},
)
.await;
let static_ip: IpAddr = "192.0.2.200".parse().unwrap();

// Create an interface with a static IP via the proper create path.
Expand Down Expand Up @@ -249,7 +275,14 @@ async fn test_expire_does_not_delete_static_allocation(
async fn test_static_address_survives_expiration_and_rediscover(
pool: sqlx::PgPool,
) -> Result<(), Box<dyn std::error::Error>> {
let env = create_test_env(pool).await;
let env = create_test_env_with_overrides(
pool,
TestEnvOverrides {
dhcp_lease_expiry_handling: Some(true),
..Default::default()
},
)
.await;
let mac = MacAddress::from_str("aa:bb:cc:dd:ee:09").unwrap();
let static_ip: IpAddr = "192.0.2.201".parse().unwrap();

Expand Down Expand Up @@ -315,7 +348,14 @@ async fn test_static_address_survives_expiration_and_rediscover(
async fn test_expire_with_matching_mac_releases(
pool: sqlx::PgPool,
) -> Result<(), Box<dyn std::error::Error>> {
let env = create_test_env(pool).await;
let env = create_test_env_with_overrides(
pool,
TestEnvOverrides {
dhcp_lease_expiry_handling: Some(true),
..Default::default()
},
)
.await;
let relay: std::net::IpAddr = FIXTURE_DHCP_RELAY_ADDRESS.parse().unwrap();
let mac = MacAddress::from_str("aa:bb:cc:dd:ee:0a").unwrap();

Expand All @@ -330,7 +370,6 @@ async fn test_expire_with_matching_mac_releases(
.await?;
let ip = interface.addresses[0];
txn.commit().await?;

let response = env
.api
.expire_dhcp_lease(Request::new(ExpireDhcpLeaseRequest {
Expand All @@ -348,7 +387,14 @@ async fn test_expire_with_matching_mac_releases(
async fn test_expire_resets_hostname_and_discover_restores_it(
pool: sqlx::PgPool,
) -> Result<(), Box<dyn std::error::Error>> {
let env = create_test_env(pool).await;
let env = create_test_env_with_overrides(
pool,
TestEnvOverrides {
dhcp_lease_expiry_handling: Some(true),
..Default::default()
},
)
.await;
let mac_address = "aa:bb:cc:dd:ee:0d";

// Initial discover: interface is created and an IP-derived hostname is set.
Expand Down Expand Up @@ -376,7 +422,7 @@ async fn test_expire_resets_hostname_and_discover_restores_it(
);
drop(txn);

// Expire the lease.
// Expire the lease: address is removed and hostname resets to dormant format.
let expire_response = env
.api
.expire_dhcp_lease(Request::new(ExpireDhcpLeaseRequest {
Expand All @@ -387,14 +433,16 @@ async fn test_expire_resets_hostname_and_discover_restores_it(
.into_inner();
assert_eq!(expire_response.status(), ExpireDhcpLeaseStatus::Released);

// After expiry the hostname must be the dormant no-IP placeholder.
// Hostname should have reset to the dormant n-<mac> placeholder.
let mut txn = env.pool.begin().await?;
let iface_after_expiry = db::machine_interface::find_one(&mut *txn, interface_id).await?;
let expected_dormant = format!("noip-{}", mac_address.replace(':', "-"));
assert_eq!(
iface_after_expiry.hostname.to_lowercase(),
expected_dormant.to_lowercase(),
"hostname should be reset to dormant placeholder after lease expiry"
assert!(
iface_after_expiry
.hostname
.to_lowercase()
.starts_with("noip"),
"hostname should reset to dormant format after expiry, got: {}",
iface_after_expiry.hostname,
);
drop(txn);

Expand Down Expand Up @@ -428,7 +476,14 @@ async fn test_expire_with_mismatched_mac_is_no_op(
// Simulates a race where MacAddressA expires after the IP was already
// re-allocated to MacAddressB. This late expiration hook should not
// delete MacAddressB's record/row.
let env = create_test_env(pool).await;
let env = create_test_env_with_overrides(
pool,
TestEnvOverrides {
dhcp_lease_expiry_handling: Some(true),
..Default::default()
},
)
.await;
let relay: std::net::IpAddr = FIXTURE_DHCP_RELAY_ADDRESS.parse().unwrap();
let mac_b = MacAddress::from_str("aa:bb:cc:dd:ee:0b").unwrap();
let mac_a_stale = MacAddress::from_str("aa:bb:cc:dd:ee:0c").unwrap();
Expand Down
3 changes: 3 additions & 0 deletions crates/dhcp/src/lease_expiration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ fn expire_lease_at(
rpc::ExpireDhcpLeaseStatus::NotFound => {
log::info!("No allocation found for expired lease {ip_str}");
}
rpc::ExpireDhcpLeaseStatus::FeatureDisabled => {
log::info!("Feature is disabled at NICo");

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

spam?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should not, we don't get expiry too much. A machine will renew its IP before getting expired.

}
Comment thread
abvarshney-nv marked this conversation as resolved.
}
LeaseExpirationResult::Success
}
Expand Down
1 change: 1 addition & 0 deletions crates/rpc/proto/forge.proto
Original file line number Diff line number Diff line change
Expand Up @@ -3928,6 +3928,7 @@
enum ExpireDhcpLeaseStatus {
EXPIRE_DHCP_LEASE_STATUS_RELEASED = 0;
EXPIRE_DHCP_LEASE_STATUS_NOT_FOUND = 1;
EXPIRE_DHCP_LEASE_STATUS_FEATURE_DISABLED = 2;
}

message ExpireDhcpLeaseResponse {
Expand Down Expand Up @@ -6071,17 +6072,17 @@
uint32 total = 4;
uint32 completed_tests = 5;
}
message MachineValidationRun {
common.MachineValidationId validation_id = 1;
common.MachineId machine_id = 2;
google.protobuf.Timestamp start_time = 3;
google.protobuf.Timestamp end_time = 4;
string name = 5;
optional string context = 6;
MachineValidationStatus status = 7;
google.protobuf.Duration duration_to_complete = 8;

}

Check failure on line 6085 in crates/rpc/proto/forge.proto

View workflow job for this annotation

GitHub Actions / Proto Breaking Changes Check

Previously present field "9" with name "last_heartbeat_at" on message "MachineValidationRun" was deleted without reserving the number "9".

Check failure on line 6085 in crates/rpc/proto/forge.proto

View workflow job for this annotation

GitHub Actions / Proto Breaking Changes Check

Previously present field "9" with name "last_heartbeat_at" on message "MachineValidationRun" was deleted without reserving the name "last_heartbeat_at".

message MachineSetAutoUpdateRequest {
enum SetAutoupdateAction {
Expand Down
1 change: 1 addition & 0 deletions rest-api/flow/internal/nicoapi/nicoproto/nico.proto
Original file line number Diff line number Diff line change
Expand Up @@ -3804,6 +3804,7 @@ message ExpireDhcpLeaseRequest {
enum ExpireDhcpLeaseStatus {
EXPIRE_DHCP_LEASE_STATUS_RELEASED = 0;
EXPIRE_DHCP_LEASE_STATUS_NOT_FOUND = 1;
EXPIRE_DHCP_LEASE_STATUS_FEATURE_DISABLED = 2;
}

message ExpireDhcpLeaseResponse {
Expand Down
Loading