From 2847c4e6162b7afba49c274e595d1a68e401c643 Mon Sep 17 00:00:00 2001 From: Srikar Date: Wed, 24 Jun 2026 09:42:05 -0700 Subject: [PATCH 1/3] feat: track per-device credential rotation convergence --- crates/api-core/src/dpa/lockdown.rs | 12 +- crates/api-core/src/handlers/credential.rs | 7 + crates/api-core/src/handlers/uefi.rs | 50 +++- crates/api-core/src/setup.rs | 5 + crates/api-core/src/test_support/builder.rs | 1 + .../20260623120000_credential_rotation.sql | 57 ++++ ...623130000_credential_rotation_backfill.sql | 111 ++++++++ crates/api-db/src/credential_rotation.rs | 166 +++++++++++ .../src/credential_rotation_backfill.rs | 265 ++++++++++++++++++ crates/api-db/src/lib.rs | 3 + crates/bmc-explorer-cli/src/main.rs | 2 + crates/dpa-manager/src/card_handler/svpc.rs | 25 +- crates/machine-controller/src/handler.rs | 66 ++++- crates/secrets/src/credentials.rs | 99 +++---- .../src/bmc_endpoint_explorer.rs | 36 +++ crates/site-explorer/src/lib.rs | 2 + crates/switch-controller/src/configuring.rs | 131 +++++---- 17 files changed, 918 insertions(+), 120 deletions(-) create mode 100644 crates/api-db/migrations/20260623120000_credential_rotation.sql create mode 100644 crates/api-db/migrations/20260623130000_credential_rotation_backfill.sql create mode 100644 crates/api-db/src/credential_rotation.rs create mode 100644 crates/api-db/src/credential_rotation_backfill.rs diff --git a/crates/api-core/src/dpa/lockdown.rs b/crates/api-core/src/dpa/lockdown.rs index 97995adbe3..8d563acf60 100644 --- a/crates/api-core/src/dpa/lockdown.rs +++ b/crates/api-core/src/dpa/lockdown.rs @@ -25,8 +25,10 @@ use sha2::Sha256; use sqlx::PgPool; // CURRENT_LOCKDOWN_IKM_VERSION is the site-wide lockdown IKM version the -// lock/unlock flow currently derives keys from. We will leave it hardcoded to 0 until -// we introduce rotation logic. +// lock/unlock flow derives keys from, and the version recorded as each card's +// convergence target. Hardcoded to 0 until the rotation engine lands: rotating +// the IKM (v0 -> v1) is what advances this, and that logic will own making newly +// ingested NICs lock under the new IKM while already-locked cards migrate. pub const CURRENT_LOCKDOWN_IKM_VERSION: u32 = 0; // LOCKDOWN_KEY_LENGTH is the max length of the supported @@ -144,9 +146,9 @@ fn lockdown_ikm_key(version: u32) -> CredentialKey { } } -// fetch_kdf_secret fetches the IKM for the KDF from the -// dedicated site-wide lockdown credential, decoupled from the BMC root so the -// two can be rotated independently. +// fetch_kdf_secret fetches the IKM for the KDF from the dedicated site-wide +// lockdown credential, decoupled from the BMC root so the two can be rotated +// independently. async fn fetch_kdf_secret( credential_reader: &dyn CredentialReader, ) -> Result { diff --git a/crates/api-core/src/handlers/credential.rs b/crates/api-core/src/handlers/credential.rs index 17d0f94c8f..4fee793c20 100644 --- a/crates/api-core/src/handlers/credential.rs +++ b/crates/api-core/src/handlers/credential.rs @@ -620,6 +620,13 @@ pub(crate) async fn delete_bmc_root_credentials_by_mac( CarbideError::internal(format!("Error deleting credential for BMC: {e:?} ")) })?; + // TODO(credential-rotation): delete the matching `device_credential_rotation` + // row (device_mac = bmc_mac_address, credential_type = 'bmc') here, alongside + // the Vault secret. Once NICo discards the per-device BMC secret it can no + // longer authenticate or rotate the device, so the convergence marker is + // meaningless and should die with the secret it depends on. Deferred to the + // rotation-engine MR that introduces the live reader/writer for that table. + api.bmc_session_manager.flush_mac(bmc_mac_address).await; Ok(()) diff --git a/crates/api-core/src/handlers/uefi.rs b/crates/api-core/src/handlers/uefi.rs index c691382cea..8863bdc08c 100644 --- a/crates/api-core/src/handlers/uefi.rs +++ b/crates/api-core/src/handlers/uefi.rs @@ -195,6 +195,16 @@ pub(crate) async fn set_host_uefi_password( CarbideError::InvalidArgument("Specified machine does not have BMC address".into()) })?; + // A known BMC MAC is a hard precondition for setting the UEFI password: it + // keys the host_uefi rotation bookkeeping recorded below, so reject the + // request up front rather than driving the device and only then discovering + // we cannot track its convergence. + let host_bmc_mac = snapshot.host_snapshot.bmc_info.mac.ok_or_else(|| { + CarbideError::InvalidArgument( + "Specified machine does not have a known BMC MAC address".into(), + ) + })?; + let bmc_access_info = db::machine_interface::lookup_bmc_access_info(&mut txn, addr.ip(), Some(addr.port())) .await?; @@ -222,14 +232,38 @@ pub(crate) async fn set_host_uefi_password( tracing::error!(%e, "Failed to run uefi_setup call"); CarbideError::internal(format!("Failed redfish uefi_setup subtask: {e}")) })?; - api.with_txn(|txn| db::machine::update_bios_password_set_time(&machine_id, txn).boxed()) - .await? - .map_err(|e| { - tracing::error!("Failed to update bios_password_set_time: {}", e); - CarbideError::Internal { - message: format!("Failed to update BIOS password timestamp: {e}"), - } - })?; + // uefi_setup returns a BMC job_id; the password change completes + // asynchronously on the device and we do not poll it here. We optimistically + // stamp bios_password_set_time and, in the same transaction, record host_uefi + // convergence (keyed by the host BMC MAC, mirroring the backfill) so the two + // always agree -- convergence rides along with the pre-existing marker. If + // the dispatched job ultimately fails on the BMC, both are inaccurate. + // + // TODO(credential-rotation): gate both the bios_password_set_time stamp and + // the host_uefi convergence record on confirmed job_id completion (poll the + // BMC job rather than trusting dispatch). Whatever confirms completion should + // perform both updates together -- convergence does not need its own separate + // write path or operator-facing API; it follows bios_password_set_time. + api.with_txn(|txn| { + async move { + db::machine::update_bios_password_set_time(&machine_id, txn).await?; + db::credential_rotation::record_device_converged( + txn, + host_bmc_mac, + db::credential_rotation::CredentialRotationType::HostUefi, + ) + .await?; + Ok::<(), db::DatabaseError>(()) + } + .boxed() + }) + .await? + .map_err(|e| { + tracing::error!("Failed to update bios_password_set_time: {}", e); + CarbideError::Internal { + message: format!("Failed to update BIOS password timestamp: {e}"), + } + })?; Ok(Response::new(rpc::SetHostUefiPasswordResponse { job_id })) } diff --git a/crates/api-core/src/setup.rs b/crates/api-core/src/setup.rs index 88252f8ca6..e2da222fda 100644 --- a/crates/api-core/src/setup.rs +++ b/crates/api-core/src/setup.rs @@ -392,6 +392,10 @@ pub async fn start_api( // lockdown key without operator action. No-op once seeded or if the BMC // root is not yet configured. crate::dpa::lockdown::ensure_lockdown_ikm_seeded(&*credential_manager).await?; + + // Initial credential-rotation bookkeeping is backfilled by the + // `*_credential_rotation_backfill` data migration (see its header for the + // ordering invariants), not seeded here. }; let common_pools = @@ -471,6 +475,7 @@ pub async fn start_api( .rotate_switch_nvos_credentials .clone(), carbide_config.site_explorer.explore_mode, + db_pool.clone(), ); let nvlink_config = carbide_config.nvlink_config.clone().unwrap_or_default(); diff --git a/crates/api-core/src/test_support/builder.rs b/crates/api-core/src/test_support/builder.rs index 47b755f52f..923d485105 100644 --- a/crates/api-core/src/test_support/builder.rs +++ b/crates/api-core/src/test_support/builder.rs @@ -217,6 +217,7 @@ impl TestApiBuilder { Arc::new(std::sync::atomic::AtomicBool::new(false)), // Tests use MockEndpointExplorer. So this doesn't affect anything. SiteExplorerExploreMode::NvRedfish, + self.db_pool.clone(), ); let metric_emitter = self.metric_emitter.unwrap_or_else(|| { diff --git a/crates/api-db/migrations/20260623120000_credential_rotation.sql b/crates/api-db/migrations/20260623120000_credential_rotation.sql new file mode 100644 index 0000000000..58987fea77 --- /dev/null +++ b/crates/api-db/migrations/20260623120000_credential_rotation.sql @@ -0,0 +1,57 @@ +-- Credential rotation bookkeeping. +-- +-- Two tables track the rotation of device credentials that NICo is +-- authoritative for (BMC root, host/DPU UEFI, switch NVOS, and the SuperNIC +-- lockdown IKM). The site-wide table records the current rotation target per +-- credential type; the per-device table records each device's convergence +-- toward that target. Secret material itself lives in Vault, never here. + +-- Credential type shared by both rotation tables. +CREATE TYPE credential_rotation_type AS ENUM + ('bmc', 'host_uefi', 'dpu_uefi', 'nvos', 'lockdown_ikm'); + +-- One row per credential type: the site-wide rotation target / intent. +CREATE TABLE sitewide_credential_rotation ( + credential_type credential_rotation_type PRIMARY KEY, + target_version integer NOT NULL, + started_at timestamptz NOT NULL DEFAULT now(), + -- Free-form initiator/reason metadata; must never contain secrets. + request_meta jsonb NOT NULL DEFAULT '{}'::jsonb +); + +-- One row per (device_mac, credential_type): per-device convergence state. +-- +-- No foreign key: device_mac is a logical key (it mirrors the Vault per-device +-- path) shared across machines, switches, power shelves, and SuperNICs, which +-- live in separate tables with heterogeneous primary keys. +CREATE TABLE device_credential_rotation ( + device_mac macaddr NOT NULL, + credential_type credential_rotation_type NOT NULL, + -- Version currently live on the hardware (the convergence marker). NULL + -- means "not yet established" (e.g. an unlocked card for lockdown_ikm). + current_version integer, + -- Non-NULL while a rotation is mid-flight on this device; doubles as the + -- crash-safety marker and pins that version against sweep. + rotating_to_version integer, + rotate_attempts integer NOT NULL DEFAULT 0, + rotate_last_attempt_at timestamptz, + -- Redacted last-error string for observability; never contains secrets. + rotate_last_error_redacted text, + rotate_quarantined_until timestamptz, + PRIMARY KEY (device_mac, credential_type) +); + +-- Hot path: "which devices for this credential type still need rotation" +-- (current_version < target) and completion counting. +CREATE INDEX device_credential_rotation_type_version_idx + ON device_credential_rotation (credential_type, current_version); + +-- In-flight rows scanned during crash recovery and sweep pinning. +CREATE INDEX device_credential_rotation_in_flight_idx + ON device_credential_rotation (credential_type) + WHERE rotating_to_version IS NOT NULL; + +-- Quarantined rows scanned by the backoff sweep. +CREATE INDEX device_credential_rotation_quarantined_idx + ON device_credential_rotation (credential_type) + WHERE rotate_quarantined_until IS NOT NULL; diff --git a/crates/api-db/migrations/20260623130000_credential_rotation_backfill.sql b/crates/api-db/migrations/20260623130000_credential_rotation_backfill.sql new file mode 100644 index 0000000000..6aea863eda --- /dev/null +++ b/crates/api-db/migrations/20260623130000_credential_rotation_backfill.sql @@ -0,0 +1,111 @@ +-- Backfill the initial credential-rotation bookkeeping for existing sites. +-- +-- Rotation does not exist yet, so every already-ingested device is, by +-- definition, at version 0 of its site-wide credential. This one-time data +-- migration records that baseline so the future rotation engine starts from a +-- correct picture of reality: +-- +-- * sitewide_credential_rotation: target_version = 0 per credential type. +-- * device_credential_rotation: current_version = 0 for each device that +-- already carries the credential. +-- +-- New sites have no ingested devices yet, so every statement below is a no-op +-- there. Secret material itself lives in Vault, never here. Three invariants +-- make this safe without reading Vault (which SQL cannot do): +-- +-- 1. Every ingested machine, switch, and power shelf authenticates its BMC +-- with the site-wide BMC root (per-device entries, when present, were set +-- from that same root at ingestion). NICo has never rotated a BMC root, so +-- every BMC-bearing device is at v0. +-- 2. The dedicated lockdown IKM v0 is copied from the BMC root by +-- `ensure_lockdown_ikm_seeded` during API startup, before the server +-- serves and before anything consumes these rows. Cards currently locked +-- were locked under that value, so they are already at v0. +-- 3. Site exploration / ingestion refuses to run unless the site-wide host +-- and DPU UEFI defaults are configured (REQUIRED_SITE_DEFAULT_CREDENTIAL +-- _KEYS), so any ingested host whose UEFI password is set, and every DPU, +-- is at v0 of its UEFI default. +-- +-- NVOS is deliberately NOT backfilled. NICo does not change the switch NVOS +-- password from the factory default on ingestion today (REQ-6 "set NVOS from +-- factory" is not yet implemented), so there is no NICo-authoritative "v0" for +-- a switch's NVOS to converge to. NVOS rows are seeded once set-from-factory +-- lands and NICo owns that value. + +-- Site-wide rotation targets (one row per backfilled credential type). +INSERT INTO sitewide_credential_rotation (credential_type, target_version) +VALUES ('bmc', 0), ('lockdown_ikm', 0), ('host_uefi', 0), ('dpu_uefi', 0) +ON CONFLICT (credential_type) DO NOTHING; + +-- BMC root: every ingested machine (host or DPU) is at v0, keyed by its +-- (earliest) BMC interface MAC. +INSERT INTO device_credential_rotation (device_mac, credential_type, current_version) +SELECT mac_address, 'bmc', 0 FROM ( + SELECT DISTINCT ON (m.id) mi.mac_address AS mac_address + FROM machines m + JOIN machine_interfaces mi + ON mi.machine_id = m.id AND mi.interface_type = 'Bmc' + ORDER BY m.id, mi.created ASC +) machine_bmcs +ON CONFLICT (device_mac, credential_type) DO NOTHING; + +-- BMC root: every live switch is at v0, keyed by its BMC MAC. +INSERT INTO device_credential_rotation (device_mac, credential_type, current_version) +SELECT bmc_mac_address, 'bmc', 0 +FROM switches +WHERE deleted IS NULL + AND bmc_mac_address IS NOT NULL +ON CONFLICT (device_mac, credential_type) DO NOTHING; + +-- BMC root: every live power shelf is at v0, keyed by its (PMC) BMC MAC. +INSERT INTO device_credential_rotation (device_mac, credential_type, current_version) +SELECT bmc_mac_address, 'bmc', 0 +FROM power_shelves +WHERE deleted IS NULL + AND bmc_mac_address IS NOT NULL +ON CONFLICT (device_mac, credential_type) DO NOTHING; + +-- Lockdown IKM: every currently-locked SuperNIC card is converged at v0, +-- keyed by its card (NIC) MAC. +INSERT INTO device_credential_rotation (device_mac, credential_type, current_version) +SELECT mac_address, 'lockdown_ikm', 0 +FROM dpa_interfaces +WHERE deleted IS NULL + AND card_state->>'lockmode' = 'Locked' +ON CONFLICT (device_mac, credential_type) DO NOTHING; + +-- A machine's type is encoded in its id (see crates/uuid/src/machine): the +-- prefix is "fm100" followed by a type character -- 'h' host, 'd' DPU, 'p' +-- predicted host. These literals are exactly MachineType::id_prefix(), so +-- matching them is the canonical way to discriminate type in SQL (there is no +-- separate machine_type column). + +-- Host UEFI: every non-DPU machine whose UEFI password has been set is at v0, +-- keyed by its (earliest) BMC MAC. This deliberately covers both real hosts +-- ('fm100h') and predicted hosts ('fm100p') -- anything that is not a DPU -- +-- so a predicted host that already carries a UEFI password is captured too. +-- The bios_password_set_time filter excludes machines with no UEFI password. +INSERT INTO device_credential_rotation (device_mac, credential_type, current_version) +SELECT mac_address, 'host_uefi', 0 FROM ( + SELECT DISTINCT ON (m.id) mi.mac_address AS mac_address + FROM machines m + JOIN machine_interfaces mi + ON mi.machine_id = m.id AND mi.interface_type = 'Bmc' + WHERE m.bios_password_set_time IS NOT NULL + AND NOT starts_with(m.id, 'fm100d') + ORDER BY m.id, mi.created ASC +) hosts +ON CONFLICT (device_mac, credential_type) DO NOTHING; + +-- DPU UEFI: managed site-wide and guaranteed set by ingestion preconditions, +-- so every DPU is at v0, keyed by its (earliest) BMC MAC. +INSERT INTO device_credential_rotation (device_mac, credential_type, current_version) +SELECT mac_address, 'dpu_uefi', 0 FROM ( + SELECT DISTINCT ON (m.id) mi.mac_address AS mac_address + FROM machines m + JOIN machine_interfaces mi + ON mi.machine_id = m.id AND mi.interface_type = 'Bmc' + WHERE starts_with(m.id, 'fm100d') + ORDER BY m.id, mi.created ASC +) dpus +ON CONFLICT (device_mac, credential_type) DO NOTHING; diff --git a/crates/api-db/src/credential_rotation.rs b/crates/api-db/src/credential_rotation.rs new file mode 100644 index 0000000000..058ee2599a --- /dev/null +++ b/crates/api-db/src/credential_rotation.rs @@ -0,0 +1,166 @@ +//! Runtime writer for the credential-rotation bookkeeping tables. +//! +//! `device_credential_rotation` records, per device and credential type, the +//! version of the site-wide credential currently applied on the hardware -- the +//! convergence marker the rotation engine drives toward +//! `sitewide_credential_rotation.target_version`. +//! +//! Already-ingested devices are populated once by the +//! `*_credential_rotation_backfill` data migration. This module records the +//! same fact at runtime, at the moment NICo actually sets a credential on a +//! device (factory -> site-wide at ingestion), so the table does not go stale +//! as new sites and new hardware are adopted. The migration handles "before the +//! upgrade"; these hooks handle "ever after". +//! +//! Ingestion hooks wired today (calling [`record_device_converged`]) record the +//! fact at the moment NICo writes the credential, not when the device row is +//! later persisted: +//! +//! * `bmc` -- at `site-explorer` `BmcEndpointExplorer::set_bmc_root_credentials`, +//! the single point where every host, DPU, switch, and power-shelf BMC is +//! moved onto (or confirmed on) the site-wide root and its per-device Vault +//! secret is written. +//! * `host_uefi` -- when the host UEFI password is set on the device +//! (`api-core` `set_host_uefi_password` and the machine-controller UEFI-setup +//! state, alongside stamping `machines.bios_password_set_time`). +//! * `dpu_uefi` -- when the DPU UEFI password is set on the device, at the +//! machine-controller `DpuInitState::WaitingForPlatformConfiguration` state +//! right after `uefi_setup(dpu = true)` succeeds (keyed by the DPU BMC MAC, +//! mirroring the backfill). +//! * `lockdown_ikm` -- when a SuperNIC card is confirmed locked, at the +//! dpa-manager `handle_locking` state (`card_state.lockmode == Locked`), keyed +//! by the card (NIC) MAC. Recorded at the current site-wide target, which today +//! is `CURRENT_LOCKDOWN_IKM_VERSION` (0) -- the same version the lock key is +//! derived from. The rotation engine will own advancing that version. +//! +//! Deferred to the work that owns those write paths: +//! +//! * `nvos` -- the hook is wired in the switch controller at +//! `configuring::handle_rotate_os_password` (the `RotateOsPassword` state) but +//! gated off, because NICo only copies the operator-provided NVOS credential +//! into Vault today; it does not change the switch password (REQ-6, +//! set-NVOS-from-factory, is not implemented). The gate flips on with REQ-6. + +use mac_address::MacAddress; +use sqlx::PgConnection; + +use crate::DatabaseError; + +/// Mirrors the `credential_rotation_type` Postgres enum +/// (`20260623120000_credential_rotation.sql`). +#[derive(Debug, Clone, Copy, PartialEq, Eq, sqlx::Type)] +#[sqlx(type_name = "credential_rotation_type", rename_all = "snake_case")] +pub enum CredentialRotationType { + Bmc, + HostUefi, + DpuUefi, + Nvos, + LockdownIkm, +} + +/// Records that `device_mac` now carries the current site-wide `credential_type` +/// credential, i.e. it has converged to the active `target_version`. +/// +/// Call this right after NICo sets the credential on the device (the factory -> +/// site-wide step at ingestion). The recorded `current_version` is the +/// credential type's current site-wide `target_version` (0 before any rotation +/// has occurred), so a device ingested during or after a rotation is recorded +/// at the version it actually received rather than a stale 0. +/// +/// Idempotent: an existing row (re-ingestion, retry, or the backfill migration) +/// is left untouched, so this never clobbers a version the rotation engine is +/// tracking -- the engine owns all subsequent version transitions. +pub async fn record_device_converged( + conn: &mut PgConnection, + device_mac: MacAddress, + credential_type: CredentialRotationType, +) -> Result<(), DatabaseError> { + let query = "INSERT INTO device_credential_rotation \ + (device_mac, credential_type, current_version) \ + SELECT $1, $2, COALESCE( \ + (SELECT target_version FROM sitewide_credential_rotation \ + WHERE credential_type = $2), 0) \ + ON CONFLICT (device_mac, credential_type) DO NOTHING"; + sqlx::query(query) + .bind(device_mac) + .bind(credential_type) + .execute(conn) + .await + .map(|_| ()) + .map_err(|e| DatabaseError::query(query, e)) +} + +#[cfg(test)] +mod tests { + use mac_address::MacAddress; + use sqlx::PgPool; + + use super::{CredentialRotationType, record_device_converged}; + + // current_version for a (mac, type) row, or None if no row exists. + async fn version_of(pool: &PgPool, mac: &str, credential_type: &str) -> Option { + let row: Option> = sqlx::query_scalar( + "SELECT current_version FROM device_credential_rotation \ + WHERE device_mac = $1::macaddr \ + AND credential_type = $2::credential_rotation_type", + ) + .bind(mac) + .bind(credential_type) + .fetch_optional(pool) + .await + .unwrap(); + row.flatten() + } + + #[crate::sqlx_test] + async fn records_current_target_and_is_idempotent(pool: PgPool) { + let mac1: MacAddress = "02:00:00:00:00:01".parse().unwrap(); + let mac2: MacAddress = "02:00:00:00:00:02".parse().unwrap(); + let mut conn = pool.acquire().await.unwrap(); + + // The backfill migration seeds the bmc site-wide target at version 0, so + // a device recorded now converges at 0. + record_device_converged(&mut conn, mac1, CredentialRotationType::Bmc) + .await + .unwrap(); + assert_eq!(version_of(&pool, "02:00:00:00:00:01", "bmc").await, Some(0)); + + // Bump the site-wide target. An already-recorded device must NOT be + // clobbered -- the engine owns version transitions, not this hook. + sqlx::query( + "UPDATE sitewide_credential_rotation SET target_version = 3 \ + WHERE credential_type = 'bmc'", + ) + .execute(&pool) + .await + .unwrap(); + record_device_converged(&mut conn, mac1, CredentialRotationType::Bmc) + .await + .unwrap(); + assert_eq!( + version_of(&pool, "02:00:00:00:00:01", "bmc").await, + Some(0), + "existing row must be preserved on re-ingestion" + ); + + // A device first seen after the bump records the current target (3). + record_device_converged(&mut conn, mac2, CredentialRotationType::Bmc) + .await + .unwrap(); + assert_eq!( + version_of(&pool, "02:00:00:00:00:02", "bmc").await, + Some(3), + "a newly ingested device records the current site-wide target" + ); + + // nvos has no site-wide target row (deliberately not backfilled); the + // COALESCE in the writer defaults current_version to 0 rather than failing. + record_device_converged(&mut conn, mac1, CredentialRotationType::Nvos) + .await + .unwrap(); + assert_eq!( + version_of(&pool, "02:00:00:00:00:01", "nvos").await, + Some(0) + ); + } +} diff --git a/crates/api-db/src/credential_rotation_backfill.rs b/crates/api-db/src/credential_rotation_backfill.rs new file mode 100644 index 0000000000..3fa8b907dc --- /dev/null +++ b/crates/api-db/src/credential_rotation_backfill.rs @@ -0,0 +1,265 @@ +//! Tests for the `*_credential_rotation_backfill` data migration. +//! +//! The migration has no Rust counterpart, so this exercises the real SQL file +//! (via `include_str!`, so the test can never silently drift from what ships) +//! against a pre-populated database. +//! +//! The `sqlx_test` harness applies every migration *before* the test body, when +//! the database is empty — so the backfill's device inserts run there as no-ops +//! (only the unconditional site-wide target rows are written). Re-applying the +//! migration here after seeding verifies the row-insertion behavior itself; the +//! statements are idempotent (`ON CONFLICT DO NOTHING`), so the second +//! application is safe and only the now-present devices get rows. + +use sqlx::PgPool; + +const BACKFILL_MIGRATION: &str = + include_str!("../migrations/20260623130000_credential_rotation_backfill.sql"); + +const SEGMENT_ID: &str = "20000000-0000-0000-0000-000000000001"; + +async fn seed_segment(pool: &PgPool) { + sqlx::query( + "INSERT INTO network_segments (id, name, version) VALUES ($1::uuid, 'seg', 'test')", + ) + .bind(SEGMENT_ID) + .execute(pool) + .await + .unwrap(); +} + +// Inserts a machine and its single BMC interface. When `bios_set` is true the +// host's UEFI password is marked as set, so the host-UEFI backfill includes it. +async fn seed_machine_with_bmc(pool: &PgPool, machine_id: &str, bmc_mac: &str, bios_set: bool) { + sqlx::query( + r#"INSERT INTO machines (id, dpf) + VALUES ($1, '{"enabled": true, "used_for_ingestion": false}'::jsonb)"#, + ) + .bind(machine_id) + .execute(pool) + .await + .unwrap(); + + sqlx::query( + r#"INSERT INTO machine_interfaces + (machine_id, segment_id, mac_address, primary_interface, hostname, + association_type, interface_type) + VALUES ($1, $2::uuid, $3::macaddr, false, $4, 'Machine', 'Bmc')"#, + ) + .bind(machine_id) + .bind(SEGMENT_ID) + .bind(bmc_mac) + .bind(format!("{machine_id}-bmc")) + .execute(pool) + .await + .unwrap(); + + if bios_set { + sqlx::query("UPDATE machines SET bios_password_set_time = NOW() WHERE id = $1") + .bind(machine_id) + .execute(pool) + .await + .unwrap(); + } +} + +// Persists a SuperNIC card on `machine_id`, locked or unlocked. +async fn seed_card(pool: &PgPool, machine_id: &str, mac: &str, locked: bool) { + let lockmode = if locked { "Locked" } else { "Unlocked" }; + sqlx::query( + r#"INSERT INTO dpa_interfaces + (machine_id, mac_address, device_type, pci_name, interface_type, card_state) + VALUES ($1, $2::macaddr, 'BlueField3', $2, 'Svpc', + jsonb_build_object('lockmode', $3::text))"#, + ) + .bind(machine_id) + .bind(mac) + .bind(lockmode) + .execute(pool) + .await + .unwrap(); +} + +// Persists a switch (plus the expected_switches row its BMC MAC FK requires), +// optionally soft-deleted. +async fn seed_switch(pool: &PgPool, id: &str, bmc_mac: &str, deleted: bool) { + sqlx::query( + "INSERT INTO expected_switches (serial_number, bmc_mac_address, bmc_username, bmc_password) + VALUES ($1, $2::macaddr, 'admin', 'pw')", + ) + .bind(format!("sn-{id}")) + .bind(bmc_mac) + .execute(pool) + .await + .unwrap(); + + sqlx::query( + "INSERT INTO switches (id, name, config, bmc_mac_address) + VALUES ($1, $1, '{}'::jsonb, $2::macaddr)", + ) + .bind(id) + .bind(bmc_mac) + .execute(pool) + .await + .unwrap(); + + if deleted { + sqlx::query("UPDATE switches SET deleted = NOW() WHERE id = $1") + .bind(id) + .execute(pool) + .await + .unwrap(); + } +} + +// Persists a power shelf with a BMC MAC (plus the expected_power_shelves row +// its FK requires). +async fn seed_power_shelf(pool: &PgPool, id: &str, bmc_mac: &str) { + sqlx::query( + "INSERT INTO expected_power_shelves \ + (serial_number, bmc_mac_address, bmc_username, bmc_password) \ + VALUES ($1, $2::macaddr, 'admin', 'pw')", + ) + .bind(format!("sn-{id}")) + .bind(bmc_mac) + .execute(pool) + .await + .unwrap(); + + sqlx::query( + "INSERT INTO power_shelves (id, name, config, bmc_mac_address) \ + VALUES ($1, $1, '{}'::jsonb, $2::macaddr)", + ) + .bind(id) + .bind(bmc_mac) + .execute(pool) + .await + .unwrap(); +} + +// Persists a power shelf with no BMC MAC (e.g. not yet linked); must be skipped. +async fn seed_power_shelf_without_bmc(pool: &PgPool, id: &str) { + sqlx::query("INSERT INTO power_shelves (id, name, config) VALUES ($1, $1, '{}'::jsonb)") + .bind(id) + .execute(pool) + .await + .unwrap(); +} + +// Returns the device MACs recorded for a credential type, sorted, as text. +async fn device_macs(pool: &PgPool, credential_type: &str) -> Vec { + sqlx::query_scalar( + "SELECT device_mac::text FROM device_credential_rotation \ + WHERE credential_type = $1::credential_rotation_type ORDER BY 1", + ) + .bind(credential_type) + .fetch_all(pool) + .await + .unwrap() +} + +#[crate::sqlx_test] +async fn backfill_records_v0_for_existing_devices(pool: PgPool) { + seed_segment(&pool).await; + + // Two hosts (one with the UEFI password set, one without) and a DPU. + seed_machine_with_bmc(&pool, "fm100hhost1", "02:00:00:00:01:01", true).await; + seed_machine_with_bmc(&pool, "fm100hhost2", "02:00:00:00:02:01", false).await; + seed_machine_with_bmc(&pool, "fm100ddpu1", "02:00:00:00:0d:01", false).await; + + // A locked and an unlocked SuperNIC card on host1. + seed_card(&pool, "fm100hhost1", "0a:00:00:00:00:01", true).await; + seed_card(&pool, "fm100hhost1", "0a:00:00:00:00:02", false).await; + + // A live switch and a soft-deleted switch (the latter must be skipped). + seed_switch(&pool, "switch-live", "04:00:00:00:00:01", false).await; + seed_switch(&pool, "switch-deleted", "04:00:00:00:00:02", true).await; + + // A power shelf with a BMC MAC and one without (the latter must be skipped). + seed_power_shelf(&pool, "ps-live", "06:00:00:00:00:01").await; + seed_power_shelf_without_bmc(&pool, "ps-nomac").await; + + // Apply the real migration against the now-populated database. + sqlx::raw_sql(BACKFILL_MIGRATION) + .execute(&pool) + .await + .unwrap(); + + // Site-wide targets: bmc + the three convergence-defined types, never nvos. + let sitewide: Vec = sqlx::query_scalar( + "SELECT credential_type::text FROM sitewide_credential_rotation ORDER BY 1", + ) + .fetch_all(&pool) + .await + .unwrap(); + assert_eq!( + sitewide, + vec!["bmc", "dpu_uefi", "host_uefi", "lockdown_ikm"], + "site-wide targets must cover bmc + UEFI + lockdown, and exclude nvos" + ); + + // BMC: all three machines + the live switch + the live power shelf. The + // deleted switch and the MAC-less power shelf are excluded. + assert_eq!( + device_macs(&pool, "bmc").await, + vec![ + "02:00:00:00:01:01", + "02:00:00:00:02:01", + "02:00:00:00:0d:01", + "04:00:00:00:00:01", + "06:00:00:00:00:01", + ] + ); + + // Host UEFI: only the host with the UEFI password set (host2 unset, DPU + // excluded by prefix), keyed by its BMC MAC. + assert_eq!( + device_macs(&pool, "host_uefi").await, + vec!["02:00:00:00:01:01"] + ); + + // DPU UEFI: every DPU, keyed by its BMC MAC. + assert_eq!( + device_macs(&pool, "dpu_uefi").await, + vec!["02:00:00:00:0d:01"] + ); + + // Lockdown IKM: only the currently-locked card, keyed by its NIC MAC. + assert_eq!( + device_macs(&pool, "lockdown_ikm").await, + vec!["0a:00:00:00:00:01"] + ); + + // NVOS is never backfilled, and every recorded device is at v0. + let nvos_rows: i64 = sqlx::query_scalar( + "SELECT count(*) FROM device_credential_rotation \ + WHERE credential_type = 'nvos'", + ) + .fetch_one(&pool) + .await + .unwrap(); + assert_eq!(nvos_rows, 0, "nvos must not be backfilled"); + + let non_v0: i64 = sqlx::query_scalar( + "SELECT count(*) FROM device_credential_rotation WHERE current_version <> 0", + ) + .fetch_one(&pool) + .await + .unwrap(); + assert_eq!(non_v0, 0, "every backfilled device must be at v0"); + + // Re-applying the migration must be idempotent (no duplicate-key errors, + // no row-count change). + sqlx::raw_sql(BACKFILL_MIGRATION) + .execute(&pool) + .await + .unwrap(); + let total: i64 = sqlx::query_scalar("SELECT count(*) FROM device_credential_rotation") + .fetch_one(&pool) + .await + .unwrap(); + assert_eq!( + total, 8, + "5 bmc + 1 host_uefi + 1 dpu_uefi + 1 lockdown_ikm" + ); +} diff --git a/crates/api-db/src/lib.rs b/crates/api-db/src/lib.rs index 732349d23c..584a670ed0 100644 --- a/crates/api-db/src/lib.rs +++ b/crates/api-db/src/lib.rs @@ -24,6 +24,9 @@ pub mod bmc_metadata; pub mod bmc_redfish_session; pub mod carbide_version; pub mod compute_allocation; +pub mod credential_rotation; +#[cfg(test)] +mod credential_rotation_backfill; pub mod db_read; pub mod desired_firmware; pub mod dhcp_entry; diff --git a/crates/bmc-explorer-cli/src/main.rs b/crates/bmc-explorer-cli/src/main.rs index 7ac625d9b4..0fe07e97fe 100644 --- a/crates/bmc-explorer-cli/src/main.rs +++ b/crates/bmc-explorer-cli/src/main.rs @@ -110,6 +110,8 @@ async fn main() -> Result<(), Box> { credential_provider.clone(), rotate_switch_nvos_credentials, mode, + // Standalone debug tool: no database, so rotation bookkeeping is skipped. + None, ); let ip = args.bmc_ip.parse()?; diff --git a/crates/dpa-manager/src/card_handler/svpc.rs b/crates/dpa-manager/src/card_handler/svpc.rs index 275a794490..a0f90ccd25 100644 --- a/crates/dpa-manager/src/card_handler/svpc.rs +++ b/crates/dpa-manager/src/card_handler/svpc.rs @@ -469,7 +469,7 @@ impl DpaInterfaceStateHandler for SvpcInterfaceHandler { #[allow(clippy::unused_async)] async fn handle_locking( &self, - _monitor: &mut DpaMonitor, + monitor: &mut DpaMonitor, mh: &ManagedHostStateSnapshot, idx: usize, _metrics: &mut DpaMonitorMetrics, @@ -497,11 +497,32 @@ impl DpaInterfaceStateHandler for SvpcInterfaceHandler { }; if cs.lockmode == Some(Locked) { + // The card is now locked on the device under the active lockdown IKM + // (build_lock_command derived the key from the same site-wide target + // version this records). Record lockdown_ikm convergence, keyed by the + // card (NIC) MAC, so the rotation engine tracks this card from the + // moment it is actually locked. The record is committed atomically + // with the state transition below (the monitor reuses this txn to + // persist the new controller state). Idempotent: a card already + // recorded (by the backfill or a prior lock) is left untouched. + let mut txn = monitor + .db_services + .db_pool + .begin() + .await + .map_err(|e| db::AnnotatedSqlxError::new("handle_locking begin txn", e))?; + db::credential_rotation::record_device_converged( + txn.as_mut(), + dpa_interface.mac_address, + db::credential_rotation::CredentialRotationType::LockdownIkm, + ) + .await?; + let new_state = DpaInterfaceControllerState::Assigned; tracing::info!(state = ?new_state, "Dpa Interface state transition"); return Ok(HandlerResult { new_state: Some(new_state), - txn: None, + txn: Some(txn), }); } diff --git a/crates/machine-controller/src/handler.rs b/crates/machine-controller/src/handler.rs index d4ccb28b10..1cb64c2325 100644 --- a/crates/machine-controller/src/handler.rs +++ b/crates/machine-controller/src/handler.rs @@ -4263,6 +4263,19 @@ impl DpuMachineStateHandler { Ok(StateHandlerOutcome::transition(next_state)) } DpuInitState::WaitingForPlatformConfiguration => { + // A known BMC MAC is a hard precondition for setting the DPU UEFI + // password: it keys the dpu_uefi rotation bookkeeping recorded once + // uefi_setup succeeds below, so refuse to drive the device without + // it rather than discovering afterward that we cannot track it. + let dpu_bmc_mac = + dpu_snapshot + .bmc_info + .mac + .ok_or_else(|| StateHandlerError::MissingData { + object_id: dpu_snapshot.id.to_string(), + missing: "bmc_mac", + })?; + let dpu_redfish_client = match ctx .services .create_redfish_client_from_machine(dpu_snapshot) @@ -4384,7 +4397,27 @@ impl DpuMachineStateHandler { let next_state = DpuInitState::PollingBiosSetup .next_state(&state.managed_state, dpu_machine_id)?; - Ok(StateHandlerOutcome::transition(next_state)) + // The DPU's UEFI password is now the site-wide value (just set via + // uefi_setup above): record dpu_uefi convergence so the rotation + // engine tracks this DPU from ingestion onward (mirrors the + // backfill, which keys DPU UEFI by the DPU BMC MAC). The MAC was + // validated as a precondition at the top of this state. Committed + // with the state transition below. + let mut txn = ctx.services.db_pool.begin().await?; + db::credential_rotation::record_device_converged( + &mut txn, + dpu_bmc_mac, + db::credential_rotation::CredentialRotationType::DpuUefi, + ) + .await + .map_err(|e| { + StateHandlerError::GenericError(eyre!( + "record dpu_uefi credential rotation convergence failed: {}", + e + )) + })?; + + Ok(StateHandlerOutcome::transition(next_state).with_txn(txn)) } DpuInitState::PollingBiosSetup => { @@ -5300,6 +5333,19 @@ async fn handle_host_uefi_setup( state: &mut ManagedHostStateSnapshot, uefi_setup_info: UefiSetupInfo, ) -> Result, StateHandlerError> { + // A known BMC MAC is a hard precondition for driving UEFI setup: it keys the + // host_uefi rotation bookkeeping recorded once the password job completes, so + // refuse to touch the device if we cannot identify (and later track) it. + let host_bmc_mac = + state + .host_snapshot + .bmc_info + .mac + .ok_or_else(|| StateHandlerError::MissingData { + object_id: state.host_snapshot.id.to_string(), + missing: "bmc_mac", + })?; + let redfish_client = ctx .services .create_redfish_client_from_machine(&state.host_snapshot) @@ -5445,6 +5491,24 @@ async fn handle_host_uefi_setup( )) })?; + // The host's UEFI password is now the site-wide value: record it as + // converged to the current host_uefi target so the rotation engine + // tracks this host from ingestion onward (mirrors the backfill, which + // keys host UEFI by the host BMC MAC). The MAC was validated as a + // precondition at the top of this handler. + db::credential_rotation::record_device_converged( + &mut txn, + host_bmc_mac, + db::credential_rotation::CredentialRotationType::HostUefi, + ) + .await + .map_err(|e| { + StateHandlerError::GenericError(eyre!( + "record host_uefi credential rotation convergence failed: {}", + e + )) + })?; + Ok(StateHandlerOutcome::transition(ManagedHostState::HostInit { machine_state: MachineState::WaitingForLockdown { lockdown_info: LockdownInfo { diff --git a/crates/secrets/src/credentials.rs b/crates/secrets/src/credentials.rs index b4f437f185..4d9efacffb 100644 --- a/crates/secrets/src/credentials.rs +++ b/crates/secrets/src/credentials.rs @@ -261,19 +261,6 @@ pub enum BmcCredentialType { }, } -/// Versioned site-wide UEFI "rotate-TO" target a rotation writes and controllers -/// read before copying into a device's per-device UEFI secret. Host and DPU UEFI -/// each get their own target (under the shared `machines/uefi/` prefix); the MAC -/// in the per-device path distinguishes the two, but the site targets are -/// disambiguated by this `host`/`dpu` segment. -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] -pub enum UefiSiteTarget { - /// `machines/uefi/host/site/root/v{N}` - Host { version: u32 }, - /// `machines/uefi/dpu/site/root/v{N}` - Dpu { version: u32 }, -} - #[derive(Debug, Clone, Copy, Serialize, Deserialize)] pub enum NicLockdownIkm { /// Site-wide SuperNIC lockdown IKM (input key material), versioned for @@ -328,10 +315,21 @@ pub enum CredentialKey { NicLockdownIkm { credential_type: NicLockdownIkm, }, - /// Versioned site-wide UEFI "rotate-TO" target (`machines/uefi/...`). - /// Admin/rotation-written only; not a loginable per-device credential. - UefiSiteTarget { - target: UefiSiteTarget, + /// Versioned site-wide host UEFI "rotate-TO" target + /// (`machines/all_hosts/site_default/uefi-metadata-items/auth/v{N}`). The + /// unversioned [`CredentialKey::HostUefi`] / [`CredentialType::SiteDefault`] + /// path stays as the current site target (= v0) used at ingestion / + /// set-from-factory; this variant addresses a specific rotation target + /// version. Admin/rotation-written only; not a loginable per-device + /// credential. + HostUefiSiteVersioned { + version: u32, + }, + /// Versioned site-wide DPU UEFI "rotate-TO" target + /// (`machines/all_dpus/site_default/uefi-metadata-items/auth/v{N}`). See + /// [`CredentialKey::HostUefiSiteVersioned`]. + DpuUefiSiteVersioned { + version: u32, }, ExtensionService { service_id: String, @@ -393,8 +391,6 @@ pub enum CredentialPrefix { UfmAuth, DpuUefi, HostUefi, - /// Per-device and versioned site-wide UEFI secrets (`machines/uefi/`). - Uefi, BmcCredentials, NicLockdownIkm, ExtensionService, @@ -418,7 +414,6 @@ impl CredentialPrefix { Self::UfmAuth => "ufm/", Self::DpuUefi => "machines/all_dpus/", Self::HostUefi => "machines/all_hosts/", - Self::Uefi => "machines/uefi/", Self::BmcCredentials => "machines/bmc/", Self::NicLockdownIkm => "machines/nic_lockdown_ikm/", Self::ExtensionService => "machines/extension-services/", @@ -441,7 +436,6 @@ impl CredentialPrefix { Self::UfmAuth, Self::DpuUefi, Self::HostUefi, - Self::Uefi, Self::BmcCredentials, Self::NicLockdownIkm, Self::ExtensionService, @@ -469,7 +463,8 @@ impl CredentialKey { Self::HostUefi { .. } => CredentialPrefix::HostUefi, Self::BmcCredentials { .. } => CredentialPrefix::BmcCredentials, Self::NicLockdownIkm { .. } => CredentialPrefix::NicLockdownIkm, - Self::UefiSiteTarget { .. } => CredentialPrefix::Uefi, + Self::HostUefiSiteVersioned { .. } => CredentialPrefix::HostUefi, + Self::DpuUefiSiteVersioned { .. } => CredentialPrefix::DpuUefi, Self::ExtensionService { .. } => CredentialPrefix::ExtensionService, Self::NmxM { .. } => CredentialPrefix::NmxM, Self::SwitchNvosAdmin { .. } => CredentialPrefix::SwitchNvosAdmin, @@ -553,14 +548,12 @@ impl CredentialKey { Cow::from(format!("machines/nic_lockdown_ikm/site/root/v{version}")) } }, - CredentialKey::UefiSiteTarget { target } => match target { - UefiSiteTarget::Host { version } => { - Cow::from(format!("machines/uefi/host/site/root/v{version}")) - } - UefiSiteTarget::Dpu { version } => { - Cow::from(format!("machines/uefi/dpu/site/root/v{version}")) - } - }, + CredentialKey::HostUefiSiteVersioned { version } => Cow::from(format!( + "machines/all_hosts/site_default/uefi-metadata-items/auth/v{version}" + )), + CredentialKey::DpuUefiSiteVersioned { version } => Cow::from(format!( + "machines/all_dpus/site_default/uefi-metadata-items/auth/v{version}" + )), CredentialKey::ExtensionService { service_id, version, @@ -667,17 +660,21 @@ mod tests { }; assert_eq!(bmc_alias.to_key_str(), "machines/bmc/site/root"); - let host_uefi = CredentialKey::UefiSiteTarget { - target: UefiSiteTarget::Host { version: 0 }, - }; - assert_eq!(host_uefi.to_key_str(), "machines/uefi/host/site/root/v0"); - assert_eq!(host_uefi.prefix(), CredentialPrefix::Uefi); + // Host/DPU UEFI rotation targets are versioned in place on the existing + // site_default path; the unversioned alias (= v0) is unchanged. + let host_uefi = CredentialKey::HostUefiSiteVersioned { version: 0 }; + assert_eq!( + host_uefi.to_key_str(), + "machines/all_hosts/site_default/uefi-metadata-items/auth/v0" + ); + assert_eq!(host_uefi.prefix(), CredentialPrefix::HostUefi); - let dpu_uefi = CredentialKey::UefiSiteTarget { - target: UefiSiteTarget::Dpu { version: 7 }, - }; - assert_eq!(dpu_uefi.to_key_str(), "machines/uefi/dpu/site/root/v7"); - assert_eq!(dpu_uefi.prefix(), CredentialPrefix::Uefi); + let dpu_uefi = CredentialKey::DpuUefiSiteVersioned { version: 7 }; + assert_eq!( + dpu_uefi.to_key_str(), + "machines/all_dpus/site_default/uefi-metadata-items/auth/v7" + ); + assert_eq!(dpu_uefi.prefix(), CredentialPrefix::DpuUefi); let nvos = CredentialKey::SwitchNvosSiteAdmin { version: 2 }; assert_eq!(nvos.to_key_str(), "switch_nvos/site/admin/v2"); @@ -938,20 +935,16 @@ mod tests { Check { scenario: "host uefi site target", input: Row { - key: CredentialKey::UefiSiteTarget { - target: UefiSiteTarget::Host { version: 0 }, - }, - expected_prefix: "machines/uefi/", + key: CredentialKey::HostUefiSiteVersioned { version: 0 }, + expected_prefix: "machines/all_hosts/", }, expect: PathChecks::all_hold(), }, Check { scenario: "dpu uefi site target", input: Row { - key: CredentialKey::UefiSiteTarget { - target: UefiSiteTarget::Dpu { version: 0 }, - }, - expected_prefix: "machines/uefi/", + key: CredentialKey::DpuUefiSiteVersioned { version: 0 }, + expected_prefix: "machines/all_dpus/", }, expect: PathChecks::all_hold(), }, @@ -1087,12 +1080,8 @@ mod tests { CredentialKey::NicLockdownIkm { credential_type: NicLockdownIkm::SiteWide { version: 0 }, }, - CredentialKey::UefiSiteTarget { - target: UefiSiteTarget::Host { version: 0 }, - }, - CredentialKey::UefiSiteTarget { - target: UefiSiteTarget::Dpu { version: 0 }, - }, + CredentialKey::HostUefiSiteVersioned { version: 0 }, + CredentialKey::DpuUefiSiteVersioned { version: 0 }, CredentialKey::ExtensionService { service_id: "s".to_string(), version: "v".to_string(), @@ -1130,6 +1119,6 @@ mod tests { #[test] fn prefix_all_is_complete() { let all = CredentialPrefix::all(); - assert_eq!(all.len(), 17); + assert_eq!(all.len(), 16); } } diff --git a/crates/site-explorer/src/bmc_endpoint_explorer.rs b/crates/site-explorer/src/bmc_endpoint_explorer.rs index 024af6666f..cd1e1f6eae 100644 --- a/crates/site-explorer/src/bmc_endpoint_explorer.rs +++ b/crates/site-explorer/src/bmc_endpoint_explorer.rs @@ -35,6 +35,7 @@ use model::machine::MachineInterfaceSnapshot; use model::site_explorer::{ EndpointExplorationError, EndpointExplorationReport, LockdownStatus, NicMode, }; +use sqlx::PgPool; use super::EndpointExplorer; use super::config::SiteExplorerExploreMode; @@ -51,6 +52,12 @@ pub struct BmcEndpointExplorer { credential_client: CredentialClient, rotate_switch_nvos_credentials: Arc, mode: SiteExplorerExploreMode, + /// Used to record per-device BMC rotation convergence at the moment the + /// device is moved onto the site-wide BMC root (see + /// [`Self::set_bmc_root_credentials`]). `None` only for the standalone + /// `bmc-explorer-cli` debug tool, which runs against an in-memory credential + /// store and no database; in that case the rotation bookkeeping is skipped. + database_connection: Option, } impl BmcEndpointExplorer { @@ -61,6 +68,7 @@ impl BmcEndpointExplorer { credential_manager: Arc, rotate_switch_nvos_credentials: Arc, mode: SiteExplorerExploreMode, + database_connection: Option, ) -> Self { Self { redfish_client: RedfishClient::new(redfish_client_pool, nv_redfish_client_pool), @@ -68,6 +76,7 @@ impl BmcEndpointExplorer { credential_client: CredentialClient::new(credential_manager), rotate_switch_nvos_credentials, mode, + database_connection, } } @@ -114,7 +123,34 @@ impl BmcEndpointExplorer { ) -> Result<(), EndpointExplorationError> { self.credential_client .set_bmc_root_credentials(bmc_mac_address, credentials) + .await?; + + // The device is now on the site-wide BMC root (just changed on the + // hardware, or validated as already-set on reingest) and its per-device + // secret is in Vault. Record bmc convergence at the current site-wide + // target version so the rotation engine tracks every host, DPU, switch, + // and power shelf from the moment NICo owns its BMC password. Idempotent, + // so reexploration of an already-recorded device is a no-op. Skipped only + // by the no-database `bmc-explorer-cli` debug tool. + if let Some(database_connection) = &self.database_connection { + let record_err = |cause: String| EndpointExplorationError::SetCredentials { + key: format!("device_credential_rotation/bmc/{bmc_mac_address}"), + cause, + }; + let mut txn = db::Transaction::begin(database_connection) + .await + .map_err(|e| record_err(e.to_string()))?; + db::credential_rotation::record_device_converged( + &mut txn, + bmc_mac_address, + db::credential_rotation::CredentialRotationType::Bmc, + ) .await + .map_err(|e| record_err(e.to_string()))?; + txn.commit().await.map_err(|e| record_err(e.to_string()))?; + } + + Ok(()) } pub async fn set_bmc_root_password( diff --git a/crates/site-explorer/src/lib.rs b/crates/site-explorer/src/lib.rs index b555056163..af39ccdaa9 100644 --- a/crates/site-explorer/src/lib.rs +++ b/crates/site-explorer/src/lib.rs @@ -107,6 +107,7 @@ pub fn new_bmc_explorer( credential_manager: Arc, rotate_switch_nvos_credentials: Arc, mode: SiteExplorerExploreMode, + database_connection: PgPool, ) -> Arc { BmcEndpointExplorer::new( redfish_client_pool, @@ -115,6 +116,7 @@ pub fn new_bmc_explorer( credential_manager, rotate_switch_nvos_credentials, mode, + Some(database_connection), ) .into() } diff --git a/crates/switch-controller/src/configuring.rs b/crates/switch-controller/src/configuring.rs index 154886f41e..2d90701b1a 100644 --- a/crates/switch-controller/src/configuring.rs +++ b/crates/switch-controller/src/configuring.rs @@ -74,63 +74,96 @@ async fn handle_rotate_os_password( )); } - let mut txn = ctx.services.db_pool.begin().await?; - let expected_switch = - db::expected_switch::find_by_bmc_mac_address(&mut txn, bmc_mac_address).await?; - txn.commit().await?; - - //TODO: This logic should be replaced with the logic of rotate password - let expected_switch = match expected_switch { - Some(es) => es, - None => { - return Ok(StateHandlerOutcome::transition( - SwitchControllerState::Error { - cause: format!("No expected switch found for BMC MAC {}", bmc_mac_address), - }, - )); - } - }; + let outcome = StateHandlerOutcome::transition(SwitchControllerState::Validating { + validating_state: ValidatingState::ValidationComplete, + }); - let (username, password) = match (expected_switch.nvos_username, expected_switch.nvos_password) - { - (Some(username), Some(password)) => (username, password), - _ => { - tracing::info!( - "Switch {:?}: no NVOS credentials in vault or expected switch for BMC MAC {}, skipping", - switch_id, - bmc_mac_address - ); - return Ok(StateHandlerOutcome::transition( - SwitchControllerState::Validating { - validating_state: ValidatingState::ValidationComplete, - }, - )); - } - }; - - let credentials = Credentials::UsernamePassword { username, password }; + // REQ-6 (set NVOS from factory) is not implemented yet, so this is gated off. + // Today NICo does not change the NVOS password from the factory default: it + // only copies the operator-provided credential into Vault (the `else` branch) + // so the rest of NICo can authenticate, and records no rotation convergence + // (the backfill skips `nvos` for the same reason). When REQ-6 lands, flip + // `update_device_password` to `true`: that branch rotates the password on the + // switch, stores the resulting NICo-owned credential, and records `nvos` + // convergence (keyed by BMC MAC) committed atomically with the transition. + let update_device_password = false; + if update_device_password { + let mut txn = ctx.services.db_pool.begin().await?; - ctx.services - .credential_manager - .set_credentials(&key, &credentials) + // TODO(REQ-6): rotate the NVOS password on the switch here (factory -> + // NICo-owned) and store the resulting credential in Vault before recording + // convergence below. Only once the password is actually changed on the + // device has the switch converged to the current nvos target. + db::credential_rotation::record_device_converged( + &mut txn, + bmc_mac_address, + db::credential_rotation::CredentialRotationType::Nvos, + ) .await .map_err(|e| { StateHandlerError::GenericError(eyre::eyre!( - "Switch {:?}: failed to store NVOS credentials in vault: {}", + "Switch {:?}: failed to record nvos rotation convergence: {}", switch_id, e )) })?; - tracing::info!( - "Switch {:?}: stored NVOS admin credentials from expected switch into vault for BMC MAC {}", - switch_id, - bmc_mac_address - ); - - Ok(StateHandlerOutcome::transition( - SwitchControllerState::Validating { - validating_state: ValidatingState::ValidationComplete, - }, - )) + Ok(outcome.with_txn(txn)) + } else { + // Copy the operator-provided NVOS admin credential from the expected + // switch into Vault. This does not touch the switch, so no convergence is + // recorded. + let mut txn = ctx.services.db_pool.begin().await?; + let expected_switch = + db::expected_switch::find_by_bmc_mac_address(&mut txn, bmc_mac_address).await?; + txn.commit().await?; + + let expected_switch = match expected_switch { + Some(es) => es, + None => { + return Ok(StateHandlerOutcome::transition( + SwitchControllerState::Error { + cause: format!("No expected switch found for BMC MAC {}", bmc_mac_address), + }, + )); + } + }; + + let (username, password) = match ( + expected_switch.nvos_username, + expected_switch.nvos_password, + ) { + (Some(username), Some(password)) => (username, password), + _ => { + tracing::info!( + "Switch {:?}: no NVOS credentials in vault or expected switch for BMC MAC {}, skipping", + switch_id, + bmc_mac_address + ); + return Ok(outcome); + } + }; + + let credentials = Credentials::UsernamePassword { username, password }; + + ctx.services + .credential_manager + .set_credentials(&key, &credentials) + .await + .map_err(|e| { + StateHandlerError::GenericError(eyre::eyre!( + "Switch {:?}: failed to store NVOS credentials in vault: {}", + switch_id, + e + )) + })?; + + tracing::info!( + "Switch {:?}: stored NVOS admin credentials from expected switch into vault for BMC MAC {}", + switch_id, + bmc_mac_address + ); + + Ok(outcome) + } } From 64f0db885f5caf2eb4c87baa8ec70b513060d201 Mon Sep 17 00:00:00 2001 From: Srikar Date: Wed, 24 Jun 2026 11:25:57 -0700 Subject: [PATCH 2/3] feat: handle credential deletion --- crates/api-core/src/handlers/credential.rs | 20 +++-- crates/api-core/src/handlers/machine.rs | 38 ++++++++- crates/api-db/src/credential_rotation.rs | 94 ++++++++++++++++++--- crates/dpa-manager/src/card_handler/svpc.rs | 1 - 4 files changed, 134 insertions(+), 19 deletions(-) diff --git a/crates/api-core/src/handlers/credential.rs b/crates/api-core/src/handlers/credential.rs index 4fee793c20..b65efadd96 100644 --- a/crates/api-core/src/handlers/credential.rs +++ b/crates/api-core/src/handlers/credential.rs @@ -620,12 +620,20 @@ pub(crate) async fn delete_bmc_root_credentials_by_mac( CarbideError::internal(format!("Error deleting credential for BMC: {e:?} ")) })?; - // TODO(credential-rotation): delete the matching `device_credential_rotation` - // row (device_mac = bmc_mac_address, credential_type = 'bmc') here, alongside - // the Vault secret. Once NICo discards the per-device BMC secret it can no - // longer authenticate or rotate the device, so the convergence marker is - // meaningless and should die with the secret it depends on. Deferred to the - // rotation-engine MR that introduces the live reader/writer for that table. + // Drop the bmc convergence marker alongside the Vault secret it depends on: + // once NICo discards the per-device BMC secret it can no longer authenticate + // or rotate the device, so tracking convergence is meaningless. (The rotation + // engine also joins device_credential_rotation to the live device tables, so + // a row orphaned by device deletion is never acted on -- this just keeps the + // table tidy at the chokepoint where the secret actually goes away.) + let mut txn = api.txn_begin().await?; + db::credential_rotation::delete_device_converged( + &mut txn, + bmc_mac_address, + db::credential_rotation::CredentialRotationType::Bmc, + ) + .await?; + txn.commit().await?; api.bmc_session_manager.flush_mac(bmc_mac_address).await; diff --git a/crates/api-core/src/handlers/machine.rs b/crates/api-core/src/handlers/machine.rs index a666978ae4..403a7f80fb 100644 --- a/crates/api-core/src/handlers/machine.rs +++ b/crates/api-core/src/handlers/machine.rs @@ -516,12 +516,28 @@ pub(crate) async fn admin_force_delete_machine( } if machine.bios_password_set_time.is_some() { - if let Err(e) = api + match api .redfish_pool .clear_host_uefi_password(client.as_ref()) .await { - tracing::warn!(%machine_id, error = %e, "Failed to clear host UEFI password while force deleting machine"); + Ok(_) => { + // The UEFI password was reset on the device, so the host no + // longer carries the site-wide UEFI value: drop the host_uefi + // convergence marker (keyed by the host BMC MAC, mirroring where + // it is recorded when the password is set). Best-effort like the + // clear itself -- the machine row is being deleted anyway, so a + // surviving marker would be neutralized by the rotation engine's + // live-device join regardless. + if let Err(e) = + forget_host_uefi_convergence(api, bmc_mac_address).await + { + tracing::warn!(%machine_id, error = %e, "Cleared host UEFI password but failed to delete its credential-rotation marker"); + } + } + Err(e) => { + tracing::warn!(%machine_id, error = %e, "Failed to clear host UEFI password while force deleting machine"); + } } // TODO (spyda): have libredfish return whether the client needs to reboot the host after clearing the host uefi password @@ -767,6 +783,24 @@ async fn clear_bmc_credentials(api: &Api, machine: &Machine) -> Result<(), Carbi Ok(()) } +/// Deletes the `host_uefi` credential-rotation convergence marker for a host, +/// keyed by its BMC MAC. Called after force-delete resets the host UEFI password +/// on the device, where the host no longer carries the site-wide UEFI value. +async fn forget_host_uefi_convergence( + api: &Api, + bmc_mac_address: mac_address::MacAddress, +) -> Result<(), CarbideError> { + let mut txn = api.txn_begin().await?; + db::credential_rotation::delete_device_converged( + &mut txn, + bmc_mac_address, + db::credential_rotation::CredentialRotationType::HostUefi, + ) + .await?; + txn.commit().await?; + Ok(()) +} + pub async fn get_machine_position_info( api: &Api, request: Request, diff --git a/crates/api-db/src/credential_rotation.rs b/crates/api-db/src/credential_rotation.rs index 058ee2599a..05951c94a9 100644 --- a/crates/api-db/src/credential_rotation.rs +++ b/crates/api-db/src/credential_rotation.rs @@ -40,6 +40,21 @@ //! gated off, because NICo only copies the operator-provided NVOS credential //! into Vault today; it does not change the switch password (REQ-6, //! set-NVOS-from-factory, is not implemented). The gate flips on with REQ-6. +//! +//! Teardown hooks (calling [`delete_device_converged`]) remove a marker when the +//! credential it tracks is torn down, keeping the table honest: +//! +//! * `bmc` -- at `api-core` `delete_bmc_root_credentials_by_mac`, alongside +//! deleting the per-device BMC secret from Vault. Once NICo discards the +//! secret it can no longer authenticate or rotate, so the marker is meaningless. +//! * `host_uefi` -- in the `api-core` force-delete path, right after +//! `clear_host_uefi_password` resets the password on the device: the host no +//! longer carries the site-wide UEFI value, so the marker is false. +//! +//! Markers NICo does *not* tear down (the device keeps the site-wide credential, +//! or NICo keeps the secret) are left to the rotation engine, which must always +//! join `device_credential_rotation` to the live device tables when selecting +//! work so a row orphaned by device deletion is never acted on. use mac_address::MacAddress; use sqlx::PgConnection; @@ -90,15 +105,42 @@ pub async fn record_device_converged( .map_err(|e| DatabaseError::query(query, e)) } +/// Deletes the convergence row for `(device_mac, credential_type)`, if present. +/// +/// Call this when NICo tears down the credential the row tracks -- either by +/// discarding its only copy (the per-device BMC secret deleted from Vault on +/// force-delete / `DeleteCredential`) or by changing it back on the device (the +/// host UEFI password cleared on force-delete). Once the credential the marker +/// depends on is gone, the marker is false and must not linger for the rotation +/// engine to act on. Idempotent: deleting a missing row is a no-op. +pub async fn delete_device_converged( + conn: &mut PgConnection, + device_mac: MacAddress, + credential_type: CredentialRotationType, +) -> Result<(), DatabaseError> { + let query = "DELETE FROM device_credential_rotation \ + WHERE device_mac = $1 AND credential_type = $2"; + sqlx::query(query) + .bind(device_mac) + .bind(credential_type) + .execute(conn) + .await + .map(|_| ()) + .map_err(|e| DatabaseError::query(query, e)) +} + #[cfg(test)] mod tests { use mac_address::MacAddress; - use sqlx::PgPool; + use sqlx::{PgConnection, PgPool}; - use super::{CredentialRotationType, record_device_converged}; + use super::{CredentialRotationType, delete_device_converged, record_device_converged}; - // current_version for a (mac, type) row, or None if no row exists. - async fn version_of(pool: &PgPool, mac: &str, credential_type: &str) -> Option { + // current_version for a (mac, type) row, or None if no row exists. Takes the + // same connection the writers use (rather than the pool) so the whole test + // runs on a single connection -- otherwise holding that connection across a + // second `pool` acquisition trips the txn_held_across_await lint. + async fn version_of(conn: &mut PgConnection, mac: &str, credential_type: &str) -> Option { let row: Option> = sqlx::query_scalar( "SELECT current_version FROM device_credential_rotation \ WHERE device_mac = $1::macaddr \ @@ -106,7 +148,7 @@ mod tests { ) .bind(mac) .bind(credential_type) - .fetch_optional(pool) + .fetch_optional(&mut *conn) .await .unwrap(); row.flatten() @@ -123,7 +165,10 @@ mod tests { record_device_converged(&mut conn, mac1, CredentialRotationType::Bmc) .await .unwrap(); - assert_eq!(version_of(&pool, "02:00:00:00:00:01", "bmc").await, Some(0)); + assert_eq!( + version_of(&mut conn, "02:00:00:00:00:01", "bmc").await, + Some(0) + ); // Bump the site-wide target. An already-recorded device must NOT be // clobbered -- the engine owns version transitions, not this hook. @@ -131,14 +176,14 @@ mod tests { "UPDATE sitewide_credential_rotation SET target_version = 3 \ WHERE credential_type = 'bmc'", ) - .execute(&pool) + .execute(&mut *conn) .await .unwrap(); record_device_converged(&mut conn, mac1, CredentialRotationType::Bmc) .await .unwrap(); assert_eq!( - version_of(&pool, "02:00:00:00:00:01", "bmc").await, + version_of(&mut conn, "02:00:00:00:00:01", "bmc").await, Some(0), "existing row must be preserved on re-ingestion" ); @@ -148,7 +193,7 @@ mod tests { .await .unwrap(); assert_eq!( - version_of(&pool, "02:00:00:00:00:02", "bmc").await, + version_of(&mut conn, "02:00:00:00:00:02", "bmc").await, Some(3), "a newly ingested device records the current site-wide target" ); @@ -159,8 +204,37 @@ mod tests { .await .unwrap(); assert_eq!( - version_of(&pool, "02:00:00:00:00:01", "nvos").await, + version_of(&mut conn, "02:00:00:00:00:01", "nvos").await, Some(0) ); } + + #[crate::sqlx_test] + async fn delete_removes_only_the_targeted_row_and_is_idempotent(pool: PgPool) { + let mac: MacAddress = "02:00:00:00:00:01".parse().unwrap(); + let mut conn = pool.acquire().await.unwrap(); + + record_device_converged(&mut conn, mac, CredentialRotationType::Bmc) + .await + .unwrap(); + record_device_converged(&mut conn, mac, CredentialRotationType::HostUefi) + .await + .unwrap(); + + // Deleting one credential type leaves the device's other markers intact. + delete_device_converged(&mut conn, mac, CredentialRotationType::Bmc) + .await + .unwrap(); + assert_eq!(version_of(&mut conn, "02:00:00:00:00:01", "bmc").await, None); + assert_eq!( + version_of(&mut conn, "02:00:00:00:00:01", "host_uefi").await, + Some(0), + "deleting bmc must not touch the host_uefi marker" + ); + + // Deleting a row that no longer exists is a no-op, not an error. + delete_device_converged(&mut conn, mac, CredentialRotationType::Bmc) + .await + .unwrap(); + } } diff --git a/crates/dpa-manager/src/card_handler/svpc.rs b/crates/dpa-manager/src/card_handler/svpc.rs index a0f90ccd25..7b1fd205dc 100644 --- a/crates/dpa-manager/src/card_handler/svpc.rs +++ b/crates/dpa-manager/src/card_handler/svpc.rs @@ -466,7 +466,6 @@ impl DpaInterfaceStateHandler for SvpcInterfaceHandler { apply_profile(dpa_interface) } - #[allow(clippy::unused_async)] async fn handle_locking( &self, monitor: &mut DpaMonitor, From f64d4af439329c2ec97ff24558875c007b9128e9 Mon Sep 17 00:00:00 2001 From: Srikar Date: Thu, 25 Jun 2026 20:28:50 -0700 Subject: [PATCH 3/3] fix: mr review --- Cargo.lock | 3 + crates/api-core/src/dpa/lockdown.rs | 41 ++- crates/api-core/src/errors.rs | 7 + crates/api-core/src/handlers/dpa.rs | 40 ++- crates/api-core/src/handlers/mlx_admin.rs | 4 +- .../20260623120000_credential_rotation.sql | 21 +- crates/api-db/src/credential_rotation.rs | 285 ++++++++++++++++-- .../test_backfill.rs} | 2 +- crates/api-db/src/lib.rs | 4 +- crates/dpa-manager/Cargo.toml | 5 + crates/dpa-manager/src/card_handler/svpc.rs | 172 ++++++++++- crates/dpa-manager/src/lib.rs | 3 + crates/switch-controller/src/configuring.rs | 7 + 13 files changed, 536 insertions(+), 58 deletions(-) rename crates/api-db/src/{credential_rotation_backfill.rs => credential_rotation/test_backfill.rs} (99%) diff --git a/Cargo.lock b/Cargo.lock index 68993566f6..7120cf9ec5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1716,7 +1716,9 @@ dependencies = [ "carbide-api-db", "carbide-api-model", "carbide-dpa", + "carbide-macros", "carbide-secrets", + "carbide-sqlx-testing", "carbide-utils", "carbide-uuid", "chrono", @@ -1724,6 +1726,7 @@ dependencies = [ "duration-str", "eyre", "http", + "mac_address", "mqttea", "opentelemetry", "rand 0.10.1", diff --git a/crates/api-core/src/dpa/lockdown.rs b/crates/api-core/src/dpa/lockdown.rs index 8d563acf60..d209d347b2 100644 --- a/crates/api-core/src/dpa/lockdown.rs +++ b/crates/api-core/src/dpa/lockdown.rs @@ -149,19 +149,23 @@ fn lockdown_ikm_key(version: u32) -> CredentialKey { // fetch_kdf_secret fetches the IKM for the KDF from the dedicated site-wide // lockdown credential, decoupled from the BMC root so the two can be rotated // independently. +// +// Returns the IKM version it resolved alongside the secret so the caller can +// durably record the exact version a card is locked under, rather than +// re-reading the (mutable) site-wide target later. Today the version is +// `CURRENT_LOCKDOWN_IKM_VERSION`; the rotation engine will own advancing it. async fn fetch_kdf_secret( credential_reader: &dyn CredentialReader, -) -> Result { - let ikm_key = lockdown_ikm_key(CURRENT_LOCKDOWN_IKM_VERSION); +) -> Result<(u32, String), eyre::Report> { + let version = CURRENT_LOCKDOWN_IKM_VERSION; + let ikm_key = lockdown_ikm_key(version); let credentials = credential_reader .get_credentials(&ikm_key) .await? - .ok_or_else(|| { - eyre::eyre!("lockdown IKM v{CURRENT_LOCKDOWN_IKM_VERSION} not found; site not seeded") - })?; + .ok_or_else(|| eyre::eyre!("lockdown IKM v{version} not found; site not seeded"))?; let Credentials::UsernamePassword { password, .. } = credentials; - Ok(password) + Ok((version, password)) } // ensure_lockdown_ikm_seeded idempotently seeds the dedicated site-wide @@ -227,16 +231,32 @@ pub async fn ensure_lockdown_ikm_seeded( } } +// SupernicLockdownKey is a derived lockdown key together with the site-wide +// lockdown IKM version it was derived from. The version travels with the key so +// the lock flow can durably record the exact version the card is locked under. +pub struct SupernicLockdownKey { + // The 16-character hex lockdown key sent to the device. + pub key: String, + // The site-wide lockdown IKM version `key` was derived from. + pub ikm_version: u32, +} + // build_supernic_lockdown_key builds a single lockdown key using // the latest KdfContextVersion. Use this for locking a card. +// +// Returns the derived key together with the IKM version it used (see +// `SupernicLockdownKey`). The unlock flow can ignore the version; the lock flow +// persists it so the recorded convergence version matches what actually locked +// the card. pub async fn build_supernic_lockdown_key( db_reader: &PgPool, dpa_interface_id: DpaInterfaceId, credential_reader: &dyn CredentialReader, -) -> Result { +) -> Result { let ctx = build_kdf_context(db_reader, dpa_interface_id).await?; - let secret = fetch_kdf_secret(credential_reader).await?; - build_lockdown_key(secret.as_bytes(), &ctx, KdfContextVersion::V1) + let (ikm_version, secret) = fetch_kdf_secret(credential_reader).await?; + let key = build_lockdown_key(secret.as_bytes(), &ctx, KdfContextVersion::V1)?; + Ok(SupernicLockdownKey { key, ikm_version }) } #[cfg(test)] @@ -441,7 +461,8 @@ mod tests { .await .unwrap(); - let secret = fetch_kdf_secret(&store).await.unwrap(); + let (version, secret) = fetch_kdf_secret(&store).await.unwrap(); + assert_eq!(version, CURRENT_LOCKDOWN_IKM_VERSION); assert_eq!(secret, "ikm-pass"); } diff --git a/crates/api-core/src/errors.rs b/crates/api-core/src/errors.rs index 4cd1c9c106..40b98ce1d9 100644 --- a/crates/api-core/src/errors.rs +++ b/crates/api-core/src/errors.rs @@ -293,6 +293,13 @@ impl From for CarbideError { DatabaseError::InvalidArgument(e) => InvalidArgument(e), DatabaseError::InvalidConfiguration(e) => InvalidConfiguration(e), DatabaseError::MissingArgument(e) => MissingArgument(e), + // A corrupted/absent site-wide rotation invariant is an internal + // state error, not a client-correctable one. + DatabaseError::MissingSitewideRotationTarget(credential_type) => Internal { + message: format!( + "no site-wide rotation target for credential type: {credential_type:?}" + ), + }, DatabaseError::NetworkParseError(e) => NetworkParseError(e), DatabaseError::NetworkSegmentDelete(e) => NetworkSegmentDelete(e), DatabaseError::NetworkSegmentDuplicateMacAddress(e) => { diff --git a/crates/api-core/src/handlers/dpa.rs b/crates/api-core/src/handlers/dpa.rs index b22c3f1c62..2b27f5cc8f 100644 --- a/crates/api-core/src/handlers/dpa.rs +++ b/crates/api-core/src/handlers/dpa.rs @@ -257,7 +257,7 @@ async fn build_unlock_command( machine_id: MachineId, pci_name: &str, ) -> CarbideResult> { - let key = crate::dpa::lockdown::build_supernic_lockdown_key( + let lockdown = crate::dpa::lockdown::build_supernic_lockdown_key( &api.database_connection, sn.id, &*api.credential_manager, @@ -271,8 +271,10 @@ async fn build_unlock_command( tracing::info!(%machine_id, %pci_name, "Unlocking DPA"); + // The unlock flow does not record convergence, so the derived IKM version is + // not persisted here. Ok(DpaCommand { - op: OpCode::Unlock { key }, + op: OpCode::Unlock { key: lockdown.key }, }) } @@ -414,7 +416,7 @@ async fn build_lock_command( machine_id: MachineId, pci_name: &str, ) -> CarbideResult> { - let key = crate::dpa::lockdown::build_supernic_lockdown_key( + let lockdown = crate::dpa::lockdown::build_supernic_lockdown_key( &api.database_connection, sn.id, &*api.credential_manager, @@ -426,9 +428,37 @@ async fn build_lock_command( )) })?; - tracing::info!(%machine_id, %pci_name, "Locking DPA"); + // Stage the IKM version we are about to lock the card with as the in-flight + // rotation marker (`rotating_to_version`) on the card's lockdown_ikm row + // *before* issuing the lock command. dpa-manager's `handle_locking` promotes + // exactly this value to the convergence version when the card reports Locked + // -- never the (possibly advanced) site-wide target re-read at observation + // time. Staging first means we only ever issue a lock for a version we have + // already recorded our intent to use; if the write fails we surface the error + // and do not lock. The writer is idempotent across the per-cycle + // re-derivation while Locking. + let ikm_version = i32::try_from(lockdown.ikm_version).map_err(|e| CarbideError::Internal { + message: format!( + "lockdown IKM version {} does not fit in i32 for DPA {pci_name}: {e}", + lockdown.ikm_version + ), + })?; + let mut conn = api.database_connection.acquire().await.map_err(|e| { + CarbideError::GenericErrorFromReport(eyre!( + "failed to acquire connection to stage lockdown IKM rotation for DPA {pci_name}: {e}" + )) + })?; + db::credential_rotation::mark_device_rotating_to_version( + &mut conn, + sn.mac_address, + db::credential_rotation::CredentialRotationType::LockdownIkm, + ikm_version, + ) + .await?; + + tracing::info!(%machine_id, %pci_name, ikm_version = lockdown.ikm_version, "Locking DPA"); Ok(DpaCommand { - op: OpCode::Lock { key }, + op: OpCode::Lock { key: lockdown.key }, }) } diff --git a/crates/api-core/src/handlers/mlx_admin.rs b/crates/api-core/src/handlers/mlx_admin.rs index b5760b2129..27fa8351b9 100644 --- a/crates/api-core/src/handlers/mlx_admin.rs +++ b/crates/api-core/src/handlers/mlx_admin.rs @@ -1316,7 +1316,7 @@ async fn get_device_lockdown_key( } })?; - let lockdown_key = crate::dpa::lockdown::build_supernic_lockdown_key( + let lockdown = crate::dpa::lockdown::build_supernic_lockdown_key( &api.database_connection, dpa_interface.id, &*api.credential_manager, @@ -1328,5 +1328,5 @@ async fn get_device_lockdown_key( ), })?; - Ok(lockdown_key) + Ok(lockdown.key) } diff --git a/crates/api-db/migrations/20260623120000_credential_rotation.sql b/crates/api-db/migrations/20260623120000_credential_rotation.sql index 58987fea77..8a773e786c 100644 --- a/crates/api-db/migrations/20260623120000_credential_rotation.sql +++ b/crates/api-db/migrations/20260623120000_credential_rotation.sql @@ -16,7 +16,11 @@ CREATE TABLE sitewide_credential_rotation ( target_version integer NOT NULL, started_at timestamptz NOT NULL DEFAULT now(), -- Free-form initiator/reason metadata; must never contain secrets. - request_meta jsonb NOT NULL DEFAULT '{}'::jsonb + request_meta jsonb NOT NULL DEFAULT '{}'::jsonb, + -- Versions are 0-based (0 = pre-rotation baseline), so the target is + -- non-negative by construction. + CONSTRAINT sitewide_credential_rotation_target_version_non_negative + CHECK (target_version >= 0) ); -- One row per (device_mac, credential_type): per-device convergence state. @@ -38,7 +42,20 @@ CREATE TABLE device_credential_rotation ( -- Redacted last-error string for observability; never contains secrets. rotate_last_error_redacted text, rotate_quarantined_until timestamptz, - PRIMARY KEY (device_mac, credential_type) + PRIMARY KEY (device_mac, credential_type), + -- Version/counter fields are non-negative by construction: versions are + -- 0-based and rotate_attempts counts up from 0. These CHECKs are the only + -- guard at write time -- manual repairs and the backfill data migration + -- bypass the Rust writers -- so they keep the rotation engine from + -- reasoning over impossible state. A NULL current_version ("not yet + -- established") or rotating_to_version ("no rotation in flight") satisfies + -- the CHECK and stays legal. + CONSTRAINT device_credential_rotation_current_version_non_negative + CHECK (current_version >= 0), + CONSTRAINT device_credential_rotation_rotating_to_version_non_negative + CHECK (rotating_to_version >= 0), + CONSTRAINT device_credential_rotation_rotate_attempts_non_negative + CHECK (rotate_attempts >= 0) ); -- Hot path: "which devices for this credential type still need rotation" diff --git a/crates/api-db/src/credential_rotation.rs b/crates/api-db/src/credential_rotation.rs index 05951c94a9..bbe9af473a 100644 --- a/crates/api-db/src/credential_rotation.rs +++ b/crates/api-db/src/credential_rotation.rs @@ -27,11 +27,22 @@ //! machine-controller `DpuInitState::WaitingForPlatformConfiguration` state //! right after `uefi_setup(dpu = true)` succeeds (keyed by the DPU BMC MAC, //! mirroring the backfill). -//! * `lockdown_ikm` -- when a SuperNIC card is confirmed locked, at the -//! dpa-manager `handle_locking` state (`card_state.lockmode == Locked`), keyed -//! by the card (NIC) MAC. Recorded at the current site-wide target, which today -//! is `CURRENT_LOCKDOWN_IKM_VERSION` (0) -- the same version the lock key is -//! derived from. The rotation engine will own advancing that version. +//! * `lockdown_ikm` -- staged as a two-phase rotation keyed by the card (NIC) +//! MAC, so the recorded convergence version is always the one the hardware was +//! actually locked under rather than the (mutable) site-wide target re-read at +//! observation time. When api-core issues the lock command it stamps the IKM +//! version the lock key was derived from as the in-flight marker via +//! [`mark_device_rotating_to_version`] (`rotating_to_version`); when +//! dpa-manager `handle_locking` sees the card report Locked +//! (`card_state.lockmode == Locked`) it promotes that exact value to +//! `current_version` via [`promote_rotating_to_current`]. A card with no staged +//! marker (locked before this flow shipped, already at v0 from the backfill) +//! falls back to [`record_device_converged`] at the site-wide target. Today the +//! locked-with version is `CURRENT_LOCKDOWN_IKM_VERSION` (0); the rotation +//! engine will own advancing the site-wide target, and the staged +//! `rotating_to_version` is exactly the crash-safety marker that keeps a +//! mid-flight advance from mis-recording a card as converged to a version it +//! was never locked under. //! //! Deferred to the work that owns those write paths: //! @@ -78,9 +89,13 @@ pub enum CredentialRotationType { /// /// Call this right after NICo sets the credential on the device (the factory -> /// site-wide step at ingestion). The recorded `current_version` is the -/// credential type's current site-wide `target_version` (0 before any rotation -/// has occurred), so a device ingested during or after a rotation is recorded -/// at the version it actually received rather than a stale 0. +/// credential type's current site-wide `target_version`, so a device ingested +/// during or after a rotation is recorded at the version it actually received. +/// +/// Requires a `sitewide_credential_rotation` row for `credential_type` to +/// already exist; the backfill migration seeds one for every active type. If it +/// is absent this returns [`DatabaseError::MissingSitewideRotationTarget`] +/// rather than guessing a version -- see the body for why guessing is unsafe. /// /// Idempotent: an existing row (re-ingestion, retry, or the backfill migration) /// is left untouched, so this never clobbers a version the rotation engine is @@ -89,22 +104,138 @@ pub async fn record_device_converged( conn: &mut PgConnection, device_mac: MacAddress, credential_type: CredentialRotationType, +) -> Result<(), DatabaseError> { + // Resolve the site-wide target up front and fail loudly if it is missing. + // + // For every credential type wired today (bmc, host_uefi, dpu_uefi, + // lockdown_ikm) the backfill data migration unconditionally seeds a + // `sitewide_credential_rotation` row, so an absent row is never a normal + // condition -- it is a corrupted invariant. The previous COALESCE(..., 0) + // masked that: it recorded `current_version = 0`, which may be the wrong + // convergence version (the device actually received whatever the live + // target was), and the `ON CONFLICT DO NOTHING` below then froze that wrong + // value forever (the engine owns transitions and never clobbers it). Once + // the site-wide row was restored the engine would see `0 < target` and + // drive a spurious rotation of a security credential. Erroring instead + // surfaces the broken state and lets it self-heal once the row exists. + // + // NVOS is deliberately NOT backfilled, and its only caller + // (switch-controller `handle_configuring`) is gated off until REQ-6. When + // that gate flips on, REQ-6 MUST also seed a `sitewide_credential_rotation` + // row for nvos (via the backfill or at runtime) before this is called, or + // it will -- correctly -- fail with `MissingSitewideRotationTarget` instead + // of recording a bogus version. The error makes that ordering + // self-enforcing. + // + // Resolving the target here and recording it is correct only when the + // device is known to carry the *current* site-wide credential -- i.e. NICo + // just set factory -> site-wide. A caller that locked/derived against a + // specific version it captured earlier must instead stage that version with + // [`mark_device_rotating_to_version`] and promote it on confirmation with + // [`promote_rotating_to_current`], so a target advancing between derivation + // and confirmation cannot mis-record the convergence version. + let select = "SELECT target_version FROM sitewide_credential_rotation \ + WHERE credential_type = $1"; + let target_version: i32 = sqlx::query_scalar(select) + .bind(credential_type) + .fetch_optional(&mut *conn) + .await + .map_err(|e| DatabaseError::query(select, e))? + .ok_or(DatabaseError::MissingSitewideRotationTarget( + credential_type, + ))?; + + // Idempotent: an existing row (re-ingestion, retry, or the backfill + // migration) is left untouched, so we never clobber a version the rotation + // engine is tracking -- the engine owns all subsequent transitions. + let insert = "INSERT INTO device_credential_rotation \ + (device_mac, credential_type, current_version) \ + VALUES ($1, $2, $3) \ + ON CONFLICT (device_mac, credential_type) DO NOTHING"; + sqlx::query(insert) + .bind(device_mac) + .bind(credential_type) + .bind(target_version) + .execute(&mut *conn) + .await + .map(|_| ()) + .map_err(|e| DatabaseError::query(insert, e)) +} + +/// Stages an in-flight rotation: records that `device_mac` is being moved to +/// `rotating_to_version` of `credential_type`, without touching `current_version`. +/// +/// This is phase one of a two-phase convergence for credentials NICo derives +/// against a specific version it must remember (the lockdown-IKM lock flow is the +/// motivating case): api-core calls this when it *issues* the lock command, +/// capturing the exact IKM version the key was derived from. dpa-manager then +/// calls [`promote_rotating_to_current`] when the hardware confirms, so the +/// recorded convergence version is the one the card was actually locked under +/// rather than the site-wide target re-read at observation time (which may have +/// advanced in between). +/// +/// Upserts so it works whether or not a row exists yet, and is idempotent across +/// retry scout cycles: the lock command is re-derived from the same version every +/// cycle, so the conditional `DO UPDATE` only writes when the staged value +/// actually changes. `current_version` is left as-is (NULL "not yet established" +/// for a first lock, or the prior converged value for a real rotation). The +/// non-negative CHECK on the column is the final guard against a bad version. +pub async fn mark_device_rotating_to_version( + conn: &mut PgConnection, + device_mac: MacAddress, + credential_type: CredentialRotationType, + rotating_to_version: i32, ) -> Result<(), DatabaseError> { let query = "INSERT INTO device_credential_rotation \ - (device_mac, credential_type, current_version) \ - SELECT $1, $2, COALESCE( \ - (SELECT target_version FROM sitewide_credential_rotation \ - WHERE credential_type = $2), 0) \ - ON CONFLICT (device_mac, credential_type) DO NOTHING"; + (device_mac, credential_type, rotating_to_version) \ + VALUES ($1, $2, $3) \ + ON CONFLICT (device_mac, credential_type) \ + DO UPDATE SET rotating_to_version = EXCLUDED.rotating_to_version \ + WHERE device_credential_rotation.rotating_to_version \ + IS DISTINCT FROM EXCLUDED.rotating_to_version"; sqlx::query(query) .bind(device_mac) .bind(credential_type) - .execute(conn) + .bind(rotating_to_version) + .execute(&mut *conn) .await .map(|_| ()) .map_err(|e| DatabaseError::query(query, e)) } +/// Completes an in-flight rotation: promotes a staged `rotating_to_version` to +/// `current_version` for `(device_mac, credential_type)` and clears the in-flight +/// marker. Returns `true` if a staged rotation was promoted, `false` if there was +/// nothing to promote (no row, or `rotating_to_version` already NULL). +/// +/// Phase two of the flow started by [`mark_device_rotating_to_version`]: called +/// when the hardware confirms the new credential. Because the promoted value is +/// the exact version staged at derivation time, a site-wide target that advanced +/// in between cannot make us record a version the hardware was never on. +/// +/// Idempotent: a second call (e.g. a re-observed lock) finds `rotating_to_version` +/// already cleared and is a no-op, leaving the promoted `current_version` intact. +/// A `false` return lets the caller fall back to [`record_device_converged`] for +/// devices that were converged before this staged flow shipped (no marker). +pub async fn promote_rotating_to_current( + conn: &mut PgConnection, + device_mac: MacAddress, + credential_type: CredentialRotationType, +) -> Result { + let query = "UPDATE device_credential_rotation \ + SET current_version = rotating_to_version, rotating_to_version = NULL \ + WHERE device_mac = $1 AND credential_type = $2 \ + AND rotating_to_version IS NOT NULL"; + let result = sqlx::query(query) + .bind(device_mac) + .bind(credential_type) + .execute(&mut *conn) + .await + .map_err(|e| DatabaseError::query(query, e))?; + + Ok(result.rows_affected() > 0) +} + /// Deletes the convergence row for `(device_mac, credential_type)`, if present. /// /// Call this when NICo tears down the credential the row tracks -- either by @@ -129,12 +260,23 @@ pub async fn delete_device_converged( .map_err(|e| DatabaseError::query(query, e)) } +// Tests for the SQL-only `*_credential_rotation_backfill` data migration. It has +// no Rust counterpart to host an inline `mod tests`, so it lives as a sibling +// child module here (mirroring `machine_interface::test_duplicate_mac`) rather +// than as a standalone top-level module. +#[cfg(test)] +mod test_backfill; + #[cfg(test)] mod tests { use mac_address::MacAddress; use sqlx::{PgConnection, PgPool}; - use super::{CredentialRotationType, delete_device_converged, record_device_converged}; + use super::{ + CredentialRotationType, delete_device_converged, mark_device_rotating_to_version, + promote_rotating_to_current, record_device_converged, + }; + use crate::DatabaseError; // current_version for a (mac, type) row, or None if no row exists. Takes the // same connection the writers use (rather than the pool) so the whole test @@ -154,6 +296,26 @@ mod tests { row.flatten() } + // rotating_to_version for a (mac, type) row, or None if no row exists or no + // rotation is staged. + async fn rotating_version_of( + conn: &mut PgConnection, + mac: &str, + credential_type: &str, + ) -> Option { + let row: Option> = sqlx::query_scalar( + "SELECT rotating_to_version FROM device_credential_rotation \ + WHERE device_mac = $1::macaddr \ + AND credential_type = $2::credential_rotation_type", + ) + .bind(mac) + .bind(credential_type) + .fetch_optional(&mut *conn) + .await + .unwrap(); + row.flatten() + } + #[crate::sqlx_test] async fn records_current_target_and_is_idempotent(pool: PgPool) { let mac1: MacAddress = "02:00:00:00:00:01".parse().unwrap(); @@ -198,14 +360,92 @@ mod tests { "a newly ingested device records the current site-wide target" ); - // nvos has no site-wide target row (deliberately not backfilled); the - // COALESCE in the writer defaults current_version to 0 rather than failing. - record_device_converged(&mut conn, mac1, CredentialRotationType::Nvos) + // nvos has no site-wide target row (deliberately not backfilled, and its + // only caller is gated off until REQ-6). Recording convergence for a + // type with no site-wide target is a corrupted invariant, so the writer + // fails loudly instead of guessing a version -- and writes nothing. + let err = record_device_converged(&mut conn, mac1, CredentialRotationType::Nvos) .await - .unwrap(); + .expect_err("nvos has no site-wide target row, so recording must fail"); + assert!( + matches!( + err, + DatabaseError::MissingSitewideRotationTarget(CredentialRotationType::Nvos) + ), + "expected MissingSitewideRotationTarget for nvos, got: {err:?}" + ); assert_eq!( version_of(&mut conn, "02:00:00:00:00:01", "nvos").await, - Some(0) + None, + "a failed record must not write a row" + ); + } + + #[crate::sqlx_test] + async fn stages_and_promotes_rotation_ignoring_sitewide_target(pool: PgPool) { + let mac: MacAddress = "02:00:00:00:00:0a".parse().unwrap(); + let mut conn = pool.acquire().await.unwrap(); + + // Advance the site-wide lockdown target so it differs from the version we + // stage. Promotion must land exactly the staged version (the one the card + // was locked under), never the live target -- this is the TOCTOU the + // two-phase lock flow guards against. + sqlx::query( + "UPDATE sitewide_credential_rotation SET target_version = 5 \ + WHERE credential_type = 'lockdown_ikm'", + ) + .execute(&mut *conn) + .await + .unwrap(); + + // Phase one (issue): stage the in-flight rotation. current_version stays + // NULL ("not yet established") until the hardware confirms. + mark_device_rotating_to_version(&mut conn, mac, CredentialRotationType::LockdownIkm, 2) + .await + .unwrap(); + assert_eq!( + rotating_version_of(&mut conn, "02:00:00:00:00:0a", "lockdown_ikm").await, + Some(2), + "issue must stage the derived version as the in-flight marker" + ); + assert_eq!( + version_of(&mut conn, "02:00:00:00:00:0a", "lockdown_ikm").await, + None, + "current_version must not advance until the lock is confirmed" + ); + + // Phase two (confirm): promote the staged version to current and clear + // the in-flight marker. + let promoted = + promote_rotating_to_current(&mut conn, mac, CredentialRotationType::LockdownIkm) + .await + .unwrap(); + assert!(promoted, "a staged rotation must report as promoted"); + assert_eq!( + version_of(&mut conn, "02:00:00:00:00:0a", "lockdown_ikm").await, + Some(2), + "must promote the staged version (2), not the site-wide target (5)" + ); + assert_eq!( + rotating_version_of(&mut conn, "02:00:00:00:00:0a", "lockdown_ikm").await, + None, + "the in-flight marker must be cleared on promotion" + ); + + // Idempotent: a re-observed lock finds nothing staged, so it is a no-op + // that leaves the promoted version intact. + let promoted_again = + promote_rotating_to_current(&mut conn, mac, CredentialRotationType::LockdownIkm) + .await + .unwrap(); + assert!( + !promoted_again, + "a second promotion with nothing staged must report no-op" + ); + assert_eq!( + version_of(&mut conn, "02:00:00:00:00:0a", "lockdown_ikm").await, + Some(2), + "current_version must be preserved when there is nothing to promote" ); } @@ -225,7 +465,10 @@ mod tests { delete_device_converged(&mut conn, mac, CredentialRotationType::Bmc) .await .unwrap(); - assert_eq!(version_of(&mut conn, "02:00:00:00:00:01", "bmc").await, None); + assert_eq!( + version_of(&mut conn, "02:00:00:00:00:01", "bmc").await, + None + ); assert_eq!( version_of(&mut conn, "02:00:00:00:00:01", "host_uefi").await, Some(0), diff --git a/crates/api-db/src/credential_rotation_backfill.rs b/crates/api-db/src/credential_rotation/test_backfill.rs similarity index 99% rename from crates/api-db/src/credential_rotation_backfill.rs rename to crates/api-db/src/credential_rotation/test_backfill.rs index 3fa8b907dc..b601e9a46e 100644 --- a/crates/api-db/src/credential_rotation_backfill.rs +++ b/crates/api-db/src/credential_rotation/test_backfill.rs @@ -14,7 +14,7 @@ use sqlx::PgPool; const BACKFILL_MIGRATION: &str = - include_str!("../migrations/20260623130000_credential_rotation_backfill.sql"); + include_str!("../../migrations/20260623130000_credential_rotation_backfill.sql"); const SEGMENT_ID: &str = "20000000-0000-0000-0000-000000000001"; diff --git a/crates/api-db/src/lib.rs b/crates/api-db/src/lib.rs index 584a670ed0..79e15142f3 100644 --- a/crates/api-db/src/lib.rs +++ b/crates/api-db/src/lib.rs @@ -25,8 +25,6 @@ pub mod bmc_redfish_session; pub mod carbide_version; pub mod compute_allocation; pub mod credential_rotation; -#[cfg(test)] -mod credential_rotation_backfill; pub mod db_read; pub mod desired_firmware; pub mod dhcp_entry; @@ -397,6 +395,8 @@ pub enum DatabaseError { MaxOneInterfaceAssociation, #[error("Fast-path allocation failed and can be retried")] TryAgain, + #[error("No site-wide rotation target for credential type: {0:?}")] + MissingSitewideRotationTarget(crate::credential_rotation::CredentialRotationType), } impl DatabaseError { diff --git a/crates/dpa-manager/Cargo.toml b/crates/dpa-manager/Cargo.toml index 461ca4c313..f0d4df5d8e 100644 --- a/crates/dpa-manager/Cargo.toml +++ b/crates/dpa-manager/Cargo.toml @@ -28,6 +28,7 @@ chrono = { workspace = true } duration-str = { workspace = true } eyre = { workspace = true } http = { workspace = true } +mac_address = { workspace = true } mqttea = { path = "../mqttea" } opentelemetry = { workspace = true } serde = { workspace = true } @@ -40,5 +41,9 @@ tokio-util = { workspace = true } tracing = { workspace = true } uuid = { features = ["v4", "serde"], workspace = true } +[dev-dependencies] +carbide-macros = { path = "../macros" } +carbide-sqlx-testing = { path = "../sqlx-testing" } + [lints] workspace = true diff --git a/crates/dpa-manager/src/card_handler/svpc.rs b/crates/dpa-manager/src/card_handler/svpc.rs index 7b1fd205dc..cc31256b1e 100644 --- a/crates/dpa-manager/src/card_handler/svpc.rs +++ b/crates/dpa-manager/src/card_handler/svpc.rs @@ -19,15 +19,17 @@ use std::sync::Arc; use async_trait::async_trait; use carbide_dpa::DpaInfo; +use carbide_uuid::dpa_interface::DpaInterfaceId; use carbide_uuid::spx::{NULL_SPX_PARTITION_ID, SpxPartitionId}; use chrono::TimeDelta; use db::{self, ObjectColumnFilter}; +use mac_address::MacAddress; use model::dpa_interface::DpaLockMode::{Locked, Unlocked}; use model::dpa_interface::{DpaInterface, DpaInterfaceControllerState}; use model::instance::snapshot::InstanceSnapshot; use model::machine::{Machine, ManagedHostStateSnapshot}; use mqttea::client::MqtteaClient; -use sqlx::PgTransaction; +use sqlx::{PgConnection, PgTransaction}; use super::DpaInterfaceStateHandler; use crate::errors::{DpaManagerError, DpaManagerResult}; @@ -496,26 +498,29 @@ impl DpaInterfaceStateHandler for SvpcInterfaceHandler { }; if cs.lockmode == Some(Locked) { - // The card is now locked on the device under the active lockdown IKM - // (build_lock_command derived the key from the same site-wide target - // version this records). Record lockdown_ikm convergence, keyed by the - // card (NIC) MAC, so the rotation engine tracks this card from the - // moment it is actually locked. The record is committed atomically - // with the state transition below (the monitor reuses this txn to - // persist the new controller state). Idempotent: a card already - // recorded (by the backfill or a prior lock) is left untouched. + // The card is now locked on the device. Complete its lockdown_ikm + // convergence, keyed by the card (NIC) MAC, so the rotation engine + // tracks this card from the moment it is actually locked. The record + // is committed atomically with the state transition below (the + // monitor reuses this txn to persist the new controller state). + // + // api-core staged the *exact* IKM version the lock key was derived + // from as the in-flight `rotating_to_version` when it issued the lock + // command; we promote that value to `current_version` here. We must + // not re-read the site-wide target instead: it can advance between + // issuing the lock and observing it, which would record the card as + // converged to a newer version than the IKM it is actually locked + // under. A card with nothing staged (locked before this flow shipped, + // already covered by the backfill at v0) falls back to the site-wide + // target; that path is idempotent and warns. let mut txn = monitor .db_services .db_pool .begin() .await .map_err(|e| db::AnnotatedSqlxError::new("handle_locking begin txn", e))?; - db::credential_rotation::record_device_converged( - txn.as_mut(), - dpa_interface.mac_address, - db::credential_rotation::CredentialRotationType::LockdownIkm, - ) - .await?; + record_lock_convergence(txn.as_mut(), dpa_interface.id, dpa_interface.mac_address) + .await?; let new_state = DpaInterfaceControllerState::Assigned; tracing::info!(state = ?new_state, "Dpa Interface state transition"); @@ -616,3 +621,140 @@ fn apply_profile(state: &DpaInterface) -> DpaManagerResult { txn: None, }) } + +/// Completes `lockdown_ikm` convergence for a card observed locked, on the +/// caller's `conn` (`handle_locking` passes its open transaction so the record +/// commits atomically with the state transition). +/// +/// api-core staged the exact IKM version the lock key was derived from as the +/// in-flight `rotating_to_version` when it issued the lock command, so: +/// +/// * if a rotation is staged, promote it to `current_version` verbatim. We +/// must NOT re-read the site-wide target: it can advance between issuing the +/// lock and observing it, which would record the card as converged to a +/// newer version than the IKM it is actually locked under. +/// * if nothing is staged (a card locked before this flow shipped, already +/// covered by the backfill at v0), fall back to the site-wide target and +/// warn; that path is idempotent. +async fn record_lock_convergence( + conn: &mut PgConnection, + dpa_interface_id: DpaInterfaceId, + mac_address: MacAddress, +) -> DpaManagerResult<()> { + let promoted = db::credential_rotation::promote_rotating_to_current( + conn, + mac_address, + db::credential_rotation::CredentialRotationType::LockdownIkm, + ) + .await?; + + if !promoted { + tracing::warn!( + %dpa_interface_id, + %mac_address, + "card locked without a staged lockdown IKM rotation; \ + recording convergence at the site-wide target" + ); + db::credential_rotation::record_device_converged( + conn, + mac_address, + db::credential_rotation::CredentialRotationType::LockdownIkm, + ) + .await?; + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use mac_address::MacAddress; + use sqlx::PgPool; + + use super::record_lock_convergence; + + // current_version for the lockdown_ikm row of `mac`, or None if no row exists. + async fn lockdown_version_of(pool: &PgPool, mac: &str) -> Option { + let row: Option> = sqlx::query_scalar( + "SELECT current_version FROM device_credential_rotation \ + WHERE device_mac = $1::macaddr AND credential_type = 'lockdown_ikm'", + ) + .bind(mac) + .fetch_optional(pool) + .await + .unwrap(); + row.flatten() + } + + // rotating_to_version for the lockdown_ikm row of `mac`, or None. + async fn lockdown_rotating_version_of(pool: &PgPool, mac: &str) -> Option { + let row: Option> = sqlx::query_scalar( + "SELECT rotating_to_version FROM device_credential_rotation \ + WHERE device_mac = $1::macaddr AND credential_type = 'lockdown_ikm'", + ) + .bind(mac) + .fetch_optional(pool) + .await + .unwrap(); + row.flatten() + } + + // Promote-path: a staged in-flight version is promoted verbatim, never + // re-derived from the (mutable) site-wide target -- this is the TOCTOU the + // two-phase lock flow guards against. + #[crate::sqlx_test] + async fn promotes_staged_version_not_sitewide_target(pool: PgPool) { + let id = carbide_uuid::dpa_interface::DpaInterfaceId::new(); + let mac: MacAddress = "02:00:00:00:00:11".parse().unwrap(); + + // api-core staged the lock at version 2; advance the site-wide target so + // it differs: the promote-path must ignore it. + sqlx::query( + "UPDATE sitewide_credential_rotation SET target_version = 7 \ + WHERE credential_type = 'lockdown_ikm'", + ) + .execute(&pool) + .await + .unwrap(); + let mut conn = pool.acquire().await.unwrap(); + db::credential_rotation::mark_device_rotating_to_version( + &mut conn, + mac, + db::credential_rotation::CredentialRotationType::LockdownIkm, + 2, + ) + .await + .unwrap(); + + record_lock_convergence(&mut conn, id, mac).await.unwrap(); + drop(conn); + + assert_eq!( + lockdown_version_of(&pool, "02:00:00:00:00:11").await, + Some(2), + "must promote the staged version, not the site-wide target (7)" + ); + assert_eq!( + lockdown_rotating_version_of(&pool, "02:00:00:00:00:11").await, + None, + "the in-flight marker must be cleared on promotion" + ); + } + + // Fallback (warn) path: nothing staged falls back to the site-wide target + // (seeded at 0 by the backfill migration). + #[crate::sqlx_test] + async fn falls_back_to_sitewide_target_when_nothing_staged(pool: PgPool) { + let id = carbide_uuid::dpa_interface::DpaInterfaceId::new(); + let mac: MacAddress = "02:00:00:00:00:12".parse().unwrap(); + + let mut conn = pool.acquire().await.unwrap(); + record_lock_convergence(&mut conn, id, mac).await.unwrap(); + drop(conn); + + assert_eq!( + lockdown_version_of(&pool, "02:00:00:00:00:12").await, + Some(0), + "nothing staged must fall back to the site-wide target (0)" + ); + } +} diff --git a/crates/dpa-manager/src/lib.rs b/crates/dpa-manager/src/lib.rs index e671060d11..30f9b353bb 100644 --- a/crates/dpa-manager/src/lib.rs +++ b/crates/dpa-manager/src/lib.rs @@ -44,6 +44,9 @@ pub mod config; pub mod errors; mod metrics; +#[cfg(test)] +pub(crate) use carbide_macros::sqlx_test; + pub struct DpaMonitor { pub(crate) db_services: DbServices, pub(crate) dpa_info: Arc, diff --git a/crates/switch-controller/src/configuring.rs b/crates/switch-controller/src/configuring.rs index 2d90701b1a..7f671108b1 100644 --- a/crates/switch-controller/src/configuring.rs +++ b/crates/switch-controller/src/configuring.rs @@ -86,6 +86,13 @@ async fn handle_rotate_os_password( // `update_device_password` to `true`: that branch rotates the password on the // switch, stores the resulting NICo-owned credential, and records `nvos` // convergence (keyed by BMC MAC) committed atomically with the transition. + // + // IMPORTANT: before flipping this on, REQ-6 must also seed a + // `sitewide_credential_rotation` row for `nvos` (via the backfill migration + // or at runtime). `record_device_converged` now requires the site-wide + // target to exist and returns `MissingSitewideRotationTarget` otherwise -- + // unlike the other credential types, nvos has no backfilled row today, so + // ungating without seeding it would make every switch fail here. let update_device_password = false; if update_device_password { let mut txn = ctx.services.db_pool.begin().await?;