Skip to content

Commit ccf58cf

Browse files
committed
fix: mr review
1 parent 59613a8 commit ccf58cf

13 files changed

Lines changed: 536 additions & 58 deletions

File tree

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/api-core/src/dpa/lockdown.rs

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -149,19 +149,23 @@ fn lockdown_ikm_key(version: u32) -> CredentialKey {
149149
// fetch_kdf_secret fetches the IKM for the KDF from the dedicated site-wide
150150
// lockdown credential, decoupled from the BMC root so the two can be rotated
151151
// independently.
152+
//
153+
// Returns the IKM version it resolved alongside the secret so the caller can
154+
// durably record the exact version a card is locked under, rather than
155+
// re-reading the (mutable) site-wide target later. Today the version is
156+
// `CURRENT_LOCKDOWN_IKM_VERSION`; the rotation engine will own advancing it.
152157
async fn fetch_kdf_secret(
153158
credential_reader: &dyn CredentialReader,
154-
) -> Result<String, eyre::Report> {
155-
let ikm_key = lockdown_ikm_key(CURRENT_LOCKDOWN_IKM_VERSION);
159+
) -> Result<(u32, String), eyre::Report> {
160+
let version = CURRENT_LOCKDOWN_IKM_VERSION;
161+
let ikm_key = lockdown_ikm_key(version);
156162
let credentials = credential_reader
157163
.get_credentials(&ikm_key)
158164
.await?
159-
.ok_or_else(|| {
160-
eyre::eyre!("lockdown IKM v{CURRENT_LOCKDOWN_IKM_VERSION} not found; site not seeded")
161-
})?;
165+
.ok_or_else(|| eyre::eyre!("lockdown IKM v{version} not found; site not seeded"))?;
162166
let Credentials::UsernamePassword { password, .. } = credentials;
163167

164-
Ok(password)
168+
Ok((version, password))
165169
}
166170

167171
// ensure_lockdown_ikm_seeded idempotently seeds the dedicated site-wide
@@ -227,16 +231,32 @@ pub async fn ensure_lockdown_ikm_seeded(
227231
}
228232
}
229233

234+
// SupernicLockdownKey is a derived lockdown key together with the site-wide
235+
// lockdown IKM version it was derived from. The version travels with the key so
236+
// the lock flow can durably record the exact version the card is locked under.
237+
pub struct SupernicLockdownKey {
238+
// The 16-character hex lockdown key sent to the device.
239+
pub key: String,
240+
// The site-wide lockdown IKM version `key` was derived from.
241+
pub ikm_version: u32,
242+
}
243+
230244
// build_supernic_lockdown_key builds a single lockdown key using
231245
// the latest KdfContextVersion. Use this for locking a card.
246+
//
247+
// Returns the derived key together with the IKM version it used (see
248+
// `SupernicLockdownKey`). The unlock flow can ignore the version; the lock flow
249+
// persists it so the recorded convergence version matches what actually locked
250+
// the card.
232251
pub async fn build_supernic_lockdown_key(
233252
db_reader: &PgPool,
234253
dpa_interface_id: DpaInterfaceId,
235254
credential_reader: &dyn CredentialReader,
236-
) -> Result<String, eyre::Report> {
255+
) -> Result<SupernicLockdownKey, eyre::Report> {
237256
let ctx = build_kdf_context(db_reader, dpa_interface_id).await?;
238-
let secret = fetch_kdf_secret(credential_reader).await?;
239-
build_lockdown_key(secret.as_bytes(), &ctx, KdfContextVersion::V1)
257+
let (ikm_version, secret) = fetch_kdf_secret(credential_reader).await?;
258+
let key = build_lockdown_key(secret.as_bytes(), &ctx, KdfContextVersion::V1)?;
259+
Ok(SupernicLockdownKey { key, ikm_version })
240260
}
241261

242262
#[cfg(test)]
@@ -441,7 +461,8 @@ mod tests {
441461
.await
442462
.unwrap();
443463

444-
let secret = fetch_kdf_secret(&store).await.unwrap();
464+
let (version, secret) = fetch_kdf_secret(&store).await.unwrap();
465+
assert_eq!(version, CURRENT_LOCKDOWN_IKM_VERSION);
445466
assert_eq!(secret, "ikm-pass");
446467
}
447468

crates/api-core/src/errors.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,13 @@ impl From<DatabaseError> for CarbideError {
293293
DatabaseError::InvalidArgument(e) => InvalidArgument(e),
294294
DatabaseError::InvalidConfiguration(e) => InvalidConfiguration(e),
295295
DatabaseError::MissingArgument(e) => MissingArgument(e),
296+
// A corrupted/absent site-wide rotation invariant is an internal
297+
// state error, not a client-correctable one.
298+
DatabaseError::MissingSitewideRotationTarget(credential_type) => Internal {
299+
message: format!(
300+
"no site-wide rotation target for credential type: {credential_type:?}"
301+
),
302+
},
296303
DatabaseError::NetworkParseError(e) => NetworkParseError(e),
297304
DatabaseError::NetworkSegmentDelete(e) => NetworkSegmentDelete(e),
298305
DatabaseError::NetworkSegmentDuplicateMacAddress(e) => {

crates/api-core/src/handlers/dpa.rs

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ async fn build_unlock_command(
257257
machine_id: MachineId,
258258
pci_name: &str,
259259
) -> CarbideResult<DpaCommand<'static>> {
260-
let key = crate::dpa::lockdown::build_supernic_lockdown_key(
260+
let lockdown = crate::dpa::lockdown::build_supernic_lockdown_key(
261261
&api.database_connection,
262262
sn.id,
263263
&*api.credential_manager,
@@ -271,8 +271,10 @@ async fn build_unlock_command(
271271

272272
tracing::info!(%machine_id, %pci_name, "Unlocking DPA");
273273

274+
// The unlock flow does not record convergence, so the derived IKM version is
275+
// not persisted here.
274276
Ok(DpaCommand {
275-
op: OpCode::Unlock { key },
277+
op: OpCode::Unlock { key: lockdown.key },
276278
})
277279
}
278280

@@ -414,7 +416,7 @@ async fn build_lock_command(
414416
machine_id: MachineId,
415417
pci_name: &str,
416418
) -> CarbideResult<DpaCommand<'static>> {
417-
let key = crate::dpa::lockdown::build_supernic_lockdown_key(
419+
let lockdown = crate::dpa::lockdown::build_supernic_lockdown_key(
418420
&api.database_connection,
419421
sn.id,
420422
&*api.credential_manager,
@@ -426,9 +428,37 @@ async fn build_lock_command(
426428
))
427429
})?;
428430

429-
tracing::info!(%machine_id, %pci_name, "Locking DPA");
431+
// Stage the IKM version we are about to lock the card with as the in-flight
432+
// rotation marker (`rotating_to_version`) on the card's lockdown_ikm row
433+
// *before* issuing the lock command. dpa-manager's `handle_locking` promotes
434+
// exactly this value to the convergence version when the card reports Locked
435+
// -- never the (possibly advanced) site-wide target re-read at observation
436+
// time. Staging first means we only ever issue a lock for a version we have
437+
// already recorded our intent to use; if the write fails we surface the error
438+
// and do not lock. The writer is idempotent across the per-cycle
439+
// re-derivation while Locking.
440+
let ikm_version = i32::try_from(lockdown.ikm_version).map_err(|e| CarbideError::Internal {
441+
message: format!(
442+
"lockdown IKM version {} does not fit in i32 for DPA {pci_name}: {e}",
443+
lockdown.ikm_version
444+
),
445+
})?;
446+
let mut conn = api.database_connection.acquire().await.map_err(|e| {
447+
CarbideError::GenericErrorFromReport(eyre!(
448+
"failed to acquire connection to stage lockdown IKM rotation for DPA {pci_name}: {e}"
449+
))
450+
})?;
451+
db::credential_rotation::mark_device_rotating_to_version(
452+
&mut conn,
453+
sn.mac_address,
454+
db::credential_rotation::CredentialRotationType::LockdownIkm,
455+
ikm_version,
456+
)
457+
.await?;
458+
459+
tracing::info!(%machine_id, %pci_name, ikm_version = lockdown.ikm_version, "Locking DPA");
430460
Ok(DpaCommand {
431-
op: OpCode::Lock { key },
461+
op: OpCode::Lock { key: lockdown.key },
432462
})
433463
}
434464

crates/api-core/src/handlers/mlx_admin.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,7 +1316,7 @@ async fn get_device_lockdown_key(
13161316
}
13171317
})?;
13181318

1319-
let lockdown_key = crate::dpa::lockdown::build_supernic_lockdown_key(
1319+
let lockdown = crate::dpa::lockdown::build_supernic_lockdown_key(
13201320
&api.database_connection,
13211321
dpa_interface.id,
13221322
&*api.credential_manager,
@@ -1328,5 +1328,5 @@ async fn get_device_lockdown_key(
13281328
),
13291329
})?;
13301330

1331-
Ok(lockdown_key)
1331+
Ok(lockdown.key)
13321332
}

crates/api-db/migrations/20260623120000_credential_rotation.sql

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@ CREATE TABLE sitewide_credential_rotation (
1616
target_version integer NOT NULL,
1717
started_at timestamptz NOT NULL DEFAULT now(),
1818
-- Free-form initiator/reason metadata; must never contain secrets.
19-
request_meta jsonb NOT NULL DEFAULT '{}'::jsonb
19+
request_meta jsonb NOT NULL DEFAULT '{}'::jsonb,
20+
-- Versions are 0-based (0 = pre-rotation baseline), so the target is
21+
-- non-negative by construction.
22+
CONSTRAINT sitewide_credential_rotation_target_version_non_negative
23+
CHECK (target_version >= 0)
2024
);
2125

2226
-- One row per (device_mac, credential_type): per-device convergence state.
@@ -38,7 +42,20 @@ CREATE TABLE device_credential_rotation (
3842
-- Redacted last-error string for observability; never contains secrets.
3943
rotate_last_error_redacted text,
4044
rotate_quarantined_until timestamptz,
41-
PRIMARY KEY (device_mac, credential_type)
45+
PRIMARY KEY (device_mac, credential_type),
46+
-- Version/counter fields are non-negative by construction: versions are
47+
-- 0-based and rotate_attempts counts up from 0. These CHECKs are the only
48+
-- guard at write time -- manual repairs and the backfill data migration
49+
-- bypass the Rust writers -- so they keep the rotation engine from
50+
-- reasoning over impossible state. A NULL current_version ("not yet
51+
-- established") or rotating_to_version ("no rotation in flight") satisfies
52+
-- the CHECK and stays legal.
53+
CONSTRAINT device_credential_rotation_current_version_non_negative
54+
CHECK (current_version >= 0),
55+
CONSTRAINT device_credential_rotation_rotating_to_version_non_negative
56+
CHECK (rotating_to_version >= 0),
57+
CONSTRAINT device_credential_rotation_rotate_attempts_non_negative
58+
CHECK (rotate_attempts >= 0)
4259
);
4360

4461
-- Hot path: "which devices for this credential type still need rotation"

0 commit comments

Comments
 (0)