Skip to content

gdma: save/restore functionality #1240

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
219 changes: 213 additions & 6 deletions vm/devices/net/mana_driver/src/gdma_driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ use crate::queues::Eq;
use crate::queues::Wq;
use crate::resources::Resource;
use crate::resources::ResourceArena;
use crate::save_restore::DoorbellSavedState;
use crate::save_restore::GdmaDriverSavedState;
use crate::save_restore::InterruptSavedState;
use crate::save_restore::SavedMemoryState;
use anyhow::Context;
use futures::FutureExt;
use gdma_defs::Cqe;
Expand Down Expand Up @@ -118,6 +122,13 @@ impl<T: DeviceRegisterIo + Inspect> Doorbell for Bar0<T> {
safe_intrinsics::store_fence();
self.mem.write_u64(offset as usize, value);
}

fn save(&self, doorbell_id: Option<u64>) -> DoorbellSavedState {
DoorbellSavedState {
doorbell_id: doorbell_id.unwrap(),
page_count: self.page_count(),
}
}
}

#[derive(Inspect)]
Expand Down Expand Up @@ -148,6 +159,8 @@ pub struct GdmaDriver<T: DeviceBacking> {
hwc_warning_time_in_ms: u32,
hwc_timeout_in_ms: u32,
hwc_failure: bool,
db_id: u32,
saving: bool,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: suggest calling this something else like state_saved or something like that

}

const EQ_PAGE: usize = 0;
Expand All @@ -163,9 +176,17 @@ const RWQE_SIZE: u32 = 32;

impl<T: DeviceBacking> Drop for GdmaDriver<T> {
fn drop(&mut self) {
tracing::debug!(?self.saving, ?self.hwc_failure, "dropping gdma driver");

// Don't destroy anything if we're saving its state for restoration.
if self.saving {
return;
}

if self.hwc_failure {
return;
}

let data = self
.bar0
.mem
Expand Down Expand Up @@ -230,7 +251,12 @@ impl<T: DeviceBacking> GdmaDriver<T> {
self.bar0.clone() as _
}

pub async fn new(driver: &impl Driver, mut device: T, num_vps: u32) -> anyhow::Result<Self> {
pub async fn new(
driver: &impl Driver,
mut device: T,
num_vps: u32,
dma_buffer: Option<MemoryBlock>,
) -> anyhow::Result<Self> {
let bar0_mapping = device.map_bar(0)?;
let bar0_len = bar0_mapping.len();
if bar0_len < size_of::<RegMap>() {
Expand Down Expand Up @@ -280,11 +306,14 @@ impl<T: DeviceBacking> GdmaDriver<T> {
);
}

let dma_client = device.dma_client();

let dma_buffer = dma_client
.allocate_dma_buffer(NUM_PAGES * PAGE_SIZE)
.context("failed to allocate DMA buffer")?;
let dma_buffer = if let Some(dma_buffer) = dma_buffer {
dma_buffer
} else {
let dma_client = device.dma_client();
dma_client
.allocate_dma_buffer(NUM_PAGES * PAGE_SIZE)
.context("failed to allocate DMA buffer")?
};

let pages = dma_buffer.pfns();

Expand Down Expand Up @@ -475,6 +504,8 @@ impl<T: DeviceBacking> GdmaDriver<T> {
hwc_warning_time_in_ms: HWC_WARNING_TIME_IN_MS,
hwc_timeout_in_ms: HWC_TIMEOUT_DEFAULT_IN_MS,
hwc_failure: false,
saving: false,
db_id,
};

this.push_rqe();
Expand All @@ -499,6 +530,182 @@ impl<T: DeviceBacking> GdmaDriver<T> {
Ok(this)
}

#[allow(dead_code)]
pub async fn save(mut self) -> anyhow::Result<GdmaDriverSavedState> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what should happen when hwc_failure = true?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When there is a HWC failure, we have couple of options. We can save the state with the HWC failure and on restore, let the recovery process deal with it as it would have usually. Or, we can decide to not save the state and in that case, the restore will be readd of the VF and that might recover the HWC failure. I think we want to go with the former here as that is aligned with the idea that a save/restore should be transparent to the natural flow. The latter seems like a workaround.

self.saving = true;

let doorbell = self.bar0.save(Some(self.db_id as u64));

let mut interrupt_config = Vec::new();
for (index, interrupt) in self.interrupts.iter().enumerate() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let interrupt_config = self.interrupts.iter().enumerate().filter_map(|(index, interrupt)| { ... }.collect();

if interrupt.is_some() {
interrupt_config.push(InterruptSavedState {
msix_index: index as u32,
cpu: index as u32,
});
}
}

Ok(GdmaDriverSavedState {
mem: SavedMemoryState {
base_pfn: self.dma_buffer.pfns()[0],
len: self.dma_buffer.len(),
},
eq: self.eq.save(),
cq: self.cq.save(),
rq: self.rq.save(),
sq: self.sq.save(),
db_id: doorbell.doorbell_id,
gpa_mkey: self.gpa_mkey,
pdid: self._pdid,
cq_armed: self.cq_armed,
eq_armed: self.eq_armed,
hwc_subscribed: self.hwc_subscribed,
eq_id_msix: self.eq_id_msix.clone(),
hwc_activity_id: self.hwc_activity_id,
num_msix: self.num_msix,
min_queue_avail: self.min_queue_avail,
link_toggle: self.link_toggle.clone(),
interrupt_config,
})
}

#[allow(dead_code)]
pub async fn restore(
saved_state: GdmaDriverSavedState,
mut device: T,
dma_buffer: MemoryBlock,
) -> anyhow::Result<Self> {
tracing::info!("restoring gdma driver");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a lot of this code at the beginning should be moved into a helper so new() and restore() can both use it


let bar0_mapping = device.map_bar(0)?;
let bar0_len = bar0_mapping.len();
if bar0_len < size_of::<RegMap>() {
anyhow::bail!("bar0 ({} bytes) too small for reg map", bar0_mapping.len());
}

let mut map = RegMap::new_zeroed();
for i in 0..size_of_val(&map) / 4 {
let v = bar0_mapping.read_u32(i * 4);
// Unmapped device memory will return -1 on reads, so check the first 32
// bits for this condition to get a clear error message early.
if i == 0 && v == !0 {
anyhow::bail!("bar0 read returned -1, device is not present");
}
map.as_mut_bytes()[i * 4..(i + 1) * 4].copy_from_slice(&v.to_ne_bytes());
}

tracing::debug!(?map, "register map on restore");

// Log on unknown major version numbers. This is not necessarily an
// error, so continue.
if map.major_version_number != 0 && map.major_version_number != 1 {
tracing::warn!(
major = map.major_version_number,
minor = map.minor_version_number,
micro = map.micro_version_number,
"unrecognized major version"
);
}

if map.vf_gdma_sriov_shared_sz != 32 {
anyhow::bail!(
"unexpected shared memory size: {}",
map.vf_gdma_sriov_shared_sz
);
}

if (bar0_len as u64).saturating_sub(map.vf_gdma_sriov_shared_reg_start)
< map.vf_gdma_sriov_shared_sz as u64
{
anyhow::bail!(
"bar0 ({} bytes) too small for shared memory at {}",
bar0_mapping.len(),
map.vf_gdma_sriov_shared_reg_start
);
}

let doorbell_shift = map.vf_db_page_sz.trailing_zeros();
let bar0 = Arc::new(Bar0 {
mem: bar0_mapping,
map,
doorbell_shift,
});

let eq = Eq::restore(
dma_buffer.subblock(0, PAGE_SIZE),
saved_state.eq,
DoorbellPage::new(bar0.clone(), saved_state.db_id as u32)?,
)?;

let db_id = saved_state.db_id;
let cq = Cq::restore(
dma_buffer.subblock(CQ_PAGE * PAGE_SIZE, PAGE_SIZE),
saved_state.cq,
DoorbellPage::new(bar0.clone(), saved_state.db_id as u32)?,
)?;

let rq = Wq::restore_rq(
dma_buffer.subblock(RQ_PAGE * PAGE_SIZE, PAGE_SIZE),
saved_state.rq,
DoorbellPage::new(bar0.clone(), saved_state.db_id as u32)?,
)?;

let sq = Wq::restore_sq(
dma_buffer.subblock(SQ_PAGE * PAGE_SIZE, PAGE_SIZE),
saved_state.sq,
DoorbellPage::new(bar0.clone(), saved_state.db_id as u32)?,
)?;

let mut interrupts = vec![None; saved_state.num_msix as usize];
for int_state in &saved_state.interrupt_config {
let interrupt = device.map_interrupt(int_state.msix_index, int_state.cpu)?;

interrupts[int_state.msix_index as usize] = Some(interrupt);
}

let mut this = Self {
device: Some(device),
bar0,
dma_buffer,
interrupts,
eq,
cq,
rq,
sq,
test_events: 0,
eq_armed: saved_state.eq_armed,
cq_armed: saved_state.cq_armed,
gpa_mkey: saved_state.gpa_mkey,
_pdid: saved_state.pdid,
eq_id_msix: saved_state.eq_id_msix,
num_msix: saved_state.num_msix,
min_queue_avail: saved_state.min_queue_avail,
hwc_activity_id: saved_state.hwc_activity_id,
link_toggle: saved_state.link_toggle,
hwc_subscribed: saved_state.hwc_subscribed,
hwc_warning_time_in_ms: HWC_WARNING_TIME_IN_MS,
hwc_timeout_in_ms: HWC_TIMEOUT_DEFAULT_IN_MS,
hwc_failure: false,
saving: false,
db_id: db_id as u32,
};

if saved_state.hwc_subscribed {
this.hwc_subscribe();
}

if saved_state.eq_armed {
this.eq.arm();
}

if saved_state.cq_armed {
this.cq.arm();
}

Ok(this)
}

async fn report_hwc_timeout(
&mut self,
last_cmd_failed: bool,
Expand Down
1 change: 1 addition & 0 deletions vm/devices/net/mana_driver/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ mod gdma_driver;
pub mod mana;
pub mod queues;
mod resources;
pub mod save_restore;
#[cfg(test)]
mod tests;
2 changes: 1 addition & 1 deletion vm/devices/net/mana_driver/src/mana.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ impl<T: DeviceBacking> ManaDevice<T> {
num_vps: u32,
max_queues_per_vport: u16,
) -> anyhow::Result<Self> {
let mut gdma = GdmaDriver::new(driver, device, num_vps).await?;
let mut gdma = GdmaDriver::new(driver, device, num_vps, None).await?;
gdma.test_eq().await?;

gdma.verify_vf_driver_version().await?;
Expand Down
Loading