From 07e9bf861e92c58cc85cc4409065bc3ba2f20321 Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Mon, 12 May 2025 17:10:01 +0200 Subject: [PATCH 01/11] Add IoMemory trait MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing `GuestMemory` trait is insufficient for representing virtual memory, as it does not allow specifying the required access permissions. Its focus on all guest memory implementations consisting of a relatively small number of regions is also unsuited for paged virtual memory with a potentially very lage set of non-continuous mappings. The new `IoMemory` trait in contrast provides only a small number of methods that keep the implementing type’s internal structure more opaque, and every access needs to be accompanied by the required permissions. Signed-off-by: Hanna Czenczek --- src/io_memory.rs | 210 +++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 3 + 2 files changed, 213 insertions(+) create mode 100644 src/io_memory.rs diff --git a/src/io_memory.rs b/src/io_memory.rs new file mode 100644 index 00000000..76498e59 --- /dev/null +++ b/src/io_memory.rs @@ -0,0 +1,210 @@ +// Copyright (C) 2025 Red Hat. All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. +// +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause + +//! Provides a trait for virtual I/O memory. +//! +//! This trait is more stripped down than `GuestMemory` because the fragmented nature of virtual +//! memory does not allow a direct translation to long continuous regions. +//! +//! In addition, any access to virtual memory must be annotated with the intended access mode (i.e. +//! reading and/or writing). + +use crate::guest_memory::Result; +use crate::{bitmap, GuestAddress, GuestMemory, MemoryRegionAddress, VolatileSlice}; + +/// Permissions for accessing virtual memory. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[repr(u8)] +pub enum Permissions { + /// No permissions + No = 0b00, + /// Read-only + Read = 0b01, + /// Write-only + Write = 0b10, + /// Allow both reading and writing + ReadWrite = 0b11, +} + +impl Permissions { + /// Convert the numerical representation into the enum. + /// + /// # Panics + /// + /// Panics if `raw` is not a valid representation of any `Permissions` variant. + fn from_repr(raw: u8) -> Self { + use Permissions::*; + + match raw { + value if value == No as u8 => No, + value if value == Read as u8 => Read, + value if value == Write as u8 => Write, + value if value == ReadWrite as u8 => ReadWrite, + _ => panic!("{raw:x} is not a valid raw Permissions value"), + } + } + + /// Check whether the permissions `self` allow the given `access`. + pub fn allow(&self, access: Self) -> bool { + *self & access == access + } +} + +impl std::ops::BitOr for Permissions { + type Output = Permissions; + + /// Return the union of `self` and `rhs`. + fn bitor(self, rhs: Permissions) -> Self::Output { + Self::from_repr(self as u8 | rhs as u8) + } +} + +impl std::ops::BitAnd for Permissions { + type Output = Permissions; + + /// Return the intersection of `self` and `rhs`. + fn bitand(self, rhs: Permissions) -> Self::Output { + Self::from_repr(self as u8 & rhs as u8) + } +} + +/// Represents virtual I/O memory. +/// +/// `IoMemory` is generally backed by some “physical” `GuestMemory`, which then consists for +/// `GuestMemoryRegion` objects. However, the mapping from I/O virtual addresses (IOVAs) to +/// physical addresses may be arbitrarily fragmented. Translation is done via an IOMMU. +/// +/// Note in contrast to `GuestMemory`: +/// - Any IOVA range may consist of arbitrarily many underlying ranges in physical memory. +/// - Accessing an IOVA requires passing the intended access mode, and the IOMMU will check whether +/// the given access mode is permitted for the given IOVA. +/// - The translation result for a given IOVA may change over time (i.e. the physical address +/// associated with an IOVA may change). +pub trait IoMemory { + /// Underlying `GuestMemory` type. + type PhysicalMemory: GuestMemory + ?Sized; + + /// Return `true` if `addr..(addr + count)` is accessible with `access`. + fn range_accessible(&self, addr: GuestAddress, count: usize, access: Permissions) -> bool; + + /// Invokes callback `f` to handle data in the address range `[addr, addr + count)`, with + /// permissions `access`. + /// + /// The address range `[addr, addr + count)` may span more than a single page in virtual + /// memory, and more than one [`GuestMemoryRegion`](trait.GuestMemoryRegion.html) object, or + /// even have holes or non-accessible regions in it. So `f` is invoked for each + /// [`GuestMemoryRegion`](trait.GuestMemoryRegion.html) object and each non-continuous page + /// involved, and then this function returns: + /// - the error code returned by the callback 'f' + /// - the size of the already handled data when encountering the first hole + /// - the size of the already handled data when the whole range has been handled + /// + /// The parameters to `f` are, in order: + /// - Offset inside of the whole range (i.e. `addr` corresponds to offset `0`), + /// - Length of the current chunk in bytes, + /// - Relative address inside the [`GuestMemoryRegion`], + /// - The underlying [`GuestMemoryRegion`]. + /// + /// `f` should return the number of bytes it handled. That number may be less than the length + /// passed to it, in which case it will be called again for the chunk following immediately + /// after that returned length. If `f` returns 0, processing will be stopped. + fn try_access( + &self, + count: usize, + addr: GuestAddress, + access: Permissions, + f: F, + ) -> Result + where + F: FnMut( + usize, + usize, + MemoryRegionAddress, + &::R, + ) -> Result; + + /// Returns a [`VolatileSlice`](struct.VolatileSlice.html) of `count` bytes starting at + /// `addr`. + /// + /// Note that because of the fragmented nature of virtual memory, it can easily happen that the + /// range `[addr, addr + count)` is not backed by a continuous region in our own virtual + /// memory, which will make generating the slice impossible. + /// + /// The iterator’s items are wrapped in [`Result`], i.e. there may be errors reported on + /// individual items. If there is no such error, the cumulative length of all items will be + /// equal to `count`. Any error will end iteration immediately, i.e. there are no items past + /// the first error. + /// + /// If `count` is 0, an empty iterator will be returned. + fn get_slices<'a>( + &'a self, + addr: GuestAddress, + count: usize, + access: Permissions, + ) -> Result>>>>; + + /// If this virtual memory is just a plain `GuestMemory` object underneath without an IOMMU + /// translation layer in between, return that `GuestMemory` object. + fn physical_memory(&self) -> Option<&Self::PhysicalMemory> { + None + } +} + +/// Allow accessing every [`GuestMemory`] via [`IoMemory`]. +/// +/// [`IoMemory`] is a generalization of [`GuestMemory`]: Every object implementing the former is a +/// subset of an object implementing the latter (there always is an underlying [`GuestMemory`]), +/// with an opaque internal mapping on top, e.g. provided by an IOMMU. +/// +/// Every [`GuestMemory`] is therefore trivially also an [`IoMemory`], assuming a complete identity +/// mapping (which we must assume, so that accessing such objects via either trait will yield the +/// same result): Basically, all [`IoMemory`] methods are implemented as trivial wrappers around +/// the same [`GuestMemory`] methods (if available), discarding the `access` parameter. +impl IoMemory for M { + type PhysicalMemory = M; + + fn range_accessible(&self, addr: GuestAddress, count: usize, _access: Permissions) -> bool { + if let Ok(done) = ::try_access(self, count, addr, |_, len, _, _| Ok(len)) + { + done == count + } else { + false + } + } + + fn try_access( + &self, + count: usize, + addr: GuestAddress, + _access: Permissions, + f: F, + ) -> Result + where + F: FnMut( + usize, + usize, + MemoryRegionAddress, + &::R, + ) -> Result, + { + ::try_access(self, count, addr, f) + } + + fn get_slices<'a>( + &'a self, + addr: GuestAddress, + count: usize, + _access: Permissions, + ) -> Result>>>> + { + Ok(::get_slices(self, addr, count)) + } + + fn physical_memory(&self) -> Option<&Self::PhysicalMemory> { + Some(self) + } +} diff --git a/src/lib.rs b/src/lib.rs index 2f87f4c8..679ae7da 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -58,6 +58,9 @@ pub use region::{ pub mod io; pub use io::{ReadVolatile, WriteVolatile}; +pub mod io_memory; +pub use io_memory::{IoMemory, Permissions}; + #[cfg(feature = "backend-mmap")] pub mod mmap; From 57b1543d1d9ecfdffb207b91675a6f0c368f876d Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Mon, 12 May 2025 17:10:04 +0200 Subject: [PATCH 02/11] Implement Bytes on IoMemory instead of GuestMemory Rust only allows us to give one trait the blanket implementations for `Bytes`. We want `IoMemory` to be our primary external interface becaue it has users specify the access permissions they need, and because we can (and do) provide a blanket `IoMemory` implementation for all `GuestMemory` types. Also, while `IoMemory` (as the more general trait) only has a restricted interface when compared to `GuestMemory`, this interface is enough to implement `Bytes`; notably, accesses to `IoMemory` require specifying the access mode, which is naturally trivial for `Bytes` methods like `read()` or `write()`. Signed-off-by: Hanna Czenczek --- src/guest_memory.rs | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/src/guest_memory.rs b/src/guest_memory.rs index a18b42ce..85b6406a 100644 --- a/src/guest_memory.rs +++ b/src/guest_memory.rs @@ -56,7 +56,7 @@ use crate::bitmap::MS; use crate::bytes::{AtomicAccess, Bytes}; use crate::io::{ReadVolatile, WriteVolatile}; use crate::volatile_memory::{self, VolatileSlice}; -use crate::GuestMemoryRegion; +use crate::{GuestMemoryRegion, IoMemory, Permissions}; /// Errors associated with handling guest memory accesses. #[allow(missing_docs)] @@ -552,13 +552,19 @@ impl<'a, M: GuestMemory + ?Sized> Iterator for GuestMemorySliceIterator<'a, M> { /// returning `None`, ensuring that it will only return `None` from that point on. impl FusedIterator for GuestMemorySliceIterator<'_, M> {} -impl Bytes for T { +/// Allow accessing [`IoMemory`] (and [`GuestMemory`]) objects via [`Bytes`]. +/// +/// Thanks to the [blanket implementation of `IoMemory` for all `GuestMemory` +/// types](../io_memory/trait.IoMemory.html#impl-IoMemory-for-M), this blanket implementation +/// extends to all [`GuestMemory`] types. +impl Bytes for T { type E = Error; fn write(&self, buf: &[u8], addr: GuestAddress) -> Result { self.try_access( buf.len(), addr, + Permissions::Write, |offset, count, caddr, region| -> Result { region.write(&buf[offset..(offset + count)], caddr) }, @@ -569,6 +575,7 @@ impl Bytes for T { self.try_access( buf.len(), addr, + Permissions::Read, |offset, count, caddr, region| -> Result { region.read(&mut buf[offset..(offset + count)], caddr) }, @@ -636,9 +643,12 @@ impl Bytes for T { where F: ReadVolatile, { - self.try_access(count, addr, |_, len, caddr, region| -> Result { - region.read_volatile_from(caddr, src, len) - }) + self.try_access( + count, + addr, + Permissions::Write, + |_, len, caddr, region| -> Result { region.read_volatile_from(caddr, src, len) }, + ) } fn read_exact_volatile_from( @@ -664,11 +674,16 @@ impl Bytes for T { where F: WriteVolatile, { - self.try_access(count, addr, |_, len, caddr, region| -> Result { - // For a non-RAM region, reading could have side effects, so we - // must use write_all(). - region.write_all_volatile_to(caddr, dst, len).map(|()| len) - }) + self.try_access( + count, + addr, + Permissions::Read, + |_, len, caddr, region| -> Result { + // For a non-RAM region, reading could have side effects, so we + // must use write_all(). + region.write_all_volatile_to(caddr, dst, len).map(|()| len) + }, + ) } fn write_all_volatile_to(&self, addr: GuestAddress, dst: &mut F, count: usize) -> Result<()> @@ -688,7 +703,7 @@ impl Bytes for T { fn store(&self, val: O, addr: GuestAddress, order: Ordering) -> Result<()> { // No need to check past the first iterator item: It either has the size of `O`, then there // can be no further items; or it does not, and then `VolatileSlice::store()` will fail. - self.get_slices(addr, size_of::()) + self.get_slices(addr, size_of::(), Permissions::Write)? .next() .unwrap()? // count > 0 never produces an empty iterator .store(val, 0, order) @@ -698,7 +713,7 @@ impl Bytes for T { fn load(&self, addr: GuestAddress, order: Ordering) -> Result { // No need to check past the first iterator item: It either has the size of `O`, then there // can be no further items; or it does not, and then `VolatileSlice::store()` will fail. - self.get_slices(addr, size_of::()) + self.get_slices(addr, size_of::(), Permissions::Read)? .next() .unwrap()? // count > 0 never produces an empty iterator .load(0, order) From 5e3ed69c24281921644e5b8222bf07edfb4cd94e Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Fri, 15 Aug 2025 16:21:49 +0200 Subject: [PATCH 03/11] Implement GuestAddressSpace for IoMemory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want a trait like `GuestAddressSpace` for `IoMemory`, but just duplicating it into an `IoAddressSpace` trait is not so easy: We could rename the current `GuestAddressSpace` to `IoAddressSpace` and require `M: IoMemory` (instead of `M: GuestMemory`), and then define `GuestAddressSpace` as: ```rust pub trait GuestAddressSpace: IoAddressSpace {} impl GuestAddressSpace for AS where AS::M: GuestMemory, {} ``` But doing just this would break all existing `GuestAddressSpace` users, as they’d now need to import `IoAddressSpace` to use `memory()`. (Re-)Adding `GuestAddressSpace::memory()` as ```rust fn memory(&self) -> ::T { IoAddressSpace::memory(self) } ``` also doesn’t (just) work, as it gets the compiler confused which `memory()` to use (between `GuestAddressSpace` and `IoAddressSpace`), so the `IoAddressSpace::memory()` method would need to be called differently. However, I would find that a bit silly, and it would also then later require changes if the user wants to switch from `GuestMemory` to `IoMemory`. Instead just changing the `GuestAddressSpace::M: GuestMemory` requirement to `M: IoMemory` seems easier: - All callers that just use the `Bytes` interface remain completely unchanged, - It does break users that actually need the `GuestMemory` interface, but from what I have seen, that is only the case for `vhost::vhost_kern`. There, we can simply require that `::M: GuestMemory`. Signed-off-by: Hanna Czenczek --- src/guest_memory.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/guest_memory.rs b/src/guest_memory.rs index 85b6406a..c9f20b21 100644 --- a/src/guest_memory.rs +++ b/src/guest_memory.rs @@ -223,7 +223,7 @@ impl FileOffset { /// ``` pub trait GuestAddressSpace: Clone { /// The type that will be used to access guest memory. - type M: GuestMemory; + type M: IoMemory; /// A type that provides access to the memory. type T: Clone + Deref; @@ -234,7 +234,7 @@ pub trait GuestAddressSpace: Clone { fn memory(&self) -> Self::T; } -impl GuestAddressSpace for &M { +impl GuestAddressSpace for &M { type M = M; type T = Self; @@ -243,7 +243,7 @@ impl GuestAddressSpace for &M { } } -impl GuestAddressSpace for Rc { +impl GuestAddressSpace for Rc { type M = M; type T = Self; @@ -252,7 +252,7 @@ impl GuestAddressSpace for Rc { } } -impl GuestAddressSpace for Arc { +impl GuestAddressSpace for Arc { type M = M; type T = Self; From 88f9b1006e41fc267fa4b483c1d2af85105e09fb Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Fri, 15 Aug 2025 16:48:31 +0200 Subject: [PATCH 04/11] Allow any IoMemory for GuestMemoryAtomic This simply makes `GuestMemoryAtomic` more general. (However, this change requires the preceding commit that relaxed the `GuestAddressSpace::M` requirement from `GuestMemory` to `IoMemory`.) Signed-off-by: Hanna Czenczek --- src/atomic.rs | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/atomic.rs b/src/atomic.rs index 87a2c1e3..be8c5cf7 100644 --- a/src/atomic.rs +++ b/src/atomic.rs @@ -2,7 +2,7 @@ // Copyright (C) 2020 Red Hat, Inc. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -//! A wrapper over an `ArcSwap` struct to support RCU-style mutability. +//! A wrapper over an `ArcSwap` struct to support RCU-style mutability. //! //! With the `backend-atomic` feature enabled, simply replacing `GuestMemoryMmap` //! with `GuestMemoryAtomic` will enable support for mutable memory maps. @@ -15,17 +15,17 @@ use arc_swap::{ArcSwap, Guard}; use std::ops::Deref; use std::sync::{Arc, LockResult, Mutex, MutexGuard, PoisonError}; -use crate::{GuestAddressSpace, GuestMemory}; +use crate::{GuestAddressSpace, IoMemory}; /// A fast implementation of a mutable collection of memory regions. /// /// This implementation uses `ArcSwap` to provide RCU-like snapshotting of the memory map: -/// every update of the memory map creates a completely new `GuestMemory` object, and +/// every update of the memory map creates a completely new `IoMemory` object, and /// readers will not be blocked because the copies they retrieved will be collected once /// no one can access them anymore. Under the assumption that updates to the memory map /// are rare, this allows a very efficient implementation of the `memory()` method. #[derive(Debug)] -pub struct GuestMemoryAtomic { +pub struct GuestMemoryAtomic { // GuestAddressSpace, which we want to implement, is basically a drop-in // replacement for &M. Therefore, we need to pass to devices the `GuestMemoryAtomic` // rather than a reference to it. To obtain this effect we wrap the actual fields @@ -34,9 +34,9 @@ pub struct GuestMemoryAtomic { inner: Arc<(ArcSwap, Mutex<()>)>, } -impl From> for GuestMemoryAtomic { +impl From> for GuestMemoryAtomic { /// create a new `GuestMemoryAtomic` object whose initial contents come from - /// the `map` reference counted `GuestMemory`. + /// the `map` reference counted `IoMemory`. fn from(map: Arc) -> Self { let inner = (ArcSwap::new(map), Mutex::new(())); GuestMemoryAtomic { @@ -45,9 +45,9 @@ impl From> for GuestMemoryAtomic { } } -impl GuestMemoryAtomic { +impl GuestMemoryAtomic { /// create a new `GuestMemoryAtomic` object whose initial contents come from - /// the `map` `GuestMemory`. + /// the `map` `IoMemory`. pub fn new(map: M) -> Self { Arc::new(map).into() } @@ -75,7 +75,7 @@ impl GuestMemoryAtomic { } } -impl Clone for GuestMemoryAtomic { +impl Clone for GuestMemoryAtomic { fn clone(&self) -> Self { Self { inner: self.inner.clone(), @@ -83,7 +83,7 @@ impl Clone for GuestMemoryAtomic { } } -impl GuestAddressSpace for GuestMemoryAtomic { +impl GuestAddressSpace for GuestMemoryAtomic { type T = GuestMemoryLoadGuard; type M = M; @@ -94,14 +94,14 @@ impl GuestAddressSpace for GuestMemoryAtomic { /// A guard that provides temporary access to a `GuestMemoryAtomic`. This /// object is returned from the `memory()` method. It dereference to -/// a snapshot of the `GuestMemory`, so it can be used transparently to +/// a snapshot of the `IoMemory`, so it can be used transparently to /// access memory. #[derive(Debug)] -pub struct GuestMemoryLoadGuard { +pub struct GuestMemoryLoadGuard { guard: Guard>, } -impl GuestMemoryLoadGuard { +impl GuestMemoryLoadGuard { /// Make a clone of the held pointer and returns it. This is more /// expensive than just using the snapshot, but it allows to hold on /// to the snapshot outside the scope of the guard. It also allows @@ -112,7 +112,7 @@ impl GuestMemoryLoadGuard { } } -impl Clone for GuestMemoryLoadGuard { +impl Clone for GuestMemoryLoadGuard { fn clone(&self) -> Self { GuestMemoryLoadGuard { guard: Guard::from_inner(Arc::clone(&*self.guard)), @@ -120,7 +120,7 @@ impl Clone for GuestMemoryLoadGuard { } } -impl Deref for GuestMemoryLoadGuard { +impl Deref for GuestMemoryLoadGuard { type Target = M; fn deref(&self) -> &Self::Target { @@ -133,12 +133,12 @@ impl Deref for GuestMemoryLoadGuard { /// possibly after updating the memory map represented by the /// `GuestMemoryAtomic` that created the guard. #[derive(Debug)] -pub struct GuestMemoryExclusiveGuard<'a, M: GuestMemory> { +pub struct GuestMemoryExclusiveGuard<'a, M: IoMemory> { parent: &'a GuestMemoryAtomic, _guard: MutexGuard<'a, ()>, } -impl GuestMemoryExclusiveGuard<'_, M> { +impl GuestMemoryExclusiveGuard<'_, M> { /// Replace the memory map in the `GuestMemoryAtomic` that created the guard /// with the new memory map, `map`. The lock is then dropped since this /// method consumes the guard. From 388d96435c7574abf41f529bab7fe1788276faff Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Mon, 12 May 2025 17:10:07 +0200 Subject: [PATCH 05/11] Add Iommu trait and Iotlb struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Iommu trait defines an interface for translating virtual addresses into addresses in an underlying address space. It is supposed to do so by internally keeping an instance of the Iotlb type, updating it with mappings whenever necessary (e.g. when actively invalidated or when there’s an access failure) from some internal data source (e.g. for a vhost-user IOMMU, the data comes from the vhost-user front-end by requesting an update). In a later commit, we are going to provide an implementation of `IoMemory` that can use an `Iommu` to provide an I/O virtual address space. Note that while I/O virtual memory in practice will be organized in pages, the vhost-user specification makes no mention of a specific page size or how to obtain it. Therefore, we cannot really assume any page size and have to use plain ranges with byte granularity as mappings instead. Signed-off-by: Hanna Czenczek --- Cargo.toml | 2 + src/atomic.rs | 7 +- src/guest_memory.rs | 6 + src/iommu.rs | 332 ++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 5 + 5 files changed, 349 insertions(+), 3 deletions(-) create mode 100644 src/iommu.rs diff --git a/Cargo.toml b/Cargo.toml index ace634a6..75750ac2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ default = ["rawfd"] backend-bitmap = ["dep:libc"] backend-mmap = ["dep:libc", "dep:winapi"] backend-atomic = ["arc-swap"] +iommu = ["dep:rangemap"] rawfd = ["dep:libc"] xen = ["backend-mmap", "bitflags", "vmm-sys-util"] @@ -23,6 +24,7 @@ xen = ["backend-mmap", "bitflags", "vmm-sys-util"] libc = { version = "0.2.39", optional = true } arc-swap = { version = "1.0.0", optional = true } bitflags = { version = "2.4.0", optional = true } +rangemap = { version = "1.5.1", optional = true } thiserror = "2.0.16" vmm-sys-util = { version = ">=0.12.1, <=0.15.0", optional = true } diff --git a/src/atomic.rs b/src/atomic.rs index be8c5cf7..4496d68a 100644 --- a/src/atomic.rs +++ b/src/atomic.rs @@ -151,7 +151,7 @@ impl GuestMemoryExclusiveGuard<'_, M> { mod tests { use super::*; use crate::region::tests::{new_guest_memory_collection_from_regions, Collection, MockRegion}; - use crate::{GuestAddress, GuestMemory, GuestMemoryRegion, GuestUsize}; + use crate::{GuestAddress, GuestMemory, GuestMemoryRegion, GuestUsize, IoMemory}; type GuestMemoryMmapAtomic = GuestMemoryAtomic; @@ -165,7 +165,8 @@ mod tests { let mut iterated_regions = Vec::new(); let gmm = new_guest_memory_collection_from_regions(®ions).unwrap(); let gm = GuestMemoryMmapAtomic::new(gmm); - let mem = gm.memory(); + let vmem = gm.memory(); + let mem = vmem.physical_memory().unwrap(); for region in mem.iter() { assert_eq!(region.len(), region_size as GuestUsize); @@ -184,7 +185,7 @@ mod tests { .map(|x| (x.0, x.1)) .eq(iterated_regions.iter().copied())); - let mem2 = mem.into_inner(); + let mem2 = vmem.into_inner(); for region in mem2.iter() { assert_eq!(region.len(), region_size as GuestUsize); } diff --git a/src/guest_memory.rs b/src/guest_memory.rs index c9f20b21..8ce52a4a 100644 --- a/src/guest_memory.rs +++ b/src/guest_memory.rs @@ -55,6 +55,8 @@ use crate::address::{Address, AddressValue}; use crate::bitmap::MS; use crate::bytes::{AtomicAccess, Bytes}; use crate::io::{ReadVolatile, WriteVolatile}; +#[cfg(feature = "iommu")] +use crate::iommu::Error as IommuError; use crate::volatile_memory::{self, VolatileSlice}; use crate::{GuestMemoryRegion, IoMemory, Permissions}; @@ -85,6 +87,10 @@ pub enum Error { /// The address to be read by `try_access` is outside the address range. #[error("The address to be read by `try_access` is outside the address range")] GuestAddressOverflow, + #[cfg(feature = "iommu")] + /// IOMMU translation error + #[error("IOMMU failed to translate guest address: {0}")] + IommuError(IommuError), } impl From for Error { diff --git a/src/iommu.rs b/src/iommu.rs new file mode 100644 index 00000000..cc16ea33 --- /dev/null +++ b/src/iommu.rs @@ -0,0 +1,332 @@ +// Copyright (C) 2025 Red Hat. All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. +// +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause + +//! Provide an interface for IOMMUs enabling I/O virtual address (IOVA) translation. +//! +//! All IOMMUs consist of an IOTLB ([`Iotlb`]), which is backed by a data source that can deliver +//! all mappings. For example, for vhost-user, that data source is the vhost-user front-end; i.e. +//! IOTLB misses require sending a notification to the front-end and awaiting a reply that supplies +//! the desired mapping. + +use crate::{GuestAddress, Permissions}; +use rangemap::RangeMap; +use std::cmp; +use std::fmt::Debug; +use std::num::Wrapping; +use std::ops::{Deref, Range}; + +/// Errors associated with IOMMU address translation. +#[derive(Debug, thiserror::Error)] +pub enum Error { + /// Lookup cannot be resolved. + #[error( + "Cannot translate I/O virtual address range {:#x}+{}: {reason}", + iova_range.base.0, + iova_range.length, + )] + CannotResolve { + /// IOVA range that could not be resolved + iova_range: IovaRange, + /// Some human-readable specifics about the reason + reason: String, + }, + + /// Wanted to translate an IOVA range into a single slice, but the range is fragmented. + #[error( + "Expected {:#x}+{} to be a continuous I/O virtual address range, but only {continuous_length} bytes are", + iova_range.base.0, + iova_range.length, + )] + Fragmented { + /// Full IOVA range that was to be translated + iova_range: IovaRange, + /// Length of the continuous head (i.e. the first fragment) + continuous_length: usize, + }, + + /// IOMMU is not configured correctly, and so cannot translate addresses. + #[error("IOMMU not configured correctly, cannot operate: {reason}")] + IommuMisconfigured { + /// Some human-readable specifics about the misconfiguration + reason: String, + }, +} + +/// An IOMMU, allowing translation of I/O virtual addresses (IOVAs). +/// +/// Generally, `Iommu` implementaions consist of an [`Iotlb`], which is supposed to be consulted +/// first for lookup requests. All misses and access failures then should be resolved by looking +/// up the affected ranges in the actual IOMMU (which has all current mappings) and putting the +/// results back into the IOTLB. A subsequent lookup in the IOTLB should result in a full +/// translation, which can then be returned. +pub trait Iommu: Debug + Send + Sync { + /// `Deref` type associated with the type that internally wraps the `Iotlb`. + /// + /// For example, the `Iommu` may keep the `Iotlb` wrapped in an `RwLock`, making this type + /// `RwLockReadGuard<'a, Iotlb>`. + /// + /// We need this specific type instead of a plain reference so that [`IotlbIterator`] can + /// actually own the reference and prolong its lifetime. + type IotlbGuard<'a>: Deref + 'a + where + Self: 'a; + + /// Translate the given range for the given access into the underlying address space. + /// + /// Any translation request is supposed to be fully served by an internal [`Iotlb`] instance. + /// Any misses or access failures should result in a lookup in the full IOMMU structures, + /// filling the IOTLB with the results, and then repeating the lookup in there. + fn translate( + &self, + iova: GuestAddress, + length: usize, + access: Permissions, + ) -> Result>, Error>; +} + +/// Mapping target in an IOMMU/IOTLB. +/// +/// This is the data to which each entry in an IOMMU/IOTLB maps. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +struct IommuMapping { + /// Difference between the mapped and the IOVA address, i.e. what to add to an IOVA address to + /// get the mapped adrress. + /// + /// We cannot store the more obvious mapped base address for this range because that would + /// allow rangemap to wrongfully merge consecutive map entries if they are a duplicate mapping + /// (which does happen). Storing the difference ensures that entries are only merged when they + /// are indeed consecutive. + /// + /// Note that we make no granularity restrictions (i.e. do not operate on a unit like pages), + /// so the source and target address may have arbitrary alignment. That is why both fields + /// here need to be separate and we cannot merge the two bits that are `permissions` with this + /// base address into a single `u64` field. + target_source_diff: Wrapping, + /// Allowed access for the mapped range + permissions: Permissions, +} + +/// Provides an IOTLB. +/// +/// The IOTLB caches IOMMU mappings. It must be preemptively updated whenever mappings are +/// restricted or removed; in contrast, adding mappings or making them more permissive does not +/// require preemptive updates, as subsequent accesses that violate the previous (more restrictive) +/// permissions will trigger TLB misses or access failures, which is then supposed to result in an +/// update from the outer [`Iommu`] object that performs the translation. +#[derive(Debug, Default)] +pub struct Iotlb { + /// Mappings of which we know. + /// + /// Note that the vhost(-user) specification makes no mention of a specific page size, even + /// though in practice the IOVA address space will be organized in terms of pages. However, we + /// cannot really rely on that (or any specific page size; it could be 4k, the guest page size, + /// or the host page size), so we need to be able to handle continuous ranges of any + /// granularity. + tlb: RangeMap, +} + +/// Iterates over a range of valid IOTLB mappings that together constitute a continuous range in +/// I/O virtual address space. +/// +/// Returned by [`Iotlb::lookup()`] and [`Iommu::translate()`] in case translation was successful +/// (i.e. the whole requested range is mapped and permits the given access). +#[derive(Clone, Debug)] +pub struct IotlbIterator> { + /// IOTLB that provides these mapings + iotlb: D, + /// I/O virtual address range left to iterate over + range: Range, + /// Requested access permissions + access: Permissions, +} + +/// Representation of an IOVA memory range (i.e. in the I/O virtual address space). +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct IovaRange { + /// IOVA base address + pub base: GuestAddress, + /// Length (in bytes) of this range + pub length: usize, +} + +/// Representation of a mapped memory range in the underlying address space. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct MappedRange { + /// Base address in the underlying address space + pub base: GuestAddress, + /// Length (in bytes) of this mapping + pub length: usize, +} + +/// Lists the subranges in I/O virtual address space that turned out to not be accessible when +/// trying to access an IOVA range. +#[derive(Clone, Debug)] +pub struct IotlbFails { + /// Subranges not mapped at all + pub misses: Vec, + /// Subranges that are mapped, but do not allow the requested access mode + pub access_fails: Vec, +} + +impl IommuMapping { + /// Create a new mapping. + fn new(source_base: u64, target_base: u64, permissions: Permissions) -> Self { + IommuMapping { + target_source_diff: Wrapping(target_base) - Wrapping(source_base), + permissions, + } + } + + /// Map the given source address (IOVA) to its corresponding target address. + fn map(&self, iova: u64) -> u64 { + (Wrapping(iova) + self.target_source_diff).0 + } + + /// Return the permissions for this mapping. + fn permissions(&self) -> Permissions { + self.permissions + } +} + +impl Iotlb { + /// Create a new empty instance. + pub fn new() -> Self { + Default::default() + } + + /// Change the mapping of the given IOVA range. + pub fn set_mapping( + &mut self, + iova: GuestAddress, + map_to: GuestAddress, + length: usize, + perm: Permissions, + ) -> Result<(), Error> { + // Soft TODO: We may want to evict old entries here once the TLB grows to a certain size, + // but that will require LRU book-keeping. However, this is left for the future, because: + // - this TLB is not implemented in hardware, so we do not really have strong entry count + // constraints, and + // - it seems like at least Linux guests invalidate mappings often, automatically limiting + // our entry count. + + let mapping = IommuMapping::new(iova.0, map_to.0, perm); + self.tlb.insert(iova.0..(iova.0 + length as u64), mapping); + + Ok(()) + } + + /// Remove any mapping in the given IOVA range. + pub fn invalidate_mapping(&mut self, iova: GuestAddress, length: usize) { + self.tlb.remove(iova.0..(iova.0 + length as u64)); + } + + /// Remove all mappings. + pub fn invalidate_all(&mut self) { + self.tlb.clear(); + } + + /// Perform a lookup for the given range and the given `access` mode. + /// + /// If the whole range is mapped and accessible, return an iterator over all mappings. + /// + /// If any part of the range is not mapped or does not permit the given access mode, return an + /// `Err(_)` that contains a list of all such subranges. + pub fn lookup>( + this: D, + iova: GuestAddress, + length: usize, + access: Permissions, + ) -> Result, IotlbFails> { + let full_range = iova.0..(iova.0 + length as u64); + + let has_misses = this.tlb.gaps(&full_range).any(|_| true); + let has_access_fails = this + .tlb + .overlapping(full_range.clone()) + .any(|(_, mapping)| !mapping.permissions().allow(access)); + + if has_misses || has_access_fails { + let misses = this + .tlb + .gaps(&full_range) + .map(|range| { + // Gaps are always cut down to the range given to `gaps()` + debug_assert!(range.start >= full_range.start && range.end <= full_range.end); + range.try_into().unwrap() + }) + .collect::>(); + + let access_fails = this + .tlb + .overlapping(full_range.clone()) + .filter(|(_, mapping)| !mapping.permissions().allow(access)) + .map(|(range, _)| { + let start = cmp::max(range.start, full_range.start); + let end = cmp::min(range.end, full_range.end); + (start..end).try_into().unwrap() + }) + .collect::>(); + + return Err(IotlbFails { + misses, + access_fails, + }); + } + + Ok(IotlbIterator { + iotlb: this, + range: full_range, + access, + }) + } +} + +impl> Iterator for IotlbIterator { + /// Addresses in the underlying address space + type Item = MappedRange; + + fn next(&mut self) -> Option { + // Note that we can expect the whole IOVA range to be mapped with the right access flags. + // The `IotlbIterator` is created by `Iotlb::lookup()` only if the whole range is mapped + // accessibly; we have a permanent reference to `Iotlb`, so the range cannot be invalidated + // in the meantime. + // Another note: It is tempting to have `IotlbIterator` wrap around the + // `rangemap::Overlapping` iterator, but that just takes a (lifetimed) reference to the + // map, not an owned reference (like RwLockReadGuard), which we want to use; so using that + // would probably require self-referential structs. + + if self.range.is_empty() { + return None; + } + + let (range, mapping) = self.iotlb.tlb.get_key_value(&self.range.start).unwrap(); + + assert!(mapping.permissions().allow(self.access)); + + let mapping_iova_start = self.range.start; + let mapping_iova_end = cmp::min(self.range.end, range.end); + let mapping_len = mapping_iova_end - mapping_iova_start; + + self.range.start = mapping_iova_end; + + Some(MappedRange { + base: GuestAddress(mapping.map(mapping_iova_start)), + length: mapping_len.try_into().unwrap(), + }) + } +} + +impl TryFrom> for IovaRange { + type Error = >::Error; + + fn try_from(range: Range) -> Result { + Ok(IovaRange { + base: GuestAddress(range.start), + length: (range.end - range.start).try_into()?, + }) + } +} diff --git a/src/lib.rs b/src/lib.rs index 679ae7da..327cfcf8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -61,6 +61,11 @@ pub use io::{ReadVolatile, WriteVolatile}; pub mod io_memory; pub use io_memory::{IoMemory, Permissions}; +#[cfg(feature = "iommu")] +pub mod iommu; +#[cfg(feature = "iommu")] +pub use iommu::{Iommu, Iotlb}; + #[cfg(feature = "backend-mmap")] pub mod mmap; From 19b5495acbe76b5377dba894c21139ce1a913748 Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Mon, 12 May 2025 17:10:10 +0200 Subject: [PATCH 06/11] Add IommuMemory This `IoMemory` type provides an I/O virtual address space by adding an IOMMU translation layer to an underlying `GuestMemory` object. Signed-off-by: Hanna Czenczek --- src/iommu.rs | 270 ++++++++++++++++++++++++++++++++++++++++++++++++++- src/lib.rs | 2 +- 2 files changed, 270 insertions(+), 2 deletions(-) diff --git a/src/iommu.rs b/src/iommu.rs index cc16ea33..34f3444d 100644 --- a/src/iommu.rs +++ b/src/iommu.rs @@ -12,12 +12,18 @@ //! IOTLB misses require sending a notification to the front-end and awaiting a reply that supplies //! the desired mapping. -use crate::{GuestAddress, Permissions}; +use crate::guest_memory::{ + Error as GuestMemoryError, GuestMemorySliceIterator, Result as GuestMemoryResult, +}; +use crate::{ + bitmap, GuestAddress, GuestMemory, IoMemory, MemoryRegionAddress, Permissions, VolatileSlice, +}; use rangemap::RangeMap; use std::cmp; use std::fmt::Debug; use std::num::Wrapping; use std::ops::{Deref, Range}; +use std::sync::Arc; /// Errors associated with IOMMU address translation. #[derive(Debug, thiserror::Error)] @@ -172,6 +178,22 @@ pub struct IotlbFails { pub access_fails: Vec, } +/// [`IoMemory`] type that consists of an underlying [`GuestMemory`] object plus an [`Iommu`]. +/// +/// The underlying [`GuestMemory`] is basically the physical memory, and the [`Iommu`] translates +/// the I/O virtual address space that `IommuMemory` provides into that underlying physical address +/// space. +#[derive(Debug, Default)] +pub struct IommuMemory { + /// Physical memory + inner: M, + /// IOMMU to translate IOVAs into physical addresses + iommu: Arc, + /// Whether the IOMMU is even to be used or not; disabling it makes this a pass-through to + /// `inner`. + use_iommu: bool, +} + impl IommuMapping { /// Create a new mapping. fn new(source_base: u64, target_base: u64, permissions: Permissions) -> Self { @@ -330,3 +352,249 @@ impl TryFrom> for IovaRange { }) } } + +impl IommuMemory { + /// Create a new `IommuMemory` instance. + pub fn new(inner: M, iommu: I, use_iommu: bool) -> Self { + IommuMemory { + inner, + iommu: Arc::new(iommu), + use_iommu, + } + } + + /// Create a new version of `self` with the underlying physical memory replaced. + /// + /// Note that the inner `Arc` reference to the IOMMU is cloned, i.e. both the existing and the + /// new `IommuMemory` object will share an IOMMU instance. (The `use_iommu` flag however is + /// copied, so is independent between the two instances.) + pub fn inner_replaced(&self, inner: M) -> Self { + IommuMemory { + inner, + iommu: Arc::clone(&self.iommu), + use_iommu: self.use_iommu, + } + } + + /// Enable or disable the IOMMU. + /// + /// Disabling the IOMMU switches to pass-through mode, where every access is done directly on + /// the underlying physical memory. + pub fn set_iommu_enabled(&mut self, enabled: bool) { + self.use_iommu = enabled; + } + + /// Return a reference to the IOMMU. + pub fn iommu(&self) -> &Arc { + &self.iommu + } + + /// Return a reference to the inner physical memory object. + pub fn inner(&self) -> &M { + &self.inner + } +} + +impl Clone for IommuMemory { + fn clone(&self) -> Self { + IommuMemory { + inner: self.inner.clone(), + iommu: Arc::clone(&self.iommu), + use_iommu: self.use_iommu, + } + } +} + +impl IoMemory for IommuMemory { + type PhysicalMemory = M; + + fn range_accessible(&self, addr: GuestAddress, count: usize, access: Permissions) -> bool { + if !self.use_iommu { + return self.inner.range_accessible(addr, count, access); + } + + let Ok(mut translated_iter) = self.iommu.translate(addr, count, access) else { + return false; + }; + + translated_iter.all(|translated| { + self.inner + .range_accessible(translated.base, translated.length, access) + }) + } + + fn try_access( + &self, + count: usize, + addr: GuestAddress, + access: Permissions, + mut f: F, + ) -> GuestMemoryResult + where + F: FnMut( + usize, + usize, + MemoryRegionAddress, + &::R, + ) -> GuestMemoryResult, + { + if !self.use_iommu { + return self.inner.try_access(count, addr, f); + } + + let translated = self + .iommu + .translate(addr, count, access) + .map_err(GuestMemoryError::IommuError)?; + + let mut total = 0; + for mapping in translated { + let handled = self.inner.try_access( + mapping.length, + mapping.base, + |inner_offset, count, in_region_addr, region| { + f(total + inner_offset, count, in_region_addr, region) + }, + )?; + + if handled == 0 { + break; + } else if handled > count { + return Err(GuestMemoryError::CallbackOutOfRange); + } + + total += handled; + // `GuestMemory::try_access()` only returns a short count when no more data needs to be + // processed, so we can stop here + if handled < mapping.length { + break; + } + } + + Ok(total) + } + + fn get_slices<'a>( + &'a self, + addr: GuestAddress, + count: usize, + access: Permissions, + ) -> GuestMemoryResult< + impl Iterator>>>, + > { + if self.use_iommu { + IommuMemorySliceIterator::virt(self, addr, count, access) + .map_err(GuestMemoryError::IommuError) + } else { + Ok(IommuMemorySliceIterator::phys(self, addr, count)) + } + } + + fn physical_memory(&self) -> Option<&Self::PhysicalMemory> { + if self.use_iommu { + None + } else { + Some(&self.inner) + } + } +} + +/// Iterates over [`VolatileSlice`]s that together form an area in an `IommuMemory`. +/// +/// Returned by [`IommuMemory::get_slices()`] +#[derive(Debug)] +pub struct IommuMemorySliceIterator<'a, M: GuestMemory, I: Iommu + 'a> { + /// Underlying physical memory (i.e. not the `IommuMemory`) + phys_mem: &'a M, + /// IOMMU translation result (i.e. remaining physical regions to visit) + translation: Option>>, + /// Iterator in the currently visited physical region + current_translated_iter: Option>, +} + +impl<'a, M: GuestMemory, I: Iommu> IommuMemorySliceIterator<'a, M, I> { + /// Create an iterator over the physical region `[addr, addr + count)`. + /// + /// “Physical” means that the IOMMU is not used to translate this address range. The resulting + /// iterator is effectively the same as would be returned by [`GuestMemory::get_slices()`] on + /// the underlying physical memory for the given address range. + fn phys(mem: &'a IommuMemory, addr: GuestAddress, count: usize) -> Self { + IommuMemorySliceIterator { + phys_mem: &mem.inner, + translation: None, + current_translated_iter: Some(mem.inner.get_slices(addr, count)), + } + } + + /// Create an iterator over the IOVA region `[addr, addr + count)`. + /// + /// This address range is translated using the IOMMU, and the resulting mappings are then + /// separately visited via [`GuestMemory::get_slices()`]. + fn virt( + mem: &'a IommuMemory, + addr: GuestAddress, + count: usize, + access: Permissions, + ) -> Result { + let translation = mem.iommu.translate(addr, count, access)?; + Ok(IommuMemorySliceIterator { + phys_mem: &mem.inner, + translation: Some(translation), + current_translated_iter: None, + }) + } + + /// Helper function for [`::next()`]. + /// + /// Get the next slice and update the internal state. If there is an element left in + /// `self.current_translated_iter`, return that; otherwise, move to the next mapping left in + /// `self.translation` until there are no more mappings left. + /// + /// If both fields are `None`, always return `None`. + /// + /// # Safety + /// + /// This function never resets `self.current_translated_iter` or `self.translation` to `None`, + /// particularly not in case of error; calling this function with these fields not reset after + /// an error is ill-defined, so the caller must check the return value, and in case of an + /// error, reset these fields to `None`. + /// + /// (This is why this function exists, so this reset can happen in a single central location.) + unsafe fn do_next( + &mut self, + ) -> Option>>> { + loop { + if let Some(item) = self + .current_translated_iter + .as_mut() + .and_then(|iter| iter.next()) + { + return Some(item); + } + + let next_mapping = self.translation.as_mut()?.next()?; + self.current_translated_iter = Some( + self.phys_mem + .get_slices(next_mapping.base, next_mapping.length), + ); + } + } +} + +impl<'a, M: GuestMemory, I: Iommu> Iterator for IommuMemorySliceIterator<'a, M, I> { + type Item = GuestMemoryResult>>; + + fn next(&mut self) -> Option { + // SAFETY: + // We reset `current_translated_iter` and `translation` to `None` in case of error + match unsafe { self.do_next() } { + Some(Ok(slice)) => Some(Ok(slice)), + other => { + // On error (or end), clear both so iteration remains stopped + self.current_translated_iter.take(); + self.translation.take(); + other + } + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 327cfcf8..09314c4e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -64,7 +64,7 @@ pub use io_memory::{IoMemory, Permissions}; #[cfg(feature = "iommu")] pub mod iommu; #[cfg(feature = "iommu")] -pub use iommu::{Iommu, Iotlb}; +pub use iommu::{Iommu, IommuMemory, Iotlb}; #[cfg(feature = "backend-mmap")] pub mod mmap; From 664dd48e1ea5fc944bc3d6ba10f150913ce17d3a Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Tue, 13 May 2025 12:01:41 +0200 Subject: [PATCH 07/11] mmap: Wrap MmapRegion in Arc<> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vhost-user-backend crate will need to be able to modify all existing memory regions to use the VMM user address instead of the guest physical address once the IOMMU feature is switched on, and vice versa. To do so, it needs to be able to modify regions’ base address. Because `GuestMemoryMmap` stores regions wrapped in an `Arc<_>`, we cannot mutate them after they have been put into the `GuestMemoryMmap` object; and `MmapRegion` itself is by its nature not clonable. So to modify the regions’ base addresses, we need some way to create a new `GuestRegionMmap` referencing the same `MmapRegion` as another one, but with a different base address. We can do that by having `GuestRegionMmap` wrap its `MmapRegion` in an `Arc`, and adding a method to return a reference to that `Arc`, and a method to construct a `GuestRegionMmap` object from such a cloned `Arc.` Signed-off-by: Hanna Czenczek --- src/mmap/mod.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/mmap/mod.rs b/src/mmap/mod.rs index 1ba59f54..3a13a9df 100644 --- a/src/mmap/mod.rs +++ b/src/mmap/mod.rs @@ -15,6 +15,7 @@ use std::borrow::Borrow; use std::ops::Deref; use std::result; +use std::sync::Arc; use crate::address::Address; use crate::bitmap::{Bitmap, BS}; @@ -54,7 +55,7 @@ pub use windows::MmapRegion; /// in the virtual address space of the calling process. #[derive(Debug)] pub struct GuestRegionMmap { - mapping: MmapRegion, + mapping: Arc>, guest_base: GuestAddress, } @@ -62,7 +63,7 @@ impl Deref for GuestRegionMmap { type Target = MmapRegion; fn deref(&self) -> &MmapRegion { - &self.mapping + self.mapping.as_ref() } } @@ -71,6 +72,11 @@ impl GuestRegionMmap { /// /// Returns `None` if `guest_base` + `mapping.len()` would overflow. pub fn new(mapping: MmapRegion, guest_base: GuestAddress) -> Option { + Self::with_arc(Arc::new(mapping), guest_base) + } + + /// Same as [`Self::new()`], but takes an `Arc`-wrapped `mapping`. + pub fn with_arc(mapping: Arc>, guest_base: GuestAddress) -> Option { guest_base .0 .checked_add(mapping.size() as u64) @@ -79,6 +85,16 @@ impl GuestRegionMmap { guest_base, }) } + + /// Return a reference to the inner `Arc` (as opposed to + /// [`.deref()`](Self::deref()), which does not reference the `Arc`). + /// + /// The returned reference can be cloned to construct a new `GuestRegionMmap` with a different + /// base address (e.g. when switching between memory address spaces based on the guest physical + /// address vs. the VMM userspace virtual address). + pub fn get_mapping(&self) -> &Arc> { + &self.mapping + } } #[cfg(not(feature = "xen"))] From ce6388c470d89f4902abf6392cf9ece7e79b86a7 Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Tue, 29 Jul 2025 12:28:31 +0200 Subject: [PATCH 08/11] IoMemory: Add IOVA-space bitmap Without an IOMMU, we have direct access to guest physical addresses (GPAs). In order to track our writes to guest memory (during migration), we log them into dirty bitmaps, and a page's bit index is its GPA divided by the page size. With an IOMMU, however, we no longer know the GPA, instead we operate on I/O virtual addresses (IOVAs) and VMM user-space addresses (VUAs). Here, the dirty bitmap bit index is the IOVA divided by the page size. `IoMemory` types contain an internal "physical" memory type that operates on these VUAs (`IoMemory::PhysicalMemory). Any bitmap functionality that this internal type may already have (e.g. `GuestMemoryMmap` does) cannot be used for dirty bitmap tracking with an IOMMU because they would use the VUA, but we need to use the IOVA, and this information is not available on that lower layer. Therefore, `IoMemory` itself needs to support bitmaps separately from its inner `PhysicalMemory`, which will be used when the IOMMU is in use. Add an associated `IoMemory::Bitmap` type and add a bitmap object to `IommuMemory`. Ensure that writes to memory dirty that bitmap appropriately: - In `try_access()`, if write access was requested, dirty the handled region of the bitmap after the access is done. - In `get_slice()`, replace the `VolatileSlice`'s bitmap (which comes from the inner `PhysicalMemory`) by the correct slice of our IOVA bitmap before returning it. Signed-off-by: Hanna Czenczek --- src/io_memory.rs | 16 ++++-- src/iommu.rs | 117 +++++++++++++++++++++++++++++++++++++---- src/volatile_memory.rs | 11 ++++ 3 files changed, 129 insertions(+), 15 deletions(-) diff --git a/src/io_memory.rs b/src/io_memory.rs index 76498e59..7ba5236d 100644 --- a/src/io_memory.rs +++ b/src/io_memory.rs @@ -13,8 +13,9 @@ //! In addition, any access to virtual memory must be annotated with the intended access mode (i.e. //! reading and/or writing). +use crate::bitmap::{self, Bitmap}; use crate::guest_memory::Result; -use crate::{bitmap, GuestAddress, GuestMemory, MemoryRegionAddress, VolatileSlice}; +use crate::{GuestAddress, GuestMemory, GuestMemoryRegion, MemoryRegionAddress, VolatileSlice}; /// Permissions for accessing virtual memory. #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -52,6 +53,11 @@ impl Permissions { pub fn allow(&self, access: Self) -> bool { *self & access == access } + + /// Check whether the permissions `self` include write access. + pub fn has_write(&self) -> bool { + *self & Permissions::Write == Permissions::Write + } } impl std::ops::BitOr for Permissions { @@ -87,6 +93,8 @@ impl std::ops::BitAnd for Permissions { pub trait IoMemory { /// Underlying `GuestMemory` type. type PhysicalMemory: GuestMemory + ?Sized; + /// Dirty bitmap type for tracking writes to the IOVA address space. + type Bitmap: Bitmap; /// Return `true` if `addr..(addr + count)` is accessible with `access`. fn range_accessible(&self, addr: GuestAddress, count: usize, access: Permissions) -> bool; @@ -145,7 +153,7 @@ pub trait IoMemory { addr: GuestAddress, count: usize, access: Permissions, - ) -> Result>>>>; + ) -> Result>>>>; /// If this virtual memory is just a plain `GuestMemory` object underneath without an IOMMU /// translation layer in between, return that `GuestMemory` object. @@ -166,6 +174,7 @@ pub trait IoMemory { /// the same [`GuestMemory`] methods (if available), discarding the `access` parameter. impl IoMemory for M { type PhysicalMemory = M; + type Bitmap = ::B; fn range_accessible(&self, addr: GuestAddress, count: usize, _access: Permissions) -> bool { if let Ok(done) = ::try_access(self, count, addr, |_, len, _, _| Ok(len)) @@ -199,8 +208,7 @@ impl IoMemory for M { addr: GuestAddress, count: usize, _access: Permissions, - ) -> Result>>>> - { + ) -> Result>>>> { Ok(::get_slices(self, addr, count)) } diff --git a/src/iommu.rs b/src/iommu.rs index 34f3444d..1f4fe12c 100644 --- a/src/iommu.rs +++ b/src/iommu.rs @@ -12,15 +12,17 @@ //! IOTLB misses require sending a notification to the front-end and awaiting a reply that supplies //! the desired mapping. +use crate::bitmap::{self, Bitmap}; use crate::guest_memory::{ Error as GuestMemoryError, GuestMemorySliceIterator, Result as GuestMemoryResult, }; use crate::{ - bitmap, GuestAddress, GuestMemory, IoMemory, MemoryRegionAddress, Permissions, VolatileSlice, + Address, GuestAddress, GuestMemory, GuestMemoryRegion, GuestUsize, IoMemory, + MemoryRegionAddress, Permissions, VolatileSlice, }; use rangemap::RangeMap; use std::cmp; -use std::fmt::Debug; +use std::fmt::{self, Debug}; use std::num::Wrapping; use std::ops::{Deref, Range}; use std::sync::Arc; @@ -183,7 +185,15 @@ pub struct IotlbFails { /// The underlying [`GuestMemory`] is basically the physical memory, and the [`Iommu`] translates /// the I/O virtual address space that `IommuMemory` provides into that underlying physical address /// space. -#[derive(Debug, Default)] +/// +/// Note on memory write tracking (“logging”): +/// - When the IOMMU is disabled ([`IommuMemory::set_iommu_enabled()`]), writes to memory are +/// tracked by the underlying [`GuestMemory`] in its bitmap(s). +/// - When it is enabled, they are instead tracked in the [`IommuMemory`]’s dirty bitmap; the +/// offset in the bitmap is calculated from the write’s IOVA. +/// +/// Therefore, this type should only be used when this is the desired behavior (IOVA-based memory +/// write logging when IOMMU is used). pub struct IommuMemory { /// Physical memory inner: M, @@ -192,6 +202,8 @@ pub struct IommuMemory { /// Whether the IOMMU is even to be used or not; disabling it makes this a pass-through to /// `inner`. use_iommu: bool, + /// Dirty bitmap to use for IOVA accesses + bitmap: Arc<::B>, } impl IommuMapping { @@ -355,27 +367,36 @@ impl TryFrom> for IovaRange { impl IommuMemory { /// Create a new `IommuMemory` instance. - pub fn new(inner: M, iommu: I, use_iommu: bool) -> Self { + pub fn new(inner: M, iommu: I, use_iommu: bool, bitmap: ::Bitmap) -> Self { IommuMemory { inner, iommu: Arc::new(iommu), use_iommu, + bitmap: Arc::new(bitmap), } } /// Create a new version of `self` with the underlying physical memory replaced. /// - /// Note that the inner `Arc` reference to the IOMMU is cloned, i.e. both the existing and the - /// new `IommuMemory` object will share an IOMMU instance. (The `use_iommu` flag however is - /// copied, so is independent between the two instances.) + /// Note that the inner `Arc` references to the IOMMU and bitmap are cloned, i.e. both the + /// existing and the new `IommuMemory` object will share the IOMMU and bitmap instances. (The + /// `use_iommu` flag however is copied, so is independent between the two instances.) pub fn inner_replaced(&self, inner: M) -> Self { IommuMemory { inner, iommu: Arc::clone(&self.iommu), use_iommu: self.use_iommu, + bitmap: Arc::clone(&self.bitmap), } } + /// Return a reference to the IOVA address space's dirty bitmap. + /// + /// This bitmap tracks write accesses done while the IOMMU is enabled. + pub fn bitmap(&self) -> &Arc<::Bitmap> { + &self.bitmap + } + /// Enable or disable the IOMMU. /// /// Disabling the IOMMU switches to pass-through mode, where every access is done directly on @@ -401,12 +422,42 @@ impl Clone for IommuMemory { inner: self.inner.clone(), iommu: Arc::clone(&self.iommu), use_iommu: self.use_iommu, + bitmap: Arc::clone(&self.bitmap), + } + } +} + +impl Debug for IommuMemory +where + ::B: Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("IommuMemory") + .field("inner", &self.inner) + .field("iommu", &self.iommu) + .field("use_iommu", &self.use_iommu) + .field("bitmap", &self.bitmap) + .finish() + } +} + +impl Default for IommuMemory +where + ::B: Default, +{ + fn default() -> Self { + IommuMemory { + inner: Default::default(), + iommu: Default::default(), + use_iommu: Default::default(), + bitmap: Default::default(), } } } impl IoMemory for IommuMemory { type PhysicalMemory = M; + type Bitmap = ::B; fn range_accessible(&self, addr: GuestAddress, count: usize, access: Permissions) -> bool { if !self.use_iommu { @@ -459,7 +510,13 @@ impl IoMemory for IommuMemory { if handled == 0 { break; - } else if handled > count { + } + + if access.has_write() { + self.bitmap.mark_dirty(addr.0 as usize + total, handled); + } + + if handled > count { return Err(GuestMemoryError::CallbackOutOfRange); } @@ -480,7 +537,7 @@ impl IoMemory for IommuMemory { count: usize, access: Permissions, ) -> GuestMemoryResult< - impl Iterator>>>, + impl Iterator>>>, > { if self.use_iommu { IommuMemorySliceIterator::virt(self, addr, count, access) @@ -502,8 +559,11 @@ impl IoMemory for IommuMemory { /// Iterates over [`VolatileSlice`]s that together form an area in an `IommuMemory`. /// /// Returned by [`IommuMemory::get_slices()`] -#[derive(Debug)] pub struct IommuMemorySliceIterator<'a, M: GuestMemory, I: Iommu + 'a> { + /// Current IOVA (needed to access the right slice of the IOVA space dirty bitmap) + iova: GuestAddress, + /// IOVA space dirty bitmap + bitmap: Option<&'a ::B>, /// Underlying physical memory (i.e. not the `IommuMemory`) phys_mem: &'a M, /// IOMMU translation result (i.e. remaining physical regions to visit) @@ -520,6 +580,8 @@ impl<'a, M: GuestMemory, I: Iommu> IommuMemorySliceIterator<'a, M, I> { /// the underlying physical memory for the given address range. fn phys(mem: &'a IommuMemory, addr: GuestAddress, count: usize) -> Self { IommuMemorySliceIterator { + iova: addr, + bitmap: None, phys_mem: &mem.inner, translation: None, current_translated_iter: Some(mem.inner.get_slices(addr, count)), @@ -538,6 +600,8 @@ impl<'a, M: GuestMemory, I: Iommu> IommuMemorySliceIterator<'a, M, I> { ) -> Result { let translation = mem.iommu.translate(addr, count, access)?; Ok(IommuMemorySliceIterator { + iova: addr, + bitmap: Some(mem.bitmap.as_ref()), phys_mem: &mem.inner, translation: Some(translation), current_translated_iter: None, @@ -569,7 +633,22 @@ impl<'a, M: GuestMemory, I: Iommu> IommuMemorySliceIterator<'a, M, I> { .as_mut() .and_then(|iter| iter.next()) { - return Some(item); + let mut item = match item { + Ok(item) => item, + Err(err) => return Some(Err(err)), + }; + + if let Some(bitmap) = self.bitmap.as_ref() { + let bitmap_slice = bitmap.slice_at(self.iova.0 as usize); + item = item.replace_bitmap(bitmap_slice); + } + + self.iova = match self.iova.overflowing_add(item.len() as GuestUsize) { + (x @ GuestAddress(0), _) | (x, false) => x, + (_, true) => return Some(Err(GuestMemoryError::GuestAddressOverflow)), + }; + + return Some(Ok(item)); } let next_mapping = self.translation.as_mut()?.next()?; @@ -598,3 +677,19 @@ impl<'a, M: GuestMemory, I: Iommu> Iterator for IommuMemorySliceIterator<'a, M, } } } + +impl<'a, M: GuestMemory + Debug, I: Iommu> Debug for IommuMemorySliceIterator<'a, M, I> +where + I::IotlbGuard<'a>: Debug, + ::B: Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("IommuMemorySliceIterator") + .field("iova", &self.iova) + .field("bitmap", &self.bitmap) + .field("phys_mem", &self.phys_mem) + .field("translation", &self.translation) + .field("current_translated_iter", &self.current_translated_iter) + .finish() + } +} diff --git a/src/volatile_memory.rs b/src/volatile_memory.rs index f242fce1..47d60932 100644 --- a/src/volatile_memory.rs +++ b/src/volatile_memory.rs @@ -435,6 +435,17 @@ impl<'a, B: BitmapSlice> VolatileSlice<'a, B> { } } + /// Replaces the bitmap in `self` by `new_bitmap`. + #[cfg(feature = "iommu")] + pub(crate) fn replace_bitmap(self, new_bitmap: NB) -> VolatileSlice<'a, NB> { + VolatileSlice { + addr: self.addr, + size: self.size, + bitmap: new_bitmap, + mmap: self.mmap, + } + } + /// Returns a guard for the pointer to the underlying memory. pub fn ptr_guard(&self) -> PtrGuard { PtrGuard::read(self.mmap, self.addr, self.len()) From f11ec802ac8067f085c012120d63e3c37d9f3d2f Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Wed, 9 Jul 2025 14:50:35 +0200 Subject: [PATCH 09/11] Add tests for IOMMU functionality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit also adds the iommu feature to the coverage_config feature list. (I left the aarch64 coverage value unchanged; I cannot find out how to get the current value on my system, and it isn’t include in CI.) Signed-off-by: Hanna Czenczek --- coverage_config_aarch64.json | 2 +- coverage_config_x86_64.json | 4 +- src/io_memory.rs | 60 +++ src/iommu.rs | 888 +++++++++++++++++++++++++++++++++++ src/mmap/mod.rs | 25 + 5 files changed, 976 insertions(+), 3 deletions(-) diff --git a/coverage_config_aarch64.json b/coverage_config_aarch64.json index 4aeb3711..7332c6c7 100644 --- a/coverage_config_aarch64.json +++ b/coverage_config_aarch64.json @@ -1,5 +1,5 @@ { "coverage_score": 85.2, "exclude_path": "mmap/windows.rs", - "crate_features": "backend-mmap,backend-atomic,backend-bitmap" + "crate_features": "backend-mmap,backend-atomic,backend-bitmap,iommu" } diff --git a/coverage_config_x86_64.json b/coverage_config_x86_64.json index 13f2dfd7..cc82ba81 100644 --- a/coverage_config_x86_64.json +++ b/coverage_config_x86_64.json @@ -1,5 +1,5 @@ { - "coverage_score": 91.78, + "coverage_score": 92.48, "exclude_path": "mmap_windows.rs", - "crate_features": "backend-mmap,backend-atomic,backend-bitmap" + "crate_features": "backend-mmap,backend-atomic,backend-bitmap,iommu" } diff --git a/src/io_memory.rs b/src/io_memory.rs index 7ba5236d..e07e3b60 100644 --- a/src/io_memory.rs +++ b/src/io_memory.rs @@ -216,3 +216,63 @@ impl IoMemory for M { Some(self) } } + +#[cfg(test)] +mod tests { + use super::Permissions; + + // Note that `IoMemory` is tested primarily in src/iommu.rs via `IommuMemory`. + + /// Test `Permissions & Permissions`. + #[test] + fn test_perm_and() { + use Permissions::*; + + for p in [No, Read, Write, ReadWrite] { + assert_eq!(p & p, p); + } + for p1 in [No, Read, Write, ReadWrite] { + for p2 in [No, Read, Write, ReadWrite] { + assert_eq!(p1 & p2, p2 & p1); + } + } + for p in [No, Read, Write, ReadWrite] { + assert_eq!(No & p, No); + } + for p in [No, Read, Write, ReadWrite] { + assert_eq!(ReadWrite & p, p); + } + assert_eq!(Read & Write, No); + } + + /// Test `Permissions | Permissions`. + #[test] + fn test_perm_or() { + use Permissions::*; + + for p in [No, Read, Write, ReadWrite] { + assert_eq!(p | p, p); + } + for p1 in [No, Read, Write, ReadWrite] { + for p2 in [No, Read, Write, ReadWrite] { + assert_eq!(p1 | p2, p2 | p1); + } + } + for p in [No, Read, Write, ReadWrite] { + assert_eq!(No | p, p); + } + for p in [No, Read, Write, ReadWrite] { + assert_eq!(ReadWrite | p, ReadWrite); + } + assert_eq!(Read | Write, ReadWrite); + } + + /// Test `Permissions::has_write()`. + #[test] + fn test_perm_has_write() { + assert!(!Permissions::No.has_write()); + assert!(!Permissions::Read.has_write()); + assert!(Permissions::Write.has_write()); + assert!(Permissions::ReadWrite.has_write()); + } +} diff --git a/src/iommu.rs b/src/iommu.rs index 1f4fe12c..1bcf4473 100644 --- a/src/iommu.rs +++ b/src/iommu.rs @@ -693,3 +693,891 @@ where .finish() } } + +#[cfg(test)] +mod tests { + use super::{Error, IotlbIterator, IovaRange, MappedRange}; + use crate::bitmap::{AtomicBitmap, NewBitmap}; + use crate::{ + Address, Bytes, GuestAddress, GuestMemoryError, GuestMemoryMmap, GuestMemoryRegion, + GuestMemoryResult, IoMemory, Iommu, IommuMemory, Iotlb, Permissions, + }; + use std::fmt::Debug; + use std::num::NonZeroUsize; + use std::ops::Deref; + use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}; + use std::sync::{RwLock, RwLockReadGuard}; + + #[derive(Debug)] + struct SimpleIommu { + iotlb: RwLock, + /// Records the last fail event's base IOVA + fail_base: AtomicU64, + /// Records the last fail event's length + fail_len: AtomicUsize, + /// Records whether the last fail event was a miss + fail_was_miss: AtomicBool, + /// What base physical address to map to on the next fail event (0 means return error) + next_map_to: AtomicU64, + } + + impl SimpleIommu { + fn new() -> Self { + SimpleIommu { + iotlb: Iotlb::new().into(), + fail_base: 0.into(), + fail_len: 0.into(), + fail_was_miss: false.into(), + next_map_to: 0.into(), + } + } + + fn expect_mapping_request(&self, to_phys: GuestAddress) { + // Clear failed range info so it can be tested after the request + self.fail_base.store(0, Ordering::Relaxed); + self.fail_len.store(0, Ordering::Relaxed); + self.next_map_to.store(to_phys.0, Ordering::Relaxed); + } + + fn verify_mapping_request(&self, virt: GuestAddress, len: usize, was_miss: bool) { + assert_eq!(self.fail_base.load(Ordering::Relaxed), virt.0); + assert_eq!(self.fail_len.load(Ordering::Relaxed), len); + assert_eq!(self.fail_was_miss.load(Ordering::Relaxed), was_miss); + } + } + + impl Iommu for SimpleIommu { + type IotlbGuard<'a> = RwLockReadGuard<'a, Iotlb>; + + fn translate( + &self, + iova: GuestAddress, + length: usize, + access: Permissions, + ) -> Result>, Error> { + loop { + let mut fails = + match Iotlb::lookup(self.iotlb.read().unwrap(), iova, length, access) { + Ok(success) => return Ok(success), + Err(fails) => fails, + }; + let miss = !fails.misses.is_empty(); + let fail = fails + .misses + .pop() + .or_else(|| fails.access_fails.pop()) + .expect("No failure reported, even though a failure happened"); + self.fail_base.store(fail.base.0, Ordering::Relaxed); + self.fail_len.store(fail.length, Ordering::Relaxed); + self.fail_was_miss.store(miss, Ordering::Relaxed); + + if !fails.misses.is_empty() || !fails.access_fails.is_empty() { + return Err(Error::CannotResolve { + iova_range: IovaRange { base: iova, length }, + reason: "This IOMMU can only handle one failure per access".into(), + }); + } + + let map_to = self.next_map_to.swap(0, Ordering::Relaxed); + if map_to == 0 { + return Err(Error::CannotResolve { + iova_range: IovaRange { + base: fail.base, + length: fail.length, + }, + reason: "No mapping provided for failed range".into(), + }); + } + + self.iotlb.write().unwrap().set_mapping( + fail.base, + GuestAddress(map_to), + fail.length, + access, + )?; + } + } + } + + /// Verify that `iova`+`length` is mapped to `expected`. + fn verify_hit( + iotlb: impl Deref + Debug, + iova: GuestAddress, + length: usize, + permissions: Permissions, + expected: impl IntoIterator, + ) { + let mut iter = Iotlb::lookup(iotlb, iova, length, permissions) + .inspect_err(|err| panic!("Unexpected lookup error {err:?}")) + .unwrap(); + + for e in expected { + assert_eq!(iter.next(), Some(e)); + } + assert_eq!(iter.next(), None); + } + + /// Verify that trying to look up `iova`+`length` results in misses at `expected_misses` and + /// access failures (permission-related) at `expected_access_fails`. + fn verify_fail( + iotlb: impl Deref + Debug, + iova: GuestAddress, + length: usize, + permissions: Permissions, + expected_misses: impl IntoIterator, + expected_access_fails: impl IntoIterator, + ) { + let fails = Iotlb::lookup(iotlb, iova, length, permissions) + .inspect(|hits| panic!("Expected error on lookup, found {hits:?}")) + .unwrap_err(); + + let mut miss_iter = fails.misses.into_iter(); + for e in expected_misses { + assert_eq!(miss_iter.next(), Some(e)); + } + assert_eq!(miss_iter.next(), None); + + let mut accf_iter = fails.access_fails.into_iter(); + for e in expected_access_fails { + assert_eq!(accf_iter.next(), Some(e)); + } + assert_eq!(accf_iter.next(), None); + } + + /// Enter adjacent IOTLB entries and verify they are merged into a single one. + #[test] + fn test_iotlb_merge() -> Result<(), Error> { + const IOVA: GuestAddress = GuestAddress(42); + const PHYS: GuestAddress = GuestAddress(87); + const LEN_1: usize = 123; + const LEN_2: usize = 234; + + let mut iotlb = Iotlb::new(); + iotlb.set_mapping(IOVA, PHYS, LEN_1, Permissions::ReadWrite)?; + iotlb.set_mapping( + GuestAddress(IOVA.0 + LEN_1 as u64), + GuestAddress(PHYS.0 + LEN_1 as u64), + LEN_2, + Permissions::ReadWrite, + )?; + + verify_hit( + &iotlb, + IOVA, + LEN_1 + LEN_2, + Permissions::ReadWrite, + [MappedRange { + base: PHYS, + length: LEN_1 + LEN_2, + }], + ); + + // Also check just a partial range + verify_hit( + &iotlb, + GuestAddress(IOVA.0 + LEN_1 as u64 - 1), + 2, + Permissions::ReadWrite, + [MappedRange { + base: GuestAddress(PHYS.0 + LEN_1 as u64 - 1), + length: 2, + }], + ); + + Ok(()) + } + + /// Test that adjacent IOTLB entries that map to the same physical address are not merged into + /// a single entry. + #[test] + fn test_iotlb_nomerge_same_phys() -> Result<(), Error> { + const IOVA: GuestAddress = GuestAddress(42); + const PHYS: GuestAddress = GuestAddress(87); + const LEN_1: usize = 123; + const LEN_2: usize = 234; + + let mut iotlb = Iotlb::new(); + iotlb.set_mapping(IOVA, PHYS, LEN_1, Permissions::ReadWrite)?; + iotlb.set_mapping( + GuestAddress(IOVA.0 + LEN_1 as u64), + PHYS, + LEN_2, + Permissions::ReadWrite, + )?; + + verify_hit( + &iotlb, + IOVA, + LEN_1 + LEN_2, + Permissions::ReadWrite, + [ + MappedRange { + base: PHYS, + length: LEN_1, + }, + MappedRange { + base: PHYS, + length: LEN_2, + }, + ], + ); + + Ok(()) + } + + /// Test permission handling + #[test] + fn test_iotlb_perms() -> Result<(), Error> { + const IOVA_R: GuestAddress = GuestAddress(42); + const PHYS_R: GuestAddress = GuestAddress(87); + const LEN_R: usize = 123; + const IOVA_W: GuestAddress = GuestAddress(IOVA_R.0 + LEN_R as u64); + const PHYS_W: GuestAddress = GuestAddress(PHYS_R.0 + LEN_R as u64); + const LEN_W: usize = 234; + const IOVA_FULL: GuestAddress = IOVA_R; + const LEN_FULL: usize = LEN_R + LEN_W; + + let mut iotlb = Iotlb::new(); + iotlb.set_mapping(IOVA_R, PHYS_R, LEN_R, Permissions::Read)?; + iotlb.set_mapping(IOVA_W, PHYS_W, LEN_W, Permissions::Write)?; + + // Test 1: Access whole range as R+W, should completely fail + verify_fail( + &iotlb, + IOVA_FULL, + LEN_FULL, + Permissions::ReadWrite, + [], + [ + IovaRange { + base: IOVA_R, + length: LEN_R, + }, + IovaRange { + base: IOVA_W, + length: LEN_W, + }, + ], + ); + + // Test 2: Access whole range as R-only, should fail on second part + verify_fail( + &iotlb, + IOVA_FULL, + LEN_FULL, + Permissions::Read, + [], + [IovaRange { + base: IOVA_W, + length: LEN_W, + }], + ); + + // Test 3: Access whole range W-only, should fail on second part + verify_fail( + &iotlb, + IOVA_FULL, + LEN_FULL, + Permissions::Write, + [], + [IovaRange { + base: IOVA_R, + length: LEN_R, + }], + ); + + // Test 4: Access whole range w/o perms, should succeed + verify_hit( + &iotlb, + IOVA_FULL, + LEN_FULL, + Permissions::No, + [ + MappedRange { + base: PHYS_R, + length: LEN_R, + }, + MappedRange { + base: PHYS_W, + length: LEN_W, + }, + ], + ); + + // Test 5: Access R range as R, should succeed + verify_hit( + &iotlb, + IOVA_R, + LEN_R, + Permissions::Read, + [MappedRange { + base: PHYS_R, + length: LEN_R, + }], + ); + + // Test 6: Access W range as W, should succeed + verify_hit( + &iotlb, + IOVA_W, + LEN_W, + Permissions::Write, + [MappedRange { + base: PHYS_W, + length: LEN_W, + }], + ); + + Ok(()) + } + + /// Test IOTLB invalidation + #[test] + fn test_iotlb_invalidation() -> Result<(), Error> { + const IOVA: GuestAddress = GuestAddress(42); + const PHYS: GuestAddress = GuestAddress(87); + const LEN: usize = 123; + const INVAL_OFS: usize = LEN / 2; + const INVAL_LEN: usize = 3; + const IOVA_AT_INVAL: GuestAddress = GuestAddress(IOVA.0 + INVAL_OFS as u64); + const PHYS_AT_INVAL: GuestAddress = GuestAddress(PHYS.0 + INVAL_OFS as u64); + const IOVA_POST_INVAL: GuestAddress = GuestAddress(IOVA_AT_INVAL.0 + INVAL_LEN as u64); + const PHYS_POST_INVAL: GuestAddress = GuestAddress(PHYS_AT_INVAL.0 + INVAL_LEN as u64); + const POST_INVAL_LEN: usize = LEN - INVAL_OFS - INVAL_LEN; + + let mut iotlb = Iotlb::new(); + iotlb.set_mapping(IOVA, PHYS, LEN, Permissions::ReadWrite)?; + verify_hit( + &iotlb, + IOVA, + LEN, + Permissions::ReadWrite, + [MappedRange { + base: PHYS, + length: LEN, + }], + ); + + // Invalidate something in the middle; expect mapping at the start, then miss, then further + // mapping + iotlb.invalidate_mapping(IOVA_AT_INVAL, INVAL_LEN); + verify_hit( + &iotlb, + IOVA, + INVAL_OFS, + Permissions::ReadWrite, + [MappedRange { + base: PHYS, + length: INVAL_OFS, + }], + ); + verify_fail( + &iotlb, + IOVA, + LEN, + Permissions::ReadWrite, + [IovaRange { + base: IOVA_AT_INVAL, + length: INVAL_LEN, + }], + [], + ); + verify_hit( + &iotlb, + IOVA_POST_INVAL, + POST_INVAL_LEN, + Permissions::ReadWrite, + [MappedRange { + base: PHYS_POST_INVAL, + length: POST_INVAL_LEN, + }], + ); + + // And invalidate everything; expect full miss + iotlb.invalidate_all(); + verify_fail( + &iotlb, + IOVA, + LEN, + Permissions::ReadWrite, + [IovaRange { + base: IOVA, + length: LEN, + }], + [], + ); + + Ok(()) + } + + /// Create `IommuMemory` backed by multiple physical regions, all mapped into a single virtual + /// region (if `virt_start`/`virt_perm` are given). + /// + /// Memory is filled with incrementing (overflowing) bytes, starting with value `value_offset`. + #[cfg(feature = "backend-mmap")] + fn create_virt_memory( + virt_mapping: Option<(GuestAddress, Permissions)>, + value_offset: u8, + phys_regions: impl IntoIterator, + bitmap: B, + ) -> IommuMemory, SimpleIommu> { + let phys_ranges = phys_regions + .into_iter() + .map(|range| (range.base, range.length)) + .collect::>(); + let phys_mem = GuestMemoryMmap::::from_ranges(&phys_ranges).unwrap(); + + let mut byte_val = value_offset; + for (base, len) in &phys_ranges { + let mut slices = phys_mem + .get_slices(*base, *len, Permissions::Write) + .inspect_err(|err| panic!("Failed to access memory: {err}")) + .unwrap(); + let slice = slices + .next() + .unwrap() + .inspect_err(|err| panic!("Failed to access memory: {err}")) + .unwrap(); + assert!(slices.next().is_none(), "Expected single slice"); + + for i in 0..*len { + slice.write(&[byte_val], i).unwrap(); + byte_val = byte_val.wrapping_add(1); + } + } + + let mem = IommuMemory::new(phys_mem, SimpleIommu::new(), true, bitmap); + + // IOMMU is in use, this will be `None` + assert!(mem.physical_memory().is_none()); + + if let Some((mut virt, perm)) = virt_mapping { + for (base, len) in phys_ranges { + let mut iotlb = mem.iommu().iotlb.write().unwrap(); + iotlb.set_mapping(virt, base, len, perm).unwrap(); + virt = GuestAddress(virt.0 + len as u64); + } + } + + mem + } + + /// Verify the byte contents at `start`+`len`. Assume the initial byte value to be + /// `value_offset`. + /// + /// Each byte is expected to be incremented over the last (as created by + /// `create_virt_memory()`). + /// + /// Return an error if mapping fails, but just panic if there is a content mismatch. + #[cfg(feature = "backend-mmap")] + fn check_virt_mem_content( + mem: &impl IoMemory, + start: GuestAddress, + len: usize, + value_offset: u8, + ) -> GuestMemoryResult<()> { + // First try the `try_access()` interface + let mut ref_value = value_offset; + let processed_len = mem.try_access( + len, + start, + Permissions::Read, + |ofs, count, in_region_addr, region| -> GuestMemoryResult { + assert_eq!(ofs as u8, ref_value.wrapping_sub(value_offset)); + for i in 0..count { + let addr = in_region_addr.checked_add(i as u64).unwrap(); + let val = region.load::(addr, Ordering::Relaxed)?; + assert_eq!(val, ref_value); + ref_value = ref_value.wrapping_add(1); + } + Ok(count) + }, + )?; + assert_eq!(processed_len, len); + + // Next try the `get_slices()` interface + ref_value = value_offset; + for slice in mem.get_slices(start, len, Permissions::Read).unwrap() { + let slice = slice.unwrap(); + + let count = slice.len(); + let mut data = vec![0u8; count]; + slice.read(&mut data, 0).unwrap(); + for val in data { + assert_eq!(val, ref_value); + ref_value = ref_value.wrapping_add(1); + } + } + + Ok(()) + } + + #[cfg(feature = "backend-mmap")] + fn verify_virt_mem_content( + m: &impl IoMemory, + start: GuestAddress, + len: usize, + value_offset: u8, + ) { + check_virt_mem_content(m, start, len, value_offset).unwrap(); + } + + /// Verify that trying to read from `start`+`len` fails (because of `CannotResolve`). + /// + /// The reported failed-to-map range is checked to be `fail_start`+`fail_len`. `fail_start` + /// defaults to `start`, `fail_len` defaults to the remaining length of the whole mapping + /// starting at `fail_start` (i.e. `start + len - fail_start`). + #[cfg(feature = "backend-mmap")] + fn verify_virt_mem_error( + m: &impl IoMemory, + start: GuestAddress, + len: usize, + fail_start: Option, + fail_len: Option, + ) { + let fail_start = fail_start.unwrap_or(start); + let fail_len = fail_len.unwrap_or(len - (fail_start.0 - start.0) as usize); + let err = check_virt_mem_content(m, start, len, 0).unwrap_err(); + let GuestMemoryError::IommuError(Error::CannotResolve { + iova_range: failed_range, + reason: _, + }) = err + else { + panic!("Unexpected error: {err:?}"); + }; + assert_eq!( + failed_range, + IovaRange { + base: fail_start, + length: fail_len, + } + ); + } + + /// Test `IommuMemory`, with pre-filled mappings. + #[cfg(feature = "backend-mmap")] + #[test] + fn test_iommu_memory_pre_mapped() { + const PHYS_START_1: GuestAddress = GuestAddress(0x4000); + const PHYS_START_2: GuestAddress = GuestAddress(0x8000); + const PHYS_LEN: usize = 128; + const VIRT_START: GuestAddress = GuestAddress(0x2a000); + const VIRT_LEN: usize = PHYS_LEN * 2; + const VIRT_POST_MAP: GuestAddress = GuestAddress(VIRT_START.0 + VIRT_LEN as u64); + + let mem = create_virt_memory( + Some((VIRT_START, Permissions::Read)), + 0, + [ + MappedRange { + base: PHYS_START_1, + length: PHYS_LEN, + }, + MappedRange { + base: PHYS_START_2, + length: PHYS_LEN, + }, + ], + (), + ); + + assert!(mem.range_accessible(VIRT_START, VIRT_LEN, Permissions::No)); + assert!(mem.range_accessible(VIRT_START, VIRT_LEN, Permissions::Read)); + assert!(!mem.range_accessible(VIRT_START, VIRT_LEN, Permissions::Write)); + assert!(!mem.range_accessible(VIRT_START, VIRT_LEN, Permissions::ReadWrite)); + assert!(!mem.range_accessible(GuestAddress(VIRT_START.0 - 1), 1, Permissions::No)); + assert!(!mem.range_accessible(VIRT_POST_MAP, 1, Permissions::No)); + + verify_virt_mem_content(&mem, VIRT_START, VIRT_LEN, 0); + verify_virt_mem_error(&mem, GuestAddress(VIRT_START.0 - 1), 1, None, None); + verify_virt_mem_error(&mem, VIRT_POST_MAP, 1, None, None); + verify_virt_mem_error(&mem, VIRT_START, VIRT_LEN + 1, Some(VIRT_POST_MAP), None); + } + + /// Test `IommuMemory`, with mappings created through the IOMMU on the fly. + #[cfg(feature = "backend-mmap")] + #[test] + fn test_iommu_memory_live_mapped() { + const PHYS_START_1: GuestAddress = GuestAddress(0x4000); + const PHYS_START_2: GuestAddress = GuestAddress(0x8000); + const PHYS_LEN: usize = 128; + const VIRT_START: GuestAddress = GuestAddress(0x2a000); + const VIRT_START_1: GuestAddress = VIRT_START; + const VIRT_START_2: GuestAddress = GuestAddress(VIRT_START.0 + PHYS_LEN as u64); + const VIRT_LEN: usize = PHYS_LEN * 2; + const VIRT_POST_MAP: GuestAddress = GuestAddress(VIRT_START.0 + VIRT_LEN as u64); + + let mem = create_virt_memory( + None, + 0, + [ + MappedRange { + base: PHYS_START_1, + length: PHYS_LEN, + }, + MappedRange { + base: PHYS_START_2, + length: PHYS_LEN, + }, + ], + (), + ); + + assert!(!mem.range_accessible(VIRT_START, VIRT_LEN, Permissions::No)); + assert!(!mem.range_accessible(VIRT_START, VIRT_LEN, Permissions::Read)); + assert!(!mem.range_accessible(VIRT_START, VIRT_LEN, Permissions::Write)); + assert!(!mem.range_accessible(VIRT_START, VIRT_LEN, Permissions::ReadWrite)); + assert!(!mem.range_accessible(GuestAddress(VIRT_START.0 - 1), 1, Permissions::No)); + assert!(!mem.range_accessible(VIRT_POST_MAP, 1, Permissions::No)); + + verify_virt_mem_error(&mem, VIRT_START, VIRT_LEN, None, None); + verify_virt_mem_error(&mem, GuestAddress(VIRT_START.0 - 1), 1, None, None); + verify_virt_mem_error(&mem, VIRT_POST_MAP, 1, None, None); + verify_virt_mem_error(&mem, VIRT_START, VIRT_LEN + 1, None, None); + + let iommu = mem.iommu(); + + // Can only map one region at a time (with `SimpleIommu`), so only access `PHYS_LEN` first, + // not `VIRT_LEN` + iommu.expect_mapping_request(PHYS_START_1); + verify_virt_mem_content(&mem, VIRT_START, PHYS_LEN, 0); + iommu.verify_mapping_request(VIRT_START_1, PHYS_LEN, true); + + iommu.expect_mapping_request(PHYS_START_2); + verify_virt_mem_content(&mem, VIRT_START, VIRT_LEN, 0); + iommu.verify_mapping_request(VIRT_START_2, PHYS_LEN, true); + + // Also check invalid access failure + iommu + .iotlb + .write() + .unwrap() + .set_mapping(VIRT_START_1, PHYS_START_1, PHYS_LEN, Permissions::Write) + .unwrap(); + + iommu.expect_mapping_request(PHYS_START_1); + verify_virt_mem_content(&mem, VIRT_START, VIRT_LEN, 0); + iommu.verify_mapping_request(VIRT_START_1, PHYS_LEN, false); + } + + /// Test replacing the physical memory of an `IommuMemory`. + #[cfg(feature = "backend-mmap")] + #[test] + fn test_mem_replace() { + const PHYS_START_1: GuestAddress = GuestAddress(0x4000); + const PHYS_START_2: GuestAddress = GuestAddress(0x8000); + const PHYS_LEN: usize = 128; + const VIRT_START: GuestAddress = GuestAddress(0x2a000); + + // Note only one physical region. `mem2` will have two, to see that this pattern + // (`inner_replaced()`) can be used to e.g. extend physical memory. + let mem = create_virt_memory( + Some((VIRT_START, Permissions::Read)), + 0, + [MappedRange { + base: PHYS_START_1, + length: PHYS_LEN, + }], + (), + ); + + verify_virt_mem_content(&mem, VIRT_START, PHYS_LEN, 0); + verify_virt_mem_error( + &mem, + VIRT_START, + PHYS_LEN * 2, + Some(GuestAddress(VIRT_START.0 + PHYS_LEN as u64)), + None, + ); + + let mut mem2 = create_virt_memory( + Some((VIRT_START, Permissions::Read)), + 42, + [ + MappedRange { + base: PHYS_START_1, + length: PHYS_LEN, + }, + MappedRange { + base: PHYS_START_2, + length: PHYS_LEN, + }, + ], + (), + ); + + verify_virt_mem_content(&mem2, VIRT_START, PHYS_LEN * 2, 42); + + // Clone `mem` before replacing its physical memory, to see that works + let mem_cloned = mem.clone(); + + // Use `mem2`'s physical memory for `mem` + mem2.set_iommu_enabled(false); + let pmem2 = mem2.physical_memory().unwrap(); + assert!(std::ptr::eq(pmem2, mem2.inner())); + let mem = mem.inner_replaced(pmem2.clone()); + + // The physical memory has been replaced, but `mem` still uses its old IOMMU, so the + // mapping for everything past VIRT_START + PHYS_LEN does not yet exist. + mem.iommu().expect_mapping_request(PHYS_START_2); + verify_virt_mem_content(&mem, VIRT_START, PHYS_LEN * 2, 42); + mem.iommu().verify_mapping_request( + GuestAddress(VIRT_START.0 + PHYS_LEN as u64), + PHYS_LEN, + true, + ); + + // Verify `mem`'s clone still is the same (though it does use the same IOMMU) + verify_virt_mem_content(&mem_cloned, VIRT_START, PHYS_LEN, 0); + // See, it's the same IOMMU (i.e. it has a mapping PHYS_START_2): + verify_hit( + mem_cloned.iommu().iotlb.read().unwrap(), + VIRT_START, + PHYS_LEN * 2, + Permissions::Read, + [ + MappedRange { + base: PHYS_START_1, + length: PHYS_LEN, + }, + MappedRange { + base: PHYS_START_2, + length: PHYS_LEN, + }, + ], + ); + // (But we cannot access that mapping because `mem_cloned`'s physical memory does not + // contain that physical range.) + } + + /// In `mem`'s dirty bitmap, verify that the given `clean` addresses are clean, and the `dirty` + /// addresses are dirty. Auto-clear the dirty addresses checked. + /// + /// Cannot import `GuestMemory` in this module, as that would interfere with `IoMemory` for + /// methods that have the same name between the two. + #[cfg(all(feature = "backend-bitmap", feature = "backend-mmap"))] + fn verify_mem_bitmap< + M: crate::GuestMemory, + R: GuestMemoryRegion, + I: Iommu, + >( + mem: &IommuMemory, + clean: impl IntoIterator, + dirty: impl IntoIterator, + ) { + let bitmap = mem.bitmap(); + for addr in clean { + if bitmap.is_addr_set(addr) { + panic!("Expected addr {addr:#x} to be clean, but is dirty"); + } + } + for addr in dirty { + if !bitmap.is_addr_set(addr) { + panic!("Expected addr {addr:#x} to be dirty, but is clean"); + } + bitmap.reset_addr_range(addr, 1); + } + } + + #[cfg(all(feature = "backend-bitmap", feature = "backend-mmap"))] + #[test] + fn test_dirty_bitmap() { + const PAGE_SIZE: usize = 4096; + const PHYS_START: GuestAddress = GuestAddress(0x4000); + const PHYS_LEN: usize = PAGE_SIZE * 2; + const PHYS_PAGE_0: usize = PHYS_START.0 as usize; + const PHYS_PAGE_1: usize = PHYS_START.0 as usize + PAGE_SIZE; + const VIRT_START: GuestAddress = GuestAddress(0x2a000); + const VIRT_PAGE_0: usize = VIRT_START.0 as usize; + const VIRT_PAGE_1: usize = VIRT_START.0 as usize + PAGE_SIZE; + + let bitmap = AtomicBitmap::new( + VIRT_START.0 as usize + PHYS_LEN, + NonZeroUsize::new(PAGE_SIZE).unwrap(), + ); + + let mem = create_virt_memory( + Some((VIRT_START, Permissions::ReadWrite)), + 0, + [MappedRange { + base: PHYS_START, + length: PHYS_LEN, + }], + bitmap, + ); + + // Check bitmap is cleared before everything -- through the whole test, the physical ranges + // should remain clean as the bitmap is only supposed to track IOVAs + verify_mem_bitmap( + &mem, + [PHYS_PAGE_0, PHYS_PAGE_1, VIRT_PAGE_0, VIRT_PAGE_1], + [], + ); + + // Just to be sure, check that PHYS_PAGE_0 and PHYS_PAGE_1 technically can be dirtied, + // though, or testing them would not be really useful + mem.bitmap().set_addr_range(PHYS_PAGE_0, 2 * PAGE_SIZE); + verify_mem_bitmap(&mem, [VIRT_PAGE_0, VIRT_PAGE_1], [PHYS_PAGE_0, PHYS_PAGE_1]); + + // Just read from memory, should not dirty bitmap + verify_virt_mem_content(&mem, VIRT_START, PHYS_LEN, 0); + verify_mem_bitmap( + &mem, + [PHYS_PAGE_0, PHYS_PAGE_1, VIRT_PAGE_0, VIRT_PAGE_1], + [], + ); + + // Verify that writing to a writeable slice causes dirtying, i.e. that the `VolatileSlice` + // returned here correctly dirties the bitmap when written to + let mut slices = mem + .get_slices(VIRT_START, PHYS_LEN, Permissions::Write) + .inspect_err(|err| panic!("Failed to access memory: {err}")) + .unwrap(); + let slice = slices + .next() + .unwrap() + .inspect_err(|err| panic!("Failed to access memory: {err}")) + .unwrap(); + assert!(slices.next().is_none(), "Expected single slice"); + + verify_mem_bitmap( + &mem, + [PHYS_PAGE_0, PHYS_PAGE_1, VIRT_PAGE_0, VIRT_PAGE_1], + [], + ); + + slice + .store(42, 0, Ordering::Relaxed) + .inspect_err(|err| panic!("Writing to memory failed: {err}")) + .unwrap(); + verify_mem_bitmap(&mem, [PHYS_PAGE_0, PHYS_PAGE_1, VIRT_PAGE_1], [VIRT_PAGE_0]); + + slice + .store(23, PAGE_SIZE, Ordering::Relaxed) + .inspect_err(|err| panic!("Writing to memory failed: {err}")) + .unwrap(); + verify_mem_bitmap(&mem, [PHYS_PAGE_0, PHYS_PAGE_1, VIRT_PAGE_0], [VIRT_PAGE_1]); + + // Verify that `try_access()` dirties the bitmap when reporting write access + mem.try_access(PHYS_LEN, VIRT_START, Permissions::Write, |ofs, _, _, _| { + // only report one page to be written + if ofs == 0 { + Ok(PAGE_SIZE) + } else { + assert_eq!(ofs, PAGE_SIZE); + Ok(0) + } + }) + .inspect_err(|err| panic!("Simulated write to memory failed: {err}")) + .unwrap(); + verify_mem_bitmap(&mem, [PHYS_PAGE_0, PHYS_PAGE_1, VIRT_PAGE_1], [VIRT_PAGE_0]); + + // And test simulated writing to the whole area + mem.try_access(PHYS_LEN, VIRT_START, Permissions::Write, |_, _, _, _| { + Ok(PHYS_LEN) + }) + .inspect_err(|err| panic!("Simulated write to memory failed: {err}")) + .unwrap(); + verify_mem_bitmap(&mem, [PHYS_PAGE_0, PHYS_PAGE_1], [VIRT_PAGE_0, VIRT_PAGE_1]); + } +} diff --git a/src/mmap/mod.rs b/src/mmap/mod.rs index 3a13a9df..47099dab 100644 --- a/src/mmap/mod.rs +++ b/src/mmap/mod.rs @@ -709,4 +709,29 @@ mod tests { .unwrap() }); } + + #[test] + fn test_change_region_addr() { + let addr1 = GuestAddress(0x1000); + let addr2 = GuestAddress(0x2000); + let gm = GuestMemoryMmap::from_ranges(&[(addr1, 0x1000)]).unwrap(); + + assert!(gm.find_region(addr1).is_some()); + assert!(gm.find_region(addr2).is_none()); + + let (gm, region) = gm.remove_region(addr1, 0x1000).unwrap(); + + assert!(gm.find_region(addr1).is_none()); + assert!(gm.find_region(addr2).is_none()); + + // Note that the `region` returned by `remove_region` is an `Arc<_>`, so users generally + // cannot mutate it (change its base address). In this test, we can (we could unwrap the + // `Arc<_>`), but our users generally cannot, hence why this interface exists. + let region = GuestRegionMmap::with_arc(Arc::clone(region.get_mapping()), addr2).unwrap(); + + let gm = gm.insert_region(Arc::new(region)).unwrap(); + + assert!(gm.find_region(addr1).is_none()); + assert!(gm.find_region(addr2).is_some()); + } } From 77d8294e20273fe1e8c20e32b002196b06e7ba76 Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Fri, 30 May 2025 13:04:50 +0200 Subject: [PATCH 10/11] DESIGN: Document I/O virtual memory Document in DESIGN.md how I/O virtual memory is handled. Signed-off-by: Hanna Czenczek --- DESIGN.md | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/DESIGN.md b/DESIGN.md index 5915f50e..c3098d85 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -2,8 +2,8 @@ ## Objectives -- Provide a set of traits for accessing and configuring the physical memory of - a virtual machine. +- Provide a set of traits for accessing and configuring the physical and/or + I/O virtual memory of a virtual machine. - Provide a clean abstraction of the VM memory such that rust-vmm components can use it without depending on the implementation details specific to different VMMs. @@ -122,6 +122,29 @@ let buf = &mut [0u8; 5]; let result = guest_memory_mmap.write(buf, addr); ``` +### I/O Virtual Address Space + +When using an IOMMU, there no longer is direct access to the guest (physical) +address space, but instead only to I/O virtual address space. In this case: + +- `IoMemory` replaces `GuestMemory`: It requires specifying the required access + permissions (which are relevant for virtual memory). It also removes + interfaces that imply a mostly linear memory layout, because virtual memory is + fragmented into many pages instead of few (large) memory regions. + - Any `IoMemory` still has a `GuestMemory` inside as the underlying address + space, but if an IOMMU is used, that will generally not be guest physical + address space. With vhost-user, for example, it will be the VMM’s user + address space instead. + - `IommuMemory` as our only actually IOMMU-supporting `IoMemory` + implementation uses an `Iommu` object to translate I/O virtual addresses + (IOVAs) into VMM user addresses (VUAs), which are then passed to the inner + `GuestMemory` implementation (like `GuestMemoryMmap`). +- `GuestAddress` (for compatibility) refers to an address in any of these + address spaces: + - Guest physical addresses (GPAs) when no IOMMU is used, + - I/O virtual addresses (IOVAs), + - VMM user addresses (VUAs). + ### Utilities and Helpers The following utilities and helper traits/macros are imported from the @@ -143,7 +166,8 @@ with minor changes: - `Address` inherits `AddressValue` - `GuestMemoryRegion` inherits `Bytes`. The `Bytes` trait must be implemented. -- `GuestMemory` has a generic implementation of `Bytes`. +- `GuestMemory` has a generic implementation of `IoMemory` +- `IoMemory` has a generic implementation of `Bytes`. **Types**: From c2eda2f8a5c51f88af53b8f4eff480eebf60750e Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Fri, 30 May 2025 16:49:59 +0200 Subject: [PATCH 11/11] CHANGELOG: Add I/O virtual memory entry Signed-off-by: Hanna Czenczek --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 13cae5d8..d19d68ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - \[[#311](https://github.com/rust-vmm/vm-memory/pull/311)\] Allow compiling without the ReadVolatile and WriteVolatile implementations - \[[#312](https://github.com/rust-vmm/vm-memory/pull/312)\] `GuestRegionContainer`, a generic container of `GuestMemoryRegion`s, generalizing `GuestMemoryMmap` (which is now a type alias for `GuestRegionContainer`). +- \[[#327](https://github.com/rust-vmm/vm-memory/pull/327)\] I/O virtual memory support via `IoMemory`, `IommuMemory`, and `Iommu`/`Iotlb` - \[[#338](https://github.com/rust-vmm/vm-memory/pull/338)\] Make `GuestMemoryAtomic` always implement `Clone`. - \[[#338](https://github.com/rust-vmm/vm-memory/pull/338)\] Make `GuestAddressSpace` a subtrait of `Clone`. - \[[#339](https://github.com/rust-vmm/vm-memory/pull/339)\] Add `GuestMemory::get_slices()`