Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions src/alloc/alloc_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,12 @@ use crate::helpers::ToU64 as _;

#[derive(Clone, Debug)]
pub enum MiriAllocParams {
/// Allocation was created by calling `alloc::alloc()`.
Global,
/// Allocation came from the isolated allocator.
Isolated(Rc<RefCell<IsolatedAlloc>>),
/// Page mapped elsewhere that we don't try to deallocate.
Forged(usize),
}

/// Allocation bytes that explicitly handle the layout of the data they're storing.
Expand All @@ -27,8 +31,7 @@ pub struct MiriAllocBytes {
/// * If `self.layout.size() == 0`, then `self.ptr` was allocated with the equivalent layout with size 1.
/// * Otherwise, `self.ptr` points to memory allocated with `self.layout`.
ptr: *mut u8,
/// Whether this instance of `MiriAllocBytes` had its allocation created by calling `alloc::alloc()`
/// (`Global`) or the discrete allocator (`Isolated`)
/// Metadata on where this allocation came from and therefore how to deallocate it.
params: MiriAllocParams,
}

Expand Down Expand Up @@ -56,6 +59,13 @@ impl Drop for MiriAllocBytes {
MiriAllocParams::Global => alloc::dealloc(self.ptr, alloc_layout),
MiriAllocParams::Isolated(alloc) =>
alloc.borrow_mut().dealloc(self.ptr, alloc_layout),
// We can't nicely support mapping a page on one side of the FFI
// bound and freeing on the other, so just do nothing on an attempt
// to free.
//
// FIXME: Should emit an unsupported diagnostic when `libc::munmap()`
// is manually called on memory backed by forged bytes.
MiriAllocParams::Forged(_) => (),
}
}
}
Expand Down Expand Up @@ -121,6 +131,7 @@ impl AllocBytes for MiriAllocBytes {
match params {
MiriAllocParams::Global => alloc::alloc(layout),
MiriAllocParams::Isolated(alloc) => alloc.borrow_mut().alloc(layout),
MiriAllocParams::Forged(addr) => std::ptr::with_exposed_provenance_mut(*addr),
}
};
let alloc_bytes = MiriAllocBytes::alloc_with(size.to_u64(), align, params, alloc_fn)
Expand All @@ -141,6 +152,7 @@ impl AllocBytes for MiriAllocBytes {
match params {
MiriAllocParams::Global => alloc::alloc_zeroed(layout),
MiriAllocParams::Isolated(alloc) => alloc.borrow_mut().alloc_zeroed(layout),
MiriAllocParams::Forged(addr) => std::ptr::with_exposed_provenance_mut(*addr),
}
};
MiriAllocBytes::alloc_with(size, align, params, alloc_fn).ok()
Expand Down
31 changes: 29 additions & 2 deletions src/alloc/isolated_alloc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ pub struct IsolatedAlloc {
/// Pointers to multiple-page-sized allocations. These must also be page-aligned,
/// with their size stored as the second element of the vector.
huge_ptrs: Vec<(NonNull<u8>, usize)>,
/// Addresses of pages that we don't actually manage, but which were allocated
/// by foreign code and where we need to track accesses.
forged_pages: Vec<NonNull<u8>>,
/// The host (not emulated) page size.
page_size: usize,
}
Expand All @@ -37,6 +40,7 @@ impl IsolatedAlloc {
page_ptrs: Vec::new(),
huge_ptrs: Vec::new(),
page_infos: Vec::new(),
forged_pages: Vec::new(),
// SAFETY: `sysconf(_SC_PAGESIZE)` is always safe to call at runtime
// See https://www.man7.org/linux/man-pages/man3/sysconf.3.html
page_size: unsafe { libc::sysconf(libc::_SC_PAGESIZE).try_into().unwrap() },
Expand Down Expand Up @@ -308,8 +312,31 @@ impl IsolatedAlloc {
/// Returns a list of page ranges managed by the allocator, given in terms of pointers
/// and size (in bytes).
pub fn pages(&self) -> impl Iterator<Item = (NonNull<u8>, usize)> {
let pages = self.page_ptrs.iter().map(|&p| (p, self.page_size));
pages.chain(self.huge_ptrs.iter().copied())
let with_pg_sz = |&p| (p, self.page_size);
let pages = self.page_ptrs.iter().map(with_pg_sz);
pages.chain(self.huge_ptrs.iter().copied()).chain(self.forged_pages.iter().map(with_pg_sz))
}

/// Makes the allocator also return this page address when `pages` is called.
pub fn forge_page(&mut self, addr: usize) {
assert!(addr.is_multiple_of(self.page_size), "Address is not page-aligned");
assert!(
!self.forged_pages.iter().any(|a| a.addr().get() == addr),
"Page already contained"
);
self.forged_pages.push(NonNull::new(std::ptr::with_exposed_provenance_mut(addr)).unwrap());
}

/// Deletes an entry from the list of forged pages.
pub fn remove_forged(&mut self, addr: usize) {
assert!(addr.is_multiple_of(self.page_size), "Address is not page-aligned");
let (index, _) = self
.forged_pages
.iter()
.enumerate()
.find(|(_, p_addr)| addr == p_addr.addr().get())
.expect("Page not contained");
self.forged_pages.remove(index);
}
}

Expand Down
168 changes: 114 additions & 54 deletions src/shims/native_lib/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,27 @@ use self::ffi::OwnedArg;
use crate::*;

/// The final results of an FFI trace, containing every relevant event detected
/// by the tracer.
/// by the tracer. Events are ordered sequentially by the real time they occurred.
pub type MemEvents = Vec<SingleEvent>;

/// Singular event occurring in an FFI call.
#[derive(Serialize, Deserialize, Debug)]
pub struct MemEvents {
/// An list of memory accesses that occurred, in the order they occurred in.
pub acc_events: Vec<AccessEvent>,
pub enum SingleEvent {
Acc(AccessEvent),
Map(MapEvent),
}

/// A single page in the address space being modified. Addresses must always be a
/// multiple of the system page size, and the event is assumed to span from the
/// address to `addr + page_size`.
///
/// TODO: Support pages that are not (just) RW.
#[derive(Serialize, Deserialize, Clone, Debug)]
pub enum MapEvent {
/// A page was mapped with this base address.
Mmap(usize),
/// The page at this address was unmapped.
Munmap(usize),
}

/// A single memory access.
Expand Down Expand Up @@ -209,58 +225,68 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
}

/// Applies the `events` to Miri's internal state. The event vector must be
/// ordered sequentially by when the accesses happened, and the sizes are
/// assumed to be exact.
fn tracing_apply_accesses(&mut self, events: MemEvents) -> InterpResult<'tcx> {
/// ordered sequentially by when they occurred.
fn tracing_apply(&mut self, events: MemEvents) -> InterpResult<'tcx> {
let this = self.eval_context_mut();
for evt in events {
match evt {
SingleEvent::Acc(acc) => this.tracing_apply_access(acc)?,
SingleEvent::Map(map) => this.tracing_apply_mapping(map)?,
}
}
interp_ok(())
}

/// Applies the possible effects of a single memory access. Sizes are assumed
/// to be exact.
fn tracing_apply_access(&mut self, acc: AccessEvent) -> InterpResult<'tcx> {
let this = self.eval_context_mut();

for evt in events.acc_events {
let evt_rg = evt.get_range();
// LLVM at least permits vectorising accesses to adjacent allocations,
// so we cannot assume 1 access = 1 allocation. :(
let mut rg = evt_rg.addr..evt_rg.end();
while let Some(curr) = rg.next() {
let Some(alloc_id) =
this.alloc_id_from_addr(curr.to_u64(), rg.len().try_into().unwrap())
else {
throw_ub_format!("Foreign code did an out-of-bounds access!")
};
let alloc = this.get_alloc_raw(alloc_id)?;
// The logical and physical address of the allocation coincide, so we can use
// this instead of `addr_from_alloc_id`.
let alloc_addr = alloc.get_bytes_unchecked_raw().addr();

// Determine the range inside the allocation that this access covers. This range is
// in terms of offsets from the start of `alloc`. The start of the overlap range
// will be `curr`; the end will be the minimum of the end of the allocation and the
// end of the access' range.
let overlap = curr.strict_sub(alloc_addr)
..std::cmp::min(alloc.len(), rg.end.strict_sub(alloc_addr));
// Skip forward however many bytes of the access are contained in the current
// allocation, subtracting 1 since the overlap range includes the current addr
// that was already popped off of the range.
rg.advance_by(overlap.len().strict_sub(1)).unwrap();

match evt {
AccessEvent::Read(_) => {
// If a provenance was read by the foreign code, expose it.
for prov in alloc.provenance().get_range(this, overlap.into()) {
this.expose_provenance(prov)?;
}
let acc_rg = acc.get_range();
// LLVM at least permits vectorising accesses to adjacent allocations,
// so we cannot assume 1 access = 1 allocation. :(
let mut rg = acc_rg.addr..acc_rg.end();
while let Some(curr) = rg.next() {
let Some(alloc_id) =
this.alloc_id_from_addr(curr.to_u64(), rg.len().try_into().unwrap())
else {
throw_ub_format!("Foreign code did an out-of-bounds access!")
};
let alloc = this.get_alloc_raw(alloc_id)?;
// The logical and physical address of the allocation coincide, so we can use
// this instead of `addr_from_alloc_id`.
let alloc_addr = alloc.get_bytes_unchecked_raw().addr();

// Determine the range inside the allocation that this access covers. This range is
// in terms of offsets from the start of `alloc`. The start of the overlap range
// will be `curr`; the end will be the minimum of the end of the allocation and the
// end of the access' range.
let overlap = curr.strict_sub(alloc_addr)
..std::cmp::min(alloc.len(), rg.end.strict_sub(alloc_addr));
// Skip forward however many bytes of the access are contained in the current
// allocation, subtracting 1 since the overlap range includes the current addr
// that was already popped off of the range.
rg.advance_by(overlap.len().strict_sub(1)).unwrap();

match acc {
AccessEvent::Read(_) => {
// If a provenance was read by the foreign code, expose it.
for prov in alloc.provenance().get_range(this, overlap.into()) {
this.expose_provenance(prov)?;
}
AccessEvent::Write(_, certain) => {
// Sometimes we aren't certain if a write happened, in which case we
// only initialise that data if the allocation is mutable.
if certain || alloc.mutability.is_mut() {
let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
alloc.process_native_write(
&cx.tcx,
Some(AllocRange {
start: Size::from_bytes(overlap.start),
size: Size::from_bytes(overlap.len()),
}),
)
}
}
AccessEvent::Write(_, certain) => {
// Sometimes we aren't certain if a write happened, in which case we
// only initialise that data if the allocation is mutable.
if certain || alloc.mutability.is_mut() {
let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
alloc.process_native_write(
&cx.tcx,
Some(AllocRange {
start: Size::from_bytes(overlap.start),
size: Size::from_bytes(overlap.len()),
}),
)
}
}
}
Expand All @@ -269,6 +295,40 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
interp_ok(())
}

/// Forges an allocation corresponding to a page mapping.
fn tracing_apply_mapping(&mut self, map: MapEvent) -> InterpResult<'tcx> {
let this = self.eval_context_mut();
let kind = MemoryKind::Machine(MiriMemoryKind::Mmap);

match map {
MapEvent::Mmap(addr) => {
let page_size = this.machine.page_size;
// Pretend an allocation was created at this address, and register
// it with the machine's allocator so it can track it.
let forged = Allocation::new(
Size::from_bytes(page_size),
rustc_abi::Align::from_bytes(page_size).unwrap(),
AllocInit::Zero,
crate::alloc::MiriAllocParams::Forged(addr),
);
let ptr = this.insert_allocation(forged, kind)?;
this.expose_provenance(ptr.provenance)?;
// Also make sure accesses on this page are intercepted.
this.machine.allocator.as_mut().unwrap().borrow_mut().forge_page(addr);
}
MapEvent::Munmap(addr) => {
let ptr = this.ptr_from_addr_cast(addr.to_u64())?;
// This will call `munmap` on already-unmapped memory; that's fine,
// since we intentionally ignore the returned error from `munmap`
// to allow this without more invasive changes.
this.deallocate_ptr(ptr, None, kind)?;
this.machine.allocator.as_mut().unwrap().borrow_mut().remove_forged(addr);
}
}

interp_ok(())
}

/// Extract the value from the result of reading an operand from the machine
/// and convert it to a `OwnedArg`.
fn op_to_ffi_arg(&self, v: &OpTy<'tcx>, tracing: bool) -> InterpResult<'tcx, OwnedArg> {
Expand Down Expand Up @@ -492,7 +552,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
this.call_native_with_args(link_name, dest, code_ptr, &mut libffi_args)?;

if tracing {
this.tracing_apply_accesses(maybe_memevents.unwrap())?;
this.tracing_apply(maybe_memevents.unwrap())?;
}

this.write_immediate(*ret, dest)?;
Expand Down
18 changes: 15 additions & 3 deletions src/shims/native_lib/trace/child.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,17 @@ impl Supervisor {
Ok(())
}

unsafe fn protect_pages_ignore_errs(
pages: impl Iterator<Item = (NonNull<u8>, usize)>,
prot: mman::ProtFlags,
) {
for (pg, sz) in pages {
unsafe {
let _ = mman::mprotect(pg.cast(), sz, prot);
};
}
}

/// Performs an arbitrary FFI call, enabling tracing from the supervisor.
/// As this locks the supervisor via a mutex, no other threads may enter FFI
/// until this function returns.
Expand Down Expand Up @@ -113,11 +124,12 @@ impl Supervisor {

// SAFETY: We set memory back to normal, so this is safe.
unsafe {
Self::protect_pages(
// Use the error-ignoring variant here, since it's possible that
// foreign code may have unmapped a page of ours.
Self::protect_pages_ignore_errs(
alloc.pages(),
mman::ProtFlags::PROT_READ | mman::ProtFlags::PROT_WRITE,
)
.unwrap();
);
}

// Signal the supervisor that we are done. Will block until the supervisor continues us.
Expand Down
Loading