Skip to content

Commit 189de73

Browse files
committed
native-lib/trace: Intercept mmap and munmap
1 parent 85aa6cd commit 189de73

File tree

9 files changed

+361
-107
lines changed

9 files changed

+361
-107
lines changed

src/alloc/alloc_bytes.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,12 @@ use crate::helpers::ToU64 as _;
1212

1313
#[derive(Clone, Debug)]
1414
pub enum MiriAllocParams {
15+
/// Allocation was created by calling `alloc::alloc()`.
1516
Global,
17+
/// Allocation came from the isolated allocator.
1618
Isolated(Rc<RefCell<IsolatedAlloc>>),
19+
/// Page mapped elsewhere that we don't try to deallocate.
20+
Forged(usize),
1721
}
1822

1923
/// Allocation bytes that explicitly handle the layout of the data they're storing.
@@ -27,8 +31,7 @@ pub struct MiriAllocBytes {
2731
/// * If `self.layout.size() == 0`, then `self.ptr` was allocated with the equivalent layout with size 1.
2832
/// * Otherwise, `self.ptr` points to memory allocated with `self.layout`.
2933
ptr: *mut u8,
30-
/// Whether this instance of `MiriAllocBytes` had its allocation created by calling `alloc::alloc()`
31-
/// (`Global`) or the discrete allocator (`Isolated`)
34+
/// Metadata on where this allocation came from and therefore how to deallocate it.
3235
params: MiriAllocParams,
3336
}
3437

@@ -56,6 +59,13 @@ impl Drop for MiriAllocBytes {
5659
MiriAllocParams::Global => alloc::dealloc(self.ptr, alloc_layout),
5760
MiriAllocParams::Isolated(alloc) =>
5861
alloc.borrow_mut().dealloc(self.ptr, alloc_layout),
62+
// We can't nicely support mapping a page on one side of the FFI
63+
// bound and freeing on the other, so just do nothing on an attempt
64+
// to free.
65+
//
66+
// FIXME: Should emit an unsupported diagnostic when `libc::munmap()`
67+
// is manually called on memory backed by forged bytes.
68+
MiriAllocParams::Forged(_) => (),
5969
}
6070
}
6171
}
@@ -121,6 +131,7 @@ impl AllocBytes for MiriAllocBytes {
121131
match params {
122132
MiriAllocParams::Global => alloc::alloc(layout),
123133
MiriAllocParams::Isolated(alloc) => alloc.borrow_mut().alloc(layout),
134+
MiriAllocParams::Forged(addr) => std::ptr::with_exposed_provenance_mut(*addr),
124135
}
125136
};
126137
let alloc_bytes = MiriAllocBytes::alloc_with(size.to_u64(), align, params, alloc_fn)
@@ -141,6 +152,7 @@ impl AllocBytes for MiriAllocBytes {
141152
match params {
142153
MiriAllocParams::Global => alloc::alloc_zeroed(layout),
143154
MiriAllocParams::Isolated(alloc) => alloc.borrow_mut().alloc_zeroed(layout),
155+
MiriAllocParams::Forged(addr) => std::ptr::with_exposed_provenance_mut(*addr),
144156
}
145157
};
146158
MiriAllocBytes::alloc_with(size, align, params, alloc_fn).ok()

src/alloc/isolated_alloc.rs

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ pub struct IsolatedAlloc {
2626
/// Pointers to multiple-page-sized allocations. These must also be page-aligned,
2727
/// with their size stored as the second element of the vector.
2828
huge_ptrs: Vec<(NonNull<u8>, usize)>,
29+
/// Addresses of pages that we don't actually manage, but which were allocated
30+
/// by foreign code and where we need to track accesses.
31+
forged_pages: Vec<NonNull<u8>>,
2932
/// The host (not emulated) page size.
3033
page_size: usize,
3134
}
@@ -37,6 +40,7 @@ impl IsolatedAlloc {
3740
page_ptrs: Vec::new(),
3841
huge_ptrs: Vec::new(),
3942
page_infos: Vec::new(),
43+
forged_pages: Vec::new(),
4044
// SAFETY: `sysconf(_SC_PAGESIZE)` is always safe to call at runtime
4145
// See https://www.man7.org/linux/man-pages/man3/sysconf.3.html
4246
page_size: unsafe { libc::sysconf(libc::_SC_PAGESIZE).try_into().unwrap() },
@@ -308,8 +312,31 @@ impl IsolatedAlloc {
308312
/// Returns a list of page ranges managed by the allocator, given in terms of pointers
309313
/// and size (in bytes).
310314
pub fn pages(&self) -> impl Iterator<Item = (NonNull<u8>, usize)> {
311-
let pages = self.page_ptrs.iter().map(|&p| (p, self.page_size));
312-
pages.chain(self.huge_ptrs.iter().copied())
315+
let with_pg_sz = |&p| (p, self.page_size);
316+
let pages = self.page_ptrs.iter().map(with_pg_sz);
317+
pages.chain(self.huge_ptrs.iter().copied()).chain(self.forged_pages.iter().map(with_pg_sz))
318+
}
319+
320+
/// Makes the allocator also return this page address when `pages` is called.
321+
pub fn forge_page(&mut self, addr: usize) {
322+
assert!(addr.is_multiple_of(self.page_size), "Address is not page-aligned");
323+
assert!(
324+
!self.forged_pages.iter().any(|a| a.addr().get() == addr),
325+
"Page already contained"
326+
);
327+
self.forged_pages.push(NonNull::new(std::ptr::with_exposed_provenance_mut(addr)).unwrap());
328+
}
329+
330+
/// Deletes an entry from the list of forged pages.
331+
pub fn remove_forged(&mut self, addr: usize) {
332+
assert!(addr.is_multiple_of(self.page_size), "Address is not page-aligned");
333+
let (index, _) = self
334+
.forged_pages
335+
.iter()
336+
.enumerate()
337+
.find(|(_, p_addr)| addr == p_addr.addr().get())
338+
.expect("Page not contained");
339+
self.forged_pages.remove(index);
313340
}
314341
}
315342

src/shims/native_lib/mod.rs

Lines changed: 114 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,27 @@ use self::ffi::OwnedArg;
2626
use crate::*;
2727

2828
/// The final results of an FFI trace, containing every relevant event detected
29-
/// by the tracer.
29+
/// by the tracer. Events are ordered sequentially by the real time they occurred.
30+
pub type MemEvents = Vec<SingleEvent>;
31+
32+
/// Singular event occurring in an FFI call.
3033
#[derive(Serialize, Deserialize, Debug)]
31-
pub struct MemEvents {
32-
/// An list of memory accesses that occurred, in the order they occurred in.
33-
pub acc_events: Vec<AccessEvent>,
34+
pub enum SingleEvent {
35+
Acc(AccessEvent),
36+
Map(MapEvent),
37+
}
38+
39+
/// A single page in the address space being modified. Addresses must always be a
40+
/// multiple of the system page size, and the event is assumed to span from the
41+
/// address to `addr + page_size`.
42+
///
43+
/// TODO: Support pages that are not (just) RW.
44+
#[derive(Serialize, Deserialize, Clone, Debug)]
45+
pub enum MapEvent {
46+
/// A page was mapped with this base address.
47+
Mmap(usize),
48+
/// The page at this address was unmapped.
49+
Munmap(usize),
3450
}
3551

3652
/// A single memory access.
@@ -209,58 +225,68 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
209225
}
210226

211227
/// Applies the `events` to Miri's internal state. The event vector must be
212-
/// ordered sequentially by when the accesses happened, and the sizes are
213-
/// assumed to be exact.
214-
fn tracing_apply_accesses(&mut self, events: MemEvents) -> InterpResult<'tcx> {
228+
/// ordered sequentially by when they occurred.
229+
fn tracing_apply(&mut self, events: MemEvents) -> InterpResult<'tcx> {
230+
let this = self.eval_context_mut();
231+
for evt in events {
232+
match evt {
233+
SingleEvent::Acc(acc) => this.tracing_apply_access(acc)?,
234+
SingleEvent::Map(map) => this.tracing_apply_mapping(map)?,
235+
}
236+
}
237+
interp_ok(())
238+
}
239+
240+
/// Applies the possible effects of a single memory access. Sizes are assumed
241+
/// to be exact.
242+
fn tracing_apply_access(&mut self, acc: AccessEvent) -> InterpResult<'tcx> {
215243
let this = self.eval_context_mut();
216244

217-
for evt in events.acc_events {
218-
let evt_rg = evt.get_range();
219-
// LLVM at least permits vectorising accesses to adjacent allocations,
220-
// so we cannot assume 1 access = 1 allocation. :(
221-
let mut rg = evt_rg.addr..evt_rg.end();
222-
while let Some(curr) = rg.next() {
223-
let Some(alloc_id) =
224-
this.alloc_id_from_addr(curr.to_u64(), rg.len().try_into().unwrap())
225-
else {
226-
throw_ub_format!("Foreign code did an out-of-bounds access!")
227-
};
228-
let alloc = this.get_alloc_raw(alloc_id)?;
229-
// The logical and physical address of the allocation coincide, so we can use
230-
// this instead of `addr_from_alloc_id`.
231-
let alloc_addr = alloc.get_bytes_unchecked_raw().addr();
232-
233-
// Determine the range inside the allocation that this access covers. This range is
234-
// in terms of offsets from the start of `alloc`. The start of the overlap range
235-
// will be `curr`; the end will be the minimum of the end of the allocation and the
236-
// end of the access' range.
237-
let overlap = curr.strict_sub(alloc_addr)
238-
..std::cmp::min(alloc.len(), rg.end.strict_sub(alloc_addr));
239-
// Skip forward however many bytes of the access are contained in the current
240-
// allocation, subtracting 1 since the overlap range includes the current addr
241-
// that was already popped off of the range.
242-
rg.advance_by(overlap.len().strict_sub(1)).unwrap();
243-
244-
match evt {
245-
AccessEvent::Read(_) => {
246-
// If a provenance was read by the foreign code, expose it.
247-
for prov in alloc.provenance().get_range(this, overlap.into()) {
248-
this.expose_provenance(prov)?;
249-
}
245+
let acc_rg = acc.get_range();
246+
// LLVM at least permits vectorising accesses to adjacent allocations,
247+
// so we cannot assume 1 access = 1 allocation. :(
248+
let mut rg = acc_rg.addr..acc_rg.end();
249+
while let Some(curr) = rg.next() {
250+
let Some(alloc_id) =
251+
this.alloc_id_from_addr(curr.to_u64(), rg.len().try_into().unwrap())
252+
else {
253+
throw_ub_format!("Foreign code did an out-of-bounds access!")
254+
};
255+
let alloc = this.get_alloc_raw(alloc_id)?;
256+
// The logical and physical address of the allocation coincide, so we can use
257+
// this instead of `addr_from_alloc_id`.
258+
let alloc_addr = alloc.get_bytes_unchecked_raw().addr();
259+
260+
// Determine the range inside the allocation that this access covers. This range is
261+
// in terms of offsets from the start of `alloc`. The start of the overlap range
262+
// will be `curr`; the end will be the minimum of the end of the allocation and the
263+
// end of the access' range.
264+
let overlap = curr.strict_sub(alloc_addr)
265+
..std::cmp::min(alloc.len(), rg.end.strict_sub(alloc_addr));
266+
// Skip forward however many bytes of the access are contained in the current
267+
// allocation, subtracting 1 since the overlap range includes the current addr
268+
// that was already popped off of the range.
269+
rg.advance_by(overlap.len().strict_sub(1)).unwrap();
270+
271+
match acc {
272+
AccessEvent::Read(_) => {
273+
// If a provenance was read by the foreign code, expose it.
274+
for prov in alloc.provenance().get_range(this, overlap.into()) {
275+
this.expose_provenance(prov)?;
250276
}
251-
AccessEvent::Write(_, certain) => {
252-
// Sometimes we aren't certain if a write happened, in which case we
253-
// only initialise that data if the allocation is mutable.
254-
if certain || alloc.mutability.is_mut() {
255-
let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
256-
alloc.process_native_write(
257-
&cx.tcx,
258-
Some(AllocRange {
259-
start: Size::from_bytes(overlap.start),
260-
size: Size::from_bytes(overlap.len()),
261-
}),
262-
)
263-
}
277+
}
278+
AccessEvent::Write(_, certain) => {
279+
// Sometimes we aren't certain if a write happened, in which case we
280+
// only initialise that data if the allocation is mutable.
281+
if certain || alloc.mutability.is_mut() {
282+
let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
283+
alloc.process_native_write(
284+
&cx.tcx,
285+
Some(AllocRange {
286+
start: Size::from_bytes(overlap.start),
287+
size: Size::from_bytes(overlap.len()),
288+
}),
289+
)
264290
}
265291
}
266292
}
@@ -269,6 +295,40 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
269295
interp_ok(())
270296
}
271297

298+
/// Forges an allocation corresponding to a page mapping.
299+
fn tracing_apply_mapping(&mut self, map: MapEvent) -> InterpResult<'tcx> {
300+
let this = self.eval_context_mut();
301+
let kind = MemoryKind::Machine(MiriMemoryKind::Mmap);
302+
303+
match map {
304+
MapEvent::Mmap(addr) => {
305+
let page_size = this.machine.page_size;
306+
// Pretend an allocation was created at this address, and register
307+
// it with the machine's allocator so it can track it.
308+
let forged = Allocation::new(
309+
Size::from_bytes(page_size),
310+
rustc_abi::Align::from_bytes(page_size).unwrap(),
311+
AllocInit::Zero,
312+
crate::alloc::MiriAllocParams::Forged(addr),
313+
);
314+
let ptr = this.insert_allocation(forged, kind)?;
315+
this.expose_provenance(ptr.provenance)?;
316+
// Also make sure accesses on this page are intercepted.
317+
this.machine.allocator.as_mut().unwrap().borrow_mut().forge_page(addr);
318+
}
319+
MapEvent::Munmap(addr) => {
320+
let ptr = this.ptr_from_addr_cast(addr.to_u64())?;
321+
// This will call `munmap` on already-unmapped memory; that's fine,
322+
// since we intentionally ignore the returned error from `munmap`
323+
// to allow this without more invasive changes.
324+
this.deallocate_ptr(ptr, None, kind)?;
325+
this.machine.allocator.as_mut().unwrap().borrow_mut().remove_forged(addr);
326+
}
327+
}
328+
329+
interp_ok(())
330+
}
331+
272332
/// Extract the value from the result of reading an operand from the machine
273333
/// and convert it to a `OwnedArg`.
274334
fn op_to_ffi_arg(&self, v: &OpTy<'tcx>, tracing: bool) -> InterpResult<'tcx, OwnedArg> {
@@ -492,7 +552,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
492552
this.call_native_with_args(link_name, dest, code_ptr, &mut libffi_args)?;
493553

494554
if tracing {
495-
this.tracing_apply_accesses(maybe_memevents.unwrap())?;
555+
this.tracing_apply(maybe_memevents.unwrap())?;
496556
}
497557

498558
this.write_immediate(*ret, dest)?;

src/shims/native_lib/trace/child.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,17 @@ impl Supervisor {
5555
Ok(())
5656
}
5757

58+
unsafe fn protect_pages_ignore_errs(
59+
pages: impl Iterator<Item = (NonNull<u8>, usize)>,
60+
prot: mman::ProtFlags,
61+
) {
62+
for (pg, sz) in pages {
63+
unsafe {
64+
let _ = mman::mprotect(pg.cast(), sz, prot);
65+
};
66+
}
67+
}
68+
5869
/// Performs an arbitrary FFI call, enabling tracing from the supervisor.
5970
/// As this locks the supervisor via a mutex, no other threads may enter FFI
6071
/// until this function returns.
@@ -113,11 +124,12 @@ impl Supervisor {
113124

114125
// SAFETY: We set memory back to normal, so this is safe.
115126
unsafe {
116-
Self::protect_pages(
127+
// Use the error-ignoring variant here, since it's possible that
128+
// foreign code may have unmapped a page of ours.
129+
Self::protect_pages_ignore_errs(
117130
alloc.pages(),
118131
mman::ProtFlags::PROT_READ | mman::ProtFlags::PROT_WRITE,
119-
)
120-
.unwrap();
132+
);
121133
}
122134

123135
// Signal the supervisor that we are done. Will block until the supervisor continues us.

0 commit comments

Comments
 (0)