Skip to content

Commit 11bbb52

Browse files
committed
Auto merge of #83214 - cjgillot:dep-map, r=michaelwoerister
Mmap the incremental data instead of reading it. Instead of reading the full incremental state using `fs::read_file`, we memmap it using a private read-only file-backed map. This allows the system to reclaim any memory we are not using, while ensuring we are not polluted by outside modifications to the file. Suggested in #83036 (comment) by `@bjorn3`
2 parents 1698e3c + bcefd48 commit 11bbb52

File tree

5 files changed

+123
-88
lines changed

5 files changed

+123
-88
lines changed

compiler/rustc_incremental/src/persist/file_format.rs

+71-7
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@
1212
use std::env;
1313
use std::fs;
1414
use std::io::{self, Read};
15-
use std::path::Path;
15+
use std::path::{Path, PathBuf};
1616

17+
use rustc_data_structures::memmap::Mmap;
1718
use rustc_serialize::opaque::{FileEncodeResult, FileEncoder};
1819
use rustc_serialize::Encoder;
20+
use rustc_session::Session;
1921

2022
/// The first few bytes of files generated by incremental compilation.
2123
const FILE_MAGIC: &[u8] = b"RSIC";
@@ -28,7 +30,7 @@ const HEADER_FORMAT_VERSION: u16 = 0;
2830
/// the Git commit hash.
2931
const RUSTC_VERSION: Option<&str> = option_env!("CFG_VERSION");
3032

31-
pub fn write_file_header(stream: &mut FileEncoder, nightly_build: bool) -> FileEncodeResult {
33+
pub(crate) fn write_file_header(stream: &mut FileEncoder, nightly_build: bool) -> FileEncodeResult {
3234
stream.emit_raw_bytes(FILE_MAGIC)?;
3335
stream.emit_raw_bytes(&[
3436
(HEADER_FORMAT_VERSION >> 0) as u8,
@@ -41,6 +43,61 @@ pub fn write_file_header(stream: &mut FileEncoder, nightly_build: bool) -> FileE
4143
stream.emit_raw_bytes(rustc_version.as_bytes())
4244
}
4345

46+
pub(crate) fn save_in<F>(sess: &Session, path_buf: PathBuf, name: &str, encode: F)
47+
where
48+
F: FnOnce(&mut FileEncoder) -> FileEncodeResult,
49+
{
50+
debug!("save: storing data in {}", path_buf.display());
51+
52+
// Delete the old file, if any.
53+
// Note: It's important that we actually delete the old file and not just
54+
// truncate and overwrite it, since it might be a shared hard-link, the
55+
// underlying data of which we don't want to modify.
56+
//
57+
// We have to ensure we have dropped the memory maps to this file
58+
// before performing this removal.
59+
match fs::remove_file(&path_buf) {
60+
Ok(()) => {
61+
debug!("save: remove old file");
62+
}
63+
Err(err) if err.kind() == io::ErrorKind::NotFound => (),
64+
Err(err) => {
65+
sess.err(&format!(
66+
"unable to delete old {} at `{}`: {}",
67+
name,
68+
path_buf.display(),
69+
err
70+
));
71+
return;
72+
}
73+
}
74+
75+
let mut encoder = match FileEncoder::new(&path_buf) {
76+
Ok(encoder) => encoder,
77+
Err(err) => {
78+
sess.err(&format!("failed to create {} at `{}`: {}", name, path_buf.display(), err));
79+
return;
80+
}
81+
};
82+
83+
if let Err(err) = write_file_header(&mut encoder, sess.is_nightly_build()) {
84+
sess.err(&format!("failed to write {} header to `{}`: {}", name, path_buf.display(), err));
85+
return;
86+
}
87+
88+
if let Err(err) = encode(&mut encoder) {
89+
sess.err(&format!("failed to write {} to `{}`: {}", name, path_buf.display(), err));
90+
return;
91+
}
92+
93+
if let Err(err) = encoder.flush() {
94+
sess.err(&format!("failed to flush {} to `{}`: {}", name, path_buf.display(), err));
95+
return;
96+
}
97+
98+
debug!("save: data written to disk successfully");
99+
}
100+
44101
/// Reads the contents of a file with a file header as defined in this module.
45102
///
46103
/// - Returns `Ok(Some(data, pos))` if the file existed and was generated by a
@@ -54,14 +111,21 @@ pub fn read_file(
54111
report_incremental_info: bool,
55112
path: &Path,
56113
nightly_build: bool,
57-
) -> io::Result<Option<(Vec<u8>, usize)>> {
58-
let data = match fs::read(path) {
59-
Ok(data) => data,
114+
) -> io::Result<Option<(Mmap, usize)>> {
115+
let file = match fs::File::open(path) {
116+
Ok(file) => file,
60117
Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None),
61118
Err(err) => return Err(err),
62119
};
120+
// SAFETY: This process must not modify nor remove the backing file while the memory map lives.
121+
// For the dep-graph and the work product index, it is as soon as the decoding is done.
122+
// For the query result cache, the memory map is dropped in save_dep_graph before calling
123+
// save_in and trying to remove the backing file.
124+
//
125+
// There is no way to prevent another process from modifying this file.
126+
let mmap = unsafe { Mmap::map(file) }?;
63127

64-
let mut file = io::Cursor::new(data);
128+
let mut file = io::Cursor::new(&*mmap);
65129

66130
// Check FILE_MAGIC
67131
{
@@ -103,7 +167,7 @@ pub fn read_file(
103167
}
104168

105169
let post_header_start_pos = file.position() as usize;
106-
Ok(Some((file.into_inner(), post_header_start_pos)))
170+
Ok(Some((mmap, post_header_start_pos)))
107171
}
108172

109173
fn report_format_mismatch(report_incremental_info: bool, file: &Path, message: &str) {

compiler/rustc_incremental/src/persist/load.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! Code to save/load the dep-graph from files.
22
33
use rustc_data_structures::fx::FxHashMap;
4+
use rustc_data_structures::memmap::Mmap;
45
use rustc_middle::dep_graph::{SerializedDepGraph, WorkProduct, WorkProductId};
56
use rustc_middle::ty::OnDiskCache;
67
use rustc_serialize::opaque::Decoder;
@@ -48,7 +49,7 @@ fn load_data(
4849
report_incremental_info: bool,
4950
path: &Path,
5051
nightly_build: bool,
51-
) -> LoadResult<(Vec<u8>, usize)> {
52+
) -> LoadResult<(Mmap, usize)> {
5253
match file_format::read_file(report_incremental_info, path, nightly_build) {
5354
Ok(Some(data_and_pos)) => LoadResult::Ok { data: data_and_pos },
5455
Ok(None) => {

compiler/rustc_incremental/src/persist/save.rs

+11-56
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@ use rustc_serialize::opaque::{FileEncodeResult, FileEncoder};
66
use rustc_serialize::Encodable as RustcEncodable;
77
use rustc_session::Session;
88
use std::fs;
9-
use std::io;
10-
use std::path::PathBuf;
119

1210
use super::data::*;
1311
use super::dirty_clean;
@@ -44,7 +42,14 @@ pub fn save_dep_graph(tcx: TyCtxt<'_>) {
4442
join(
4543
move || {
4644
sess.time("incr_comp_persist_result_cache", || {
47-
save_in(sess, query_cache_path, "query cache", |e| encode_query_cache(tcx, e));
45+
// Drop the memory map so that we can remove the file and write to it.
46+
if let Some(odc) = &tcx.on_disk_cache {
47+
odc.drop_serialized_data(tcx);
48+
}
49+
50+
file_format::save_in(sess, query_cache_path, "query cache", |e| {
51+
encode_query_cache(tcx, e)
52+
});
4853
});
4954
},
5055
move || {
@@ -86,7 +91,9 @@ pub fn save_work_product_index(
8691
debug!("save_work_product_index()");
8792
dep_graph.assert_ignored();
8893
let path = work_products_path(sess);
89-
save_in(sess, path, "work product index", |e| encode_work_product_index(&new_work_products, e));
94+
file_format::save_in(sess, path, "work product index", |e| {
95+
encode_work_product_index(&new_work_products, e)
96+
});
9097

9198
// We also need to clean out old work-products, as not all of them are
9299
// deleted during invalidation. Some object files don't change their
@@ -113,58 +120,6 @@ pub fn save_work_product_index(
113120
});
114121
}
115122

116-
pub(crate) fn save_in<F>(sess: &Session, path_buf: PathBuf, name: &str, encode: F)
117-
where
118-
F: FnOnce(&mut FileEncoder) -> FileEncodeResult,
119-
{
120-
debug!("save: storing data in {}", path_buf.display());
121-
122-
// Delete the old file, if any.
123-
// Note: It's important that we actually delete the old file and not just
124-
// truncate and overwrite it, since it might be a shared hard-link, the
125-
// underlying data of which we don't want to modify
126-
match fs::remove_file(&path_buf) {
127-
Ok(()) => {
128-
debug!("save: remove old file");
129-
}
130-
Err(err) if err.kind() == io::ErrorKind::NotFound => (),
131-
Err(err) => {
132-
sess.err(&format!(
133-
"unable to delete old {} at `{}`: {}",
134-
name,
135-
path_buf.display(),
136-
err
137-
));
138-
return;
139-
}
140-
}
141-
142-
let mut encoder = match FileEncoder::new(&path_buf) {
143-
Ok(encoder) => encoder,
144-
Err(err) => {
145-
sess.err(&format!("failed to create {} at `{}`: {}", name, path_buf.display(), err));
146-
return;
147-
}
148-
};
149-
150-
if let Err(err) = file_format::write_file_header(&mut encoder, sess.is_nightly_build()) {
151-
sess.err(&format!("failed to write {} header to `{}`: {}", name, path_buf.display(), err));
152-
return;
153-
}
154-
155-
if let Err(err) = encode(&mut encoder) {
156-
sess.err(&format!("failed to write {} to `{}`: {}", name, path_buf.display(), err));
157-
return;
158-
}
159-
160-
if let Err(err) = encoder.flush() {
161-
sess.err(&format!("failed to flush {} to `{}`: {}", name, path_buf.display(), err));
162-
return;
163-
}
164-
165-
debug!("save: data written to disk successfully");
166-
}
167-
168123
fn encode_work_product_index(
169124
work_products: &FxHashMap<WorkProductId, WorkProduct>,
170125
encoder: &mut FileEncoder,

compiler/rustc_middle/src/ty/context.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ use crate::ty::{
2727
use rustc_ast as ast;
2828
use rustc_attr as attr;
2929
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
30+
use rustc_data_structures::memmap::Mmap;
3031
use rustc_data_structures::profiling::SelfProfilerRef;
3132
use rustc_data_structures::sharded::{IntoPointer, ShardedHashMap};
3233
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
@@ -71,7 +72,7 @@ use std::sync::Arc;
7172

7273
pub trait OnDiskCache<'tcx>: rustc_data_structures::sync::Sync {
7374
/// Creates a new `OnDiskCache` instance from the serialized data in `data`.
74-
fn new(sess: &'tcx Session, data: Vec<u8>, start_pos: usize) -> Self
75+
fn new(sess: &'tcx Session, data: Mmap, start_pos: usize) -> Self
7576
where
7677
Self: Sized;
7778

@@ -100,6 +101,8 @@ pub trait OnDiskCache<'tcx>: rustc_data_structures::sync::Sync {
100101
fn register_reused_dep_node(&self, tcx: TyCtxt<'tcx>, dep_node: &DepNode);
101102
fn store_foreign_def_id_hash(&self, def_id: DefId, hash: DefPathHash);
102103

104+
fn drop_serialized_data(&self, tcx: TyCtxt<'tcx>);
105+
103106
fn serialize(&self, tcx: TyCtxt<'tcx>, encoder: &mut FileEncoder) -> FileEncodeResult;
104107
}
105108

compiler/rustc_query_impl/src/on_disk_cache.rs

+35-23
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use crate::QueryCtxt;
22
use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexSet};
3-
use rustc_data_structures::sync::{HashMapExt, Lock, Lrc, OnceCell};
3+
use rustc_data_structures::memmap::Mmap;
4+
use rustc_data_structures::sync::{HashMapExt, Lock, Lrc, OnceCell, RwLock};
45
use rustc_data_structures::unhash::UnhashMap;
56
use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LocalDefId, StableCrateId, LOCAL_CRATE};
67
use rustc_hir::definitions::DefPathHash;
@@ -42,7 +43,7 @@ const TAG_EXPN_DATA: u8 = 1;
4243
/// any side effects that have been emitted during a query.
4344
pub struct OnDiskCache<'sess> {
4445
// The complete cache data in serialized form.
45-
serialized_data: Vec<u8>,
46+
serialized_data: RwLock<Option<Mmap>>,
4647

4748
// Collects all `QuerySideEffects` created during the current compilation
4849
// session.
@@ -182,7 +183,8 @@ impl EncodedSourceFileId {
182183
}
183184

184185
impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
185-
fn new(sess: &'sess Session, data: Vec<u8>, start_pos: usize) -> Self {
186+
/// Creates a new `OnDiskCache` instance from the serialized data in `data`.
187+
fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Self {
186188
debug_assert!(sess.opts.incremental.is_some());
187189

188190
// Wrap in a scope so we can borrow `data`.
@@ -204,7 +206,7 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
204206
};
205207

206208
Self {
207-
serialized_data: data,
209+
serialized_data: RwLock::new(Some(data)),
208210
file_index_to_stable_id: footer.file_index_to_stable_id,
209211
file_index_to_file: Default::default(),
210212
cnum_map: OnceCell::new(),
@@ -225,7 +227,7 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
225227

226228
fn new_empty(source_map: &'sess SourceMap) -> Self {
227229
Self {
228-
serialized_data: Vec::new(),
230+
serialized_data: RwLock::new(None),
229231
file_index_to_stable_id: Default::default(),
230232
file_index_to_file: Default::default(),
231233
cnum_map: OnceCell::new(),
@@ -244,7 +246,31 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
244246
}
245247
}
246248

247-
fn serialize(&self, tcx: TyCtxt<'sess>, encoder: &mut FileEncoder) -> FileEncodeResult {
249+
/// Execute all cache promotions and release the serialized backing Mmap.
250+
///
251+
/// Cache promotions require invoking queries, which needs to read the serialized data.
252+
/// In order to serialize the new on-disk cache, the former on-disk cache file needs to be
253+
/// deleted, hence we won't be able to refer to its memmapped data.
254+
fn drop_serialized_data(&self, tcx: TyCtxt<'tcx>) {
255+
// Register any dep nodes that we reused from the previous session,
256+
// but didn't `DepNode::construct` in this session. This ensures
257+
// that their `DefPathHash` to `RawDefId` mappings are registered
258+
// in 'latest_foreign_def_path_hashes' if necessary, since that
259+
// normally happens in `DepNode::construct`.
260+
tcx.dep_graph.register_reused_dep_nodes(tcx);
261+
262+
// Load everything into memory so we can write it out to the on-disk
263+
// cache. The vast majority of cacheable query results should already
264+
// be in memory, so this should be a cheap operation.
265+
// Do this *before* we clone 'latest_foreign_def_path_hashes', since
266+
// loading existing queries may cause us to create new DepNodes, which
267+
// may in turn end up invoking `store_foreign_def_id_hash`
268+
tcx.dep_graph.exec_cache_promotions(QueryCtxt::from_tcx(tcx));
269+
270+
*self.serialized_data.write() = None;
271+
}
272+
273+
fn serialize<'tcx>(&self, tcx: TyCtxt<'tcx>, encoder: &mut FileEncoder) -> FileEncodeResult {
248274
// Serializing the `DepGraph` should not modify it.
249275
tcx.dep_graph.with_ignore(|| {
250276
// Allocate `SourceFileIndex`es.
@@ -266,21 +292,6 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
266292
(file_to_file_index, file_index_to_stable_id)
267293
};
268294

269-
// Register any dep nodes that we reused from the previous session,
270-
// but didn't `DepNode::construct` in this session. This ensures
271-
// that their `DefPathHash` to `RawDefId` mappings are registered
272-
// in 'latest_foreign_def_path_hashes' if necessary, since that
273-
// normally happens in `DepNode::construct`.
274-
tcx.dep_graph.register_reused_dep_nodes(tcx);
275-
276-
// Load everything into memory so we can write it out to the on-disk
277-
// cache. The vast majority of cacheable query results should already
278-
// be in memory, so this should be a cheap operation.
279-
// Do this *before* we clone 'latest_foreign_def_path_hashes', since
280-
// loading existing queries may cause us to create new DepNodes, which
281-
// may in turn end up invoking `store_foreign_def_id_hash`
282-
tcx.dep_graph.exec_cache_promotions(QueryCtxt::from_tcx(tcx));
283-
284295
let latest_foreign_def_path_hashes = self.latest_foreign_def_path_hashes.lock().clone();
285296
let hygiene_encode_context = HygieneEncodeContext::default();
286297

@@ -564,7 +575,7 @@ impl<'sess> OnDiskCache<'sess> {
564575
})
565576
}
566577

567-
fn with_decoder<'a, 'tcx, T, F: FnOnce(&mut CacheDecoder<'sess, 'tcx>) -> T>(
578+
fn with_decoder<'a, 'tcx, T, F: for<'s> FnOnce(&mut CacheDecoder<'s, 'tcx>) -> T>(
568579
&'sess self,
569580
tcx: TyCtxt<'tcx>,
570581
pos: AbsoluteBytePos,
@@ -575,9 +586,10 @@ impl<'sess> OnDiskCache<'sess> {
575586
{
576587
let cnum_map = self.cnum_map.get_or_init(|| Self::compute_cnum_map(tcx));
577588

589+
let serialized_data = self.serialized_data.read();
578590
let mut decoder = CacheDecoder {
579591
tcx,
580-
opaque: opaque::Decoder::new(&self.serialized_data[..], pos.to_usize()),
592+
opaque: opaque::Decoder::new(serialized_data.as_deref().unwrap_or(&[]), pos.to_usize()),
581593
source_map: self.source_map,
582594
cnum_map,
583595
file_index_to_file: &self.file_index_to_file,

0 commit comments

Comments
 (0)