Skip to content

Commit 017cca2

Browse files
committed
Introduce -Zsplit-metadata option
This will split the crate metadata out of library files. Instead only the svh is preserved to allow for loading the right rmeta file. This significicantly reduces library size. In addition it allows for cheaper checks if different library files are the same crate.
1 parent 8656e1b commit 017cca2

File tree

8 files changed

+107
-28
lines changed

8 files changed

+107
-28
lines changed

Diff for: compiler/rustc_codegen_ssa/src/back/link.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ fn link_rlib<'a>(
298298
let (metadata, metadata_position) = create_wrapper_file(
299299
sess,
300300
".rmeta".to_string(),
301-
codegen_results.metadata.raw_data(),
301+
codegen_results.metadata.maybe_reference(),
302302
);
303303
let metadata = emit_wrapper_file(sess, &metadata, tmpdir, METADATA_FILENAME);
304304
match metadata_position {

Diff for: compiler/rustc_codegen_ssa/src/back/metadata.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -578,8 +578,8 @@ pub fn create_compressed_metadata_file(
578578
symbol_name: &str,
579579
) -> Vec<u8> {
580580
let mut packed_metadata = rustc_metadata::METADATA_HEADER.to_vec();
581-
packed_metadata.write_all(&(metadata.raw_data().len() as u64).to_le_bytes()).unwrap();
582-
packed_metadata.extend(metadata.raw_data());
581+
packed_metadata.write_all(&(metadata.maybe_reference().len() as u64).to_le_bytes()).unwrap();
582+
packed_metadata.extend(metadata.maybe_reference());
583583

584584
let Some(mut file) = create_object_file(sess) else {
585585
if sess.target.is_like_wasm {

Diff for: compiler/rustc_interface/src/tests.rs

+1
Original file line numberDiff line numberDiff line change
@@ -854,6 +854,7 @@ fn test_unstable_options_tracking_hash() {
854854
tracked!(simulate_remapped_rust_src_base, Some(PathBuf::from("/rustc/abc")));
855855
tracked!(small_data_threshold, Some(16));
856856
tracked!(split_lto_unit, Some(true));
857+
tracked!(split_metadata, true);
857858
tracked!(src_hash_algorithm, Some(SourceFileHashAlgorithm::Sha1));
858859
tracked!(stack_protector, StackProtector::All);
859860
tracked!(teach, true);

Diff for: compiler/rustc_metadata/src/fs.rs

+19-6
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::{fs, io};
33

44
use rustc_data_structures::temp_dir::MaybeTempDir;
55
use rustc_middle::ty::TyCtxt;
6-
use rustc_session::config::{OutFileName, OutputType};
6+
use rustc_session::config::{CrateType, OutFileName, OutputType};
77
use rustc_session::output::filename_for_metadata;
88
use rustc_session::{MetadataKind, Session};
99
use tempfile::Builder as TempFileBuilder;
@@ -50,7 +50,14 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
5050
.tempdir_in(out_filename.parent().unwrap_or_else(|| Path::new("")))
5151
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailedCreateTempdir { err }));
5252
let metadata_tmpdir = MaybeTempDir::new(metadata_tmpdir, tcx.sess.opts.cg.save_temps);
53-
let metadata_filename = metadata_tmpdir.as_ref().join(METADATA_FILENAME);
53+
let metadata_filename = metadata_tmpdir.as_ref().join("full.rmeta");
54+
let metadata_reference_filename = if tcx.sess.opts.unstable_opts.split_metadata
55+
&& !tcx.crate_types().contains(&CrateType::ProcMacro)
56+
{
57+
Some(metadata_tmpdir.as_ref().join("ref.rmeta"))
58+
} else {
59+
None
60+
};
5461

5562
// Always create a file at `metadata_filename`, even if we have nothing to write to it.
5663
// This simplifies the creation of the output `out_filename` when requested.
@@ -60,9 +67,14 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
6067
std::fs::File::create(&metadata_filename).unwrap_or_else(|err| {
6168
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
6269
});
70+
if let Some(metadata_reference_filename) = &metadata_reference_filename {
71+
std::fs::File::create(metadata_reference_filename).unwrap_or_else(|err| {
72+
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
73+
});
74+
}
6375
}
6476
MetadataKind::Uncompressed | MetadataKind::Compressed => {
65-
encode_metadata(tcx, &metadata_filename);
77+
encode_metadata(tcx, &metadata_filename, metadata_reference_filename.as_deref())
6678
}
6779
};
6880

@@ -100,9 +112,10 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
100112

101113
// Load metadata back to memory: codegen may need to include it in object files.
102114
let metadata =
103-
EncodedMetadata::from_path(metadata_filename, metadata_tmpdir).unwrap_or_else(|err| {
104-
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
105-
});
115+
EncodedMetadata::from_path(metadata_filename, metadata_reference_filename, metadata_tmpdir)
116+
.unwrap_or_else(|err| {
117+
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
118+
});
106119

107120
let need_metadata_module = metadata_kind == MetadataKind::Compressed;
108121

Diff for: compiler/rustc_metadata/src/locator.rs

+5
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,11 @@ impl<'a> CrateLocator<'a> {
580580
) {
581581
Ok(blob) => {
582582
if let Some(h) = self.crate_matches(&blob, &lib) {
583+
if blob.get_header().is_reference {
584+
if slot.is_none() {
585+
todo!("return error");
586+
}
587+
}
583588
(h, blob)
584589
} else {
585590
info!("metadata mismatch");

Diff for: compiler/rustc_metadata/src/rmeta/encoder.rs

+73-19
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
701701
triple: tcx.sess.opts.target_triple.clone(),
702702
hash: tcx.crate_hash(LOCAL_CRATE),
703703
is_proc_macro_crate: proc_macro_data.is_some(),
704+
is_reference: false,
704705
},
705706
extra_filename: tcx.sess.opts.cg.extra_filename.clone(),
706707
stable_crate_id: tcx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id(),
@@ -2226,42 +2227,64 @@ fn prefetch_mir(tcx: TyCtxt<'_>) {
22262227
// generated regardless of trailing bytes that end up in it.
22272228

22282229
pub struct EncodedMetadata {
2229-
// The declaration order matters because `mmap` should be dropped before `_temp_dir`.
2230-
mmap: Option<Mmap>,
2230+
// The declaration order matters because `full_mmap` should be dropped
2231+
// before `_temp_dir`.
2232+
full_mmap: Option<Mmap>,
2233+
reference: Option<Vec<u8>>,
22312234
// We need to carry MaybeTempDir to avoid deleting the temporary
22322235
// directory while accessing the Mmap.
22332236
_temp_dir: Option<MaybeTempDir>,
22342237
}
22352238

22362239
impl EncodedMetadata {
22372240
#[inline]
2238-
pub fn from_path(path: PathBuf, temp_dir: Option<MaybeTempDir>) -> std::io::Result<Self> {
2241+
pub fn from_path(
2242+
path: PathBuf,
2243+
reference_path: Option<PathBuf>,
2244+
temp_dir: Option<MaybeTempDir>,
2245+
) -> std::io::Result<Self> {
22392246
let file = std::fs::File::open(&path)?;
22402247
let file_metadata = file.metadata()?;
22412248
if file_metadata.len() == 0 {
2242-
return Ok(Self { mmap: None, _temp_dir: None });
2249+
return Ok(Self { full_mmap: None, reference: None, _temp_dir: None });
22432250
}
2244-
let mmap = unsafe { Some(Mmap::map(file)?) };
2245-
Ok(Self { mmap, _temp_dir: temp_dir })
2251+
let full_mmap = unsafe { Some(Mmap::map(file)?) };
2252+
2253+
let reference = if let Some(reference_path) = reference_path {
2254+
Some(std::fs::read(reference_path)?)
2255+
} else {
2256+
None
2257+
};
2258+
2259+
Ok(Self { full_mmap, reference, _temp_dir: temp_dir })
22462260
}
22472261

22482262
#[inline]
2249-
pub fn raw_data(&self) -> &[u8] {
2250-
self.mmap.as_deref().unwrap_or_default()
2263+
pub fn full(&self) -> &[u8] {
2264+
&self.full_mmap.as_deref().unwrap_or_default()
2265+
}
2266+
2267+
#[inline]
2268+
pub fn maybe_reference(&self) -> &[u8] {
2269+
self.reference.as_deref().unwrap_or(self.full())
22512270
}
22522271
}
22532272

22542273
impl<S: Encoder> Encodable<S> for EncodedMetadata {
22552274
fn encode(&self, s: &mut S) {
2256-
let slice = self.raw_data();
2275+
self.reference.encode(s);
2276+
2277+
let slice = self.full();
22572278
slice.encode(s)
22582279
}
22592280
}
22602281

22612282
impl<D: Decoder> Decodable<D> for EncodedMetadata {
22622283
fn decode(d: &mut D) -> Self {
2284+
let reference = <Option<Vec<u8>>>::decode(d);
2285+
22632286
let len = d.read_usize();
2264-
let mmap = if len > 0 {
2287+
let full_mmap = if len > 0 {
22652288
let mut mmap = MmapMut::map_anon(len).unwrap();
22662289
for _ in 0..len {
22672290
(&mut mmap[..]).write_all(&[d.read_u8()]).unwrap();
@@ -2272,11 +2295,11 @@ impl<D: Decoder> Decodable<D> for EncodedMetadata {
22722295
None
22732296
};
22742297

2275-
Self { mmap, _temp_dir: None }
2298+
Self { full_mmap, reference, _temp_dir: None }
22762299
}
22772300
}
22782301

2279-
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
2302+
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: Option<&Path>) {
22802303
let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata");
22812304

22822305
// Since encoding metadata is not in a query, and nothing is cached,
@@ -2290,6 +2313,42 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
22902313
join(|| prefetch_mir(tcx), || tcx.exported_symbols(LOCAL_CRATE));
22912314
}
22922315

2316+
with_encode_metadata_header(tcx, path, |ecx| {
2317+
// Encode all the entries and extra information in the crate,
2318+
// culminating in the `CrateRoot` which points to all of it.
2319+
let root = ecx.encode_crate_root();
2320+
2321+
// Flush buffer to ensure backing file has the correct size.
2322+
ecx.opaque.flush();
2323+
// Record metadata size for self-profiling
2324+
tcx.prof.artifact_size(
2325+
"crate_metadata",
2326+
"crate_metadata",
2327+
ecx.opaque.file().metadata().unwrap().len(),
2328+
);
2329+
2330+
root.position.get()
2331+
});
2332+
2333+
if let Some(ref_path) = ref_path {
2334+
with_encode_metadata_header(tcx, ref_path, |ecx| {
2335+
let header: LazyValue<CrateHeader> = ecx.lazy(CrateHeader {
2336+
name: tcx.crate_name(LOCAL_CRATE),
2337+
triple: tcx.sess.opts.target_triple.clone(),
2338+
hash: tcx.crate_hash(LOCAL_CRATE),
2339+
is_proc_macro_crate: false,
2340+
is_reference: true,
2341+
});
2342+
header.position.get()
2343+
});
2344+
}
2345+
}
2346+
2347+
fn with_encode_metadata_header(
2348+
tcx: TyCtxt<'_>,
2349+
path: &Path,
2350+
f: impl FnOnce(&mut EncodeContext<'_, '_>) -> usize,
2351+
) {
22932352
let mut encoder = opaque::FileEncoder::new(path)
22942353
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailCreateFileEncoder { err }));
22952354
encoder.emit_raw_bytes(METADATA_HEADER);
@@ -2324,9 +2383,7 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
23242383
// Encode the rustc version string in a predictable location.
23252384
rustc_version(tcx.sess.cfg_version).encode(&mut ecx);
23262385

2327-
// Encode all the entries and extra information in the crate,
2328-
// culminating in the `CrateRoot` which points to all of it.
2329-
let root = ecx.encode_crate_root();
2386+
let root_position = f(&mut ecx);
23302387

23312388
// Make sure we report any errors from writing to the file.
23322389
// If we forget this, compilation can succeed with an incomplete rmeta file,
@@ -2336,12 +2393,9 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
23362393
}
23372394

23382395
let file = ecx.opaque.file();
2339-
if let Err(err) = encode_root_position(file, root.position.get()) {
2396+
if let Err(err) = encode_root_position(file, root_position) {
23402397
tcx.dcx().emit_fatal(FailWriteFile { path: ecx.opaque.path(), err });
23412398
}
2342-
2343-
// Record metadata size for self-profiling
2344-
tcx.prof.artifact_size("crate_metadata", "crate_metadata", file.metadata().unwrap().len());
23452399
}
23462400

23472401
fn encode_root_position(mut file: &File, pos: usize) -> Result<(), std::io::Error> {

Diff for: compiler/rustc_metadata/src/rmeta/mod.rs

+4
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,10 @@ pub(crate) struct CrateHeader {
220220
/// This is separate from [`ProcMacroData`] to avoid having to update [`METADATA_VERSION`] every
221221
/// time ProcMacroData changes.
222222
pub(crate) is_proc_macro_crate: bool,
223+
/// Whether this header is a reference to a separate rmeta file.
224+
///
225+
/// This is used inside rlibs and dylibs when using `-Zsplit-metadata`.
226+
pub(crate) is_reference: bool,
223227
}
224228

225229
/// Serialized `.rmeta` data for a crate.

Diff for: compiler/rustc_session/src/options.rs

+2
Original file line numberDiff line numberDiff line change
@@ -2128,6 +2128,8 @@ written to standard error output)"),
21282128
by the linker"),
21292129
split_lto_unit: Option<bool> = (None, parse_opt_bool, [TRACKED],
21302130
"enable LTO unit splitting (default: no)"),
2131+
split_metadata: bool = (false, parse_bool, [TRACKED],
2132+
"split metadata out of libraries into .rmeta files"),
21312133
src_hash_algorithm: Option<SourceFileHashAlgorithm> = (None, parse_src_file_hash, [TRACKED],
21322134
"hash algorithm of source files in debug info (`md5`, `sha1`, or `sha256`)"),
21332135
#[rustc_lint_opt_deny_field_access("use `Session::stack_protector` instead of this field")]

0 commit comments

Comments
 (0)