Skip to content

Commit 5e86595

Browse files
committed
Introduce -Zsplit-metadata option
This will split the crate metadata out of library files. Instead only the svh is preserved to allow for loading the right rmeta file. This significicantly reduces library size. In addition it allows for cheaper checks if different library files are the same crate.
1 parent 7347f8e commit 5e86595

File tree

8 files changed

+97
-27
lines changed

8 files changed

+97
-27
lines changed

compiler/rustc_codegen_ssa/src/back/link.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ fn link_rlib<'a>(
311311
let (metadata, metadata_position) = create_wrapper_file(
312312
sess,
313313
".rmeta".to_string(),
314-
codegen_results.metadata.raw_data(),
314+
codegen_results.metadata.maybe_reference(),
315315
);
316316
let metadata = emit_wrapper_file(sess, &metadata, tmpdir, METADATA_FILENAME);
317317
match metadata_position {

compiler/rustc_codegen_ssa/src/back/metadata.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -527,8 +527,8 @@ pub fn create_compressed_metadata_file(
527527
symbol_name: &str,
528528
) -> Vec<u8> {
529529
let mut packed_metadata = rustc_metadata::METADATA_HEADER.to_vec();
530-
packed_metadata.write_all(&(metadata.raw_data().len() as u64).to_le_bytes()).unwrap();
531-
packed_metadata.extend(metadata.raw_data());
530+
packed_metadata.write_all(&(metadata.maybe_reference().len() as u64).to_le_bytes()).unwrap();
531+
packed_metadata.extend(metadata.maybe_reference());
532532

533533
let Some(mut file) = create_object_file(sess) else {
534534
if sess.target.is_like_wasm {

compiler/rustc_interface/src/tests.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,7 @@ fn test_unstable_options_tracking_hash() {
844844
tracked!(show_span, Some(String::from("abc")));
845845
tracked!(simulate_remapped_rust_src_base, Some(PathBuf::from("/rustc/abc")));
846846
tracked!(split_lto_unit, Some(true));
847+
tracked!(split_metadata, true);
847848
tracked!(src_hash_algorithm, Some(SourceFileHashAlgorithm::Sha1));
848849
tracked!(stack_protector, StackProtector::All);
849850
tracked!(teach, true);

compiler/rustc_metadata/src/fs.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
5050
.tempdir_in(out_filename.parent().unwrap_or_else(|| Path::new("")))
5151
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailedCreateTempdir { err }));
5252
let metadata_tmpdir = MaybeTempDir::new(metadata_tmpdir, tcx.sess.opts.cg.save_temps);
53-
let metadata_filename = metadata_tmpdir.as_ref().join(METADATA_FILENAME);
53+
let metadata_filename = metadata_tmpdir.as_ref().join("full.rmeta");
54+
let metadata_reference_filename = metadata_tmpdir.as_ref().join("ref.rmeta");
5455

5556
// Always create a file at `metadata_filename`, even if we have nothing to write to it.
5657
// This simplifies the creation of the output `out_filename` when requested.
@@ -60,9 +61,12 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
6061
std::fs::File::create(&metadata_filename).unwrap_or_else(|err| {
6162
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
6263
});
64+
std::fs::File::create(&metadata_reference_filename).unwrap_or_else(|err| {
65+
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
66+
});
6367
}
6468
MetadataKind::Uncompressed | MetadataKind::Compressed => {
65-
encode_metadata(tcx, &metadata_filename);
69+
encode_metadata(tcx, &metadata_filename, &metadata_reference_filename)
6670
}
6771
};
6872

@@ -100,9 +104,10 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
100104

101105
// Load metadata back to memory: codegen may need to include it in object files.
102106
let metadata =
103-
EncodedMetadata::from_path(metadata_filename, metadata_tmpdir).unwrap_or_else(|err| {
104-
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
105-
});
107+
EncodedMetadata::from_path(metadata_filename, metadata_reference_filename, metadata_tmpdir)
108+
.unwrap_or_else(|err| {
109+
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
110+
});
106111

107112
let need_metadata_module = metadata_kind == MetadataKind::Compressed;
108113

compiler/rustc_metadata/src/locator.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,11 @@ impl<'a> CrateLocator<'a> {
575575
) {
576576
Ok(blob) => {
577577
if let Some(h) = self.crate_matches(&blob, &lib) {
578+
if blob.get_header().is_reference {
579+
if slot.is_none() {
580+
todo!("return error");
581+
}
582+
}
578583
(h, blob)
579584
} else {
580585
info!("metadata mismatch");

compiler/rustc_metadata/src/rmeta/encoder.rs

Lines changed: 72 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
700700
triple: tcx.sess.opts.target_triple.clone(),
701701
hash: tcx.crate_hash(LOCAL_CRATE),
702702
is_proc_macro_crate: proc_macro_data.is_some(),
703+
is_reference: false,
703704
},
704705
extra_filename: tcx.sess.opts.cg.extra_filename.clone(),
705706
stable_crate_id: tcx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id(),
@@ -2162,42 +2163,61 @@ fn prefetch_mir(tcx: TyCtxt<'_>) {
21622163
// generated regardless of trailing bytes that end up in it.
21632164

21642165
pub struct EncodedMetadata {
2165-
// The declaration order matters because `mmap` should be dropped before `_temp_dir`.
2166-
mmap: Option<Mmap>,
2166+
// The declaration order matters because `full_mmap` should be dropped
2167+
// before `_temp_dir`.
2168+
full_mmap: Option<Mmap>,
2169+
reference: Option<Vec<u8>>,
21672170
// We need to carry MaybeTempDir to avoid deleting the temporary
21682171
// directory while accessing the Mmap.
21692172
_temp_dir: Option<MaybeTempDir>,
21702173
}
21712174

21722175
impl EncodedMetadata {
21732176
#[inline]
2174-
pub fn from_path(path: PathBuf, temp_dir: Option<MaybeTempDir>) -> std::io::Result<Self> {
2177+
pub fn from_path(
2178+
path: PathBuf,
2179+
reference_path: PathBuf,
2180+
temp_dir: Option<MaybeTempDir>,
2181+
) -> std::io::Result<Self> {
21752182
let file = std::fs::File::open(&path)?;
21762183
let file_metadata = file.metadata()?;
21772184
if file_metadata.len() == 0 {
2178-
return Ok(Self { mmap: None, _temp_dir: None });
2185+
return Ok(Self { full_mmap: None, reference: None, _temp_dir: None });
21792186
}
2180-
let mmap = unsafe { Some(Mmap::map(file)?) };
2181-
Ok(Self { mmap, _temp_dir: temp_dir })
2187+
let full_mmap = unsafe { Some(Mmap::map(file)?) };
2188+
2189+
let reference = std::fs::read(reference_path)?;
2190+
let reference = if reference.is_empty() { None } else { Some(reference) };
2191+
2192+
Ok(Self { full_mmap, reference, _temp_dir: temp_dir })
2193+
}
2194+
2195+
#[inline]
2196+
pub fn full(&self) -> &[u8] {
2197+
&self.full_mmap.as_deref().unwrap_or_default()
21822198
}
21832199

21842200
#[inline]
2185-
pub fn raw_data(&self) -> &[u8] {
2186-
self.mmap.as_deref().unwrap_or_default()
2201+
pub fn maybe_reference(&self) -> &[u8] {
2202+
self.reference.as_deref().unwrap_or(self.full())
21872203
}
21882204
}
21892205

21902206
impl<S: Encoder> Encodable<S> for EncodedMetadata {
21912207
fn encode(&self, s: &mut S) {
2192-
let slice = self.raw_data();
2208+
self.reference.encode(s);
2209+
2210+
let slice = self.full();
21932211
slice.encode(s)
21942212
}
21952213
}
21962214

21972215
impl<D: Decoder> Decodable<D> for EncodedMetadata {
21982216
fn decode(d: &mut D) -> Self {
2217+
let reference = <Option<Vec<u8>>>::decode(d);
2218+
21992219
let len = d.read_usize();
2200-
let mmap = if len > 0 {
2220+
let full_mmap = if len > 0 {
22012221
let mut mmap = MmapMut::map_anon(len).unwrap();
22022222
for _ in 0..len {
22032223
(&mut mmap[..]).write_all(&[d.read_u8()]).unwrap();
@@ -2208,11 +2228,11 @@ impl<D: Decoder> Decodable<D> for EncodedMetadata {
22082228
None
22092229
};
22102230

2211-
Self { mmap, _temp_dir: None }
2231+
Self { full_mmap, reference, _temp_dir: None }
22122232
}
22132233
}
22142234

2215-
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
2235+
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: &Path) {
22162236
let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata");
22172237

22182238
// Since encoding metadata is not in a query, and nothing is cached,
@@ -2226,6 +2246,44 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
22262246
join(|| prefetch_mir(tcx), || tcx.exported_symbols(LOCAL_CRATE));
22272247
}
22282248

2249+
with_encode_metadata_header(tcx, path, |ecx| {
2250+
// Encode all the entries and extra information in the crate,
2251+
// culminating in the `CrateRoot` which points to all of it.
2252+
let root = ecx.encode_crate_root();
2253+
2254+
// Flush buffer to ensure backing file has the correct size.
2255+
ecx.opaque.flush();
2256+
// Record metadata size for self-profiling
2257+
tcx.prof.artifact_size(
2258+
"crate_metadata",
2259+
"crate_metadata",
2260+
ecx.opaque.file().metadata().unwrap().len(),
2261+
);
2262+
2263+
root.position.get()
2264+
});
2265+
2266+
if tcx.sess.opts.unstable_opts.split_metadata
2267+
&& !tcx.crate_types().contains(&CrateType::ProcMacro)
2268+
{
2269+
with_encode_metadata_header(tcx, ref_path, |ecx| {
2270+
let header: LazyValue<CrateHeader> = ecx.lazy(CrateHeader {
2271+
name: tcx.crate_name(LOCAL_CRATE),
2272+
triple: tcx.sess.opts.target_triple.clone(),
2273+
hash: tcx.crate_hash(LOCAL_CRATE),
2274+
is_proc_macro_crate: false,
2275+
is_reference: true,
2276+
});
2277+
header.position.get()
2278+
});
2279+
}
2280+
}
2281+
2282+
pub fn with_encode_metadata_header(
2283+
tcx: TyCtxt<'_>,
2284+
path: &Path,
2285+
f: impl FnOnce(&mut EncodeContext<'_, '_>) -> usize,
2286+
) {
22292287
let mut encoder = opaque::FileEncoder::new(path)
22302288
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailCreateFileEncoder { err }));
22312289
encoder.emit_raw_bytes(METADATA_HEADER);
@@ -2260,9 +2318,7 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
22602318
// Encode the rustc version string in a predictable location.
22612319
rustc_version(tcx.sess.cfg_version).encode(&mut ecx);
22622320

2263-
// Encode all the entries and extra information in the crate,
2264-
// culminating in the `CrateRoot` which points to all of it.
2265-
let root = ecx.encode_crate_root();
2321+
let root_position = f(&mut ecx);
22662322

22672323
// Make sure we report any errors from writing to the file.
22682324
// If we forget this, compilation can succeed with an incomplete rmeta file,
@@ -2272,12 +2328,9 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
22722328
}
22732329

22742330
let file = ecx.opaque.file();
2275-
if let Err(err) = encode_root_position(file, root.position.get()) {
2331+
if let Err(err) = encode_root_position(file, root_position) {
22762332
tcx.dcx().emit_fatal(FailWriteFile { path: ecx.opaque.path(), err });
22772333
}
2278-
2279-
// Record metadata size for self-profiling
2280-
tcx.prof.artifact_size("crate_metadata", "crate_metadata", file.metadata().unwrap().len());
22812334
}
22822335

22832336
fn encode_root_position(mut file: &File, pos: usize) -> Result<(), std::io::Error> {

compiler/rustc_metadata/src/rmeta/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,10 @@ pub(crate) struct CrateHeader {
221221
/// This is separate from [`ProcMacroData`] to avoid having to update [`METADATA_VERSION`] every
222222
/// time ProcMacroData changes.
223223
pub(crate) is_proc_macro_crate: bool,
224+
/// Whether this header is a reference to a separate rmeta file.
225+
///
226+
/// This is used inside rlibs and dylibs when using `-Zsplit-metadata`.
227+
pub(crate) is_reference: bool,
224228
}
225229

226230
/// Serialized `.rmeta` data for a crate.

compiler/rustc_session/src/options.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2014,6 +2014,8 @@ written to standard error output)"),
20142014
by the linker"),
20152015
split_lto_unit: Option<bool> = (None, parse_opt_bool, [TRACKED],
20162016
"enable LTO unit splitting (default: no)"),
2017+
split_metadata: bool = (false, parse_bool, [TRACKED],
2018+
"split metadata out of libraries into .rmeta files"),
20172019
src_hash_algorithm: Option<SourceFileHashAlgorithm> = (None, parse_src_file_hash, [TRACKED],
20182020
"hash algorithm of source files in debug info (`md5`, `sha1`, or `sha256`)"),
20192021
#[rustc_lint_opt_deny_field_access("use `Session::stack_protector` instead of this field")]

0 commit comments

Comments
 (0)