Skip to content

Introduce -Zsplit-metadata option #120855

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions compiler/rustc_codegen_cranelift/build_system/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,7 @@ impl<'a> TestRunner<'a> {
cmd.arg(&self.target_compiler.triple);
cmd.arg("-Cpanic=abort");
cmd.arg("--check-cfg=cfg(jit)");
cmd.arg("--emit=metadata,link");
cmd.args(args);
cmd
}
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_ssa/src/back/link.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ fn link_rlib<'a>(
let (metadata, metadata_position) = create_wrapper_file(
sess,
".rmeta".to_string(),
codegen_results.metadata.raw_data(),
codegen_results.metadata.maybe_reference(),
);
let metadata = emit_wrapper_file(sess, &metadata, tmpdir, METADATA_FILENAME);
match metadata_position {
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_codegen_ssa/src/back/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -578,8 +578,8 @@ pub fn create_compressed_metadata_file(
symbol_name: &str,
) -> Vec<u8> {
let mut packed_metadata = rustc_metadata::METADATA_HEADER.to_vec();
packed_metadata.write_all(&(metadata.raw_data().len() as u64).to_le_bytes()).unwrap();
packed_metadata.extend(metadata.raw_data());
packed_metadata.write_all(&(metadata.maybe_reference().len() as u64).to_le_bytes()).unwrap();
packed_metadata.extend(metadata.maybe_reference());

let Some(mut file) = create_object_file(sess) else {
if sess.target.is_like_wasm {
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_interface/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,7 @@ fn test_unstable_options_tracking_hash() {
tracked!(simulate_remapped_rust_src_base, Some(PathBuf::from("/rustc/abc")));
tracked!(small_data_threshold, Some(16));
tracked!(split_lto_unit, Some(true));
tracked!(split_metadata, true);
tracked!(src_hash_algorithm, Some(SourceFileHashAlgorithm::Sha1));
tracked!(stack_protector, StackProtector::All);
tracked!(teach, true);
Expand Down
25 changes: 19 additions & 6 deletions compiler/rustc_metadata/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::{fs, io};

use rustc_data_structures::temp_dir::MaybeTempDir;
use rustc_middle::ty::TyCtxt;
use rustc_session::config::{OutFileName, OutputType};
use rustc_session::config::{CrateType, OutFileName, OutputType};
use rustc_session::output::filename_for_metadata;
use rustc_session::{MetadataKind, Session};
use tempfile::Builder as TempFileBuilder;
Expand Down Expand Up @@ -50,7 +50,14 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
.tempdir_in(out_filename.parent().unwrap_or_else(|| Path::new("")))
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailedCreateTempdir { err }));
let metadata_tmpdir = MaybeTempDir::new(metadata_tmpdir, tcx.sess.opts.cg.save_temps);
let metadata_filename = metadata_tmpdir.as_ref().join(METADATA_FILENAME);
let metadata_filename = metadata_tmpdir.as_ref().join("full.rmeta");
let metadata_reference_filename = if tcx.sess.opts.unstable_opts.split_metadata
&& !tcx.crate_types().contains(&CrateType::ProcMacro)
{
Some(metadata_tmpdir.as_ref().join("ref.rmeta"))
} else {
None
};

// Always create a file at `metadata_filename`, even if we have nothing to write to it.
// This simplifies the creation of the output `out_filename` when requested.
Expand All @@ -60,9 +67,14 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
std::fs::File::create(&metadata_filename).unwrap_or_else(|err| {
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
});
if let Some(metadata_reference_filename) = &metadata_reference_filename {
std::fs::File::create(metadata_reference_filename).unwrap_or_else(|err| {
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
});
}
}
MetadataKind::Uncompressed | MetadataKind::Compressed => {
encode_metadata(tcx, &metadata_filename);
encode_metadata(tcx, &metadata_filename, metadata_reference_filename.as_deref())
}
};

Expand Down Expand Up @@ -100,9 +112,10 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {

// Load metadata back to memory: codegen may need to include it in object files.
let metadata =
EncodedMetadata::from_path(metadata_filename, metadata_tmpdir).unwrap_or_else(|err| {
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
});
EncodedMetadata::from_path(metadata_filename, metadata_reference_filename, metadata_tmpdir)
.unwrap_or_else(|err| {
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
});

let need_metadata_module = metadata_kind == MetadataKind::Compressed;

Expand Down
62 changes: 35 additions & 27 deletions compiler/rustc_metadata/src/locator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,12 @@ impl<'a> CrateLocator<'a> {
continue;
}
}
*slot = Some((hash, metadata, lib.clone()));

if !metadata.get_header().is_reference {
// FIXME nicer error when only an rlib or dylib with is_reference is found
// and no .rmeta?
*slot = Some((hash, metadata, lib.clone()));
}
ret = Some((lib, kind));
}

Expand Down Expand Up @@ -728,37 +733,40 @@ impl<'a> CrateLocator<'a> {
let Some(file) = loc_orig.file_name().and_then(|s| s.to_str()) else {
return Err(CrateError::ExternLocationNotFile(self.crate_name, loc_orig.clone()));
};
// FnMut cannot return reference to captured value, so references
// must be taken outside the closure.
let rlibs = &mut rlibs;
let rmetas = &mut rmetas;
let dylibs = &mut dylibs;
let type_via_filename = (|| {
if file.starts_with("lib") {
if file.ends_with(".rlib") {
return Some(rlibs);
}
if file.ends_with(".rmeta") {
return Some(rmetas);
if file.starts_with("lib") {
if file.ends_with(".rlib") {
if let Ok(rmeta_path) = try_canonicalize(
loc_orig
.parent()
.unwrap()
.join(file.strip_suffix(".rlib").unwrap().to_owned() + ".rmeta"),
) {
rmetas.insert(rmeta_path, PathKind::ExternFlag);
}
rlibs.insert(loc_canon.clone(), PathKind::ExternFlag);
continue;
}
let dll_prefix = self.target.dll_prefix.as_ref();
let dll_suffix = self.target.dll_suffix.as_ref();
if file.starts_with(dll_prefix) && file.ends_with(dll_suffix) {
return Some(dylibs);
}
None
})();
match type_via_filename {
Some(type_via_filename) => {
type_via_filename.insert(loc_canon.clone(), PathKind::ExternFlag);
if file.ends_with(".rmeta") {
rmetas.insert(loc_canon.clone(), PathKind::ExternFlag);
continue;
}
None => {
self.crate_rejections
.via_filename
.push(CrateMismatch { path: loc_orig.clone(), got: String::new() });
}
let dll_prefix = self.target.dll_prefix.as_ref();
let dll_suffix = self.target.dll_suffix.as_ref();
if file.starts_with(dll_prefix) && file.ends_with(dll_suffix) {
if let Ok(rmeta_path) = try_canonicalize(loc_orig.parent().unwrap().join(
"lib".to_owned()
+ file.strip_prefix(dll_prefix).unwrap().strip_suffix(dll_suffix).unwrap()
+ ".rmeta",
)) {
rmetas.insert(rmeta_path, PathKind::ExternFlag);
}
dylibs.insert(loc_canon.clone(), PathKind::ExternFlag);
continue;
}
self.crate_rejections
.via_filename
.push(CrateMismatch { path: loc_orig.clone(), got: String::new() });
}

// Extract the dylib/rlib/rmeta triple.
Expand Down
92 changes: 73 additions & 19 deletions compiler/rustc_metadata/src/rmeta/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
triple: tcx.sess.opts.target_triple.clone(),
hash: tcx.crate_hash(LOCAL_CRATE),
is_proc_macro_crate: proc_macro_data.is_some(),
is_reference: false,
},
extra_filename: tcx.sess.opts.cg.extra_filename.clone(),
stable_crate_id: tcx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id(),
Expand Down Expand Up @@ -2226,42 +2227,64 @@ fn prefetch_mir(tcx: TyCtxt<'_>) {
// generated regardless of trailing bytes that end up in it.

pub struct EncodedMetadata {
// The declaration order matters because `mmap` should be dropped before `_temp_dir`.
mmap: Option<Mmap>,
// The declaration order matters because `full_mmap` should be dropped
// before `_temp_dir`.
full_mmap: Option<Mmap>,
reference: Option<Vec<u8>>,
// We need to carry MaybeTempDir to avoid deleting the temporary
// directory while accessing the Mmap.
_temp_dir: Option<MaybeTempDir>,
}

impl EncodedMetadata {
#[inline]
pub fn from_path(path: PathBuf, temp_dir: Option<MaybeTempDir>) -> std::io::Result<Self> {
pub fn from_path(
path: PathBuf,
reference_path: Option<PathBuf>,
temp_dir: Option<MaybeTempDir>,
) -> std::io::Result<Self> {
let file = std::fs::File::open(&path)?;
let file_metadata = file.metadata()?;
if file_metadata.len() == 0 {
return Ok(Self { mmap: None, _temp_dir: None });
return Ok(Self { full_mmap: None, reference: None, _temp_dir: None });
}
let mmap = unsafe { Some(Mmap::map(file)?) };
Ok(Self { mmap, _temp_dir: temp_dir })
let full_mmap = unsafe { Some(Mmap::map(file)?) };

let reference = if let Some(reference_path) = reference_path {
Some(std::fs::read(reference_path)?)
} else {
None
};

Ok(Self { full_mmap, reference, _temp_dir: temp_dir })
}

#[inline]
pub fn raw_data(&self) -> &[u8] {
self.mmap.as_deref().unwrap_or_default()
pub fn full(&self) -> &[u8] {
&self.full_mmap.as_deref().unwrap_or_default()
}

#[inline]
pub fn maybe_reference(&self) -> &[u8] {
self.reference.as_deref().unwrap_or(self.full())
}
}

impl<S: Encoder> Encodable<S> for EncodedMetadata {
fn encode(&self, s: &mut S) {
let slice = self.raw_data();
self.reference.encode(s);

let slice = self.full();
slice.encode(s)
}
}

impl<D: Decoder> Decodable<D> for EncodedMetadata {
fn decode(d: &mut D) -> Self {
let reference = <Option<Vec<u8>>>::decode(d);

let len = d.read_usize();
let mmap = if len > 0 {
let full_mmap = if len > 0 {
let mut mmap = MmapMut::map_anon(len).unwrap();
for _ in 0..len {
(&mut mmap[..]).write_all(&[d.read_u8()]).unwrap();
Expand All @@ -2272,11 +2295,11 @@ impl<D: Decoder> Decodable<D> for EncodedMetadata {
None
};

Self { mmap, _temp_dir: None }
Self { full_mmap, reference, _temp_dir: None }
}
}

pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: Option<&Path>) {
let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata");

// Since encoding metadata is not in a query, and nothing is cached,
Expand All @@ -2290,6 +2313,42 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
join(|| prefetch_mir(tcx), || tcx.exported_symbols(LOCAL_CRATE));
}

with_encode_metadata_header(tcx, path, |ecx| {
// Encode all the entries and extra information in the crate,
// culminating in the `CrateRoot` which points to all of it.
let root = ecx.encode_crate_root();

// Flush buffer to ensure backing file has the correct size.
ecx.opaque.flush();
// Record metadata size for self-profiling
tcx.prof.artifact_size(
"crate_metadata",
"crate_metadata",
ecx.opaque.file().metadata().unwrap().len(),
);

root.position.get()
});

if let Some(ref_path) = ref_path {
with_encode_metadata_header(tcx, ref_path, |ecx| {
let header: LazyValue<CrateHeader> = ecx.lazy(CrateHeader {
name: tcx.crate_name(LOCAL_CRATE),
triple: tcx.sess.opts.target_triple.clone(),
hash: tcx.crate_hash(LOCAL_CRATE),
is_proc_macro_crate: false,
is_reference: true,
});
header.position.get()
});
}
}

fn with_encode_metadata_header(
tcx: TyCtxt<'_>,
path: &Path,
f: impl FnOnce(&mut EncodeContext<'_, '_>) -> usize,
) {
let mut encoder = opaque::FileEncoder::new(path)
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailCreateFileEncoder { err }));
encoder.emit_raw_bytes(METADATA_HEADER);
Expand Down Expand Up @@ -2324,9 +2383,7 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
// Encode the rustc version string in a predictable location.
rustc_version(tcx.sess.cfg_version).encode(&mut ecx);

// Encode all the entries and extra information in the crate,
// culminating in the `CrateRoot` which points to all of it.
let root = ecx.encode_crate_root();
let root_position = f(&mut ecx);

// Make sure we report any errors from writing to the file.
// If we forget this, compilation can succeed with an incomplete rmeta file,
Expand All @@ -2336,12 +2393,9 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
}

let file = ecx.opaque.file();
if let Err(err) = encode_root_position(file, root.position.get()) {
if let Err(err) = encode_root_position(file, root_position) {
tcx.dcx().emit_fatal(FailWriteFile { path: ecx.opaque.path(), err });
}

// Record metadata size for self-profiling
tcx.prof.artifact_size("crate_metadata", "crate_metadata", file.metadata().unwrap().len());
}

fn encode_root_position(mut file: &File, pos: usize) -> Result<(), std::io::Error> {
Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_metadata/src/rmeta/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,10 @@ pub(crate) struct CrateHeader {
/// This is separate from [`ProcMacroData`] to avoid having to update [`METADATA_VERSION`] every
/// time ProcMacroData changes.
pub(crate) is_proc_macro_crate: bool,
/// Whether this header is a reference to a separate rmeta file.
///
/// This is used inside rlibs and dylibs when using `-Zsplit-metadata`.
pub(crate) is_reference: bool,
}

/// Serialized `.rmeta` data for a crate.
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_session/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2128,6 +2128,8 @@ written to standard error output)"),
by the linker"),
split_lto_unit: Option<bool> = (None, parse_opt_bool, [TRACKED],
"enable LTO unit splitting (default: no)"),
split_metadata: bool = (false, parse_bool, [TRACKED],
"split metadata out of libraries into .rmeta files"),
src_hash_algorithm: Option<SourceFileHashAlgorithm> = (None, parse_src_file_hash, [TRACKED],
"hash algorithm of source files in debug info (`md5`, `sha1`, or `sha256`)"),
#[rustc_lint_opt_deny_field_access("use `Session::stack_protector` instead of this field")]
Expand Down
6 changes: 6 additions & 0 deletions src/bootstrap/src/bin/rustc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@ fn main() {
}
}

if orig_args.iter().any(|arg| arg == "-Zsplit-metadata")
&& orig_args.windows(2).any(|args| args[0] == "--crate-type" && args[1] == "dylib")
{
cmd.arg("--emit").arg("metadata");
}

// Print backtrace in case of ICE
if env::var("RUSTC_BACKTRACE_ON_ICE").is_ok() && env::var("RUST_BACKTRACE").is_err() {
cmd.env("RUST_BACKTRACE", "1");
Expand Down
Loading
Loading