Skip to content

Commit de5832f

Browse files
committed
Auto merge of #14751 - weihanglo:depinfo-format, r=epage
fix: track version in fingerprint dep-info files ### What does this PR try to resolve? Encodes the version information into Cargo's fingerprint dep-info files, so that when the format encoding changes in the future, Cargo understands a dep-info file was outdated and doesn't bother parsing it. Since there was no version info encoded in the old format (call it v0), to be compatible with older cargoes, this PR works around it with a horrible hack. It is explained in the doc comment of `EncodedDepInfo`. ### How should we test and review this PR? This PR also adds a static file which is v0 depinfo, to ensure newer cargoes give up parsing it successfully without allocation errors. See the allocation failures in this CI job result: https://github.com/weihanglo/cargo/actions/runs/11601065840/job/32302943597 It is no harm to keep the v0 test file also fine with removing it from git. ### Additional information Fixes #14712
2 parents 5f9257e + 634450e commit de5832f

File tree

2 files changed

+153
-5
lines changed

2 files changed

+153
-5
lines changed

crates/cargo-test-support/src/lib.rs

+6
Original file line numberDiff line numberDiff line change
@@ -1597,6 +1597,12 @@ pub fn assert_deps(project: &Project, fingerprint: &str, test_cb: impl Fn(&Path,
15971597
assert!(files.next().is_none(), "expected only 1 dep-info file");
15981598
let dep_info = fs::read(&info_path).unwrap();
15991599
let dep_info = &mut &dep_info[..];
1600+
1601+
// Consume the magic marker and version. Here they don't really matter.
1602+
read_usize(dep_info);
1603+
read_u8(dep_info);
1604+
read_u8(dep_info);
1605+
16001606
let deps = (0..read_usize(dep_info))
16011607
.map(|_| {
16021608
let ty = read_u8(dep_info);

src/cargo/core/compiler/fingerprint/dep_info.rs

+147-5
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ use cargo_util::Sha256;
2222
use crate::CargoResult;
2323
use crate::CARGO_ENV;
2424

25+
/// The current format version of [`EncodedDepInfo`].
26+
const CURRENT_ENCODED_DEP_INFO_VERSION: u8 = 1;
27+
2528
/// The representation of the `.d` dep-info file generated by rustc
2629
#[derive(Default)]
2730
pub struct RustcDepInfo {
@@ -61,20 +64,36 @@ pub enum DepInfoPathType {
6164
/// Currently the format looks like:
6265
///
6366
/// ```text
64-
/// +------------+------------+---------------+---------------+
65-
/// | # of files | file paths | # of env vars | env var pairs |
66-
/// +------------+------------+---------------+---------------+
67+
/// +--------+---------+------------+------------+---------------+---------------+
68+
/// | marker | version | # of files | file paths | # of env vars | env var pairs |
69+
/// +--------+---------+------------+------------+---------------+---------------+
6770
/// ```
6871
///
6972
/// Each field represents
7073
///
74+
/// * _Marker_ --- A magic marker to ensure that older Cargoes, which only
75+
/// recognize format v0 (prior to checksum support in [`f4ca7390`]), do not
76+
/// proceed with parsing newer formats. Since [`EncodedDepInfo`] is merely
77+
/// an optimization, and to avoid adding complexity, Cargo recognizes only
78+
/// one version of [`CURRENT_ENCODED_DEP_INFO_VERSION`].
79+
/// The current layout looks like this
80+
/// ```text
81+
/// +----------------------------+
82+
/// | [0x01 0x00 0x00 0x00 0xff] |
83+
/// +----------------------------+
84+
/// ```
85+
/// These bytes will be interpreted as "one file tracked and an invalid
86+
/// [`DepInfoPathType`] variant with 255" by older Cargoes, causing them to
87+
/// stop parsing. This could prevent problematic parsing as noted in
88+
/// rust-lang/cargo#14712.
89+
/// * _Version_ --- The current format version.
7190
/// * _Number of files/envs_ --- A `u32` representing the number of things.
7291
/// * _File paths_ --- Zero or more paths of files the dep-info file depends on.
7392
/// Each path is encoded as the following:
7493
///
7594
/// ```text
7695
/// +-----------+-------------+------------+---------------+-----------+-------+
77-
/// | Path type | len of path | path bytes | cksum exists? | file size | cksum |
96+
/// | path type | len of path | path bytes | cksum exists? | file size | cksum |
7897
/// +-----------+-------------+------------+---------------+-----------+-------+
7998
/// ```
8099
/// * _Env var pairs_ --- Zero or more env vars the dep-info file depends on.
@@ -84,7 +103,9 @@ pub enum DepInfoPathType {
84103
/// | len of key | key bytes | value exists? | len of value | value bytes |
85104
/// +------------+-----------+---------------+--------------+-------------+
86105
/// ```
87-
#[derive(Default)]
106+
///
107+
/// [`f4ca7390`]: https://github.com/rust-lang/cargo/commit/f4ca739073185ea5e1148ff100bb4a06d3bf721d
108+
#[derive(Default, Debug, PartialEq, Eq)]
88109
pub struct EncodedDepInfo {
89110
pub files: Vec<(DepInfoPathType, PathBuf, Option<(u64, String)>)>,
90111
pub env: Vec<(String, Option<String>)>,
@@ -93,6 +114,12 @@ pub struct EncodedDepInfo {
93114
impl EncodedDepInfo {
94115
pub fn parse(mut bytes: &[u8]) -> Option<EncodedDepInfo> {
95116
let bytes = &mut bytes;
117+
read_magic_marker(bytes)?;
118+
let version = read_u8(bytes)?;
119+
if version != CURRENT_ENCODED_DEP_INFO_VERSION {
120+
return None;
121+
}
122+
96123
let nfiles = read_usize(bytes)?;
97124
let mut files = Vec::with_capacity(nfiles);
98125
for _ in 0..nfiles {
@@ -129,6 +156,18 @@ impl EncodedDepInfo {
129156
}
130157
return Some(EncodedDepInfo { files, env });
131158

159+
/// See [`EncodedDepInfo`] for why a magic marker exists.
160+
fn read_magic_marker(bytes: &mut &[u8]) -> Option<()> {
161+
let _size = read_usize(bytes)?;
162+
let path_type = read_u8(bytes)?;
163+
if path_type != u8::MAX {
164+
// Old depinfo. Give up parsing it.
165+
None
166+
} else {
167+
Some(())
168+
}
169+
}
170+
132171
fn read_usize(bytes: &mut &[u8]) -> Option<usize> {
133172
let ret = bytes.get(..4)?;
134173
*bytes = &bytes[4..];
@@ -162,6 +201,10 @@ impl EncodedDepInfo {
162201
pub fn serialize(&self) -> CargoResult<Vec<u8>> {
163202
let mut ret = Vec::new();
164203
let dst = &mut ret;
204+
205+
write_magic_marker(dst);
206+
dst.push(CURRENT_ENCODED_DEP_INFO_VERSION);
207+
165208
write_usize(dst, self.files.len());
166209
for (ty, file, checksum_info) in self.files.iter() {
167210
match ty {
@@ -189,6 +232,14 @@ impl EncodedDepInfo {
189232
}
190233
return Ok(ret);
191234

235+
/// See [`EncodedDepInfo`] for why a magic marker exists.
236+
///
237+
/// There is an assumption that there is always at least a file.
238+
fn write_magic_marker(dst: &mut Vec<u8>) {
239+
write_usize(dst, 1);
240+
dst.push(u8::MAX);
241+
}
242+
192243
fn write_bytes(dst: &mut Vec<u8>, val: impl AsRef<[u8]>) {
193244
let val = val.as_ref();
194245
write_usize(dst, val.len());
@@ -614,3 +665,94 @@ pub enum InvalidChecksum {
614665
#[error("expected a string with format \"algorithm=hex_checksum\"")]
615666
InvalidFormat,
616667
}
668+
669+
#[cfg(test)]
670+
mod encoded_dep_info {
671+
use super::*;
672+
673+
#[track_caller]
674+
fn gen_test(checksum: bool) {
675+
let checksum = checksum.then_some((768, "c01efc669f09508b55eced32d3c88702578a7c3e".into()));
676+
let lib_rs = (
677+
DepInfoPathType::TargetRootRelative,
678+
PathBuf::from("src/lib.rs"),
679+
checksum.clone(),
680+
);
681+
682+
let depinfo = EncodedDepInfo {
683+
files: vec![lib_rs.clone()],
684+
env: Vec::new(),
685+
};
686+
let data = depinfo.serialize().unwrap();
687+
assert_eq!(EncodedDepInfo::parse(&data).unwrap(), depinfo);
688+
689+
let mod_rs = (
690+
DepInfoPathType::TargetRootRelative,
691+
PathBuf::from("src/mod.rs"),
692+
checksum.clone(),
693+
);
694+
let depinfo = EncodedDepInfo {
695+
files: vec![lib_rs.clone(), mod_rs.clone()],
696+
env: Vec::new(),
697+
};
698+
let data = depinfo.serialize().unwrap();
699+
assert_eq!(EncodedDepInfo::parse(&data).unwrap(), depinfo);
700+
701+
let depinfo = EncodedDepInfo {
702+
files: vec![lib_rs, mod_rs],
703+
env: vec![
704+
("Gimli".into(), Some("Legolas".into())),
705+
("Beren".into(), Some("Lúthien".into())),
706+
],
707+
};
708+
let data = depinfo.serialize().unwrap();
709+
assert_eq!(EncodedDepInfo::parse(&data).unwrap(), depinfo);
710+
}
711+
712+
#[test]
713+
fn round_trip() {
714+
gen_test(false);
715+
}
716+
717+
#[test]
718+
fn round_trip_with_checksums() {
719+
gen_test(true);
720+
}
721+
722+
#[test]
723+
fn path_type_is_u8_max() {
724+
#[rustfmt::skip]
725+
let data = [
726+
0x01, 0x00, 0x00, 0x00, 0xff, // magic marker
727+
CURRENT_ENCODED_DEP_INFO_VERSION, // version
728+
0x01, 0x00, 0x00, 0x00, // # of files
729+
0x00, // path type
730+
0x04, 0x00, 0x00, 0x00, // len of path
731+
0x72, 0x75, 0x73, 0x74, // path bytes ("rust")
732+
0x00, // cksum exists?
733+
0x00, 0x00, 0x00, 0x00, // # of env vars
734+
];
735+
// The current cargo doesn't recognize the magic marker.
736+
assert_eq!(
737+
EncodedDepInfo::parse(&data).unwrap(),
738+
EncodedDepInfo {
739+
files: vec![(DepInfoPathType::PackageRootRelative, "rust".into(), None)],
740+
env: Vec::new(),
741+
}
742+
);
743+
}
744+
745+
#[test]
746+
fn parse_v0_fingerprint_dep_info() {
747+
#[rustfmt::skip]
748+
let data = [
749+
0x01, 0x00, 0x00, 0x00, // # of files
750+
0x00, // path type
751+
0x04, 0x00, 0x00, 0x00, // len of path
752+
0x72, 0x75, 0x73, 0x74, // path bytes: "rust"
753+
0x00, 0x00, 0x00, 0x00, // # of env vars
754+
];
755+
// Cargo can't recognize v0 after `-Zchecksum-freshess` added.
756+
assert!(EncodedDepInfo::parse(&data).is_none());
757+
}
758+
}

0 commit comments

Comments
 (0)