Skip to content

Commit abbb33e

Browse files
committed
kernel_cmdline: Add canonicalized() methods
This seems generally useful, mostly I could see always canonicalizing the cmdline generated by bootc to remove a source of potential drift/nondeterminism and better enable reproducible builds. Signed-off-by: John Eckersberg <[email protected]>
1 parent 7e81f9c commit abbb33e

File tree

2 files changed

+175
-0
lines changed

2 files changed

+175
-0
lines changed

crates/kernel_cmdline/src/bytes.rs

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,38 @@ impl<'a> Cmdline<'a> {
340340
removed
341341
}
342342

343+
/// Returns the canonicalized version of the `Cmdline`.
344+
///
345+
/// This:
346+
///
347+
/// 1. Sorts the parameter list
348+
/// 2. Canonicalizes each `Parameter`
349+
/// 3. Joins each parameter together with a single space ' '
350+
///
351+
/// # Examples
352+
///
353+
/// ```
354+
/// use bootc_kernel_cmdline::bytes::Cmdline;
355+
///
356+
/// let cmdline = Cmdline::from(b"z a=\"b c\"");
357+
/// assert_eq!(&cmdline.canonicalized(), b"\"a=b c\" z");
358+
/// ```
359+
pub fn canonicalized(&self) -> Vec<u8> {
360+
let mut params = self.iter().collect::<Vec<_>>();
361+
params.sort();
362+
363+
let mut res = Vec::new();
364+
365+
for (i, p) in params.iter().enumerate() {
366+
if i > 0 {
367+
res.push(b' ');
368+
}
369+
res.extend(p.canonicalized());
370+
}
371+
372+
res
373+
}
374+
343375
#[cfg(test)]
344376
pub(crate) fn is_owned(&self) -> bool {
345377
matches!(self.0, Cow::Owned(_))
@@ -426,6 +458,21 @@ impl ParameterKey<'_> {
426458
.iter()
427459
.map(|&c: &u8| if c == b'-' { b'_' } else { c })
428460
}
461+
462+
/// Returns the canonicalized version of the key. This replaces
463+
/// all dashes '-' with underscores '_'.
464+
///
465+
/// # Example
466+
///
467+
/// ```
468+
/// use bootc_kernel_cmdline::bytes::ParameterKey;
469+
///
470+
/// assert_eq!(&ParameterKey::from("key-with-dashes").canonicalized(),
471+
/// "key_with_dashes".as_bytes());
472+
/// ```
473+
pub fn canonicalized(&self) -> Vec<u8> {
474+
self.iter().collect()
475+
}
429476
}
430477

431478
impl PartialEq for ParameterKey<'_> {
@@ -528,6 +575,57 @@ impl<'a> Parameter<'a> {
528575
pub fn value(&self) -> Option<&'a [u8]> {
529576
self.value
530577
}
578+
579+
/// Returns the canonical representation of the parameter.
580+
///
581+
/// The canonical representation:
582+
///
583+
/// 1. Will use the canonicalized form of the key via
584+
/// `ParameterKey::canonicalized`
585+
///
586+
/// 2. Will be "externally" quoted if either the key or
587+
/// (optional) value contains ascii whitespace.
588+
///
589+
/// 3. Unnecessary quoting will be removed.
590+
///
591+
/// # Examples
592+
///
593+
/// ```
594+
/// use bootc_kernel_cmdline::bytes::Parameter;
595+
///
596+
/// // key is canonicalized
597+
/// assert_eq!(Parameter::parse("a-dashed-key").unwrap().canonicalized(),
598+
/// "a_dashed_key".as_bytes());
599+
///
600+
/// // quotes are externally added if needed
601+
/// assert_eq!(Parameter::parse("foo=\"has some spaces\"").unwrap().canonicalized(),
602+
/// "\"foo=has some spaces\"".as_bytes());
603+
///
604+
/// // unnecessary quotes are removed
605+
/// assert_eq!(Parameter::parse("foo=\"bar\"").unwrap().canonicalized(),
606+
/// "foo=bar".as_bytes());
607+
/// ```
608+
pub fn canonicalized(&self) -> Vec<u8> {
609+
let spaces = self.key.iter().any(|b| b.is_ascii_whitespace())
610+
|| self
611+
.value
612+
.map_or(false, |val| val.iter().any(|b| b.is_ascii_whitespace()));
613+
614+
let mut res = if spaces { vec![b'"'] } else { vec![] };
615+
616+
res.extend(self.key.iter());
617+
618+
if let Some(val) = self.value {
619+
res.push(b'=');
620+
res.extend(val);
621+
}
622+
623+
if spaces {
624+
res.push(b'"');
625+
}
626+
627+
res
628+
}
531629
}
532630

533631
impl PartialEq for Parameter<'_> {

crates/kernel_cmdline/src/utf8.rs

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,29 @@ impl<'a> Cmdline<'a> {
203203
self.0.remove_exact(&param.0)
204204
}
205205

206+
/// Returns the canonicalized version of the `Cmdline`.
207+
///
208+
/// This:
209+
///
210+
/// 1. Sorts the parameter list
211+
/// 2. Canonicalizes each `Parameter`
212+
/// 3. Joins each parameter together with a single space ' '
213+
///
214+
/// # Examples
215+
///
216+
/// ```
217+
/// use bootc_kernel_cmdline::utf8::Cmdline;
218+
///
219+
/// let cmdline = Cmdline::from("z a=\"b c\"");
220+
/// assert_eq!(&cmdline.canonicalized(), "\"a=b c\" z");
221+
/// ```
222+
pub fn canonicalized(&self) -> String {
223+
self.0
224+
.canonicalized()
225+
.try_into()
226+
.expect("We only construct the underlying bytes from valid UTF-8")
227+
}
228+
206229
#[cfg(test)]
207230
pub(crate) fn is_owned(&self) -> bool {
208231
self.0.is_owned()
@@ -298,6 +321,24 @@ impl<'a> ParameterKey<'a> {
298321
fn from_bytes(input: bytes::ParameterKey<'a>) -> Self {
299322
Self(input)
300323
}
324+
325+
/// Returns the canonicalized version of the key. This replaces
326+
/// all dashes '-' with underscores '_'.
327+
///
328+
/// # Example
329+
///
330+
/// ```
331+
/// use bootc_kernel_cmdline::utf8::ParameterKey;
332+
///
333+
/// assert_eq!(ParameterKey::from("key-with-dashes").canonicalized(),
334+
/// "key_with_dashes".to_string());
335+
/// ```
336+
pub fn canonicalized(&self) -> String {
337+
self.0
338+
.canonicalized()
339+
.try_into()
340+
.expect("We only construct the underlying bytes from valid UTF-8")
341+
}
301342
}
302343

303344
impl<'a, T: AsRef<str> + ?Sized> From<&'a T> for ParameterKey<'a> {
@@ -358,6 +399,42 @@ impl<'a> Parameter<'a> {
358399
str::from_utf8(p).expect("We only construct the underlying bytes from valid UTF-8")
359400
})
360401
}
402+
403+
/// Returns the canonical representation of the parameter.
404+
///
405+
/// The canonical representation:
406+
///
407+
/// 1. Will use the canonicalized form of the key via
408+
/// `ParameterKey::canonicalized`
409+
///
410+
/// 2. Will be "externally" quoted if either the key or
411+
/// (optional) value contains ascii whitespace.
412+
///
413+
/// 3. Unnecessary quoting will be removed.
414+
///
415+
/// # Examples
416+
///
417+
/// ```
418+
/// use bootc_kernel_cmdline::utf8::Parameter;
419+
///
420+
/// // key is canonicalized
421+
/// assert_eq!(Parameter::parse("a-dashed-key").unwrap().canonicalized(),
422+
/// "a_dashed_key".to_string());
423+
///
424+
/// // quotes are externally added if needed
425+
/// assert_eq!(Parameter::parse("foo=\"has some spaces\"").unwrap().canonicalized(),
426+
/// "\"foo=has some spaces\"".to_string());
427+
///
428+
/// // unnecessary quotes are removed
429+
/// assert_eq!(Parameter::parse("foo=\"bar\"").unwrap().canonicalized(),
430+
/// "foo=bar".to_string());
431+
/// ```
432+
pub fn canonicalized(&self) -> String {
433+
self.0
434+
.canonicalized()
435+
.try_into()
436+
.expect("We only construct the underlying bytes from valid UTF-8")
437+
}
361438
}
362439

363440
impl<'a> TryFrom<bytes::Parameter<'a>> for Parameter<'a> {

0 commit comments

Comments
 (0)