chore(nix-compat): bump to nom 8.x

flokli · flokli · commit e3acb05c9cd5 · 2025-03-04T08:17:05.000Z
See https://github.com/rust-bakery/nom/blob/72dd5818b70f16a691b6f016d34774f2cfc7c0c7/CHANGELOG.md for the nom changelog. Most notably, there's now a .parse() to be added: `combinator(arg)(input)` -> `combinator(arg).parse(input)` There also doesn't need to be a tuple combinator (it's implemented on tuples directly). This also refactors the string / byte field parsing parts, to make them more concise. Change-Id: I9e8a3cedd07d6705be391898eb6a486fb8164069 Reviewed-on: https://cl.tvl.fyi/c/depot/+/13193 Tested-by: BuildkiteCI Reviewed-by: edef <edef@edef.eu> Reviewed-by: Brian Olsen <me@griff.name>
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.nix b/Cargo.nix
@@ -7513,20 +7513,6 @@ rec {
         };
         resolvedDefaultFeatures = [ "default" "rev-mappings" ];
       };
-      "minimal-lexical" = rec {
-        crateName = "minimal-lexical";
-        version = "0.2.1";
-        edition = "2018";
-        sha256 = "16ppc5g84aijpri4jzv14rvcnslvlpphbszc7zzp6vfkddf4qdb8";
-        libName = "minimal_lexical";
-        authors = [
-          "Alex Huszagh <ahuszagh@gmail.com>"
-        ];
-        features = {
-          "default" = [ "std" ];
-        };
-        resolvedDefaultFeatures = [ "std" ];
-      };
       "miniz_oxide" = rec {
         crateName = "miniz_oxide";
         version = "0.8.2";
@@ -8528,9 +8514,9 @@ rec {
       };
       "nom" = rec {
         crateName = "nom";
-        version = "7.1.3";
-        edition = "2018";
-        sha256 = "0jha9901wxam390jcf5pfa0qqfrgh8li787jx2ip0yk5b8y9hwyj";
+        version = "8.0.0";
+        edition = "2021";
+        sha256 = "01cl5xng9d0gxf26h39m0l8lprgpa00fcc75ps1yzgbib1vn35yz";
         authors = [
           "contact@geoffroycouprie.com"
         ];
@@ -8540,15 +8526,10 @@ rec {
             packageId = "memchr";
             usesDefaultFeatures = false;
           }
-          {
-            name = "minimal-lexical";
-            packageId = "minimal-lexical";
-            usesDefaultFeatures = false;
-          }
         ];
         features = {
           "default" = [ "std" ];
-          "std" = [ "alloc" "memchr/std" "minimal-lexical/std" ];
+          "std" = [ "alloc" "memchr/std" ];
         };
         resolvedDefaultFeatures = [ "alloc" "default" "std" ];
       };
diff --git a/Cargo.toml b/Cargo.toml
@@ -85,7 +85,7 @@ md-5 = "0.10.6"
 mimalloc = "0.1.43"
 nix = "0.27.1"
 nohash-hasher = "0.2.0"
-nom = "7.1.3"
+nom = "8.0"
 num-traits = "0.2.19"
 object_store = "0.10.2"
 opentelemetry = "0.28.0"
diff --git a/nix-compat/src/aterm/parser.rs b/nix-compat/src/aterm/parser.rs
@@ -4,12 +4,12 @@
 //! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html
 use bstr::BString;
 use nom::branch::alt;
-use nom::bytes::complete::{escaped_transform, is_not, tag};
+use nom::bytes::complete::{escaped_transform, is_not};
 use nom::character::complete::char as nomchar;
-use nom::combinator::{map, value};
+use nom::combinator::{map_res, opt, value};
 use nom::multi::separated_list0;
 use nom::sequence::delimited;
-use nom::IResult;
+use nom::{IResult, Parser};
 
 /// Parse a bstr and undo any escaping (which is why this needs to allocate).
 // FUTUREWORK: have a version for fields that are known to not need escaping
@@ -32,48 +32,37 @@ fn parse_escaped_bytes(i: &[u8]) -> IResult<&[u8], BString> {
 /// Parse a field in double quotes, undo any escaping, and return the unquoted
 /// and decoded `Vec<u8>`.
 pub(crate) fn parse_bytes_field(i: &[u8]) -> IResult<&[u8], BString> {
-    // inside double quotes…
     delimited(
         nomchar('\"'),
-        // There is
-        alt((
-            // …either is a bstr after unescaping
-            parse_escaped_bytes,
-            // …or an empty string.
-            map(tag(b""), |_| BString::default()),
-        )),
+        opt(parse_escaped_bytes).map(|opt_bstr| opt_bstr.unwrap_or_default()),
         nomchar('\"'),
-    )(i)
+    )
+    .parse(i)
 }
 
 /// Parse a field in double quotes, undo any escaping, and return the unquoted
 /// and decoded [String], if it's valid UTF-8.
 /// Or fail parsing if the bytes are no valid UTF-8.
 pub(crate) fn parse_string_field(i: &[u8]) -> IResult<&[u8], String> {
-    // inside double quotes…
     delimited(
         nomchar('\"'),
-        // There is
-        alt((
-            // either is a String after unescaping
-            nom::combinator::map_opt(parse_escaped_bytes, |escaped_bytes| {
-                String::from_utf8(escaped_bytes.into()).ok()
-            }),
-            // or an empty string.
-            map(tag(b""), |_| "".to_string()),
-        )),
+        map_res(
+            opt(parse_escaped_bytes).map(|opt_bstr| opt_bstr.unwrap_or_default()),
+            |bstr| String::from_utf8(bstr.to_vec()),
+        ),
         nomchar('\"'),
-    )(i)
+    )
+    .parse(i)
 }
 
 /// Parse a list of string fields (enclosed in brackets)
 pub(crate) fn parse_string_list(i: &[u8]) -> IResult<&[u8], Vec<String>> {
-    // inside brackets
     delimited(
         nomchar('['),
         separated_list0(nomchar(','), parse_string_field),
         nomchar(']'),
-    )(i)
+    )
+    .parse(i)
 }
 
 #[cfg(test)]
diff --git a/nix-compat/src/derivation/parser.rs b/nix-compat/src/derivation/parser.rs
@@ -7,7 +7,8 @@ use nom::bytes::complete::tag;
 use nom::character::complete::char as nomchar;
 use nom::combinator::{all_consuming, map_res};
 use nom::multi::{separated_list0, separated_list1};
-use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple};
+use nom::sequence::{delimited, preceded, separated_pair, terminated};
+use nom::Parser;
 use std::collections::{btree_map, BTreeMap, BTreeSet};
 use thiserror;
 
@@ -27,7 +28,7 @@ pub enum Error<I> {
 }
 
 pub(crate) fn parse(i: &[u8]) -> Result<Derivation, Error<&[u8]>> {
-    match all_consuming(parse_derivation)(i) {
+    match all_consuming(parse_derivation).parse(i) {
         Ok((rest, derivation)) => {
             // this shouldn't happen, as all_consuming shouldn't return.
             debug_assert!(rest.is_empty());
@@ -68,13 +69,14 @@ fn parse_output(i: &[u8]) -> NomResult<&[u8], (String, Output)> {
         nomchar('('),
         map_res(
             |i| {
-                tuple((
+                (
                     terminated(aterm::parse_string_field, nomchar(',')),
                     terminated(aterm::parse_string_field, nomchar(',')),
                     terminated(aterm::parse_string_field, nomchar(',')),
                     aterm::parse_bytes_field,
-                ))(i)
-                .map_err(into_nomerror)
+                )
+                    .parse(i)
+                    .map_err(into_nomerror)
             },
             |(output_name, output_path, algo_and_mode, encoded_digest)| {
                 // convert these 4 fields into an [Output].
@@ -114,7 +116,8 @@ fn parse_output(i: &[u8]) -> NomResult<&[u8], (String, Output)> {
             },
         ),
         nomchar(')'),
-    )(i)
+    )
+    .parse(i)
 }
 
 /// Parse multiple outputs in ATerm. This is a list of things acccepted by
@@ -127,7 +130,8 @@ fn parse_outputs(i: &[u8]) -> NomResult<&[u8], BTreeMap<String, Output>> {
         nomchar('['),
         separated_list1(tag(","), parse_output),
         nomchar(']'),
-    )(i);
+    )
+    .parse(i);
 
     match res {
         Ok((rst, outputs_lst)) => {
@@ -228,22 +232,34 @@ pub fn parse_derivation(i: &[u8]) -> NomResult<&[u8], Derivation> {
             nomchar('('),
             // tuple requires all errors to be of the same type, so we need to be a
             // bit verbose here wrapping generic IResult into [NomATermResult].
-            tuple((
+            (
                 // parse outputs
                 terminated(parse_outputs, nomchar(',')),
                 // // parse input derivations
                 terminated(parse_input_derivations, nomchar(',')),
                 // // parse input sources
                 terminated(parse_input_sources, nomchar(',')),
                 // // parse system
-                |i| terminated(aterm::parse_string_field, nomchar(','))(i).map_err(into_nomerror),
+                |i| {
+                    terminated(aterm::parse_string_field, nomchar(','))
+                        .parse(i)
+                        .map_err(into_nomerror)
+                },
                 // // parse builder
-                |i| terminated(aterm::parse_string_field, nomchar(','))(i).map_err(into_nomerror),
+                |i| {
+                    terminated(aterm::parse_string_field, nomchar(','))
+                        .parse(i)
+                        .map_err(into_nomerror)
+                },
                 // // parse arguments
-                |i| terminated(aterm::parse_string_list, nomchar(','))(i).map_err(into_nomerror),
+                |i| {
+                    terminated(aterm::parse_string_list, nomchar(','))
+                        .parse(i)
+                        .map_err(into_nomerror)
+                },
                 // parse environment
                 parse_kv(aterm::parse_bytes_field),
-            )),
+            ),
             nomchar(')'),
         )
         .map(
@@ -267,7 +283,8 @@ pub fn parse_derivation(i: &[u8]) -> NomResult<&[u8], Derivation> {
                 }
             },
         ),
-    )(i)
+    )
+    .parse(i)
 }
 
 /// Parse a list of key/value pairs into a BTreeMap.
@@ -298,7 +315,7 @@ where
                     ),
                     nomchar(')'),
                 ),
-            )(ii).map_err(into_nomerror);
+            ).parse(ii).map_err(into_nomerror);
 
             match res {
                 Ok((rest, pairs)) => {
@@ -322,7 +339,7 @@ where
             }
         },
         nomchar(']'),
-    )(i)
+    ).parse(i)
 }
 
 #[cfg(test)]