Skip to content

Commit 8691870

Browse files
authoredJul 23, 2024··
fix(stdlib)!: parse_logfmt performs escaping (#777) (#924)
* fix(stdlib): parse_logfmt performs escaping (#777) * label changelog fragment as breaking * explaining comment for Some(_) -> c branch
1 parent 8487e77 commit 8691870

File tree

3 files changed

+127
-22
lines changed

3 files changed

+127
-22
lines changed
 

‎changelog.d/777.breaking.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
`parse_logfmt` now processes 3 escape sequences when parsing: `\n`, `\"` and `\\`. This means that for example, `\n` in the input will be replaced with an actual newline character in parsed keys or values.

‎src/stdlib/encode_key_value.rs

+57-1
Original file line numberDiff line numberDiff line change
@@ -168,10 +168,66 @@ impl FunctionExpression for EncodeKeyValueFn {
168168

169169
#[cfg(test)]
170170
mod tests {
171-
use crate::{btreemap, value};
171+
use std::collections::BTreeMap;
172+
173+
use crate::{
174+
btreemap,
175+
stdlib::parse_key_value::{parse_key_value, Whitespace},
176+
value,
177+
};
172178

173179
use super::*;
174180

181+
#[test]
182+
fn test_encode_decode_cycle() {
183+
let before: Value = {
184+
let mut map = Value::from(BTreeMap::default());
185+
map.insert("key", r#"this has a " quote"#);
186+
map
187+
};
188+
189+
let after = parse_key_value(
190+
encode_key_value(None, before.clone(), "=".into(), " ".into(), true.into())
191+
.expect("valid key value before"),
192+
"=".into(),
193+
" ".into(),
194+
true.into(),
195+
Whitespace::Lenient,
196+
)
197+
.expect("valid key value after");
198+
199+
assert_eq!(before, after);
200+
}
201+
202+
#[test]
203+
fn test_decode_encode_cycle() {
204+
let before: Value = r#"key="this has a \" quote""#.into();
205+
206+
let after = encode_key_value(
207+
Some(Value::Array(vec![
208+
"key".into(),
209+
"has".into(),
210+
"a".into(),
211+
r#"""#.into(),
212+
"quote".into(),
213+
])),
214+
parse_key_value(
215+
before.clone(),
216+
"=".into(),
217+
" ".into(),
218+
true.into(),
219+
Whitespace::Lenient,
220+
)
221+
.expect("valid key value before"),
222+
"=".into(),
223+
" ".into(),
224+
true.into(),
225+
)
226+
.expect("valid key value after");
227+
228+
assert_eq!(before, after);
229+
}
230+
175231
test_function![
176232
encode_key_value => EncodeKeyValue;
177233

‎src/stdlib/parse_key_value.rs

+69-21
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@ use nom::{
1212
IResult,
1313
};
1414
use std::{
15+
borrow::Cow,
1516
collections::{btree_map::Entry, BTreeMap},
16-
str::FromStr,
17+
iter::Peekable,
18+
str::{Chars, FromStr},
1719
};
1820

1921
pub(crate) fn parse_key_value(
@@ -346,7 +348,7 @@ fn parse_key_value_<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
346348
parse_value(field_delimiter),
347349
))(input),
348350
},
349-
|(field, sep, value): (&str, Vec<&str>, Value)| {
351+
|(field, sep, value): (Cow<'_, str>, Vec<&str>, Value)| {
350352
(
351353
field.to_string().into(),
352354
if sep.len() == 1 { value } else { value!(true) },
@@ -356,6 +358,45 @@ fn parse_key_value_<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
356358
}
357359
}
358360

361+
fn escape_str(s: &str) -> Cow<'_, str> {
362+
if s.contains('\\') {
363+
let mut out = String::new();
364+
let mut chars = s.chars().peekable();
365+
366+
while let Some(c) = chars.next() {
367+
out.push(escape_char(c, &mut chars))
368+
}
369+
Cow::Owned(out)
370+
} else {
371+
Cow::Borrowed(s)
372+
}
373+
}
374+
375+
fn escape_char(c: char, rest: &mut Peekable<Chars>) -> char {
376+
if c == '\\' {
377+
match rest.peek() {
378+
Some('n') => {
379+
let _ = rest.next();
380+
'\n'
381+
}
382+
Some('\\') => {
383+
let _ = rest.next();
384+
'\\'
385+
}
386+
Some('"') => {
387+
let _ = rest.next();
388+
'\"'
389+
}
390+
// ignore escape sequences not added by encode_key_value and return the backslash untouched
391+
Some(_) => c,
392+
// trailing escape char is a little odd... Might need to error here!
393+
None => c,
394+
}
395+
} else {
396+
c
397+
}
398+
}
399+
359400
/// Parses a string delimited by the given character.
360401
/// Can be escaped using `\`.
361402
/// The terminator indicates the character that should follow the delimited field.
@@ -367,7 +408,7 @@ fn parse_key_value_<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
367408
fn parse_delimited<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
368409
delimiter: char,
369410
field_terminator: &'a str,
370-
) -> impl Fn(&'a str) -> IResult<&'a str, &'a str, E> {
411+
) -> impl Fn(&'a str) -> IResult<&'a str, Cow<'a, str>, E> {
371412
move |input| {
372413
terminated(
373414
delimited(
@@ -379,7 +420,8 @@ fn parse_delimited<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
379420
// match literally any character, there are no invalid escape sequences
380421
take(1usize),
381422
)),
382-
|inner| inner.unwrap_or(""),
423+
// process the escape sequences that we encode
424+
|inner| inner.map_or(Cow::Borrowed(""), escape_str),
383425
),
384426
char(delimiter),
385427
),
@@ -395,8 +437,12 @@ fn parse_delimited<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
395437
/// just take the rest of the string.
396438
fn parse_undelimited<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
397439
field_delimiter: &'a str,
398-
) -> impl Fn(&'a str) -> IResult<&'a str, &'a str, E> {
399-
move |input| map(alt((take_until(field_delimiter), rest)), str::trim)(input)
440+
) -> impl Fn(&'a str) -> IResult<&'a str, Cow<'a, str>, E> {
441+
move |input| {
442+
map(alt((take_until(field_delimiter), rest)), |s: &'_ str| {
443+
Cow::Borrowed(s.trim())
444+
})(input)
445+
}
400446
}
401447

402448
/// Parses the value.
@@ -421,14 +467,16 @@ fn parse_value<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
421467
}
422468
}
423469

470+
type ParseKeyIResult<'a, E> = IResult<&'a str, Cow<'a, str>, E>;
471+
424472
/// Parses the key.
425473
/// Overall parsing strategies are the same as `parse_value`, but we don't need to convert the result to a `Value`.
426474
/// Standalone key are handled here so a quoted standalone key that contains a delimiter will be dealt with correctly.
427475
fn parse_key<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
428476
key_value_delimiter: &'a str,
429477
field_delimiter: &'a str,
430478
standalone_key: bool,
431-
) -> Box<dyn Fn(&'a str) -> IResult<&'a str, &'a str, E> + 'a> {
479+
) -> Box<dyn Fn(&'a str) -> ParseKeyIResult<'a, E> + 'a> {
432480
if standalone_key {
433481
Box::new(move |input| {
434482
verify(
@@ -630,37 +678,37 @@ mod test {
630678
fn test_parse_key() {
631679
// delimited
632680
assert_eq!(
633-
Ok(("", "noog")),
681+
Ok(("", Cow::Borrowed("noog"))),
634682
parse_key::<VerboseError<&str>>("=", " ", false)(r#""noog""#)
635683
);
636684

637685
// undelimited
638686
assert_eq!(
639-
Ok(("", "noog")),
687+
Ok(("", Cow::Borrowed("noog"))),
640688
parse_key::<VerboseError<&str>>("=", " ", false)("noog")
641689
);
642690

643691
// delimited with escaped char (1)
644692
assert_eq!(
645-
Ok(("=baz", r#"foo \" bar"#)),
693+
Ok(("=baz", Cow::Borrowed(r#"foo " bar"#))),
646694
parse_key::<VerboseError<&str>>("=", " ", false)(r#""foo \" bar"=baz"#)
647695
);
648696

649697
// delimited with escaped char (2)
650698
assert_eq!(
651-
Ok(("=baz", r#"foo \\ \" \ bar"#)),
699+
Ok(("=baz", Cow::Borrowed(r#"foo \ " \ bar"#))),
652700
parse_key::<VerboseError<&str>>("=", " ", false)(r#""foo \\ \" \ bar"=baz"#)
653701
);
654702

655703
// delimited with escaped char (3)
656704
assert_eq!(
657-
Ok(("=baz", r"foo \ bar")),
705+
Ok(("=baz", Cow::Borrowed(r"foo \ bar"))),
658706
parse_key::<VerboseError<&str>>("=", " ", false)(r#""foo \ bar"=baz"#)
659707
);
660708

661709
// Standalone key
662710
assert_eq!(
663-
Ok((" bar=baz", "foo")),
711+
Ok((" bar=baz", Cow::Borrowed("foo"))),
664712
parse_key::<VerboseError<&str>>("=", " ", true)("foo bar=baz")
665713
);
666714

@@ -703,7 +751,7 @@ mod test {
703751
#[test]
704752
fn test_parse_delimited_with_single_quotes() {
705753
assert_eq!(
706-
Ok(("", "test")),
754+
Ok(("", Cow::Borrowed("test"))),
707755
parse_delimited::<VerboseError<&str>>('\'', " ")("'test'")
708756
);
709757
}
@@ -747,15 +795,15 @@ mod test {
747795
#[test]
748796
fn test_parse_delimited_with_internal_delimiters() {
749797
assert_eq!(
750-
Ok(("", "noog nonk")),
798+
Ok(("", Cow::Borrowed("noog nonk"))),
751799
parse_delimited::<VerboseError<&str>>('"', " ")(r#""noog nonk""#)
752800
);
753801
}
754802

755803
#[test]
756804
fn test_parse_undelimited_with_quotes() {
757805
assert_eq!(
758-
Ok(("", r#""noog" nonk"#)),
806+
Ok(("", Cow::Borrowed(r#""noog" nonk"#))),
759807
parse_undelimited::<VerboseError<&str>>(":")(r#""noog" nonk"#)
760808
);
761809
}
@@ -829,7 +877,7 @@ mod test {
829877
value: r#""zork one" : "zoog\"zink\"zork" nonk : nink"#,
830878
key_value_delimiter: ":",
831879
],
832-
want: Ok(value!({"zork one": r#"zoog\"zink\"zork"#,
880+
want: Ok(value!({"zork one": r#"zoog"zink"zork"#,
833881
nonk: "nink"})),
834882
tdef: type_def(),
835883
}
@@ -840,7 +888,7 @@ mod test {
840888
key_value_delimiter: ":",
841889
field_delimiter: ",",
842890
],
843-
want: Ok(value!({"zork one": r#"zoog\"zink\"zork"#,
891+
want: Ok(value!({"zork one": r#"zoog"zink"zork"#,
844892
nonk: "nink"})),
845893
tdef: type_def(),
846894
}
@@ -851,7 +899,7 @@ mod test {
851899
key_value_delimiter: ":",
852900
field_delimiter: ",",
853901
],
854-
want: Ok(value!({"zork one": r#"zoog\"zink\"zork"#,
902+
want: Ok(value!({"zork one": r#"zoog"zink"zork"#,
855903
nonk: "nink"})),
856904
tdef: type_def(),
857905
}
@@ -862,7 +910,7 @@ mod test {
862910
key_value_delimiter: "--",
863911
field_delimiter: "||",
864912
],
865-
want: Ok(value!({"zork one": r#"zoog\"zink\"zork"#,
913+
want: Ok(value!({"zork one": r#"zoog"zink"zork"#,
866914
nonk: "nink"})),
867915
tdef: type_def(),
868916
}
@@ -968,7 +1016,7 @@ mod test {
9681016
field_delimiter: " ",
9691017
],
9701018
want: Ok(value!({
971-
"field": "quote -> \\\" <-",
1019+
"field": "quote -> \" <-",
9721020
"level": "info",
9731021
})),
9741022
tdef: type_def(),

0 commit comments

Comments
 (0)
Please sign in to comment.