Skip to content

Commit 52b9763

Browse files
tniessentmpfs
andauthored
Allow redundant CHARSET=UTF-8 parameter (#17)
* Allow redundant CHARSET=UTF-8 parameter Fixes: #16 Co-authored-by: muji <[email protected]> * Update README --------- Co-authored-by: muji <[email protected]>
1 parent 8845d16 commit 52b9763

File tree

5 files changed

+61
-2
lines changed

5 files changed

+61
-2
lines changed

Diff for: README.md

+2
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,6 @@
22

33
Fast and correct vCard parser based on [RFC6350](https://www.rfc-editor.org/rfc/rfc6350); see the [API documentation](https://docs.rs/vcard4/latest/vcard4/) for more information.
44

5+
For interoperability with older software the parser will accept input with a `CHARSET` parameter that has a value of `UTF-8`, any other encoding value for `CHARSET` will generate an error. However, this parameter is not part of [RFC6350](https://www.rfc-editor.org/rfc/rfc6350) and is therefore not included in the string output for a vCard.
6+
57
License is MIT or Apache-2.0.

Diff for: src/error.rs

+4
Original file line numberDiff line numberDiff line change
@@ -184,4 +184,8 @@ pub enum Error {
184184
/// Error generated during lexing.
185185
#[error(transparent)]
186186
LexError(#[from] LexError),
187+
188+
/// Error generated when a CHARSET other than UTF-8 is specified.
189+
#[error("CHARSET='{0}' is invalid, expected UTF-8")]
190+
CharsetParameter(String),
187191
}

Diff for: src/name.rs

+3
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ pub(crate) const SORT_AS: &str = "SORT-AS";
5858
// NOTE: we use GEO from the property names
5959
// NOTE: we use TZ from the property names
6060
pub(crate) const LABEL: &str = "LABEL";
61+
// RFC 6350 removed the CHARSET parameter because it requires UTF-8, but some
62+
// implementations still emit CHARSET=UTF-8. This is the only value we allow.
63+
pub(crate) const CHARSET: &str = "CHARSET";
6164

6265
// Apple uses this for embedded photos
6366
pub(crate) const ENCODING: &str = "ENCODING";

Diff for: src/parser.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ pub(crate) enum Token {
4646
#[token("\"")]
4747
DoubleQuote,
4848

49-
#[regex("(?i:LANGUAGE|VALUE|PREF|ALTID|PID|TYPE|MEDIATYPE|CALSCALE|SORT-AS|LABEL|ENCODING)")]
49+
#[regex("(?i:LANGUAGE|VALUE|PREF|ALTID|PID|TYPE|MEDIATYPE|CALSCALE|SORT-AS|CHARSET|LABEL|ENCODING)")]
5050
ParameterKey,
5151

5252
#[token("=")]
@@ -372,6 +372,13 @@ impl<'s> VcardParser<'s> {
372372
}
373373
}
374374
}
375+
CHARSET => {
376+
// Ignore CHARSET=UTF-8 for compatibility with software that
377+
// unnecessarily (and in spite of RFC 6350) adds this parameter.
378+
if value != "UTF-8" {
379+
return Err(Error::CharsetParameter(value));
380+
}
381+
}
375382
LABEL => {
376383
if property_upper_name != ADR {
377384
return Err(Error::InvalidLabel(

Diff for: tests/parameters.rs

+44-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use vcard4::{
99
Pid, RelatedType, TelephoneType, TimeZoneParameter, TypeParameter,
1010
ValueType,
1111
},
12-
parse,
12+
parse, Error,
1313
};
1414

1515
use test_helpers::{assert_language, assert_media_type, assert_round_trip};
@@ -327,3 +327,46 @@ END:VCARD"#;
327327

328328
Ok(())
329329
}
330+
331+
#[test]
332+
fn param_charset() -> Result<()> {
333+
let input = r#"BEGIN:VCARD
334+
VERSION:4.0
335+
FN;CHARSET=UTF-8:Jane Doe
336+
ORG:Some Organization
337+
TITLE;CHARSET=UTF-8:External employee
338+
KIND:individual
339+
N;CHARSET=UTF-8:Doe;Jane;;;
340+
END:VCARD"#;
341+
342+
let expected = r#"BEGIN:VCARD
343+
VERSION:4.0
344+
KIND:individual
345+
FN:Jane Doe
346+
N:Doe;Jane;;;
347+
TITLE:External employee
348+
ORG:Some Organization
349+
END:VCARD
350+
"#
351+
.replace('\n', "\r\n");
352+
353+
let mut vcards = parse(input)?;
354+
assert_eq!(1, vcards.len());
355+
let card = vcards.remove(0);
356+
let prop = card.formatted_name.get(0).unwrap();
357+
assert_eq!("Jane Doe", prop.value);
358+
assert_eq!(expected, card.to_string());
359+
360+
let input = r#"BEGIN:VCARD
361+
VERSION:4.0
362+
FN;CHARSET=ISO-8859-1:Jane Doe
363+
N;CHARSET=UTF-8:Doe;Jane;;;
364+
END:VCARD"#;
365+
let err = parse(input).expect_err("Non-UTF-8 CHARSET should fail");
366+
assert!(
367+
matches!(&err, Error::CharsetParameter(x) if x == "ISO-8859-1"),
368+
"Unexpected error: {err:?}"
369+
);
370+
371+
Ok(())
372+
}

0 commit comments

Comments
 (0)