Skip to content

Commit 54159d0

Browse files
committed
Ensure, that map keys serialized as valid xml names
1 parent 0336dcb commit 54159d0

File tree

6 files changed

+488
-8
lines changed

6 files changed

+488
-8
lines changed

Changelog.md

+6
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,17 @@
1414

1515
### Bug Fixes
1616

17+
- [#468]: Ensure, that serialization of map keys always produces valid XML names.
18+
In particular, that means that maps with numeric and numeric-like keys (for
19+
example, `"42"`) no longer can be serialized because [XML name] cannot start
20+
from a digit
21+
1722
### Misc Changes
1823

1924
- [#468]: Content of `DeError::Unsupported` changed from `&'static str` to `Cow<'static, str>`
2025

2126
[#468]: https://github.com/tafia/quick-xml/pull/468
27+
[XML name]: https://www.w3.org/TR/REC-xml/#NT-Name
2228

2329
## 0.24.0 -- 2022-08-28
2430

src/errors.rs

+16
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,15 @@ pub mod serialize {
188188
/// An attempt to deserialize to a type, that is not supported by the XML
189189
/// store at current position, for example, attempt to deserialize `struct`
190190
/// from attribute or attempt to deserialize binary data.
191+
///
192+
/// Serialized type cannot be represented in an XML due to violation of the
193+
/// XML rules in the final XML document. For example, attempt to serialize
194+
/// a `HashMap<{integer}, ...>` would cause this error because [XML name]
195+
/// cannot start from a digit or a hyphen (minus sign). The same result
196+
/// would occur if map key is a complex type that serialized not as
197+
/// a primitive type (i.e. string, char, bool, unit struct or unit variant).
198+
///
199+
/// [XML name]: https://www.w3.org/TR/REC-xml/#sec-common-syn
191200
Unsupported(Cow<'static, str>),
192201
/// Too many events were skipped while deserializing a sequence, event limit
193202
/// exceeded. The limit was provided as an argument
@@ -294,4 +303,11 @@ pub mod serialize {
294303
Self::InvalidFloat(e)
295304
}
296305
}
306+
307+
impl From<fmt::Error> for DeError {
308+
#[inline]
309+
fn from(e: fmt::Error) -> Self {
310+
Self::Custom(e.to_string())
311+
}
312+
}
297313
}

src/se/key.rs

+353
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,353 @@
1+
use crate::errors::serialize::DeError;
2+
use serde::ser::{Impossible, Serialize, Serializer};
3+
use serde::serde_if_integer128;
4+
use std::fmt::Write;
5+
6+
/// Almost all characters can form a name. Citation from <https://www.w3.org/TR/xml11/#sec-xml11>:
7+
///
8+
/// > The overall philosophy of names has changed since XML 1.0. Whereas XML 1.0
9+
/// > provided a rigid definition of names, wherein everything that was not permitted
10+
/// > was forbidden, XML 1.1 names are designed so that everything that is not
11+
/// > forbidden (for a specific reason) is permitted. Since Unicode will continue
12+
/// > to grow past version 4.0, further changes to XML can be avoided by allowing
13+
/// > almost any character, including those not yet assigned, in names.
14+
///
15+
/// <https://www.w3.org/TR/xml11/#NT-NameStartChar>
16+
const fn is_xml11_name_start_char(ch: char) -> bool {
17+
match ch {
18+
':'
19+
| 'A'..='Z'
20+
| '_'
21+
| 'a'..='z'
22+
| '\u{00C0}'..='\u{00D6}'
23+
| '\u{00D8}'..='\u{00F6}'
24+
| '\u{00F8}'..='\u{02FF}'
25+
| '\u{0370}'..='\u{037D}'
26+
| '\u{037F}'..='\u{1FFF}'
27+
| '\u{200C}'..='\u{200D}'
28+
| '\u{2070}'..='\u{218F}'
29+
| '\u{2C00}'..='\u{2FEF}'
30+
| '\u{3001}'..='\u{D7FF}'
31+
| '\u{F900}'..='\u{FDCF}'
32+
| '\u{FDF0}'..='\u{FFFD}'
33+
| '\u{10000}'..='\u{EFFFF}' => true,
34+
_ => false,
35+
}
36+
}
37+
/// <https://www.w3.org/TR/REC-xml/#NT-NameChar>
38+
const fn is_xml11_name_char(ch: char) -> bool {
39+
match ch {
40+
'-' | '.' | '0'..='9' | '\u{00B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}' => {
41+
true
42+
}
43+
_ => is_xml11_name_start_char(ch),
44+
}
45+
}
46+
47+
////////////////////////////////////////////////////////////////////////////////////////////////////
48+
49+
/// A serializer, that ensures, that only plain types can be serialized,
50+
/// so result can be used as an XML tag or attribute name.
51+
///
52+
/// This serializer checks that name does not contain characters that [not allowed]
53+
/// in XML names.
54+
///
55+
/// [not allowed]: https://www.w3.org/TR/REC-xml/#sec-common-syn
56+
pub struct XmlNameSerializer<W: Write> {
57+
/// Writer to which this serializer writes content
58+
pub writer: W,
59+
}
60+
61+
impl<W: Write> XmlNameSerializer<W> {
62+
//TODO: customization point - allow user to decide if he want to reject or encode the name
63+
fn write_str(&mut self, value: &str) -> Result<(), DeError> {
64+
match value.chars().next() {
65+
Some(ch) if !is_xml11_name_start_char(ch) => Err(DeError::Unsupported(
66+
format!(
67+
"character `{}` is not allowed at the start of an XML name",
68+
ch
69+
)
70+
.into(),
71+
)),
72+
_ => match value.matches(|ch| !is_xml11_name_char(ch)).next() {
73+
Some(s) => Err(DeError::Unsupported(
74+
format!("character `{}` is not allowed in an XML name", s).into(),
75+
)),
76+
None => Ok(self.writer.write_str(value)?),
77+
},
78+
}
79+
}
80+
}
81+
82+
impl<W: Write> Serializer for XmlNameSerializer<W> {
83+
type Ok = W;
84+
type Error = DeError;
85+
86+
type SerializeSeq = Impossible<Self::Ok, Self::Error>;
87+
type SerializeTuple = Impossible<Self::Ok, Self::Error>;
88+
type SerializeTupleStruct = Impossible<Self::Ok, Self::Error>;
89+
type SerializeTupleVariant = Impossible<Self::Ok, Self::Error>;
90+
type SerializeMap = Impossible<Self::Ok, Self::Error>;
91+
type SerializeStruct = Impossible<Self::Ok, Self::Error>;
92+
type SerializeStructVariant = Impossible<Self::Ok, Self::Error>;
93+
94+
write_primitive!();
95+
96+
fn serialize_str(mut self, value: &str) -> Result<Self::Ok, Self::Error> {
97+
self.write_str(value)?;
98+
Ok(self.writer)
99+
}
100+
101+
/// We cannot store anything, so the absence of a unit and presence of it
102+
/// does not differ, so serialization of unit returns `Err(Unsupported)`
103+
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
104+
Err(DeError::Unsupported(
105+
"unit type `()` cannot be serialized as an XML tag name".into(),
106+
))
107+
}
108+
109+
/// We cannot store both a variant discriminant and a variant value,
110+
/// so serialization of enum newtype variant returns `Err(Unsupported)`
111+
fn serialize_newtype_variant<T: ?Sized + Serialize>(
112+
self,
113+
name: &'static str,
114+
_variant_index: u32,
115+
variant: &'static str,
116+
_value: &T,
117+
) -> Result<Self::Ok, DeError> {
118+
Err(DeError::Unsupported(
119+
format!(
120+
"enum newtype variant `{}::{}` cannot be serialized as an XML tag name",
121+
name, variant
122+
)
123+
.into(),
124+
))
125+
}
126+
127+
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
128+
Err(DeError::Unsupported(
129+
"sequence cannot be serialized as an XML tag name".into(),
130+
))
131+
}
132+
133+
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
134+
Err(DeError::Unsupported(
135+
"tuple cannot be serialized as an XML tag name".into(),
136+
))
137+
}
138+
139+
fn serialize_tuple_struct(
140+
self,
141+
name: &'static str,
142+
_len: usize,
143+
) -> Result<Self::SerializeTupleStruct, Self::Error> {
144+
Err(DeError::Unsupported(
145+
format!(
146+
"tuple struct `{}` cannot be serialized as an XML tag name",
147+
name
148+
)
149+
.into(),
150+
))
151+
}
152+
153+
fn serialize_tuple_variant(
154+
self,
155+
name: &'static str,
156+
_variant_index: u32,
157+
variant: &'static str,
158+
_len: usize,
159+
) -> Result<Self::SerializeTupleVariant, Self::Error> {
160+
Err(DeError::Unsupported(
161+
format!(
162+
"enum tuple variant `{}::{}` cannot be serialized as an XML tag name",
163+
name, variant
164+
)
165+
.into(),
166+
))
167+
}
168+
169+
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
170+
Err(DeError::Unsupported(
171+
"map cannot be serialized as an XML tag name".into(),
172+
))
173+
}
174+
175+
fn serialize_struct(
176+
self,
177+
name: &'static str,
178+
_len: usize,
179+
) -> Result<Self::SerializeStruct, Self::Error> {
180+
Err(DeError::Unsupported(
181+
format!("struct `{}` cannot be serialized as an XML tag name", name).into(),
182+
))
183+
}
184+
185+
fn serialize_struct_variant(
186+
self,
187+
name: &'static str,
188+
_variant_index: u32,
189+
variant: &'static str,
190+
_len: usize,
191+
) -> Result<Self::SerializeStructVariant, Self::Error> {
192+
Err(DeError::Unsupported(
193+
format!(
194+
"enum struct variant `{}::{}` cannot be serialized as an XML tag name",
195+
name, variant
196+
)
197+
.into(),
198+
))
199+
}
200+
}
201+
202+
#[cfg(test)]
203+
mod tests {
204+
use super::*;
205+
use crate::utils::Bytes;
206+
use pretty_assertions::assert_eq;
207+
use serde::Serialize;
208+
use std::collections::BTreeMap;
209+
210+
#[derive(Debug, Serialize, PartialEq)]
211+
struct Unit;
212+
213+
#[derive(Debug, Serialize, PartialEq)]
214+
#[serde(rename = "<\"&'>")]
215+
struct UnitEscaped;
216+
217+
#[derive(Debug, Serialize, PartialEq)]
218+
struct Newtype(bool);
219+
220+
#[derive(Debug, Serialize, PartialEq)]
221+
struct Tuple(&'static str, usize);
222+
223+
#[derive(Debug, Serialize, PartialEq)]
224+
struct Struct {
225+
key: &'static str,
226+
val: usize,
227+
}
228+
229+
#[derive(Debug, Serialize, PartialEq)]
230+
enum Enum {
231+
Unit,
232+
#[serde(rename = "<\"&'>")]
233+
UnitEscaped,
234+
Newtype(bool),
235+
Tuple(&'static str, usize),
236+
Struct {
237+
key: &'static str,
238+
val: usize,
239+
},
240+
}
241+
242+
/// Checks that given `$data` successfully serialized as `$expected`
243+
macro_rules! serialize_as {
244+
($name:ident: $data:expr => $expected:literal) => {
245+
#[test]
246+
fn $name() {
247+
let ser = XmlNameSerializer {
248+
writer: String::new(),
249+
};
250+
251+
let buffer = $data.serialize(ser).unwrap();
252+
assert_eq!(buffer, $expected);
253+
}
254+
};
255+
}
256+
257+
/// Checks that attempt to serialize given `$data` results to a
258+
/// serialization error `$kind` with `$reason`
259+
macro_rules! err {
260+
($name:ident: $data:expr => $kind:ident($reason:literal)) => {
261+
#[test]
262+
fn $name() {
263+
let mut buffer = String::new();
264+
let ser = XmlNameSerializer {
265+
writer: &mut buffer,
266+
};
267+
268+
match $data.serialize(ser).unwrap_err() {
269+
DeError::$kind(e) => assert_eq!(e, $reason),
270+
e => panic!(
271+
"Expected `{}({})`, found `{:?}`",
272+
stringify!($kind),
273+
$reason,
274+
e
275+
),
276+
}
277+
assert_eq!(buffer, "");
278+
}
279+
};
280+
}
281+
282+
serialize_as!(false_: false => "false");
283+
serialize_as!(true_: true => "true");
284+
285+
err!(i8_: -42i8 => Unsupported("character `-` is not allowed at the start of an XML name"));
286+
err!(i16_: -4200i16 => Unsupported("character `-` is not allowed at the start of an XML name"));
287+
err!(i32_: -42000000i32 => Unsupported("character `-` is not allowed at the start of an XML name"));
288+
err!(i64_: -42000000000000i64 => Unsupported("character `-` is not allowed at the start of an XML name"));
289+
err!(isize_: -42000000000000isize => Unsupported("character `-` is not allowed at the start of an XML name"));
290+
291+
err!(u8_: 42u8 => Unsupported("character `4` is not allowed at the start of an XML name"));
292+
err!(u16_: 4200u16 => Unsupported("character `4` is not allowed at the start of an XML name"));
293+
err!(u32_: 42000000u32 => Unsupported("character `4` is not allowed at the start of an XML name"));
294+
err!(u64_: 42000000000000u64 => Unsupported("character `4` is not allowed at the start of an XML name"));
295+
err!(usize_: 42000000000000usize => Unsupported("character `4` is not allowed at the start of an XML name"));
296+
297+
serde_if_integer128! {
298+
err!(i128_: -420000000000000000000000000000i128 => Unsupported("character `-` is not allowed at the start of an XML name"));
299+
err!(u128_: 420000000000000000000000000000u128 => Unsupported("character `4` is not allowed at the start of an XML name"));
300+
}
301+
302+
err!(f32_: 4.2f32 => Unsupported("character `4` is not allowed at the start of an XML name"));
303+
err!(f64_: 4.2f64 => Unsupported("character `4` is not allowed at the start of an XML name"));
304+
305+
serialize_as!(char_non_escaped: 'h' => "h");
306+
err!(char_lt: '<' => Unsupported("character `<` is not allowed at the start of an XML name"));
307+
err!(char_gt: '>' => Unsupported("character `>` is not allowed at the start of an XML name"));
308+
err!(char_amp: '&' => Unsupported("character `&` is not allowed at the start of an XML name"));
309+
err!(char_apos: '\'' => Unsupported("character `'` is not allowed at the start of an XML name"));
310+
err!(char_quot: '"' => Unsupported("character `\"` is not allowed at the start of an XML name"));
311+
312+
serialize_as!(str_valid_name: "valid-name" => "valid-name");
313+
err!(str_space: "string with spaces" => Unsupported("character ` ` is not allowed in an XML name"));
314+
err!(str_lt: "string<" => Unsupported("character `<` is not allowed in an XML name"));
315+
err!(str_gt: "string>" => Unsupported("character `>` is not allowed in an XML name"));
316+
err!(str_amp: "string&" => Unsupported("character `&` is not allowed in an XML name"));
317+
err!(str_apos: "string'" => Unsupported("character `'` is not allowed in an XML name"));
318+
err!(str_quot: "string\"" => Unsupported("character `\"` is not allowed in an XML name"));
319+
320+
err!(bytes: Bytes(b"<\"escaped & bytes'>")
321+
=> Unsupported("`serialize_bytes` not supported yet"));
322+
323+
serialize_as!(option_none: Option::<&str>::None => "");
324+
serialize_as!(option_some: Some("non-escaped-string") => "non-escaped-string");
325+
326+
err!(unit: ()
327+
=> Unsupported("unit type `()` cannot be serialized as an XML tag name"));
328+
serialize_as!(unit_struct: Unit => "Unit");
329+
err!(unit_struct_escaped: UnitEscaped => Unsupported("character `<` is not allowed at the start of an XML name"));
330+
331+
serialize_as!(enum_unit: Enum::Unit => "Unit");
332+
err!(enum_unit_escaped: Enum::UnitEscaped => Unsupported("character `<` is not allowed at the start of an XML name"));
333+
334+
serialize_as!(newtype: Newtype(true) => "true");
335+
err!(enum_newtype: Enum::Newtype(false)
336+
=> Unsupported("enum newtype variant `Enum::Newtype` cannot be serialized as an XML tag name"));
337+
338+
err!(seq: vec![1, 2, 3]
339+
=> Unsupported("sequence cannot be serialized as an XML tag name"));
340+
err!(tuple: ("<\"&'>", "with\t\r\n spaces", 3usize)
341+
=> Unsupported("tuple cannot be serialized as an XML tag name"));
342+
err!(tuple_struct: Tuple("first", 42)
343+
=> Unsupported("tuple struct `Tuple` cannot be serialized as an XML tag name"));
344+
err!(enum_tuple: Enum::Tuple("first", 42)
345+
=> Unsupported("enum tuple variant `Enum::Tuple` cannot be serialized as an XML tag name"));
346+
347+
err!(map: BTreeMap::from([("_1", 2), ("_3", 4)])
348+
=> Unsupported("map cannot be serialized as an XML tag name"));
349+
err!(struct_: Struct { key: "answer", val: 42 }
350+
=> Unsupported("struct `Struct` cannot be serialized as an XML tag name"));
351+
err!(enum_struct: Enum::Struct { key: "answer", val: 42 }
352+
=> Unsupported("enum struct variant `Enum::Struct` cannot be serialized as an XML tag name"));
353+
}

0 commit comments

Comments
 (0)