Skip to content

Commit f0b3420

Browse files
authored
Merge pull request #520 from Mingun/merge-text-nodes
Merge consequent text and CDATA events into one string
2 parents cfda567 + ac1ad0c commit f0b3420

File tree

8 files changed

+1381
-273
lines changed

8 files changed

+1381
-273
lines changed

Changelog.md

+8
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
enums from textual content
1717
- [#556]: `to_writer` and `to_string` now accept `?Sized` types
1818
- [#556]: Add new `to_writer_with_root` and `to_string_with_root` helper functions
19+
- [#520]: Add methods `BytesText::inplace_trim_start` and `BytesText::inplace_trim_end`
20+
to trim leading and trailing spaces from text events
1921

2022
### Bug Fixes
2123

@@ -25,12 +27,18 @@
2527
sequence type (for example, `Vec` or tuple)
2628
- [#540]: Fix a compilation error (probably a rustc bug) in some circumstances.
2729
`Serializer::new` and `Serializer::with_root` now accepts only references to `Write`r.
30+
- [#520]: Merge consequent (delimited only by comments and processing instructions)
31+
texts and CDATA when deserialize using serde deserializer. `DeEvent::Text` and
32+
`DeEvent::CData` events was replaced by `DeEvent::Text` with merged content.
33+
The same behavior for the `Reader` does not implemented (yet?) and should be
34+
implemented manually
2835

2936
### Misc Changes
3037

3138
[externally tagged]: https://serde.rs/enum-representations.html#externally-tagged
3239
[#490]: https://github.com/tafia/quick-xml/pull/490
3340
[#510]: https://github.com/tafia/quick-xml/issues/510
41+
[#520]: https://github.com/tafia/quick-xml/pull/520
3442
[#537]: https://github.com/tafia/quick-xml/issues/537
3543
[#540]: https://github.com/tafia/quick-xml/issues/540
3644
[#541]: https://github.com/tafia/quick-xml/pull/541

src/de/map.rs

+25-48
Original file line numberDiff line numberDiff line change
@@ -239,14 +239,14 @@ where
239239
// We shouldn't have both `$value` and `$text` fields in the same
240240
// struct, so if we have `$value` field, the we should deserialize
241241
// text content to `$value`
242-
DeEvent::Text(_) | DeEvent::CData(_) if self.has_value_field => {
242+
DeEvent::Text(_) if self.has_value_field => {
243243
self.source = ValueSource::Content;
244244
// Deserialize `key` from special attribute name which means
245245
// that value should be taken from the text content of the
246246
// XML node
247247
seed.deserialize(VALUE_KEY.into_deserializer()).map(Some)
248248
}
249-
DeEvent::Text(_) | DeEvent::CData(_) => {
249+
DeEvent::Text(_) => {
250250
self.source = ValueSource::Text;
251251
// Deserialize `key` from special attribute name which means
252252
// that value should be taken from the text content of the
@@ -307,19 +307,11 @@ where
307307
// </any-tag>
308308
// The whole map represented by an `<any-tag>` element, the map key
309309
// is implicit and equals to the `TEXT_KEY` constant, and the value
310-
// is a `Text` or a `CData` event (the value deserializer will see one
311-
// of that events)
310+
// is a `Text` event (the value deserializer will see that event)
312311
// This case are checked by "xml_schema_lists::element" tests in tests/serde-de.rs
313312
ValueSource::Text => match self.de.next()? {
314-
DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(
315-
// Comment to prevent auto-formatting
316-
e.decode(true)?,
317-
)),
318-
DeEvent::CData(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(
319-
// Comment to prevent auto-formatting
320-
e.decode()?,
321-
)),
322-
// SAFETY: We set `Text` only when we seen `Text` or `CData`
313+
DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)),
314+
// SAFETY: We set `Text` only when we seen `Text`
323315
_ => unreachable!(),
324316
},
325317
// This arm processes the following XML shape:
@@ -431,7 +423,7 @@ where
431423
///
432424
/// The whole map represented by an `<any-tag>` element, the map key is
433425
/// implicit and equals to the [`VALUE_KEY`] constant, and the value is
434-
/// a [`Text`], a [`CData`], or a [`Start`] event (the value deserializer
426+
/// a [`Text`], or a [`Start`] event (the value deserializer
435427
/// will see one of those events). In the first two cases the value of this
436428
/// field do not matter (because we already see the textual event and there
437429
/// no reasons to look "inside" something), but in the last case the primitives
@@ -452,7 +444,6 @@ where
452444
/// as accepting "text content" which the currently `$text` means.
453445
///
454446
/// [`Text`]: DeEvent::Text
455-
/// [`CData`]: DeEvent::CData
456447
/// [`Start`]: DeEvent::Start
457448
allow_start: bool,
458449
}
@@ -464,11 +455,11 @@ where
464455
/// Returns a next string as concatenated content of consequent [`Text`] and
465456
/// [`CData`] events, used inside [`deserialize_primitives!()`].
466457
///
467-
/// [`Text`]: DeEvent::Text
468-
/// [`CData`]: DeEvent::CData
458+
/// [`Text`]: crate::events::Event::Text
459+
/// [`CData`]: crate::events::Event::CData
469460
#[inline]
470-
fn read_string(&mut self, unescape: bool) -> Result<Cow<'de, str>, DeError> {
471-
self.map.de.read_string_impl(unescape, self.allow_start)
461+
fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
462+
self.map.de.read_string_impl(self.allow_start)
472463
}
473464
}
474465

@@ -631,8 +622,8 @@ impl<'de> TagFilter<'de> {
631622
/// Depending on [`Self::filter`], only some of that possible constructs would be
632623
/// an element.
633624
///
634-
/// [`Text`]: DeEvent::Text
635-
/// [`CData`]: DeEvent::CData
625+
/// [`Text`]: crate::events::Event::Text
626+
/// [`CData`]: crate::events::Event::CData
636627
struct MapValueSeqAccess<'de, 'a, 'm, R>
637628
where
638629
R: XmlRead<'de>,
@@ -697,7 +688,7 @@ where
697688
// opened tag `self.map.start`
698689
DeEvent::Eof => Err(DeError::UnexpectedEof),
699690

700-
// Start(tag), Text, CData
691+
// Start(tag), Text
701692
_ => seed
702693
.deserialize(SeqItemDeserializer { map: self.map })
703694
.map(Some),
@@ -725,11 +716,11 @@ where
725716
/// Returns a next string as concatenated content of consequent [`Text`] and
726717
/// [`CData`] events, used inside [`deserialize_primitives!()`].
727718
///
728-
/// [`Text`]: DeEvent::Text
729-
/// [`CData`]: DeEvent::CData
719+
/// [`Text`]: crate::events::Event::Text
720+
/// [`CData`]: crate::events::Event::CData
730721
#[inline]
731-
fn read_string(&mut self, unescape: bool) -> Result<Cow<'de, str>, DeError> {
732-
self.map.de.read_string_impl(unescape, true)
722+
fn read_string(&mut self) -> Result<Cow<'de, str>, DeError> {
723+
self.map.de.read_string_impl(true)
733724
}
734725
}
735726

@@ -781,31 +772,17 @@ where
781772
V: Visitor<'de>,
782773
{
783774
match self.map.de.next()? {
784-
DeEvent::Text(e) => SimpleTypeDeserializer::from_text_content(
785-
// Comment to prevent auto-formatting
786-
e.decode(true)?,
787-
)
788-
.deserialize_seq(visitor),
789-
DeEvent::CData(e) => SimpleTypeDeserializer::from_text_content(
790-
// Comment to prevent auto-formatting
791-
e.decode()?,
792-
)
793-
.deserialize_seq(visitor),
775+
DeEvent::Text(e) => {
776+
SimpleTypeDeserializer::from_text_content(e).deserialize_seq(visitor)
777+
}
794778
// This is a sequence element. We cannot treat it as another flatten
795779
// sequence if type will require `deserialize_seq` We instead forward
796780
// it to `xs:simpleType` implementation
797781
DeEvent::Start(e) => {
798782
let value = match self.map.de.next()? {
799-
DeEvent::Text(e) => SimpleTypeDeserializer::from_text_content(
800-
// Comment to prevent auto-formatting
801-
e.decode(true)?,
802-
)
803-
.deserialize_seq(visitor),
804-
DeEvent::CData(e) => SimpleTypeDeserializer::from_text_content(
805-
// Comment to prevent auto-formatting
806-
e.decode()?,
807-
)
808-
.deserialize_seq(visitor),
783+
DeEvent::Text(e) => {
784+
SimpleTypeDeserializer::from_text_content(e).deserialize_seq(visitor)
785+
}
809786
e => Err(DeError::Unsupported(
810787
format!("unsupported event {:?}", e).into(),
811788
)),
@@ -814,8 +791,8 @@ where
814791
self.map.de.read_to_end(e.name())?;
815792
value
816793
}
817-
// SAFETY: we use that deserializer only when Start(element), Text,
818-
// or CData event Start(tag), Text, CData was peeked already
794+
// SAFETY: we use that deserializer only when Start(element) or Text
795+
// event was peeked already
819796
_ => unreachable!(),
820797
}
821798
}

0 commit comments

Comments
 (0)