Skip to content

Commit c0d6284

Browse files
committed
Remove quick_xml::encoding::Decoder and Reader::decoder()
As quick-xml will pre-decode everything, it is unnecessary.
1 parent dd64262 commit c0d6284

12 files changed

+34
-111
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ async-tokio = ["tokio"]
9191
## let mut buf = Vec::new();
9292
## let mut unsupported = false;
9393
## loop {
94-
## if !reader.decoder().encoding().is_ascii_compatible() {
94+
## if !reader.encoding().is_ascii_compatible() {
9595
## unsupported = true;
9696
## break;
9797
## }

Changelog.md

+3
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,9 @@
242242

243243
### Misc Changes
244244

245+
- [#441]: `Reader::decoder()` removed as it is no longer necessary (`Reader` already
246+
decodes everything for you). `Reader::encoding()` is provided to make the current
247+
encoding accessible as it was before.
245248
- [#481]: Removed the uses of `const fn` added in version 0.24 in favor of a lower minimum
246249
supported Rust version (1.46.0). Minimum supported Rust version is now verified in the CI.
247250
- [#489]: Reduced the size of the package uploaded into the crates.io by excluding

src/de/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1931,7 +1931,7 @@ pub use crate::errors::serialize::DeError;
19311931
pub use resolver::{EntityResolver, NoEntityResolver};
19321932

19331933
use crate::{
1934-
encoding::{Decoder, Utf8BytesReader},
1934+
encoding::Utf8BytesReader,
19351935
errors::Error,
19361936
events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
19371937
name::QName,

src/encoding.rs

-68
Original file line numberDiff line numberDiff line change
@@ -75,74 +75,6 @@ impl<R: io::Read> io::BufRead for Utf8BytesReader<R> {
7575
}
7676
}
7777

78-
/// Decoder of byte slices into strings.
79-
///
80-
/// If feature `encoding` is enabled, this encoding taken from the `"encoding"`
81-
/// XML declaration or assumes UTF-8, if XML has no <?xml ?> declaration, encoding
82-
/// key is not defined or contains unknown encoding.
83-
///
84-
/// The library supports any UTF-8 compatible encodings that crate `encoding_rs`
85-
/// is supported. [*UTF-16 and ISO-2022-JP are not supported at the present*][utf16].
86-
///
87-
/// If feature `encoding` is disabled, the decoder is always UTF-8 decoder:
88-
/// any XML declarations are ignored.
89-
///
90-
/// [utf16]: https://github.com/tafia/quick-xml/issues/158
91-
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
92-
pub struct Decoder {
93-
#[cfg(feature = "encoding")]
94-
pub(crate) encoding: &'static Encoding,
95-
}
96-
97-
impl Decoder {
98-
pub(crate) fn utf8() -> Self {
99-
Decoder {
100-
#[cfg(feature = "encoding")]
101-
encoding: UTF_8,
102-
}
103-
}
104-
105-
#[cfg(all(test, feature = "encoding", feature = "serialize"))]
106-
pub(crate) fn utf16() -> Self {
107-
Decoder { encoding: UTF_16LE }
108-
}
109-
}
110-
111-
impl Decoder {
112-
/// Returns the `Reader`s encoding.
113-
///
114-
/// This encoding will be used by [`decode`].
115-
///
116-
/// [`decode`]: Self::decode
117-
#[cfg(feature = "encoding")]
118-
pub fn encoding(&self) -> &'static Encoding {
119-
self.encoding
120-
}
121-
122-
/// ## Without `encoding` feature
123-
///
124-
/// Decodes an UTF-8 slice regardless of XML declaration and ignoring BOM
125-
/// if it is present in the `bytes`.
126-
///
127-
/// ## With `encoding` feature
128-
///
129-
/// Decodes specified bytes using encoding, declared in the XML, if it was
130-
/// declared there, or UTF-8 otherwise, and ignoring BOM if it is present
131-
/// in the `bytes`.
132-
///
133-
/// ----
134-
/// Returns an error in case of malformed sequences in the `bytes`.
135-
pub fn decode<'b>(&self, bytes: &'b [u8]) -> Result<Cow<'b, str>> {
136-
#[cfg(not(feature = "encoding"))]
137-
let decoded = Ok(Cow::Borrowed(std::str::from_utf8(bytes)?));
138-
139-
#[cfg(feature = "encoding")]
140-
let decoded = decode(bytes, self.encoding);
141-
142-
decoded
143-
}
144-
}
145-
14678
/// Decodes the provided bytes using the specified encoding.
14779
///
14880
/// Returns an error in case of malformed or non-representable sequences in the `bytes`.

src/lib.rs

-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ pub mod utils;
7171
pub mod writer;
7272

7373
// reexports
74-
pub use crate::encoding::Decoder;
7574
#[cfg(feature = "serialize")]
7675
pub use crate::errors::serialize::DeError;
7776
pub use crate::errors::{Error, Result};

src/reader/buffered_reader.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -445,9 +445,9 @@ mod test {
445445
Reader::from_reader(b"\xFF\xFE<?xml encoding='windows-1251'?>".as_ref());
446446
let mut buf = Vec::new();
447447

448-
assert_eq!(reader.decoder().encoding(), UTF_8);
448+
assert_eq!(reader.encoding(), UTF_8);
449449
reader.read_event_into(&mut buf).unwrap();
450-
assert_eq!(reader.decoder().encoding(), WINDOWS_1251);
450+
assert_eq!(reader.encoding(), WINDOWS_1251);
451451

452452
assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
453453
}
@@ -460,12 +460,12 @@ mod test {
460460
);
461461
let mut buf = Vec::new();
462462

463-
assert_eq!(reader.decoder().encoding(), UTF_8);
463+
assert_eq!(reader.encoding(), UTF_8);
464464
reader.read_event_into(&mut buf).unwrap();
465-
assert_eq!(reader.decoder().encoding(), UTF_16LE);
465+
assert_eq!(reader.encoding(), UTF_16LE);
466466

467467
reader.read_event_into(&mut buf).unwrap();
468-
assert_eq!(reader.decoder().encoding(), UTF_16LE);
468+
assert_eq!(reader.encoding(), UTF_16LE);
469469

470470
assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
471471
}

src/reader/mod.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::ops::Range;
66
#[cfg(feature = "encoding")]
77
use encoding_rs::{Encoding, UTF_8};
88

9-
use crate::encoding::{Decoder, Utf8BytesReader};
9+
use crate::encoding::Utf8BytesReader;
1010
use crate::errors::{Error, Result};
1111
use crate::events::Event;
1212
use crate::reader::parser::Parser;
@@ -350,8 +350,7 @@ macro_rules! read_to_end {
350350
depth -= 1;
351351
}
352352
Ok(Event::Eof) => {
353-
let name = $self.decoder().decode($end.as_ref().as_bytes());
354-
return Err(Error::UnexpectedEof(format!("</{:?}>", name)));
353+
return Err(Error::UnexpectedEof(format!("</{:?}>", $end.as_ref())));
355354
}
356355
_ => (),
357356
}
@@ -598,16 +597,17 @@ impl<R> Reader<R> {
598597
}
599598
}
600599

601-
/// Get the decoder, used to decode bytes, read by this reader, to the strings.
600+
/// Get the encoding this reader is currently using to decode strings.
602601
///
603602
/// If `encoding` feature is enabled, the used encoding may change after
604603
/// parsing the XML declaration, otherwise encoding is fixed to UTF-8.
605604
///
606605
/// If `encoding` feature is enabled and no encoding is specified in declaration,
607606
/// defaults to UTF-8.
607+
#[cfg(feature = "encoding")]
608608
#[inline]
609-
pub fn decoder(&self) -> Decoder {
610-
self.parser.decoder()
609+
pub fn encoding(&self) -> &'static Encoding {
610+
self.parser.encoding.encoding()
611611
}
612612
}
613613

src/reader/ns_reader.rs

+1-4
Original file line numberDiff line numberDiff line change
@@ -775,13 +775,11 @@ impl<'i> NsReader<&'i [u8]> {
775775
/// it reads, and if, for example, it contains CDATA section, attempt to
776776
/// unescape it content will spoil data.
777777
///
778-
/// Any text will be decoded using the XML current [`decoder()`].
779-
///
780778
/// Actually, this method perform the following code:
781779
///
782780
/// ```ignore
783781
/// let span = reader.read_to_end(end)?;
784-
/// let text = reader.decoder().decode(&reader.inner_slice[span]);
782+
/// let text = std::str::from_utf8(&reader.inner_slice[span]);
785783
/// ```
786784
///
787785
/// # Examples
@@ -828,7 +826,6 @@ impl<'i> NsReader<&'i [u8]> {
828826
/// ```
829827
///
830828
/// [`Start`]: Event::Start
831-
/// [`decoder()`]: Reader::decoder()
832829
#[inline]
833830
pub fn read_text(&mut self, end: QName) -> Result<Cow<'i, str>> {
834831
self.reader.read_text(end)

src/reader/parser.rs

+10-15
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#[cfg(feature = "encoding")]
22
use encoding_rs::UTF_8;
33

4-
use crate::encoding::Decoder;
54
use crate::errors::{Error, Result};
65
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
76
#[cfg(feature = "encoding")]
@@ -55,6 +54,16 @@ pub(super) struct Parser {
5554

5655
#[cfg(feature = "encoding")]
5756
/// Reference to the encoding used to read an XML
57+
///
58+
/// If feature `encoding` is enabled, this encoding is taken from the `"encoding"`
59+
/// XML declaration or assumes UTF-8, if XML has no <?xml ?> declaration, encoding
60+
/// key is not defined or contains unknown encoding.
61+
///
62+
/// The library supports any UTF-8 compatible encodings that crate `encoding_rs`
63+
/// is supported. [*UTF-16 and ISO-2022-JP are not supported at the present*][utf16].
64+
///
65+
/// If feature `encoding` is disabled, the decoder is always UTF-8 decoder:
66+
/// any XML declarations are ignored.
5867
pub encoding: EncodingRef,
5968
}
6069

@@ -249,20 +258,6 @@ impl Parser {
249258
.split_off(self.opened_starts.pop().unwrap());
250259
Ok(Event::End(BytesEnd::new(name)))
251260
}
252-
253-
/// Get the decoder, used to decode bytes, read by this reader, to the strings.
254-
///
255-
/// If `encoding` feature is enabled, the used encoding may change after
256-
/// parsing the XML declaration, otherwise encoding is fixed to UTF-8.
257-
///
258-
/// If `encoding` feature is enabled and no encoding is specified in declaration,
259-
/// defaults to UTF-8.
260-
pub fn decoder(&self) -> Decoder {
261-
Decoder {
262-
#[cfg(feature = "encoding")]
263-
encoding: self.encoding.encoding(),
264-
}
265-
}
266261
}
267262

268263
impl Default for Parser {

src/reader/slice_reader.rs

+4-7
Original file line numberDiff line numberDiff line change
@@ -152,13 +152,11 @@ impl<'a> Reader<&'a [u8]> {
152152
/// it reads, and if, for example, it contains CDATA section, attempt to
153153
/// unescape it content will spoil data.
154154
///
155-
/// Any text will be decoded using the XML current [`decoder()`].
156-
///
157155
/// Actually, this method perform the following code:
158156
///
159157
/// ```ignore
160158
/// let span = reader.read_to_end(end)?;
161-
/// let text = reader.decoder().decode(&reader.inner_slice[span]);
159+
/// let text = std::str::from_utf8(&reader.inner_slice[span]);
162160
/// ```
163161
///
164162
/// # Examples
@@ -206,13 +204,12 @@ impl<'a> Reader<&'a [u8]> {
206204
/// ```
207205
///
208206
/// [`Start`]: Event::Start
209-
/// [`decoder()`]: Self::decoder()
210207
pub fn read_text(&mut self, end: QName) -> Result<Cow<'a, str>> {
211208
// self.reader will be changed, so store original reference
212209
let buffer = self.reader;
213210
let span = self.read_to_end(end)?;
214211

215-
self.decoder().decode(&buffer[0..span.len()])
212+
Ok(Cow::Borrowed(std::str::from_utf8(&buffer[0..span.len()])?))
216213
}
217214
}
218215

@@ -364,9 +361,9 @@ mod test {
364361
fn str_always_has_utf8() {
365362
let mut reader = Reader::from_str("<?xml encoding='UTF-16'?>");
366363

367-
assert_eq!(reader.decoder().encoding(), UTF_8);
364+
assert_eq!(reader.encoding(), UTF_8);
368365
reader.read_event().unwrap();
369-
assert_eq!(reader.decoder().encoding(), UTF_8);
366+
assert_eq!(reader.encoding(), UTF_8);
370367

371368
assert_eq!(reader.read_event().unwrap(), Event::Eof);
372369
}

tests/encodings.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -59,15 +59,15 @@ mod detect {
5959
let mut r = Reader::from_reader(
6060
include_bytes!(concat!("documents/encoding/", $file, ".xml")).as_ref(),
6161
);
62-
assert_eq!(r.decoder().encoding(), UTF_8);
62+
assert_eq!(r.encoding(), UTF_8);
6363

6464
let mut buf = Vec::new();
6565
loop {
6666
match dbg!(r.read_event_into(&mut buf).unwrap()) {
6767
Event::Eof => break,
6868
_ => {}
6969
}
70-
assert_eq!(r.decoder().encoding(), $enc);
70+
assert_eq!(r.encoding(), $enc);
7171
buf.clear();
7272
$($break)?
7373
}

tests/xmlrs_reader_tests.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ fn test_bytes(input: &[u8], output: &[u8], trim: bool) {
394394
loop {
395395
let line = match reader.read_resolved_event_into(&mut Vec::new()) {
396396
Ok((_, Event::Decl(e))) => {
397-
// Declaration could change decoder
397+
// Declaration could change encoding
398398
let version = e.version().unwrap();
399399
let encoding = e.encoding().unwrap().unwrap();
400400
format!("StartDocument({}, {})", version, encoding)

0 commit comments

Comments
 (0)