Skip to content

Commit 95fb35f

Browse files
committed
Remove quick_xml::encoding::Decoder and Reader::decoder()
As quick-xml will pre-decode everything, it is unnecessary.
1 parent 0b76628 commit 95fb35f

12 files changed

+34
-111
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ async-tokio = ["tokio"]
8888
## let mut buf = Vec::new();
8989
## let mut unsupported = false;
9090
## loop {
91-
## if !reader.decoder().encoding().is_ascii_compatible() {
91+
## if !reader.encoding().is_ascii_compatible() {
9292
## unsupported = true;
9393
## break;
9494
## }

Changelog.md

+3
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@
6363

6464
### Misc Changes
6565

66+
- [#441]: `Reader::decoder()` removed as it is no longer necessary (`Reader` already
67+
decodes everything for you). `Reader::encoding()` is provided to make the current
68+
encoding accessible as it was before.
6669
- [#481]: Removed the uses of `const fn` added in version 0.24 in favor of a lower minimum
6770
supported Rust version (1.46.0). Minimum supported Rust version is now verified in the CI.
6871
- [#489]: Reduced the size of the package uploaded into the crates.io by excluding

src/de/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ mod var;
338338

339339
pub use crate::errors::serialize::DeError;
340340
use crate::{
341-
encoding::{Decoder, Utf8BytesReader},
341+
encoding::Utf8BytesReader,
342342
errors::Error,
343343
events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
344344
name::QName,

src/encoding.rs

-68
Original file line numberDiff line numberDiff line change
@@ -75,74 +75,6 @@ impl<R: io::Read> io::BufRead for Utf8BytesReader<R> {
7575
}
7676
}
7777

78-
/// Decoder of byte slices into strings.
79-
///
80-
/// If feature `encoding` is enabled, this encoding taken from the `"encoding"`
81-
/// XML declaration or assumes UTF-8, if XML has no <?xml ?> declaration, encoding
82-
/// key is not defined or contains unknown encoding.
83-
///
84-
/// The library supports any UTF-8 compatible encodings that crate `encoding_rs`
85-
/// is supported. [*UTF-16 and ISO-2022-JP are not supported at the present*][utf16].
86-
///
87-
/// If feature `encoding` is disabled, the decoder is always UTF-8 decoder:
88-
/// any XML declarations are ignored.
89-
///
90-
/// [utf16]: https://github.com/tafia/quick-xml/issues/158
91-
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
92-
pub struct Decoder {
93-
#[cfg(feature = "encoding")]
94-
pub(crate) encoding: &'static Encoding,
95-
}
96-
97-
impl Decoder {
98-
pub(crate) fn utf8() -> Self {
99-
Decoder {
100-
#[cfg(feature = "encoding")]
101-
encoding: UTF_8,
102-
}
103-
}
104-
105-
#[cfg(all(test, feature = "encoding", feature = "serialize"))]
106-
pub(crate) fn utf16() -> Self {
107-
Decoder { encoding: UTF_16LE }
108-
}
109-
}
110-
111-
impl Decoder {
112-
/// Returns the `Reader`s encoding.
113-
///
114-
/// This encoding will be used by [`decode`].
115-
///
116-
/// [`decode`]: Self::decode
117-
#[cfg(feature = "encoding")]
118-
pub fn encoding(&self) -> &'static Encoding {
119-
self.encoding
120-
}
121-
122-
/// ## Without `encoding` feature
123-
///
124-
/// Decodes an UTF-8 slice regardless of XML declaration and ignoring BOM
125-
/// if it is present in the `bytes`.
126-
///
127-
/// ## With `encoding` feature
128-
///
129-
/// Decodes specified bytes using encoding, declared in the XML, if it was
130-
/// declared there, or UTF-8 otherwise, and ignoring BOM if it is present
131-
/// in the `bytes`.
132-
///
133-
/// ----
134-
/// Returns an error in case of malformed sequences in the `bytes`.
135-
pub fn decode<'b>(&self, bytes: &'b [u8]) -> Result<Cow<'b, str>> {
136-
#[cfg(not(feature = "encoding"))]
137-
let decoded = Ok(Cow::Borrowed(std::str::from_utf8(bytes)?));
138-
139-
#[cfg(feature = "encoding")]
140-
let decoded = decode(bytes, self.encoding);
141-
142-
decoded
143-
}
144-
}
145-
14678
/// Decodes the provided bytes using the specified encoding.
14779
///
14880
/// Returns an error in case of malformed or non-representable sequences in the `bytes`.

src/lib.rs

-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ pub mod utils;
6565
pub mod writer;
6666

6767
// reexports
68-
pub use crate::encoding::Decoder;
6968
#[cfg(feature = "serialize")]
7069
pub use crate::errors::serialize::DeError;
7170
pub use crate::errors::{Error, Result};

src/reader/buffered_reader.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -442,9 +442,9 @@ mod test {
442442
Reader::from_reader(b"\xFF\xFE<?xml encoding='windows-1251'?>".as_ref());
443443
let mut buf = Vec::new();
444444

445-
assert_eq!(reader.decoder().encoding(), UTF_8);
445+
assert_eq!(reader.encoding(), UTF_8);
446446
reader.read_event_into(&mut buf).unwrap();
447-
assert_eq!(reader.decoder().encoding(), WINDOWS_1251);
447+
assert_eq!(reader.encoding(), WINDOWS_1251);
448448

449449
assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
450450
}
@@ -457,12 +457,12 @@ mod test {
457457
);
458458
let mut buf = Vec::new();
459459

460-
assert_eq!(reader.decoder().encoding(), UTF_8);
460+
assert_eq!(reader.encoding(), UTF_8);
461461
reader.read_event_into(&mut buf).unwrap();
462-
assert_eq!(reader.decoder().encoding(), UTF_16LE);
462+
assert_eq!(reader.encoding(), UTF_16LE);
463463

464464
reader.read_event_into(&mut buf).unwrap();
465-
assert_eq!(reader.decoder().encoding(), UTF_16LE);
465+
assert_eq!(reader.encoding(), UTF_16LE);
466466

467467
assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
468468
}

src/reader/mod.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::ops::Range;
66
#[cfg(feature = "encoding")]
77
use encoding_rs::{Encoding, UTF_8};
88

9-
use crate::encoding::{Decoder, Utf8BytesReader};
9+
use crate::encoding::Utf8BytesReader;
1010
use crate::errors::{Error, Result};
1111
use crate::events::Event;
1212
use crate::reader::parser::Parser;
@@ -283,8 +283,7 @@ macro_rules! read_to_end {
283283
depth -= 1;
284284
}
285285
Ok(Event::Eof) => {
286-
let name = $self.decoder().decode($end.as_ref().as_bytes());
287-
return Err(Error::UnexpectedEof(format!("</{:?}>", name)));
286+
return Err(Error::UnexpectedEof(format!("</{:?}>", $end.as_ref())));
288287
}
289288
_ => (),
290289
}
@@ -529,16 +528,17 @@ impl<R> Reader<R> {
529528
}
530529
}
531530

532-
/// Get the decoder, used to decode bytes, read by this reader, to the strings.
531+
/// Get the encoding this reader is currently using to decode strings.
533532
///
534533
/// If `encoding` feature is enabled, the used encoding may change after
535534
/// parsing the XML declaration, otherwise encoding is fixed to UTF-8.
536535
///
537536
/// If `encoding` feature is enabled and no encoding is specified in declaration,
538537
/// defaults to UTF-8.
538+
#[cfg(feature = "encoding")]
539539
#[inline]
540-
pub fn decoder(&self) -> Decoder {
541-
self.parser.decoder()
540+
pub fn encoding(&self) -> &'static Encoding {
541+
self.parser.encoding.encoding()
542542
}
543543
}
544544

src/reader/ns_reader.rs

+1-4
Original file line numberDiff line numberDiff line change
@@ -774,13 +774,11 @@ impl<'i> NsReader<&'i [u8]> {
774774
/// it reads, and if, for example, it contains CDATA section, attempt to
775775
/// unescape it content will spoil data.
776776
///
777-
/// Any text will be decoded using the XML current [`decoder()`].
778-
///
779777
/// Actually, this method perform the following code:
780778
///
781779
/// ```ignore
782780
/// let span = reader.read_to_end(end)?;
783-
/// let text = reader.decoder().decode(&reader.inner_slice[span]);
781+
/// let text = std::str::from_utf8(&reader.inner_slice[span]);
784782
/// ```
785783
///
786784
/// # Examples
@@ -827,7 +825,6 @@ impl<'i> NsReader<&'i [u8]> {
827825
/// ```
828826
///
829827
/// [`Start`]: Event::Start
830-
/// [`decoder()`]: Reader::decoder()
831828
#[inline]
832829
pub fn read_text(&mut self, end: QName) -> Result<Cow<'i, str>> {
833830
self.reader.read_text(end)

src/reader/parser.rs

+10-15
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#[cfg(feature = "encoding")]
22
use encoding_rs::UTF_8;
33

4-
use crate::encoding::Decoder;
54
use crate::errors::{Error, Result};
65
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
76
#[cfg(feature = "encoding")]
@@ -55,6 +54,16 @@ pub(super) struct Parser {
5554

5655
#[cfg(feature = "encoding")]
5756
/// Reference to the encoding used to read an XML
57+
///
58+
/// If feature `encoding` is enabled, this encoding is taken from the `"encoding"`
59+
/// XML declaration or assumes UTF-8, if XML has no <?xml ?> declaration, encoding
60+
/// key is not defined or contains unknown encoding.
61+
///
62+
/// The library supports any UTF-8 compatible encodings that crate `encoding_rs`
63+
/// is supported. [*UTF-16 and ISO-2022-JP are not supported at the present*][utf16].
64+
///
65+
/// If feature `encoding` is disabled, the decoder is always UTF-8 decoder:
66+
/// any XML declarations are ignored.
5867
pub encoding: EncodingRef,
5968
}
6069

@@ -228,20 +237,6 @@ impl Parser {
228237
.split_off(self.opened_starts.pop().unwrap());
229238
Ok(Event::End(BytesEnd::new(name)))
230239
}
231-
232-
/// Get the decoder, used to decode bytes, read by this reader, to the strings.
233-
///
234-
/// If `encoding` feature is enabled, the used encoding may change after
235-
/// parsing the XML declaration, otherwise encoding is fixed to UTF-8.
236-
///
237-
/// If `encoding` feature is enabled and no encoding is specified in declaration,
238-
/// defaults to UTF-8.
239-
pub fn decoder(&self) -> Decoder {
240-
Decoder {
241-
#[cfg(feature = "encoding")]
242-
encoding: self.encoding.encoding(),
243-
}
244-
}
245240
}
246241

247242
impl Default for Parser {

src/reader/slice_reader.rs

+4-7
Original file line numberDiff line numberDiff line change
@@ -152,13 +152,11 @@ impl<'a> Reader<&'a [u8]> {
152152
/// it reads, and if, for example, it contains CDATA section, attempt to
153153
/// unescape it content will spoil data.
154154
///
155-
/// Any text will be decoded using the XML current [`decoder()`].
156-
///
157155
/// Actually, this method perform the following code:
158156
///
159157
/// ```ignore
160158
/// let span = reader.read_to_end(end)?;
161-
/// let text = reader.decoder().decode(&reader.inner_slice[span]);
159+
/// let text = std::str::from_utf8(&reader.inner_slice[span]);
162160
/// ```
163161
///
164162
/// # Examples
@@ -205,13 +203,12 @@ impl<'a> Reader<&'a [u8]> {
205203
/// ```
206204
///
207205
/// [`Start`]: Event::Start
208-
/// [`decoder()`]: Self::decoder()
209206
pub fn read_text(&mut self, end: QName) -> Result<Cow<'a, str>> {
210207
// self.reader will be changed, so store original reference
211208
let buffer = self.reader;
212209
let span = self.read_to_end(end)?;
213210

214-
self.decoder().decode(&buffer[0..span.len()])
211+
Ok(Cow::Borrowed(std::str::from_utf8(&buffer[0..span.len()])?))
215212
}
216213
}
217214

@@ -362,9 +359,9 @@ mod test {
362359
fn str_always_has_utf8() {
363360
let mut reader = Reader::from_str("<?xml encoding='UTF-16'?>");
364361

365-
assert_eq!(reader.decoder().encoding(), UTF_8);
362+
assert_eq!(reader.encoding(), UTF_8);
366363
reader.read_event().unwrap();
367-
assert_eq!(reader.decoder().encoding(), UTF_8);
364+
assert_eq!(reader.encoding(), UTF_8);
368365

369366
assert_eq!(reader.read_event().unwrap(), Event::Eof);
370367
}

tests/encodings.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -59,15 +59,15 @@ mod detect {
5959
let mut r = Reader::from_reader(
6060
include_bytes!(concat!("documents/encoding/", $file, ".xml")).as_ref(),
6161
);
62-
assert_eq!(r.decoder().encoding(), UTF_8);
62+
assert_eq!(r.encoding(), UTF_8);
6363

6464
let mut buf = Vec::new();
6565
loop {
6666
match dbg!(r.read_event_into(&mut buf).unwrap()) {
6767
Event::Eof => break,
6868
_ => {}
6969
}
70-
assert_eq!(r.decoder().encoding(), $enc);
70+
assert_eq!(r.encoding(), $enc);
7171
buf.clear();
7272
$($break)?
7373
}

tests/xmlrs_reader_tests.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ fn test_bytes(input: &[u8], output: &[u8], trim: bool) {
394394
loop {
395395
let line = match reader.read_resolved_event_into(&mut Vec::new()) {
396396
Ok((_, Event::Decl(e))) => {
397-
// Declaration could change decoder
397+
// Declaration could change encoding
398398
let version = e.version().unwrap();
399399
let encoding = e.encoding().unwrap().unwrap();
400400
format!("StartDocument({}, {})", version, encoding)

0 commit comments

Comments
 (0)