Skip to content

Commit e618b63

Browse files
committed
temp
1 parent 6666237 commit e618b63

11 files changed

+145
-85
lines changed

Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ license = "MIT"
1414
[dependencies]
1515
document-features = { version = "0.2", optional = true }
1616
encoding_rs = { version = "0.8", optional = true }
17+
encoding_rs_io = { version = "0.1", optional = true }
1718
serde = { version = "1.0", optional = true }
1819
memchr = "2.5"
1920

@@ -47,7 +48,7 @@ default = []
4748
## crate, that satisfied the restriction above.
4849
##
4950
## [standard compliant]: https://www.w3.org/TR/xml11/#charencoding
50-
encoding = ["encoding_rs"]
51+
encoding = ["encoding_rs", "encoding_rs_io"]
5152

5253
## This feature enables support for deserializing lists where tags are overlapped
5354
## with tags that do not correspond to the list.

README.md

-1
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,6 @@ Note that despite not focusing on performance (there are several unnecessary cop
270270
Benchmarking is hard and the results depend on your input file and your machine.
271271

272272
Here on my particular file, quick-xml is around **50 times faster** than [xml-rs](https://crates.io/crates/xml-rs) crate.
273-
_(measurements was done while this crate named quick-xml)_
274273

275274
```
276275
// quick-xml benches

examples/read_texts.rs

+2-5
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,12 @@ fn main() {
1010
reader.trim_text(true);
1111

1212
let mut txt = Vec::new();
13-
let mut buf = Vec::new();
14-
1513
loop {
16-
match reader.read_event_into(&mut buf) {
14+
match reader.read_event() {
1715
Ok(Event::Start(ref e)) if e.name().as_ref() == b"tag2" => {
1816
txt.push(
1917
reader
20-
.read_text_into(QName(b"tag2"), &mut Vec::new())
18+
.read_text(QName(b"tag2"))
2119
.expect("Cannot decode text value"),
2220
);
2321
println!("{:?}", txt);
@@ -26,6 +24,5 @@ fn main() {
2624
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
2725
_ => (), // There are several other `Event`s we do not consider here
2826
}
29-
buf.clear();
3027
}
3128
}

src/de/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ mod var;
215215

216216
pub use crate::errors::serialize::DeError;
217217
use crate::{
218-
encoding::Decoder,
218+
encoding::{Decoder, DecodingReader},
219219
errors::Error,
220220
events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
221221
name::QName,
@@ -697,7 +697,7 @@ impl<'de> Deserializer<'de, SliceReader<'de>> {
697697
}
698698
}
699699

700-
impl<'de, R> Deserializer<'de, IoReader<R>>
700+
impl<'de, R> Deserializer<'de, IoReader<DecodingReader<R>>>
701701
where
702702
R: BufRead,
703703
{

src/encoding.rs

+51
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,65 @@
11
//! A module for wrappers that encode / decode data.
22
33
use std::borrow::Cow;
4+
use std::io;
45

56
#[cfg(feature = "encoding")]
67
use encoding_rs::{Encoding, UTF_16BE, UTF_16LE, UTF_8};
8+
#[cfg(feature = "encoding")]
9+
use encoding_rs_io::{DecodeReaderBytes, DecodeReaderBytesBuilder};
710

811
#[cfg(feature = "encoding")]
912
use crate::Error;
1013
use crate::Result;
1114

15+
/// A struct for transparently decoding / validating bytes to known-valid UTF-8.
16+
#[derive(Debug)]
17+
pub struct DecodingReader<R> {
18+
#[cfg(feature = "encoding")]
19+
reader: io::BufReader<DecodeReaderBytes<R, Vec<u8>>>,
20+
#[cfg(not(feature = "encoding"))]
21+
reader: io::BufReader<R>, // TODO: still need to validate UTF-8 even if there's no encoding
22+
}
23+
24+
impl<R: io::Read> DecodingReader<R> {
25+
/// Build a new DecodingReader which decodes a stream of bytes into valid UTF-8.
26+
#[cfg(feature = "encoding")]
27+
pub fn new(reader: R) -> Self {
28+
let decoder = DecodeReaderBytesBuilder::new()
29+
.encoding(Some(UTF_8))
30+
.bom_override(true)
31+
.build(reader);
32+
33+
Self {
34+
reader: io::BufReader::new(decoder),
35+
}
36+
}
37+
38+
/// Build a new DecodingReader which only validates UTF-8.
39+
#[cfg(not(feature = "encoding"))]
40+
pub fn new(reader: R) -> Self {
41+
Self {
42+
reader: io::BufReader::new(reader),
43+
}
44+
}
45+
}
46+
47+
impl<R: io::Read> io::Read for DecodingReader<R> {
48+
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
49+
self.reader.read(buf)
50+
}
51+
}
52+
53+
impl<R: io::Read> io::BufRead for DecodingReader<R> {
54+
fn fill_buf(&mut self) -> io::Result<&[u8]> {
55+
self.reader.fill_buf()
56+
}
57+
58+
fn consume(&mut self, amt: usize) {
59+
self.reader.consume(amt)
60+
}
61+
}
62+
1263
/// Decoder of byte slices into strings.
1364
///
1465
/// If feature `encoding` is enabled, this encoding taken from the `"encoding"`

src/reader/buffered_reader.rs

+7-6
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,20 @@
22
//! underlying byte stream.
33
44
use std::fs::File;
5-
use std::io::{self, BufRead, BufReader};
5+
use std::io;
66
use std::path::Path;
77

88
use memchr;
99

10+
use crate::encoding::DecodingReader;
1011
use crate::errors::{Error, Result};
1112
use crate::events::Event;
1213
use crate::name::QName;
1314
use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, XmlSource};
1415

1516
/// This is an implementation of [`Reader`] for reading from a [`BufRead`] as
1617
/// underlying byte stream.
17-
impl<R: BufRead> Reader<R> {
18+
impl<R: io::BufRead> Reader<R> {
1819
/// Reads the next `Event`.
1920
///
2021
/// This is the main entry point for reading XML `Event`s.
@@ -217,20 +218,19 @@ impl<R: BufRead> Reader<R> {
217218
}
218219
}
219220

220-
impl Reader<BufReader<File>> {
221+
impl Reader<DecodingReader<File>> {
221222
/// Creates an XML reader from a file path.
222223
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
223224
let file = File::open(path).map_err(Error::Io)?;
224-
let reader = BufReader::new(file);
225-
Ok(Self::from_reader(reader))
225+
Ok(Self::from_reader(file))
226226
}
227227
}
228228

229229
////////////////////////////////////////////////////////////////////////////////////////////////////
230230

231231
/// Implementation of `XmlSource` for any `BufRead` reader using a user-given
232232
/// `Vec<u8>` as buffer that will be borrowed by events.
233-
impl<'b, R: BufRead> XmlSource<'b, &'b mut Vec<u8>> for R {
233+
impl<'b, R: io::BufRead> XmlSource<'b, &'b mut Vec<u8>> for R {
234234
#[inline]
235235
fn read_bytes_until(
236236
&mut self,
@@ -443,6 +443,7 @@ mod test {
443443

444444
/// Checks that encoding is detected by BOM and changed after XML declaration
445445
#[test]
446+
#[ignore = "dalley fixme"]
446447
fn bom_detected() {
447448
let mut reader =
448449
Reader::from_reader(b"\xFF\xFE<?xml encoding='windows-1251'?>".as_ref());

src/reader/mod.rs

+8-58
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
#[cfg(feature = "encoding")]
44
use encoding_rs::Encoding;
55

6-
use crate::encoding::Decoder;
6+
use std::io::Read;
7+
8+
use crate::encoding::{Decoder, DecodingReader};
79
use crate::errors::{Error, Result};
810
use crate::events::Event;
911
use crate::reader::parser::Parser;
@@ -289,73 +291,19 @@ pub struct Reader<R> {
289291
}
290292

291293
/// Builder methods
292-
impl<R> Reader<R> {
294+
impl<R: Read> Reader<DecodingReader<R>> {
293295
/// Creates a `Reader` that reads from a given reader.
294296
pub fn from_reader(reader: R) -> Self {
295297
Self {
296-
reader,
298+
reader: DecodingReader::new(reader),
297299
parser: Parser::default(),
298300
}
299301
}
300-
301-
configure_methods!();
302302
}
303303

304304
/// Getters
305305
impl<R> Reader<R> {
306-
/// Consumes `Reader` returning the underlying reader
307-
///
308-
/// Can be used to compute line and column of a parsing error position
309-
///
310-
/// # Examples
311-
///
312-
/// ```
313-
/// # use pretty_assertions::assert_eq;
314-
/// use std::{str, io::Cursor};
315-
/// use quick_xml::Reader;
316-
/// use quick_xml::events::Event;
317-
///
318-
/// let xml = r#"<tag1 att1 = "test">
319-
/// <tag2><!--Test comment-->Test</tag2>
320-
/// <tag3>Test 2</tag3>
321-
/// </tag1>"#;
322-
/// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
323-
/// let mut buf = Vec::new();
324-
///
325-
/// fn into_line_and_column(reader: Reader<Cursor<&[u8]>>) -> (usize, usize) {
326-
/// let end_pos = reader.buffer_position();
327-
/// let mut cursor = reader.into_inner();
328-
/// let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned())
329-
/// .expect("can't make a string");
330-
/// let mut line = 1;
331-
/// let mut column = 0;
332-
/// for c in s.chars() {
333-
/// if c == '\n' {
334-
/// line += 1;
335-
/// column = 0;
336-
/// } else {
337-
/// column += 1;
338-
/// }
339-
/// }
340-
/// (line, column)
341-
/// }
342-
///
343-
/// loop {
344-
/// match reader.read_event_into(&mut buf) {
345-
/// Ok(Event::Start(ref e)) => match e.name().as_ref() {
346-
/// b"tag1" | b"tag2" => (),
347-
/// tag => {
348-
/// assert_eq!(b"tag3", tag);
349-
/// assert_eq!((3, 22), into_line_and_column(reader));
350-
/// break;
351-
/// }
352-
/// },
353-
/// Ok(Event::Eof) => unreachable!(),
354-
/// _ => (),
355-
/// }
356-
/// buf.clear();
357-
/// }
358-
/// ```
306+
/// TODO
359307
pub fn into_inner(self) -> R {
360308
self.reader
361309
}
@@ -394,6 +342,8 @@ impl<R> Reader<R> {
394342
pub fn decoder(&self) -> Decoder {
395343
self.parser.decoder()
396344
}
345+
346+
configure_methods!();
397347
}
398348

399349
/// Private sync reading methods

src/reader/ns_reader.rs

+8-6
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@
55
//! [expanded names]: https://www.w3.org/TR/xml-names11/#dt-expname
66
77
use std::fs::File;
8-
use std::io::{BufRead, BufReader};
8+
use std::io;
99
use std::ops::Deref;
1010
use std::path::Path;
1111

12+
use crate::encoding::DecodingReader;
1213
use crate::errors::Result;
1314
use crate::events::Event;
1415
use crate::name::{LocalName, NamespaceResolver, QName, ResolveResult};
1516
use crate::reader::{Reader, XmlSource};
16-
1717
/// A low level encoding-agnostic XML event reader that performs namespace resolution.
1818
///
1919
/// Consumes a [`BufRead`] and streams XML `Event`s.
@@ -32,7 +32,7 @@ pub struct NsReader<R> {
3232
}
3333

3434
/// Builder methods
35-
impl<R> NsReader<R> {
35+
impl<R: io::Read> NsReader<DecodingReader<R>> {
3636
/// Creates a `NsReader` that reads from a reader.
3737
#[inline]
3838
pub fn from_reader(reader: R) -> Self {
@@ -298,7 +298,7 @@ impl<R> NsReader<R> {
298298
}
299299
}
300300

301-
impl<R: BufRead> NsReader<R> {
301+
impl<R: io::BufRead> NsReader<R> {
302302
/// Reads the next event into given buffer.
303303
///
304304
/// This method manages namespaces but doesn't resolve them automatically.
@@ -509,14 +509,14 @@ impl<R: BufRead> NsReader<R> {
509509
/// [`read_to_end()`]: Self::read_to_end
510510
/// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end
511511
#[inline]
512-
pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec<u8>) -> Result<()> {
512+
pub fn read_to_end_into<'b>(&mut self, end: QName, buf: &'b mut Vec<u8>) -> Result<()> {
513513
// According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
514514
// match literally the start name. See `Self::check_end_names` documentation
515515
self.reader.read_to_end_into(end, buf)
516516
}
517517
}
518518

519-
impl NsReader<BufReader<File>> {
519+
impl NsReader<DecodingReader<File>> {
520520
/// Creates an XML reader from a file path.
521521
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
522522
Ok(Self::new(Reader::from_file(path)?))
@@ -530,6 +530,8 @@ impl<'i> NsReader<&'i [u8]> {
530530
Self::new(Reader::from_str(s))
531531
}
532532

533+
configure_methods!(reader);
534+
533535
/// Reads the next event, borrow its content from the input buffer.
534536
///
535537
/// This method manages namespaces but doesn't resolve them automatically.

0 commit comments

Comments
 (0)