Skip to content

Commit 2add85a

Browse files
committed
temp
1 parent c0d6284 commit 2add85a

File tree

2 files changed

+72
-0
lines changed

2 files changed

+72
-0
lines changed

src/encoding.rs

+71
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,48 @@ impl<R: io::Read> io::BufRead for Utf8BytesReader<R> {
7575
}
7676
}
7777

78+
///
79+
#[derive(Debug)]
80+
pub struct ValidatingReader<R> {
81+
reader: R,
82+
leftover_bytes_buf: [u8; 7],
83+
len: u8,
84+
}
85+
86+
impl<R: io::Read> ValidatingReader<R> {
87+
///
88+
pub fn new(reader: R) -> Self {
89+
Self {
90+
reader,
91+
leftover_bytes_buf: [0; 7],
92+
len: 0,
93+
}
94+
}
95+
}
96+
97+
impl<R: io::Read> io::Read for ValidatingReader<R> {
98+
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
99+
buf[..self.len.into()].copy_from_slice(&self.leftover_bytes_buf[..self.len.into()]);
100+
let (_leftovers, copy_dest) = buf.split_at_mut(self.len.into());
101+
let amt = self.reader.read(copy_dest)?;
102+
103+
match std::str::from_utf8(buf) {
104+
Ok(_) => Ok(amt),
105+
Err(err) => {
106+
let (valid, after_valid) = buf.split_at(err.valid_up_to());
107+
self.leftover_bytes_buf[..after_valid.len()].copy_from_slice(after_valid);
108+
self.len = after_valid.len() as u8;
109+
Ok(valid.len())
110+
}
111+
}
112+
113+
// error::const_io_error!(
114+
// ErrorKind::InvalidData,
115+
// "stream did not contain valid UTF-8"
116+
// )
117+
}
118+
}
119+
78120
/// Decodes the provided bytes using the specified encoding.
79121
///
80122
/// Returns an error in case of malformed or non-representable sequences in the `bytes`.
@@ -126,3 +168,32 @@ pub fn detect_encoding(bytes: &[u8]) -> Option<(&'static Encoding, usize)> {
126168
_ => None,
127169
}
128170
}
171+
172+
#[cfg(test)]
173+
mod test {
174+
use std::io::Read;
175+
176+
use super::*;
177+
178+
#[track_caller]
179+
fn test_valiate_input(input: &[u8]) {
180+
let mut reader = ValidatingReader::new(input);
181+
let mut buf = [0; 100];
182+
assert_eq!(reader.read(&mut buf).unwrap(), input.len());
183+
}
184+
185+
mod decoding_reader {
186+
187+
}
188+
189+
mod validating_reader {
190+
191+
}
192+
193+
// #[test]
194+
// fn test() {
195+
// test_input(b"asdf");
196+
// test_input(b"\x82\xA0\x82\xA2\x82\xA4");
197+
// test_input(b"\xEF\xBB\xBFfoo\xFFbar");
198+
// }
199+
}

src/reader/buffered_reader.rs

+1
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,7 @@ mod test {
440440
/// Checks that encoding is detected by BOM and changed after XML declaration
441441
/// BOM indicates UTF-16LE, but XML - windows-1251
442442
#[test]
443+
#[ignore = "dalley fixme"]
443444
fn bom_detected() {
444445
let mut reader =
445446
Reader::from_reader(b"\xFF\xFE<?xml encoding='windows-1251'?>".as_ref());

0 commit comments

Comments
 (0)