diff --git a/marked-cli/build.rs b/marked-cli/build.rs index 4492e93..1192956 100644 --- a/marked-cli/build.rs +++ b/marked-cli/build.rs @@ -13,14 +13,21 @@ fn main() { if rustv < msrv { panic!( "{} v{} {} is {} > {} (this rustc)", - PACKAGE, VERSION, M_V, join(&msrv), join(&rustv)); + PACKAGE, + VERSION, + M_V, + join(&msrv), + join(&rustv) + ); } } fn join(ver: &[u16]) -> String { let mut out = String::new(); for v in ver { - if !out.is_empty() { out.push('.'); } + if !out.is_empty() { + out.push('.'); + } out.push_str(&v.to_string()); } out @@ -39,9 +46,7 @@ fn rustc_version() -> Vec { } let mut vp = v.split("-"); if let Some(v) = vp.next() { - let vs: Vec = v.split(".") - .filter_map(|vss| vss.parse().ok()) - .collect(); + let vs: Vec = v.split(".").filter_map(|vss| vss.parse().ok()).collect(); if !vs.is_empty() { return vs; } diff --git a/marked-cli/src/main.rs b/marked-cli/src/main.rs index 3078f3a..04faaef 100644 --- a/marked-cli/src/main.rs +++ b/marked-cli/src/main.rs @@ -2,24 +2,15 @@ use std::error::Error as StdError; use std::fmt; +use std::fs::File; use std::io; use std::process; -use std::fs::File; use encoding_rs as enc; -use marked::{ - chain_filters, - filter, - html::parse_buffered, - logger::setup_logger, - EncodingHint, -}; +use marked::{chain_filters, filter, html::parse_buffered, logger::setup_logger, EncodingHint}; -use clap::{ - crate_version, - Arg, App, AppSettings, SubCommand, -}; +use clap::{crate_version, App, AppSettings, Arg, SubCommand}; use log::{debug, error}; @@ -55,8 +46,7 @@ fn run() -> Result<(), Flaw> { let html = SubCommand::with_name("html") .setting(AppSettings::DeriveDisplayOrder) .about("HTML processing") - .after_help( - "Parses input, applies filters, and serializes to output.") + .after_help("Parses input, applies filters, and serializes to output.") .args(&[ Arg::with_name("output") .short("o") @@ -80,7 +70,7 @@ fn run() -> Result<(), Flaw> { Arg::with_name("file") .required(false) .value_name("INPUT-FILE") - .help("File path to read (default: STDIN)") + .help("File path to read (default: STDIN)"), ]); let app = App::new("marked") @@ -89,12 +79,14 @@ fn run() -> Result<(), Flaw> { .setting(AppSettings::SubcommandRequired) .setting(AppSettings::DeriveDisplayOrder) .max_term_width(100) - .arg(Arg::with_name("debug") - .short("d") - .long("debug") - .multiple(true) - .help("Enable more logging, and up to `-dddd`") - .global(true)) + .arg( + Arg::with_name("debug") + .short("d") + .long("debug") + .multiple(true) + .help("Enable more logging, and up to `-dddd`") + .global(true), + ) .subcommand(html); let mtch = app.get_matches(); @@ -148,7 +140,9 @@ fn run() -> Result<(), Flaw> { } else { quit!( "input {} same as output {} not supported", - fin.unwrap(), fout); + fin.unwrap(), + fout + ); } } else { Box::new(io::stdout()) diff --git a/marked/benches/round_trip.rs b/marked/benches/round_trip.rs index bddfb8f..d1accba 100644 --- a/marked/benches/round_trip.rs +++ b/marked/benches/round_trip.rs @@ -1,39 +1,35 @@ #![warn(rust_2018_idioms)] - #![feature(test)] extern crate test; // Still required, see rust-lang/rust#55133 use std::default::Default; -use std::io; use std::fs::File; +use std::io; use test::Bencher; use encoding_rs as enc; use html5ever::driver::ParseOpts; use html5ever::parse_document; -use markup5ever_rcdom::{SerializableHandle, RcDom}; use html5ever::serialize as rc_serialize; +use markup5ever_rcdom::{RcDom, SerializableHandle}; use marked; -use marked::{Decoder, Document, EncodingHint}; use marked::chain_filters; use marked::filter; use marked::html::parse_buffered; +use marked::{Decoder, Document, EncodingHint}; #[bench] fn b00_round_trip_rcdom(b: &mut Bencher) { b.iter(|| { - let parser_sink = - parse_document(RcDom::default(), ParseOpts::default()); + let parser_sink = parse_document(RcDom::default(), ParseOpts::default()); let decoder = Decoder::new(enc::UTF_8, parser_sink); - let mut fin = sample_file("github-dekellum.html") - .expect("sample_file"); + let mut fin = sample_file("github-dekellum.html").expect("sample_file"); let doc = decoder.read_to_end(&mut fin).expect("parse"); let mut out = Vec::with_capacity(273108); let ser_handle: SerializableHandle = doc.document.clone().into(); - rc_serialize(&mut out, &ser_handle, Default::default()) - .expect("serialization"); + rc_serialize(&mut out, &ser_handle, Default::default()).expect("serialization"); assert_eq!(out.len(), 272273); }); } @@ -41,8 +37,7 @@ fn b00_round_trip_rcdom(b: &mut Bencher) { #[bench] fn b01_round_trip_marked(b: &mut Bencher) { b.iter(|| { - let mut fin = sample_file("github-dekellum.html") - .expect("sample_file"); + let mut fin = sample_file("github-dekellum.html").expect("sample_file"); let eh = EncodingHint::shared_default(enc::UTF_8); let doc = parse_buffered(eh, &mut fin).expect("parse"); let mut out = Vec::with_capacity(273108); @@ -54,8 +49,7 @@ fn b01_round_trip_marked(b: &mut Bencher) { #[bench] fn b11_decode_eucjp_parse_marked(b: &mut Bencher) { b.iter(|| { - let mut fin = sample_file("matsunami_eucjp_meta.html") - .expect("sample_file"); + let mut fin = sample_file("matsunami_eucjp_meta.html").expect("sample_file"); let eh = EncodingHint::shared_default(enc::UTF_8); parse_buffered(eh, &mut fin).expect("parse"); }); @@ -64,8 +58,7 @@ fn b11_decode_eucjp_parse_marked(b: &mut Bencher) { #[bench] fn b12_decode_windows1251_parse_marked(b: &mut Bencher) { b.iter(|| { - let mut fin = sample_file("russez_windows1251_meta.html") - .expect("sample_file"); + let mut fin = sample_file("russez_windows1251_meta.html").expect("sample_file"); let eh = EncodingHint::shared_default(enc::UTF_8); parse_buffered(eh, &mut fin).expect("parse"); }); @@ -74,8 +67,7 @@ fn b12_decode_windows1251_parse_marked(b: &mut Bencher) { #[bench] fn b13_utf8_parse_marked(b: &mut Bencher) { b.iter(|| { - let mut fin = sample_file("github-dekellum.html") - .expect("sample_file"); + let mut fin = sample_file("github-dekellum.html").expect("sample_file"); let eh = EncodingHint::shared_default(enc::UTF_8); parse_buffered(eh, &mut fin).expect("parse"); }); @@ -83,8 +75,7 @@ fn b13_utf8_parse_marked(b: &mut Bencher) { #[bench] fn b20_text_content(b: &mut Bencher) { - let mut fin = sample_file("github-dekellum.html") - .expect("sample_file"); + let mut fin = sample_file("github-dekellum.html").expect("sample_file"); let eh = EncodingHint::shared_default(enc::UTF_8); let doc = parse_buffered(eh, &mut fin).expect("parse"); @@ -96,8 +87,7 @@ fn b20_text_content(b: &mut Bencher) { #[bench] fn b30_text_normalize_content(b: &mut Bencher) { - let mut fin = sample_file("github-dekellum.html") - .expect("sample_file"); + let mut fin = sample_file("github-dekellum.html").expect("sample_file"); let eh = EncodingHint::shared_default(enc::UTF_8); let doc = parse_buffered(eh, &mut fin).expect("parse"); b.iter(|| { @@ -110,8 +100,7 @@ fn b30_text_normalize_content(b: &mut Bencher) { #[bench] fn b31_text_normalize_content_identity(b: &mut Bencher) { - let mut fin = sample_file("github-dekellum.html") - .expect("sample_file"); + let mut fin = sample_file("github-dekellum.html").expect("sample_file"); let eh = EncodingHint::shared_default(enc::UTF_8); let mut doc = parse_buffered(eh, &mut fin).expect("parse"); doc.filter(chain_filters!( @@ -144,8 +133,7 @@ fn b31_text_normalize_content_identity(b: &mut Bencher) { #[bench] fn b50_sparse_bulk_clone(b: &mut Bencher) { - let mut fin = sample_file("github-dekellum.html") - .expect("sample_file"); + let mut fin = sample_file("github-dekellum.html").expect("sample_file"); let eh = EncodingHint::shared_default(enc::UTF_8); let mut doc = parse_buffered(eh, &mut fin).expect("parse"); filter_all(&mut doc); @@ -157,8 +145,7 @@ fn b50_sparse_bulk_clone(b: &mut Bencher) { #[bench] fn b51_sparse_compact(b: &mut Bencher) { - let mut fin = sample_file("github-dekellum.html") - .expect("sample_file"); + let mut fin = sample_file("github-dekellum.html").expect("sample_file"); let eh = EncodingHint::shared_default(enc::UTF_8); let mut doc = parse_buffered(eh, &mut fin).expect("parse"); filter_all(&mut doc); @@ -171,8 +158,7 @@ fn b51_sparse_compact(b: &mut Bencher) { #[bench] fn b52_sparse_deep_clone(b: &mut Bencher) { - let mut fin = sample_file("github-dekellum.html") - .expect("sample_file"); + let mut fin = sample_file("github-dekellum.html").expect("sample_file"); let eh = EncodingHint::shared_default(enc::UTF_8); let mut doc = parse_buffered(eh, &mut fin).expect("parse"); filter_all(&mut doc); diff --git a/marked/build.rs b/marked/build.rs index 4251862..9721d97 100644 --- a/marked/build.rs +++ b/marked/build.rs @@ -13,14 +13,21 @@ fn main() { if rustv < msrv { panic!( "{} v{} {} is {} > {} (this rustc)", - PACKAGE, VERSION, M_V, join(&msrv), join(&rustv)); + PACKAGE, + VERSION, + M_V, + join(&msrv), + join(&rustv) + ); } } fn join(ver: &[u16]) -> String { let mut out = String::new(); for v in ver { - if !out.is_empty() { out.push('.'); } + if !out.is_empty() { + out.push('.'); + } out.push_str(&v.to_string()); } out @@ -39,9 +46,7 @@ fn rustc_version() -> Vec { } let mut vp = v.split("-"); if let Some(v) = vp.next() { - let vs: Vec = v.split(".") - .filter_map(|vss| vss.parse().ok()) - .collect(); + let vs: Vec = v.split(".").filter_map(|vss| vss.parse().ok()).collect(); if !vs.is_empty() { return vs; } diff --git a/marked/src/chars.rs b/marked/src/chars.rs index f1fa376..ae05f73 100644 --- a/marked/src/chars.rs +++ b/marked/src/chars.rs @@ -13,8 +13,8 @@ pub(crate) fn replace_chars( ws: bool, ctrl: bool, trim_start: bool, - trim_end: bool) -{ + trim_end: bool, +) { let mut last = 0; let mut ost = None; // output lazy allocated let mut replacing = 0u8; @@ -31,9 +31,7 @@ pub(crate) fn replace_chars( } replacing |= rmask; } else if replacing > 0 { - if replacing >= 2 && - (ost.as_ref().unwrap().len32() > 0 || !trim_start) - { + if replacing >= 2 && (ost.as_ref().unwrap().len32() > 0 || !trim_start) { ost.as_mut().unwrap().push_char(' '); } last = i; @@ -74,7 +72,9 @@ enum CharClass { /// True if all contained characters are classified as whitespace or controls. pub(crate) fn is_all_ctrl_ws(st: &StrTendril) -> bool { - st.as_ref().chars().all(|c| char_class(c) != CharClass::Unclassified) + st.as_ref() + .chars() + .all(|c| char_class(c) != CharClass::Unclassified) } // Return CharClass for a char @@ -137,61 +137,61 @@ mod tests { fn test_char_class() { use CharClass::*; assert_eq!(Unclassified, char_class('x')); - assert_eq!(Control, char_class('\u{0008}')); - assert_eq!(ZeroSpace, char_class('\u{2060}')); - assert_eq!(WhiteSpace, char_class('\n')); - assert_eq!(WhiteSpace, char_class('\n')); + assert_eq!(Control, char_class('\u{0008}')); + assert_eq!(ZeroSpace, char_class('\u{2060}')); + assert_eq!(WhiteSpace, char_class('\n')); + assert_eq!(WhiteSpace, char_class('\n')); } #[test] fn replace() { - assert_clean("", "" ); - assert_clean("", "\u{2060}" ); + assert_clean("", ""); + assert_clean("", "\u{2060}"); assert_clean(" ", " "); assert_clean(" ", "\t \r\n"); - assert_clean("x", "x" ); + assert_clean("x", "x"); assert_clean(" x ", " x "); - assert_clean(" x", " x\u{2060}" ); - assert_clean("x ", "x " ); + assert_clean(" x", " x\u{2060}"); + assert_clean("x ", "x "); - assert_clean("aa b ", "\u{009F}a\u{009F}a b " ); + assert_clean("aa b ", "\u{009F}a\u{009F}a b "); - assert_clean("aa b c ", "aa b c " ); - assert_clean("aa b c", "aa \t b c" ); + assert_clean("aa b c ", "aa b c "); + assert_clean("aa b c", "aa \t b c"); assert_clean(" aa b c", "\t aa \t b c"); } // Assert that super-ASCII character boundaries are properly observed #[test] fn replace_multibyte() { - assert_clean("Ψ", "Ψ" ); + assert_clean("Ψ", "Ψ"); assert_clean(" Ψ ", " Ψ "); - assert_clean(" Ψ", " Ψ\u{2060}" ); - assert_clean("Ψ ", "Ψ " ); + assert_clean(" Ψ", " Ψ\u{2060}"); + assert_clean("Ψ ", "Ψ "); - assert_clean("αα β ", "\u{009F}α\u{009F}α β " ); + assert_clean("αα β ", "\u{009F}α\u{009F}α β "); - assert_clean("αα β γ ", "αα β γ " ); - assert_clean("αα β γ", "αα \t β γ" ); + assert_clean("αα β γ ", "αα β γ "); + assert_clean("αα β γ", "αα \t β γ"); assert_clean(" αα β γ", "\t αα \t β γ"); } #[test] fn replace_ctrl_only() { - assert_clean_ctrl("", "" ); - assert_clean_ctrl("", "\u{2060}" ); + assert_clean_ctrl("", ""); + assert_clean_ctrl("", "\u{2060}"); assert_clean_ctrl(" ", " "); - assert_clean_ctrl("x", "x" ); + assert_clean_ctrl("x", "x"); assert_clean_ctrl(" x ", " x "); - assert_clean_ctrl(" x", " x\u{2060}" ); - assert_clean_ctrl("x ", "x " ); + assert_clean_ctrl(" x", " x\u{2060}"); + assert_clean_ctrl("x ", "x "); - assert_clean_ctrl("aaa β ", "\u{009F}a\u{009F}aa β " ); + assert_clean_ctrl("aaa β ", "\u{009F}a\u{009F}aa β "); - assert_clean_ctrl("aa β c ", "aa β c " ); - assert_clean_ctrl("aa \t β c", "aa \t β c" ); + assert_clean_ctrl("aa β c ", "aa β c "); + assert_clean_ctrl("aa \t β c", "aa \t β c"); assert_clean_ctrl("\t aa \t β c", "\t aa \t β c"); } @@ -203,15 +203,15 @@ mod tests { assert_clean_trim("", "\u{FFFE}"); //BAD BOM assert_clean_trim("", "\u{00A0}\u{2007}\u{202F}"); - assert_clean_trim("x", "x" ); + assert_clean_trim("x", "x"); assert_clean_trim("x", " x "); - assert_clean_trim("x", " x" ); - assert_clean_trim("x", "x " ); + assert_clean_trim("x", " x"); + assert_clean_trim("x", "x "); - assert_clean_trim("aa b", " a\u{009F}a\u{009F} b " ); + assert_clean_trim("aa b", " a\u{009F}a\u{009F} b "); - assert_clean_trim("aa b c", "aa b c " ); - assert_clean_trim("aa b c", "aa \t b c" ); + assert_clean_trim("aa b c", "aa b c "); + assert_clean_trim("aa b c", "aa \t b c"); assert_clean_trim("aa b c", "\t aa \t b c"); } diff --git a/marked/src/decode.rs b/marked/src/decode.rs index 341738a..6350bc3 100644 --- a/marked/src/decode.rs +++ b/marked/src/decode.rs @@ -13,19 +13,17 @@ use std::borrow::Cow; use std::io; -use log::trace; -use encoding_rs as enc; use enc::DecoderResult; +use encoding_rs as enc; +use log::trace; -use tendril::{Tendril, TendrilSink, Atomicity, NonAtomic}; use tendril::fmt as form; use tendril::stream::Utf8LossyDecoder; +use tendril::{Atomicity, NonAtomic, Tendril, TendrilSink}; mod encoding_hint; -pub use encoding_hint::{ - EncodingHint, SharedEncodingHint, -}; +pub use encoding_hint::{EncodingHint, SharedEncodingHint}; use crate::READ_BUFFER_SIZE; @@ -34,24 +32,29 @@ use crate::READ_BUFFER_SIZE; /// U+FFFD replacement characters, and emits Unicode (`StrTendril`). /// /// This allocates new tendrils for encodings other than UTF-8. -pub struct Decoder - where Sink: TendrilSink, A: Atomicity +pub struct Decoder +where + Sink: TendrilSink, + A: Atomicity, { mode: Mode, } enum Mode - where Sink: TendrilSink, A: Atomicity +where + Sink: TendrilSink, + A: Atomicity, { Utf8(Utf8LossyDecoder), Other(enc::Decoder, Sink), } impl Decoder - where Sink: TendrilSink, A: Atomicity +where + Sink: TendrilSink, + A: Atomicity, { pub fn new(encoding: &'static enc::Encoding, sink: Sink) -> Self { - let mode = if encoding == enc::UTF_8 { Mode::Utf8(Utf8LossyDecoder::new(sink)) } else { @@ -71,9 +74,10 @@ impl Decoder /// Read until EOF of stream, processing each buffer, and finish this /// decoder. Returns the sink output or any io::Error. - pub fn read_to_end(mut self, r: &mut R) - -> Result - where Self: Sized, R: io::Read + pub fn read_to_end(mut self, r: &mut R) -> Result + where + Self: Sized, + R: io::Read, { // Adapted from TendrilSink::read_from loop { @@ -90,7 +94,7 @@ impl Decoder break; } Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} - Err(e) => return Err(e) + Err(e) => return Err(e), } } // repeat on interrupt } // repeat until EOF (0) or Err @@ -98,7 +102,9 @@ impl Decoder } impl TendrilSink for Decoder - where Sink: TendrilSink, A: Atomicity +where + Sink: TendrilSink, + A: Atomicity, { type Output = Sink::Output; @@ -110,7 +116,7 @@ impl TendrilSink for Decoder return; } decode_to_sink(t, decoder, sink, false); - }, + } } } @@ -136,8 +142,10 @@ fn decode_to_sink( mut inpt: Tendril, decoder: &mut enc::Decoder, sink: &mut Sink, - last: bool) - where Sink: TendrilSink, A: Atomicity + last: bool, +) where + Sink: TendrilSink, + A: Atomicity, { loop { let mut outt = >::new(); @@ -146,7 +154,9 @@ fn decode_to_sink( .unwrap_or(READ_BUFFER_SIZE as usize); let len = std::cmp::min(len as u32, READ_BUFFER_SIZE); trace!("decode buffer len {}", len); - unsafe { outt.push_uninitialized(len); } + unsafe { + outt.push_uninitialized(len); + } let (result, bytes_read, bytes_written) = decoder.decode_to_utf8_without_replacement(&inpt, &mut outt, last); @@ -160,12 +170,12 @@ fn decode_to_sink( DecoderResult::InputEmpty => break, DecoderResult::OutputFull => { trace!("decode OutputFull"); - }, + } DecoderResult::Malformed(_, _) => { // String matched in Sink, don't change sink.error(Cow::Borrowed("invalid byte sequence")); sink.process("\u{FFFD}".into()); - }, + } } inpt.pop_front(bytes_read as u32); if inpt.is_empty() { @@ -180,14 +190,16 @@ mod tests { use tendril::SliceExt; struct Accumulate - where A: Atomicity + where + A: Atomicity, { tendrils: Vec>, errors: Vec, } impl Accumulate - where A: Atomicity + where + A: Atomicity, { fn new() -> Accumulate { Accumulate { @@ -198,7 +210,8 @@ mod tests { } impl TendrilSink for Accumulate - where A: Atomicity + where + A: Atomicity, { type Output = (Vec>, Vec); @@ -219,8 +232,8 @@ mod tests { mut decoder: Decoder>, input: &[&[u8]], expected: &str, - errs: usize) - { + errs: usize, + ) { for x in input { decoder.process(x.to_tendril()); } @@ -240,19 +253,23 @@ mod tests { (&[b""], "", 0), (&[b"xyz"], "xyz", 0), (&[b"x", b"y", b"z"], "xyz", 0), - (&[b"\xEA\x99\xAE"], "\u{a66e}", 0), (&[b"\xEA", b"\x99\xAE"], "\u{a66e}", 0), (&[b"\xEA\x99", b"\xAE"], "\u{a66e}", 0), (&[b"\xEA", b"\x99", b"\xAE"], "\u{a66e}", 0), (&[b"\xEA", b"", b"\x99", b"", b"\xAE"], "\u{a66e}", 0), - (&[b"", b"\xEA", b"", b"\x99", b"", b"\xAE", b""], "\u{a66e}", 0), - + ( + &[b"", b"\xEA", b"", b"\x99", b"", b"\xAE", b""], + "\u{a66e}", + 0, + ), (&[b"xy\xEA", b"\x99\xAEz"], "xy\u{a66e}z", 0), - (&[b"xy\xEA", b"\xFF", b"\x99\xAEz"], - "xy\u{fffd}\u{fffd}\u{fffd}\u{fffd}z", 4), + ( + &[b"xy\xEA", b"\xFF", b"\x99\xAEz"], + "xy\u{fffd}\u{fffd}\u{fffd}\u{fffd}z", + 4, + ), (&[b"xy\xEA\x99", b"\xFFz"], "xy\u{fffd}\u{fffd}z", 2), - // incomplete char at end of input (&[b"\xC0"], "\u{fffd}", 1), (&[b"\xEA\x99"], "\u{fffd}", 1), @@ -270,7 +287,11 @@ mod tests { (&[b"\xfc\xce\xc5\xd2\xc7\xc9\xd1"], "Энергия", 0), (&[b"\xfc\xce", b"\xc5\xd2\xc7\xc9\xd1"], "Энергия", 0), (&[b"\xfc\xce", b"\xc5\xd2\xc7", b"\xc9\xd1"], "Энергия", 0), - (&[b"\xfc\xce", b"", b"\xc5\xd2\xc7", b"\xc9\xd1", b""], "Энергия", 0), + ( + &[b"\xfc\xce", b"", b"\xc5\xd2\xc7", b"\xc9\xd1", b""], + "Энергия", + 0, + ), ]; #[test] @@ -287,9 +308,12 @@ mod tests { (&[b"\xbe\xc8\xb3\xe7"], "안녕", 0), (&[b"\xbe", b"\xc8\xb3\xe7"], "안녕", 0), (&[b"\xbe", b"", b"\xc8\xb3\xe7"], "안녕", 0), - (&[b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\xbf\xe4"], "안녕하세요", 0), + ( + &[b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\xbf\xe4"], + "안녕하세요", + 0, + ), (&[b"\xbe\xc8\xb3\xe7\xc7"], "안녕\u{fffd}", 1), - (&[b"\xbe", b"", b"\xc8\xb3"], "안\u{fffd}", 1), (&[b"\xbe\x28\xb3\xe7"], "\u{fffd}(녕", 1), ]; diff --git a/marked/src/decode/encoding_hint.rs b/marked/src/decode/encoding_hint.rs index 271bc7c..a650283 100644 --- a/marked/src/decode/encoding_hint.rs +++ b/marked/src/decode/encoding_hint.rs @@ -1,4 +1,3 @@ - use std::cell::RefCell; use std::collections::HashMap; use std::rc::Rc; @@ -46,9 +45,7 @@ impl EncodingHint { /// Construct a new Encoding hint with the specified encoding and /// confidence, wrapped for sharing. - pub fn shared_with_hint(enc: &'static enc::Encoding, confidence: f32) - -> SharedEncodingHint - { + pub fn shared_with_hint(enc: &'static enc::Encoding, confidence: f32) -> SharedEncodingHint { let mut eh = EncodingHint::new(); eh.add_hint(enc, confidence); eh.clear_changed(); @@ -59,9 +56,9 @@ impl EncodingHint { /// positive confidence value. If no encoding (or applicable replacement) /// is found for the specified label, returns false. Return true if an /// encoding is found _and_ this hint changes the top confidence encoding. - pub fn add_label_hint(&mut self, enc: L, confidence: f32) - -> bool - where L: AsRef<[u8]> + pub fn add_label_hint(&mut self, enc: L, confidence: f32) -> bool + where + L: AsRef<[u8]>, { if let Some(enc) = enc::Encoding::for_label(enc.as_ref()) { self.add_hint(enc, confidence) @@ -73,16 +70,14 @@ impl EncodingHint { /// Add a hint for the specified encoding and some positive confidence /// value. Return true if this hint changes the top most confident /// encoding. - pub fn add_hint(&mut self, enc: &'static enc::Encoding, confidence: f32) - -> bool - { + pub fn add_hint(&mut self, enc: &'static enc::Encoding, confidence: f32) -> bool { assert!(confidence > 0.0); - let new_conf = *( - self.encodings.entry(enc) - .and_modify(|c| *c += confidence) - .or_insert(confidence) - ); + let new_conf = *(self + .encodings + .entry(enc) + .and_modify(|c| *c += confidence) + .or_insert(confidence)); if new_conf > self.confidence { self.confidence = new_conf; @@ -115,9 +110,7 @@ impl EncodingHint { /// part of the document body. pub fn could_read_from(&self, enc: &'static enc::Encoding) -> bool { if let Some(t) = self.top { - if ( includes_ascii(t) && !includes_ascii(enc)) || - (!includes_ascii(t) && t != enc) - { + if (includes_ascii(t) && !includes_ascii(enc)) || (!includes_ascii(t) && t != enc) { return false; } } @@ -178,8 +171,12 @@ fn includes_ascii(enc: &'static enc::Encoding) -> bool { mod tests { use super::*; - fn is_send() -> bool { true } - fn is_sync() -> bool { true } + fn is_send() -> bool { + true + } + fn is_sync() -> bool { + true + } #[test] fn test_send_sync() { @@ -191,14 +188,18 @@ mod tests { // MIT/Apache licensed trait AmbiguousIfImpl { - fn some_f() -> bool { true } + fn some_f() -> bool { + true + } } impl AmbiguousIfImpl<()> for T {} - #[allow(unused)] struct NotSync; + #[allow(unused)] + struct NotSync; impl AmbiguousIfImpl for T {} - #[allow(unused)] struct NotSend; + #[allow(unused)] + struct NotSend; impl AmbiguousIfImpl for T {} #[test] @@ -209,11 +210,12 @@ mod tests { #[test] fn encoding_hint() { let mut encs = EncodingHint::new(); - assert!( encs.add_label_hint("LATIN1", 0.3)); + assert!(encs.add_label_hint("LATIN1", 0.3)); assert!(!encs.add_label_hint("iso-8859-1", 0.4)); - assert!(!encs.add_label_hint("utf-8", 0.5)); + assert!(!encs.add_label_hint("utf-8", 0.5)); assert_eq!( - "windows-1252", encs.top().unwrap().name(), + "windows-1252", + encs.top().unwrap().name(), "desired replacement for first two hints" ); assert_eq!(0.3 + 0.4, encs.confidence()); @@ -223,9 +225,9 @@ mod tests { fn could_read_from() { let mut eh = EncodingHint::new(); eh.add_hint(enc::UTF_8, 0.5); - assert!( eh.could_read_from(enc::UTF_8)); - assert!( eh.could_read_from(enc::WINDOWS_1252)); - assert!( eh.could_read_from(enc::ISO_2022_JP)); + assert!(eh.could_read_from(enc::UTF_8)); + assert!(eh.could_read_from(enc::WINDOWS_1252)); + assert!(eh.could_read_from(enc::ISO_2022_JP)); assert!(!eh.could_read_from(enc::UTF_16LE)); assert!(!eh.could_read_from(enc::UTF_16BE)); } @@ -234,7 +236,7 @@ mod tests { fn could_read_from_multi_byte() { let mut eh = EncodingHint::new(); eh.add_hint(enc::UTF_16LE, 0.5); - assert!( eh.could_read_from(enc::UTF_16LE)); + assert!(eh.could_read_from(enc::UTF_16LE)); assert!(!eh.could_read_from(enc::UTF_16BE)); assert!(!eh.could_read_from(enc::ISO_2022_JP)); assert!(!eh.could_read_from(enc::UTF_8)); diff --git a/marked/src/dom.rs b/marked/src/dom.rs index 905c8e7..44aa3e5 100644 --- a/marked/src/dom.rs +++ b/marked/src/dom.rs @@ -26,7 +26,8 @@ pub use tendril::StrTendril; mod node_ref; mod serializer; -#[macro_use] pub mod filter; +#[macro_use] +pub mod filter; pub mod html; #[cfg(feature = "xml")] @@ -100,14 +101,14 @@ pub enum NodeData { #[derive(Clone, Debug)] pub struct DocumentType { pub name: StrTendril, - _priv: () + _priv: (), } /// Processing instruction details. #[derive(Clone, Debug)] pub struct ProcessingInstruction { pub data: StrTendril, - _priv: () + _priv: (), } /// A markup element with name and attributes. @@ -115,15 +116,13 @@ pub struct ProcessingInstruction { pub struct Element { pub name: QualName, pub attrs: Vec, - _priv: () + _priv: (), } /// Core implementation. impl Document { /// The constant `NodeId` for the document node of all `Document`s. - pub const DOCUMENT_NODE_ID: NodeId = NodeId( - unsafe { NonZeroU32::new_unchecked(1) } - ); + pub const DOCUMENT_NODE_ID: NodeId = NodeId(unsafe { NonZeroU32::new_unchecked(1) }); /// Construct a new `Document` with the single empty document node. pub fn new() -> Self { @@ -134,8 +133,8 @@ impl Document { /// specified capacity. pub fn with_capacity(count: u32) -> Self { let mut nodes = Vec::with_capacity(count as usize); - nodes.push(Node::new(NodeData::Hole)); // Index 0: Padding - nodes.push(Node::new(NodeData::Document)); // Index 1: DOCUMENT_NODE_ID + nodes.push(Node::new(NodeData::Hole)); // Index 0: Padding + nodes.push(Node::new(NodeData::Document)); // Index 1: DOCUMENT_NODE_ID Document { nodes } } @@ -165,18 +164,21 @@ impl Document { pub fn root_element(&self) -> Option { let document_node = &self[Document::DOCUMENT_NODE_ID]; debug_assert!( - (if let NodeData::Document = document_node.data { true } - else { false }), - "not document node: {:?}", document_node); + (if let NodeData::Document = document_node.data { + true + } else { + false + }), + "not document node: {:?}", + document_node + ); debug_assert!(document_node.parent.is_none()); debug_assert!(document_node.next_sibling.is_none()); debug_assert!(document_node.prev_sibling.is_none()); let mut root = None; for child in self.children(Document::DOCUMENT_NODE_ID) { match &self[child].data { - NodeData::DocType(_) | - NodeData::Comment(_) | - NodeData::Pi(_) => {} + NodeData::DocType(_) | NodeData::Comment(_) | NodeData::Pi(_) => {} NodeData::Document => { debug_assert!(false, "Document child of Document"); root = None; @@ -206,10 +208,17 @@ impl Document { fn push_node(&mut self, node: Node) -> NodeId { debug_assert!( - (if let NodeData::Document | NodeData::Hole = node.data { false } - else { true }), - "Invalid push {:?}", node.data); - let next_index = self.nodes.len() + (if let NodeData::Document | NodeData::Hole = node.data { + false + } else { + true + }), + "Invalid push {:?}", + node.data + ); + let next_index = self + .nodes + .len() .try_into() .expect("Document (u32) node index overflow"); debug_assert!(next_index > 1); @@ -228,13 +237,16 @@ impl Document { pub fn detach(&mut self, id: NodeId) { assert!( id != Document::DOCUMENT_NODE_ID, - "Can't detach the synthetic document node"); + "Can't detach the synthetic document node" + ); let (parent, prev_sibling, next_sibling) = { let node = &mut self[id]; - (node.parent.take(), - node.prev_sibling.take(), - node.next_sibling.take()) + ( + node.parent.take(), + node.prev_sibling.take(), + node.next_sibling.take(), + ) }; if let Some(next_sibling) = next_sibling { @@ -251,9 +263,7 @@ impl Document { } /// Append node as new last child of parent, and return its new ID. - pub fn append_child(&mut self, parent: NodeId, node: Node) - -> NodeId - { + pub fn append_child(&mut self, parent: NodeId, node: Node) -> NodeId { let id = self.push_node(node); self.append(parent, id); id @@ -275,9 +285,7 @@ impl Document { } /// Insert node before the given sibling and return its new ID. - pub fn insert_before_sibling(&mut self, sibling: NodeId, node: Node) - -> NodeId - { + pub fn insert_before_sibling(&mut self, sibling: NodeId, node: Node) -> NodeId { let id = self.push_node(node); self.insert_before(sibling, id); id @@ -285,17 +293,15 @@ impl Document { fn insert_before(&mut self, sibling: NodeId, new_sibling: NodeId) { self.detach(new_sibling); - let parent = self[sibling].parent + let parent = self[sibling] + .parent .expect("insert_before sibling has parent"); self[parent].assert_suitable_parent(); self[new_sibling].parent = Some(parent); self[new_sibling].next_sibling = Some(sibling); if let Some(prev_sibling) = self[sibling].prev_sibling.take() { self[new_sibling].prev_sibling = Some(prev_sibling); - debug_assert_eq!( - self[prev_sibling].next_sibling, - Some(sibling) - ); + debug_assert_eq!(self[prev_sibling].next_sibling, Some(sibling)); self[prev_sibling].next_sibling = Some(new_sibling); } else { debug_assert_eq!(self[parent].first_child, Some(sibling)); @@ -333,38 +339,31 @@ impl Document { /// Return an iterator over this node's direct children. /// /// Will be empty if the node can not or does not have children. - pub fn children<'a>(&'a self, id: NodeId) - -> impl Iterator + 'a - { - iter::successors( - self[id].first_child, - move |&id| self[id].next_sibling - ) + pub fn children<'a>(&'a self, id: NodeId) -> impl Iterator + 'a { + iter::successors(self[id].first_child, move |&id| self[id].next_sibling) } /// Return an iterator over the specified node and all its following, /// direct siblings, within the same parent. - pub fn node_and_following_siblings<'a>(&'a self, id: NodeId) - -> impl Iterator + 'a - { + pub fn node_and_following_siblings<'a>( + &'a self, + id: NodeId, + ) -> impl Iterator + 'a { iter::successors(Some(id), move |&id| self[id].next_sibling) } /// Return an iterator over the specified node and all its ancestors, /// terminating at the document node. - pub fn node_and_ancestors<'a>(&'a self, id: NodeId) - -> impl Iterator + 'a - { + pub fn node_and_ancestors<'a>(&'a self, id: NodeId) -> impl Iterator + 'a { iter::successors(Some(id), move |&id| self[id].parent) } /// Return an iterator over all nodes, starting with the document node, and /// including all descendants in tree order. pub fn nodes<'a>(&'a self) -> impl Iterator + 'a { - iter::successors( - Some(Document::DOCUMENT_NODE_ID), - move |&id| self.next_in_tree_order(id) - ) + iter::successors(Some(Document::DOCUMENT_NODE_ID), move |&id| { + self.next_in_tree_order(id) + }) } fn next_in_tree_order(&self, id: NodeId) -> Option { @@ -382,7 +381,8 @@ impl Document { push_if_pair( &mut next, self[Document::DOCUMENT_NODE_ID].first_child, - Document::DOCUMENT_NODE_ID); + Document::DOCUMENT_NODE_ID, + ); while let Some((id, nid)) = next.pop() { let data = mem::replace(&mut self[id].data, NodeData::Hole); @@ -426,7 +426,9 @@ impl Document { /// same as the original. As compared with `deep_clone(DOCUMENT_NODE_ID)` /// this is faster but potentially much less memory efficient. pub fn bulk_clone(&self) -> Document { - Document { nodes: self.nodes.clone() } + Document { + nodes: self.nodes.clone(), + } } /// Replace the specified node ID with its children. @@ -443,7 +445,8 @@ impl Document { pub fn fold(&mut self, id: NodeId) { assert!( id != Document::DOCUMENT_NODE_ID, - "Can't fold the synthetic document node"); + "Can't fold the synthetic document node" + ); let mut next_child = self[id].first_child; while let Some(child) = next_child { @@ -486,18 +489,20 @@ impl std::ops::IndexMut for Document { impl Element { /// Construct new element by local name, with no attributes. pub fn new(lname: LN) -> Element - where LN: Into + where + LN: Into, { Element { name: QualName::new(None, ns!(), lname.into()), attrs: Vec::new(), - _priv: () + _priv: (), } } /// Return true if this element has the given local name. pub fn is_elem(&self, lname: LN) -> bool - where LN: Into + where + LN: Into, { self.name.local == lname.into() } @@ -514,7 +519,8 @@ impl Element { /// Return attribute value by local name, if present. pub fn attr(&self, lname: LN) -> Option<&StrTendril> - where LN: Into + where + LN: Into, { let lname = lname.into(); self.attrs @@ -530,7 +536,8 @@ impl Element { /// same named attributes or multiples might be introduced via manual /// mutations. pub fn remove_attr(&mut self, lname: LN) -> Option - where LN: Into + where + LN: Into, { let mut found = None; let mut i = 0; @@ -555,7 +562,9 @@ impl Element { /// returned. Parsers may allow same named attributes or multiples might be /// introduced via manual mutations. pub fn set_attr(&mut self, lname: LN, value: V) -> Option - where LN: Into, V: Into + where + LN: Into, + V: Into, { let mut found = None; let mut i = 0; @@ -583,7 +592,7 @@ impl Element { if found.is_none() { self.attrs.push(Attribute { name: QualName::new(None, ns!(), lname), - value: value.take().unwrap() + value: value.take().unwrap(), }); } found @@ -598,7 +607,8 @@ impl Node { /// Construct a new text node. pub fn new_text(text: T) -> Node - where T: Into + where + T: Into, { Node::new(NodeData::Text(text.into())) } @@ -667,7 +677,8 @@ impl NodeData { /// Return attribute value by given local attribute name, if this is an /// element with that attribute present. pub fn attr(&self, lname: LN) -> Option<&StrTendril> - where LN: Into + where + LN: Into, { if let Some(edata) = self.as_element() { edata.attr(lname) @@ -678,7 +689,8 @@ impl NodeData { /// Return true if this Node is an element with the given local name. pub fn is_elem(&self, lname: LN) -> bool - where LN: Into + where + LN: Into, { if let Some(edata) = self.as_element() { edata.is_elem(lname) @@ -690,9 +702,14 @@ impl NodeData { #[inline] fn assert_suitable_parent(&self) { debug_assert!( - (if let NodeData::Document | NodeData::Elem(_) = self { true } - else { false }), - "Not a suitable parent: {:?}", self) + (if let NodeData::Document | NodeData::Elem(_) = self { + true + } else { + false + }), + "Not a suitable parent: {:?}", + self + ) } } @@ -702,11 +719,7 @@ fn push_if(stack: &mut Vec, id: Option) { } } -fn push_if_pair( - stack: &mut Vec<(NodeId, NodeId)>, - id: Option, - oid: NodeId) -{ +fn push_if_pair(stack: &mut Vec<(NodeId, NodeId)>, id: Option, oid: NodeId) { if let Some(id) = id { stack.push((id, oid)); } diff --git a/marked/src/dom/filter.rs b/marked/src/dom/filter.rs index 5550808..8607775 100644 --- a/marked/src/dom/filter.rs +++ b/marked/src/dom/filter.rs @@ -7,7 +7,7 @@ use log::debug; use crate::chars::{is_all_ctrl_ws, replace_chars}; use crate::dom::{ html::{t, TAG_META}, - Document, Element, NodeData, NodeId, NodeRef, StrTendril + Document, Element, NodeData, NodeId, NodeRef, StrTendril, }; /// An instruction returned by the `Fn` closure used by [`Document::filter`]. @@ -32,7 +32,8 @@ impl Document { /// /// See [`Document::filter_at`] for additional details. pub fn filter(&mut self, mut f: F) - where F: Fn(NodeRef<'_>, &mut NodeData) -> Action + where + F: Fn(NodeRef<'_>, &mut NodeData) -> Action, { self.filter_at_ref(Document::DOCUMENT_NODE_ID, true, &mut f); } @@ -43,7 +44,8 @@ impl Document { /// /// See [`Document::filter_at`] for additional details. pub fn filter_breadth(&mut self, mut f: F) - where F: Fn(NodeRef<'_>, &mut NodeData) -> Action + where + F: Fn(NodeRef<'_>, &mut NodeData) -> Action, { self.filter_at_ref(Document::DOCUMENT_NODE_ID, false, &mut f); } @@ -88,7 +90,8 @@ impl Document { /// [`Document::deep_clone`][`Document::deep_clone`] and drop the original /// `Document`. pub fn filter_at(&mut self, id: NodeId, mut f: F) - where F: Fn(NodeRef<'_>, &mut NodeData) -> Action + where + F: Fn(NodeRef<'_>, &mut NodeData) -> Action, { self.filter_at_ref(id, true, &mut f); } @@ -98,14 +101,15 @@ impl Document { /// /// See [`Document::filter_at`] for additional details. pub fn filter_at_breadth(&mut self, id: NodeId, mut f: F) - where F: Fn(NodeRef<'_>, &mut NodeData) -> Action + where + F: Fn(NodeRef<'_>, &mut NodeData) -> Action, { self.filter_at_ref(id, false, &mut f); } - fn filter_at_ref(&mut self, id: NodeId, depth_first: bool, f: &mut F) - -> Action - where F: Fn(NodeRef<'_>, &mut NodeData) -> Action + fn filter_at_ref(&mut self, id: NodeId, depth_first: bool, f: &mut F) -> Action + where + F: Fn(NodeRef<'_>, &mut NodeData) -> Action, { let res = if depth_first { self.walk_depth(id, f) @@ -114,7 +118,7 @@ impl Document { }; match res { - Action::Continue => {}, + Action::Continue => {} Action::Fold => { self.fold(id); } @@ -126,7 +130,8 @@ impl Document { } fn walk_depth(&mut self, id: NodeId, f: &mut F) -> Action - where F: Fn(NodeRef<'_>, &mut NodeData) -> Action + where + F: Fn(NodeRef<'_>, &mut NodeData) -> Action, { // Children first, recursively let mut next_child = self[id].first_child; @@ -140,7 +145,8 @@ impl Document { } fn walk_breadth(&mut self, id: NodeId, f: &mut F) -> Action - where F: Fn(NodeRef<'_>, &mut NodeData) -> Action + where + F: Fn(NodeRef<'_>, &mut NodeData) -> Action, { let res = self.filter_node(id, f); if res != Action::Continue { @@ -170,7 +176,8 @@ impl Document { } fn filter_node(&mut self, id: NodeId, f: &mut F) -> Action - where F: Fn(NodeRef<'_>, &mut NodeData) -> Action + where + F: Fn(NodeRef<'_>, &mut NodeData) -> Action, { // We need to replace node.data with a placeholder (Hole) to appease // the borrow checker. Otherwise there would be an aliasing problem @@ -192,7 +199,9 @@ impl Document { debug_assert!( node.first_child.is_none() && node.last_child.is_none(), "Filter changed node {:?} with children to {:?}", - id, ndata); + id, + ndata + ); } } node.data = ndata; @@ -254,10 +263,7 @@ pub fn detach_banned_elements(_p: NodeRef<'_>, data: &mut NodeData) -> Action { /// /// Should be run depth-first for complete filtering. pub fn fold_empty_inline(pos: NodeRef<'_>, data: &mut NodeData) -> Action { - if is_inline(data) && - !is_multi_media(data) && - pos.children().all(is_logical_ws) - { + if is_inline(data) && !is_multi_media(data) && pos.children().all(is_logical_ws) { Action::Fold } else { Action::Continue @@ -290,9 +296,7 @@ pub fn detach_pis(_p: NodeRef<'_>, data: &mut NodeData) -> Action { /// [`TagMeta`](crate::html::TagMeta) for each element. /// /// Compatible with depth or breadth-first filtering. -pub fn retain_basic_attributes(_p: NodeRef<'_>, data: &mut NodeData) - -> Action -{ +pub fn retain_basic_attributes(_p: NodeRef<'_>, data: &mut NodeData) -> Action { if let Some(ref mut elm) = data.as_element_mut() { if let Some(tmeta) = TAG_META.get(&elm.name.local) { elm.attrs.retain(|a| tmeta.has_basic_attr(&a.name.local)); @@ -328,9 +332,7 @@ pub fn text_normalize(pos: NodeRef<'_>, data: &mut NodeData) -> Action { // tendril to the merge queue and detach. let node_r = pos.next_sibling(); if node_r.map_or(false, |n| n.as_text().is_some()) { - MERGE_Q.with(|q| { - q.borrow_mut().push_tendril(t) - }); + MERGE_Q.with(|q| q.borrow_mut().push_tendril(t)); return Action::Detach; } @@ -417,14 +419,15 @@ fn is_logical_ws(n: NodeRef<'_>) -> bool { } fn is_multi_media(n: &NodeData) -> bool { - /**/n.is_elem(t::AUDIO) || - n.is_elem(t::EMBED) || - n.is_elem(t::IFRAME) || - n.is_elem(t::IMG) || - n.is_elem(t::METER) || - n.is_elem(t::OBJECT) || - n.is_elem(t::PICTURE) || - n.is_elem(t::PROGRESS) || - n.is_elem(t::SVG) || - n.is_elem(t::VIDEO) + /**/ + n.is_elem(t::AUDIO) + || n.is_elem(t::EMBED) + || n.is_elem(t::IFRAME) + || n.is_elem(t::IMG) + || n.is_elem(t::METER) + || n.is_elem(t::OBJECT) + || n.is_elem(t::PICTURE) + || n.is_elem(t::PROGRESS) + || n.is_elem(t::SVG) + || n.is_elem(t::VIDEO) } diff --git a/marked/src/dom/html.rs b/marked/src/dom/html.rs index 4cee42e..40949fa 100644 --- a/marked/src/dom/html.rs +++ b/marked/src/dom/html.rs @@ -16,29 +16,20 @@ use std::io; use encoding_rs as enc; -use html5ever::{ - parse_document, parse_fragment, - ExpandedName, QualName, Parser, ParseOpts -}; -use html5ever::interface::tree_builder::{ - ElementFlags, NodeOrText, QuirksMode, TreeSink -}; +use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; use html5ever::tendril::{StrTendril, TendrilSink}; +use html5ever::{parse_document, parse_fragment, ExpandedName, ParseOpts, Parser, QualName}; use log::{debug, info, trace}; use tendril::{fmt as form, Tendril}; use crate::{ - Attribute, Decoder, Document, DocumentType, Element, EncodingHint, - Node, NodeData, NodeId, ProcessingInstruction, SharedEncodingHint, - BOM_CONF, HTML_META_CONF, INITIAL_BUFFER_SIZE, + Attribute, Decoder, Document, DocumentType, Element, EncodingHint, Node, NodeData, NodeId, + ProcessingInstruction, SharedEncodingHint, BOM_CONF, HTML_META_CONF, INITIAL_BUFFER_SIZE, }; mod meta; -pub use self::meta::{ - a, ns, t, - TagMeta, TAG_META -}; +pub use self::meta::{a, ns, t, TagMeta, TAG_META}; /// Parse HTML document from UTF-8 bytes in RAM. pub fn parse_utf8(bytes: &[u8]) -> Document { @@ -60,9 +51,10 @@ pub fn parse_utf8_fragment(bytes: &[u8]) -> Document { sink, Default::default(), QualName::new(None, ns::HTML, t::DIV), - vec![]) - .from_utf8() - .one(bytes); + vec![], + ) + .from_utf8() + .one(bytes); // Note that the above context name, doesn't really get used. A matching // element is pushed but never linked, so unless we replace the doc (deep @@ -103,16 +95,17 @@ pub fn parse_utf8_fragment(bytes: &[u8]) -> Document { /// hint is found via a leading Byte-Order-Mark (BOM) or in the documents /// ``, the parse will be restarted from the beginning with that encoding /// and continuing until the end. -pub fn parse_buffered(hint: SharedEncodingHint, r: &mut R) - -> Result - where R: io::Read +pub fn parse_buffered(hint: SharedEncodingHint, r: &mut R) -> Result +where + R: io::Read, { - let enc = hint.borrow().top().expect("EnodingHint default encoding required"); + let enc = hint + .borrow() + .top() + .expect("EnodingHint default encoding required"); - let parser_sink: Parser = parse_document( - Sink::new(hint.clone(), true), - ParseOpts::default() - ); + let parser_sink: Parser = + parse_document(Sink::new(hint.clone(), true), ParseOpts::default()); // Decoders are "Sink adaptors" that also impl TendrilSink. // The decoder is consumed to finish the parse. @@ -161,7 +154,7 @@ pub fn parse_buffered(hint: SharedEncodingHint, r: &mut R) } } Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} - Err(e) => return Err(e) + Err(e) => return Err(e), } } // repeat on interrupt or short read. @@ -177,17 +170,16 @@ pub fn parse_buffered(hint: SharedEncodingHint, r: &mut R) if let Some(enc) = changed { info!( "Reparsing with enc {}, buffered: {}, prior enc errors: {}", - enc.name(), buff.len(), errors + enc.name(), + buff.len(), + errors ); hint.borrow_mut().clear_errors(); finished = None; // Replace decoder and re-process, consuming the original tendril // buffer, which was previously cloned. - let parser_sink = parse_document( - Sink::new(hint.clone(), false), - ParseOpts::default() - ); + let parser_sink = parse_document(Sink::new(hint.clone(), false), ParseOpts::default()); decoder = Some(Decoder::new(enc, parser_sink)); decoder.as_mut().unwrap().process(buff); } @@ -205,13 +197,12 @@ pub fn parse_buffered(hint: SharedEncodingHint, r: &mut R) } // Return encoding for any Byte-Order-Mark found at start of buff. -fn bom_enc(buff: &Tendril::) -> Option<&'static enc::Encoding> -{ +fn bom_enc(buff: &Tendril) -> Option<&'static enc::Encoding> { match (buff[0], buff[1], buff[2]) { - (0xFE, 0xFF, _) => Some(enc::UTF_16BE), - (0xFF, 0xFE, _) => Some(enc::UTF_16LE), + (0xFE, 0xFF, _) => Some(enc::UTF_16BE), + (0xFF, 0xFE, _) => Some(enc::UTF_16LE), (0xEF, 0xBB, 0xBF) => Some(enc::UTF_8), - _ => None + _ => None, } } @@ -230,7 +221,6 @@ impl Sink { /// If enc_check is true, encodings mentioned in html meta elements will be /// added to the encoding hint as soon as possible in the parse. pub fn new(enc_hint: SharedEncodingHint, enc_check: bool) -> Sink { - Sink { document: Document::new(), quirks_mode: QuirksMode::NoQuirks, @@ -243,13 +233,10 @@ impl Sink { self.document.push_node(Node::new(data)) } - fn append_common( - &mut self, - child: NodeOrText, - previous: P, - append: A) - where P: FnOnce(&mut Document) -> Option, - A: FnOnce(&mut Document, NodeId) + fn append_common(&mut self, child: NodeOrText, previous: P, append: A) + where + P: FnOnce(&mut Document) -> Option, + A: FnOnce(&mut Document, NodeId), { let new_node = match child { NodeOrText::AppendText(text) => { @@ -309,7 +296,8 @@ impl Sink { debug!( "found charsets: {:?} ({})", charsets.iter().map(|e| e.name()).collect::>(), - metas); + metas + ); let conf = HTML_META_CONF / (metas as f32); let mut hints = self.enc_hint.borrow_mut(); @@ -376,19 +364,20 @@ impl TreeSink for Sink { &mut self, name: QualName, attrs: Vec, - _flags: ElementFlags) - -> NodeId - { - self.new_node(NodeData::Elem(Element { name, attrs, _priv: () })) + _flags: ElementFlags, + ) -> NodeId { + self.new_node(NodeData::Elem(Element { + name, + attrs, + _priv: (), + })) } fn create_comment(&mut self, text: StrTendril) -> NodeId { self.new_node(NodeData::Comment(text)) } - fn create_pi(&mut self, _target: StrTendril, data: StrTendril) - -> NodeId - { + fn create_pi(&mut self, _target: StrTendril, data: StrTendril) -> NodeId { self.new_node(NodeData::Pi(ProcessingInstruction { data, _priv: () })) } @@ -400,11 +389,7 @@ impl TreeSink for Sink { ) } - fn append_before_sibling( - &mut self, - &sibling: &NodeId, - child: NodeOrText) - { + fn append_before_sibling(&mut self, &sibling: &NodeId, child: NodeOrText) { self.append_common( child, |document| document[sibling].prev_sibling, @@ -416,8 +401,8 @@ impl TreeSink for Sink { &mut self, element: &NodeId, prev_element: &NodeId, - child: NodeOrText) - { + child: NodeOrText, + ) { if self.document[*element].parent.is_some() { self.append_before_sibling(element, child) } else { @@ -429,19 +414,13 @@ impl TreeSink for Sink { &mut self, name: StrTendril, _p_id: StrTendril, - _s_id: StrTendril) - { - let node = self.new_node(NodeData::DocType( - DocumentType { name, _priv: () } - )); + _s_id: StrTendril, + ) { + let node = self.new_node(NodeData::DocType(DocumentType { name, _priv: () })); self.document.append(Document::DOCUMENT_NODE_ID, node) } - fn add_attrs_if_missing( - &mut self, - &target: &NodeId, - attrs: Vec) - { + fn add_attrs_if_missing(&mut self, &target: &NodeId, attrs: Vec) { // Note this is only used in few, strange cases involving re-working of // html and body node attributes, but it definitely needs to be // implemented. diff --git a/marked/src/dom/html/meta.rs b/marked/src/dom/html/meta.rs index d81e276..705633b 100644 --- a/marked/src/dom/html/meta.rs +++ b/marked/src/dom/html/meta.rs @@ -96,272 +96,272 @@ impl Default for TagMeta { /// `Namespace` constants pub mod ns { - use html5ever::ns; use crate::dom::Namespace; + use html5ever::ns; - pub const HTML: Namespace = ns!(html); + pub const HTML: Namespace = ns!(html); } /// HTML tag constants pub mod t { - use html5ever::local_name as lname; use crate::dom::LocalName; + use html5ever::local_name as lname; /// Tag ``: anchor. /// (meta: inline) - pub const A: LocalName = lname!("a"); + pub const A: LocalName = lname!("a"); /// Tag ``: abbreviation. /// (meta: inline) - pub const ABBR: LocalName = lname!("abbr"); + pub const ABBR: LocalName = lname!("abbr"); /// Tag ``: acronym. /// (meta: deprecated inline) - pub const ACRONYM: LocalName = lname!("acronym"); + pub const ACRONYM: LocalName = lname!("acronym"); /// Tag `
`: contact information for the author or owner. - pub const ADDRESS: LocalName = lname!("address"); + pub const ADDRESS: LocalName = lname!("address"); /// Tag ``: embedded applet. /// (meta: deprecated) - pub const APPLET: LocalName = lname!("applet"); + pub const APPLET: LocalName = lname!("applet"); /// Tag ``: area inside an image-map. /// (meta: empty) - pub const AREA: LocalName = lname!("area"); + pub const AREA: LocalName = lname!("area"); /// Tag `
`: Structure: an independent content element. - pub const ARTICLE: LocalName = lname!("article"); + pub const ARTICLE: LocalName = lname!("article"); /// Tag `