From dacfd249ae587b39a1b72b6994dbc5df0d99957f Mon Sep 17 00:00:00 2001 From: c Date: Tue, 10 Dec 2024 11:04:00 +0100 Subject: [PATCH 01/16] clippy --- examples/html2term.rs | 20 ++++++++------------ src/ansi_colours.rs | 1 + src/css.rs | 10 ++++------ src/css/parser.rs | 12 +++++------- src/lib.rs | 37 +++++++++++++++++++------------------ src/markup5ever_rcdom.rs | 2 +- 6 files changed, 38 insertions(+), 44 deletions(-) diff --git a/examples/html2term.rs b/examples/html2term.rs index a1c2937..141479f 100644 --- a/examples/html2term.rs +++ b/examples/html2term.rs @@ -269,21 +269,17 @@ mod top { Key::Char('k') | Key::Up => { if inspect_path.is_empty() { doc_y = doc_y.saturating_sub(1); - } else { - if *inspect_path.last().unwrap() > 1 { - *inspect_path.last_mut().unwrap() -= 1; - annotated = rerender(&dom, &inspect_path, width, &options); - } + } else if *inspect_path.last().unwrap() > 1 { + *inspect_path.last_mut().unwrap() -= 1; + annotated = rerender(&dom, &inspect_path, width, &options); } } Key::Char('h') | Key::Left => { if inspect_path.is_empty() { doc_x = doc_x.saturating_sub(1); - } else { - if inspect_path.len() > 1 { - inspect_path.pop(); - annotated = rerender(&dom, &inspect_path, width, &options); - } + } else if inspect_path.len() > 1 { + inspect_path.pop(); + annotated = rerender(&dom, &inspect_path, width, &options); } } Key::Char('l') | Key::Right => { @@ -378,7 +374,7 @@ mod top { }; if inspect_path.is_empty() { let render_tree = config - .dom_to_render_tree(&dom) + .dom_to_render_tree(dom) .expect("Failed to build render tree"); config .render_to_lines(render_tree, width) @@ -405,7 +401,7 @@ mod top { ) .expect("Invalid CSS"); let render_tree = config - .dom_to_render_tree(&dom) + .dom_to_render_tree(dom) .expect("Failed to build render tree"); config .render_to_lines(render_tree, width) diff --git a/src/ansi_colours.rs b/src/ansi_colours.rs index 9dd54a1..edf8da4 100644 --- a/src/ansi_colours.rs +++ b/src/ansi_colours.rs @@ -8,6 +8,7 @@ use crate::{parse, RichAnnotation, RichDecorator}; use std::io; /// Reads HTML from `input`, and returns text wrapped to `width` columns. +/// /// The text is returned as a `Vec>`; the annotations are vectors /// of `RichAnnotation`. The "outer" annotation comes first in the `Vec`. /// diff --git a/src/css.rs b/src/css.rs index f3207e9..219ed73 100644 --- a/src/css.rs +++ b/src/css.rs @@ -132,10 +132,8 @@ impl Selector { if Rc::ptr_eq(child, node) { break; } - } else { - if Rc::ptr_eq(child, node) { - return false; - } + } else if Rc::ptr_eq(child, node) { + return false; } } } @@ -148,14 +146,14 @@ impl Selector { */ let idx_offset = idx - b; if *a == 0 { - return idx_offset == 0 && Self::do_matches(&comps[1..], &node); + return idx_offset == 0 && Self::do_matches(&comps[1..], node); } if (idx_offset % a) != 0 { // Not a multiple return false; } let n = idx_offset / a; - n >= 0 && Self::do_matches(&comps[1..], &node) + n >= 0 && Self::do_matches(&comps[1..], node) } }, } diff --git a/src/css/parser.rs b/src/css/parser.rs index 0d64f0c..83886dd 100644 --- a/src/css/parser.rs +++ b/src/css/parser.rs @@ -397,13 +397,11 @@ fn parse_faulty_color( text: &str, ) -> Result>> { let text = text.trim(); - if text.chars().all(|c| c.is_hex_digit()) { - if text.len() == 6 { - let r = u8::from_str_radix(&text[0..2], 16).unwrap(); - let g = u8::from_str_radix(&text[2..4], 16).unwrap(); - let b = u8::from_str_radix(&text[4..6], 16).unwrap(); - return Ok(Colour::Rgb(r, g, b)); - } + if text.chars().all(|c| c.is_hex_digit()) && text.len() == 6 { + let r = u8::from_str_radix(&text[0..2], 16).unwrap(); + let g = u8::from_str_radix(&text[2..4], 16).unwrap(); + let b = u8::from_str_radix(&text[4..6], 16).unwrap(); + return Ok(Colour::Rgb(r, g, b)); } Err(e) } diff --git a/src/lib.rs b/src/lib.rs index 41bea8a..b546477 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -846,7 +846,7 @@ impl RenderNode { if style.internal_pre { write!(f, " internal_pre")?; } - writeln!(f, "") + writeln!(f) } fn write_self( &self, @@ -858,52 +858,52 @@ impl RenderNode { match &self.info { RenderNodeInfo::Text(s) => writeln!(f, "{:indent$}{s:?}", "")?, RenderNodeInfo::Container(v) => { - self.write_container("Container", &v, f, indent)?; + self.write_container("Container", v, f, indent)?; } RenderNodeInfo::Link(targ, v) => { - self.write_container(&format!("Link({})", targ), &v, f, indent)?; + self.write_container(&format!("Link({})", targ), v, f, indent)?; } RenderNodeInfo::Em(v) => { - self.write_container("Em", &v, f, indent)?; + self.write_container("Em", v, f, indent)?; } RenderNodeInfo::Strong(v) => { - self.write_container("Strong", &v, f, indent)?; + self.write_container("Strong", v, f, indent)?; } RenderNodeInfo::Strikeout(v) => { - self.write_container("Strikeout", &v, f, indent)?; + self.write_container("Strikeout", v, f, indent)?; } RenderNodeInfo::Code(v) => { - self.write_container("Code", &v, f, indent)?; + self.write_container("Code", v, f, indent)?; } RenderNodeInfo::Img(src, title) => { writeln!(f, "{:indent$}Img src={:?} title={:?}:", "", src, title)?; } RenderNodeInfo::Block(v) => { - self.write_container("Block", &v, f, indent)?; + self.write_container("Block", v, f, indent)?; } RenderNodeInfo::Header(depth, v) => { - self.write_container(&format!("Header({})", depth), &v, f, indent)?; + self.write_container(&format!("Header({})", depth), v, f, indent)?; } RenderNodeInfo::Div(v) => { - self.write_container("Div", &v, f, indent)?; + self.write_container("Div", v, f, indent)?; } RenderNodeInfo::BlockQuote(v) => { - self.write_container("BlockQuote", &v, f, indent)?; + self.write_container("BlockQuote", v, f, indent)?; } RenderNodeInfo::Ul(v) => { - self.write_container("Ul", &v, f, indent)?; + self.write_container("Ul", v, f, indent)?; } RenderNodeInfo::Ol(start, v) => { - self.write_container(&format!("Ol({})", start), &v, f, indent)?; + self.write_container(&format!("Ol({})", start), v, f, indent)?; } RenderNodeInfo::Dl(v) => { - self.write_container("Dl", &v, f, indent)?; + self.write_container("Dl", v, f, indent)?; } RenderNodeInfo::Dt(v) => { - self.write_container("Dt", &v, f, indent)?; + self.write_container("Dt", v, f, indent)?; } RenderNodeInfo::Dd(v) => { - self.write_container("Dd", &v, f, indent)?; + self.write_container("Dd", v, f, indent)?; } RenderNodeInfo::Break => { writeln!(f, "{:indent$}Break", "", indent = indent)?; @@ -942,10 +942,10 @@ impl RenderNode { writeln!(f, "{:indent$}FragStart({}):", "", frag)?; } RenderNodeInfo::ListItem(v) => { - self.write_container("ListItem", &v, f, indent)?; + self.write_container("ListItem", v, f, indent)?; } RenderNodeInfo::Sup(v) => { - self.write_container("Sup", &v, f, indent)?; + self.write_container("Sup", v, f, indent)?; } } Ok(()) @@ -2764,6 +2764,7 @@ where } /// Reads HTML from `input`, and returns text wrapped to `width` columns. +/// /// The text is returned as a `Vec>`; the annotations are vectors /// of `RichAnnotation`. The "outer" annotation comes first in the `Vec`. pub fn from_read_rich(input: R, width: usize) -> Result>>> diff --git a/src/markup5ever_rcdom.rs b/src/markup5ever_rcdom.rs index dfdccd9..ef3ea24 100644 --- a/src/markup5ever_rcdom.rs +++ b/src/markup5ever_rcdom.rs @@ -151,7 +151,7 @@ impl Node { /// Return the element type (if an element) pub fn element_name(&self) -> Option { if let NodeData::Element { ref name, .. } = self.data { - Some(format!("{}", &*name.local_name())) + Some(format!("{}", name.local_name())) } else { None } From 52055fb1baefbe3c74431edfa6e064748e10084c Mon Sep 17 00:00:00 2001 From: c Date: Tue, 10 Dec 2024 11:47:07 +0100 Subject: [PATCH 02/16] Express fn parse_with_context() with the help of fn parse_html() --- src/lib.rs | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b546477..0cb51a7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -71,7 +71,6 @@ use render::{Renderer, RichDecorator, TextDecorator}; use html5ever::driver::ParseOpts; use html5ever::parse_document; -use html5ever::tendril::TendrilSink; use html5ever::tree_builder::TreeBuilderOpts; mod markup5ever_rcdom; pub use markup5ever_rcdom::RcDom; @@ -2722,17 +2721,8 @@ impl RenderedText { } } -fn parse_with_context(mut input: impl io::Read, context: &mut HtmlContext) -> Result { - let opts = ParseOpts { - tree_builder: TreeBuilderOpts { - drop_doctype: true, - ..Default::default() - }, - ..Default::default() - }; - let dom = parse_document(RcDom::default(), opts) - .from_utf8() - .read_from(&mut input)?; +fn parse_with_context(input: impl io::Read, context: &mut HtmlContext) -> Result { + let dom = config::plain().parse_html(input)?; let render_tree = dom_to_render_tree_with_context(dom.document.clone(), &mut io::sink(), context)? .ok_or(Error::Fail)?; From b29ec277560da1984dbde3610eb9ce058acaf28c Mon Sep 17 00:00:00 2001 From: c Date: Tue, 10 Dec 2024 11:48:38 +0100 Subject: [PATCH 03/16] Express configs with default constructor fn with_decorator --- src/lib.rs | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0cb51a7..71a56c7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2601,38 +2601,12 @@ pub mod config { /// Return a Config initialized with a `RichDecorator`. pub fn rich() -> Config { - Config { - decorator: RichDecorator::new(), - #[cfg(feature = "css")] - style: Default::default(), - #[cfg(feature = "css")] - use_doc_css: false, - max_wrap_width: None, - pad_block_width: false, - allow_width_overflow: false, - min_wrap_width: MIN_WIDTH, - raw: false, - draw_borders: true, - wrap_links: true, - } + with_decorator(RichDecorator::new()) } /// Return a Config initialized with a `PlainDecorator`. pub fn plain() -> Config { - Config { - decorator: PlainDecorator::new(), - #[cfg(feature = "css")] - style: Default::default(), - #[cfg(feature = "css")] - use_doc_css: false, - max_wrap_width: None, - pad_block_width: false, - allow_width_overflow: false, - min_wrap_width: MIN_WIDTH, - raw: false, - draw_borders: true, - wrap_links: true, - } + with_decorator(PlainDecorator::new()) } /// Return a Config initialized with a custom decorator. From cfdadd9265b84e3d6d4b0be6b384c20e3e316b41 Mon Sep 17 00:00:00 2001 From: c Date: Tue, 10 Dec 2024 12:58:53 +0100 Subject: [PATCH 04/16] Derive PartialEq, Eq impls --- src/css.rs | 14 +++++++------- src/lib.rs | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/css.rs b/src/css.rs index 219ed73..68256e9 100644 --- a/src/css.rs +++ b/src/css.rs @@ -16,7 +16,7 @@ use crate::{ use self::parser::Importance; -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub(crate) enum SelectorComponent { Class(String), Element(String), @@ -46,7 +46,7 @@ impl std::fmt::Display for SelectorComponent { } } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub(crate) struct Selector { // List of components, right first so we match from the leaf. components: Vec, @@ -189,7 +189,7 @@ impl Selector { } } -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) enum Display { /// display: none None, @@ -198,7 +198,7 @@ pub(crate) enum Display { ExtRawDom, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub(crate) enum Style { Colour(Colour), BgColour(Colour), @@ -206,7 +206,7 @@ pub(crate) enum Style { WhiteSpace(WhiteSpace), } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub(crate) struct StyleDecl { style: Style, importance: Importance, @@ -230,7 +230,7 @@ impl std::fmt::Display for StyleDecl { } } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] struct Ruleset { selector: Selector, styles: Vec, @@ -248,7 +248,7 @@ impl std::fmt::Display for Ruleset { } /// Stylesheet data which can be used while building the render tree. -#[derive(Clone, Default, Debug)] +#[derive(Clone, Default, Debug, PartialEq, Eq)] pub(crate) struct StyleData { agent_rules: Vec, user_rules: Vec, diff --git a/src/lib.rs b/src/lib.rs index 71a56c7..05502a7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -88,7 +88,7 @@ use std::io; use std::io::Write; use std::iter::{once, repeat}; -#[derive(Debug, Copy, Clone, Default, PartialEq)] +#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)] pub(crate) enum WhiteSpace { #[default] Normal, @@ -1306,7 +1306,7 @@ where } } -#[derive(Default, Debug)] +#[derive(Default, Debug, PartialEq, Eq)] struct HtmlContext { #[cfg(feature = "css")] style_data: css::StyleData, From 0019a6a97711e13ca6c59f5035cb970fe720336c Mon Sep 17 00:00:00 2001 From: c Date: Tue, 10 Dec 2024 13:56:26 +0100 Subject: [PATCH 05/16] Express fn from_read_coloured() with fn coloured() --- src/ansi_colours.rs | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/src/ansi_colours.rs b/src/ansi_colours.rs index edf8da4..5c9a19e 100644 --- a/src/ansi_colours.rs +++ b/src/ansi_colours.rs @@ -4,7 +4,7 @@ //! can be achieved using inline characters sent to the terminal such as //! underlining in some terminals). -use crate::{parse, RichAnnotation, RichDecorator}; +use crate::RichAnnotation; use std::io; /// Reads HTML from `input`, and returns text wrapped to `width` columns. @@ -25,16 +25,5 @@ where R: io::Read, FMap: Fn(&[RichAnnotation], &str) -> String, { - let lines = parse(input)? - .render(width, RichDecorator::new())? - .into_lines()?; - - let mut result = String::new(); - for line in lines { - for ts in line.tagged_strings() { - result.push_str(&colour_map(&ts.tag, &ts.s)); - } - result.push('\n'); - } - Ok(result) + super::config::rich().coloured(input, width, colour_map) } From c5b106d4a2cb79cfaa6a998ac858bce757e0cd42 Mon Sep 17 00:00:00 2001 From: c Date: Tue, 10 Dec 2024 14:43:00 +0100 Subject: [PATCH 06/16] Construct context inside do_parse to make obvious it isnt used to communicate to later stages The following code can easily be mistaken for using the context to store information retrieved by render_to_lines. This is actually not the case. let mut context = self.make_context(); self.do_parse(&mut context, input) .render_with_context(&mut context, 100, self.decorator); --- src/lib.rs | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 05502a7..7eeb72d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -67,7 +67,7 @@ pub mod render; use render::text_renderer::{ RenderLine, RenderOptions, RichAnnotation, SubRenderer, TaggedLine, TextRenderer, }; -use render::{Renderer, RichDecorator, TextDecorator}; +use render::{Renderer, TextDecorator}; use html5ever::driver::ParseOpts; use html5ever::parse_document; @@ -2371,12 +2371,8 @@ pub mod config { } } /// Parse with context. - fn do_parse( - &mut self, - context: &mut HtmlContext, - input: R, - ) -> Result { - super::parse_with_context(input, context) + fn do_parse(&self, input: R) -> Result { + super::parse_with_context(input, &mut self.make_context()) } /// Parse the HTML into a DOM structure. @@ -2438,14 +2434,10 @@ pub mod config { /// Reads HTML from `input`, and returns a `String` with text wrapped to /// `width` columns. - pub fn string_from_read( - mut self, - input: R, - width: usize, - ) -> Result { + pub fn string_from_read(self, input: R, width: usize) -> Result { let mut context = self.make_context(); let s = self - .do_parse(&mut context, input)? + .do_parse(input)? .render_with_context(&mut context, width, self.decorator)? .into_string()?; Ok(s) @@ -2456,12 +2448,12 @@ pub mod config { /// of the provided text decorator's `Annotation`. The "outer" annotation comes first in /// the `Vec`. pub fn lines_from_read( - mut self, + self, input: R, width: usize, ) -> Result>>> { let mut context = self.make_context(); - self.do_parse(&mut context, input)? + self.do_parse(input)? .render_with_context(&mut context, width, self.decorator)? .into_lines() } @@ -2548,19 +2540,14 @@ pub mod config { /// a list of `RichAnnotation` and some text, and returns the text /// with any terminal escapes desired to indicate those annotations /// (such as colour). - pub fn coloured( - mut self, - input: R, - width: usize, - colour_map: FMap, - ) -> Result + pub fn coloured(self, input: R, width: usize, colour_map: FMap) -> Result where R: std::io::Read, FMap: Fn(&[RichAnnotation], &str) -> String, { let mut context = self.make_context(); let lines = self - .do_parse(&mut context, input)? + .do_parse(input)? .render_with_context(&mut context, width, self.decorator)? .into_lines()?; From 64b1ac8eef37e90ecd250e30d161b16cd1075c73 Mon Sep 17 00:00:00 2001 From: c Date: Thu, 12 Dec 2024 13:51:55 +0100 Subject: [PATCH 07/16] Express functions consistently with pub API functions when available --- src/lib.rs | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7eeb72d..5e1b06a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2435,12 +2435,8 @@ pub mod config { /// Reads HTML from `input`, and returns a `String` with text wrapped to /// `width` columns. pub fn string_from_read(self, input: R, width: usize) -> Result { - let mut context = self.make_context(); - let s = self - .do_parse(input)? - .render_with_context(&mut context, width, self.decorator)? - .into_string()?; - Ok(s) + let render_tree = self.do_parse(input)?; + self.render_to_string(render_tree, width) } /// Reads HTML from `input`, and returns text wrapped to `width` columns. @@ -2452,10 +2448,8 @@ pub mod config { input: R, width: usize, ) -> Result>>> { - let mut context = self.make_context(); - self.do_parse(input)? - .render_with_context(&mut context, width, self.decorator)? - .into_lines() + let render_tree = self.do_parse(input)?; + self.render_to_lines(render_tree, width) } #[cfg(feature = "css")] @@ -2545,20 +2539,8 @@ pub mod config { R: std::io::Read, FMap: Fn(&[RichAnnotation], &str) -> String, { - let mut context = self.make_context(); - let lines = self - .do_parse(input)? - .render_with_context(&mut context, width, self.decorator)? - .into_lines()?; - - let mut result = String::new(); - for line in lines { - for ts in line.tagged_strings() { - result.push_str(&colour_map(&ts.tag, &ts.s)); - } - result.push('\n'); - } - Ok(result) + let render_tree = self.do_parse(input)?; + self.render_coloured(render_tree, width, colour_map) } /// Return coloured text from a RenderTree. `colour_map` is a function which takes a list From f2512360ad2537fdc2e156bc619d2fb6d41dac4b Mon Sep 17 00:00:00 2001 From: c Date: Thu, 12 Dec 2024 14:20:28 +0100 Subject: [PATCH 08/16] Remove fn render as it is only used in tests --- src/lib.rs | 5 ----- src/tests.rs | 10 +++------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5e1b06a..2cc803d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2636,11 +2636,6 @@ impl RenderTree { render_tree_to_string(context, builder, &test_decorator, self.0, &mut io::sink())?; Ok(RenderedText(builder)) } - - /// Render this document using the given `decorator` and wrap it to `width` columns. - fn render(self, width: usize, decorator: D) -> Result> { - self.render_with_context(&mut Default::default(), width, decorator) - } } /// A rendered HTML document. diff --git a/src/tests.rs b/src/tests.rs index c06c12c..70e9be4 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1531,11 +1531,8 @@ fn test_finalise() { } assert_eq!( - crate::parse("test".as_bytes()) - .unwrap() - .render(80, TestDecorator) - .unwrap() - .into_lines() + config::with_decorator(TestDecorator) + .lines_from_read("test".as_bytes(), 80) .unwrap(), vec![ TaggedLine::from_string("test".to_owned(), &Vec::new()), @@ -1935,8 +1932,7 @@ fn test_issue_93_x() { 104, 100, 100, 107, 105, 60, 120, 98, 255, 255, 255, 0, 60, 255, 127, 46, 60, 113, 127, ]; let _local0 = crate::parse(&data[..]).unwrap(); - let d1 = TrivialDecorator::new(); - let _local1 = crate::RenderTree::render(_local0, 1, d1); + let _local1 = config::with_decorator(TrivialDecorator::new()).render_to_string(_local0, 1); } #[test] From e7aa74e757b8e3349c002c8f43fea6f0286dc79d Mon Sep 17 00:00:00 2001 From: c Date: Thu, 12 Dec 2024 14:22:03 +0100 Subject: [PATCH 09/16] Express HtmlContext::default() as config::plain().make_context() Note that it does not hold that HtmlContext::default() == config::plain().make_context() but the relevant fields used by fn parse_with_context() are the same. This is a bit unclear, so this change makes this more transparent. --- src/lib.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2cc803d..55ce31a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1306,7 +1306,7 @@ where } } -#[derive(Default, Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq)] struct HtmlContext { #[cfg(feature = "css")] style_data: css::StyleData, @@ -2669,7 +2669,8 @@ fn parse_with_context(input: impl io::Read, context: &mut HtmlContext) -> Result /// Reads and parses HTML from `input` and prepares a render tree. pub fn parse(input: impl io::Read) -> Result { - parse_with_context(input, &mut Default::default()) + let mut context = config::plain().make_context(); + parse_with_context(input, &mut context) } /// Reads HTML from `input`, decorates it using `decorator`, and From 436dea431a48697a5b7d336f248e72a8c5cec383 Mon Sep 17 00:00:00 2001 From: c Date: Mon, 16 Dec 2024 11:14:47 +0100 Subject: [PATCH 10/16] Express fn parse() with fn do_parse() --- src/lib.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 55ce31a..ca5c7e9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2371,7 +2371,7 @@ pub mod config { } } /// Parse with context. - fn do_parse(&self, input: R) -> Result { + pub(crate) fn do_parse(&self, input: R) -> Result { super::parse_with_context(input, &mut self.make_context()) } @@ -2669,8 +2669,7 @@ fn parse_with_context(input: impl io::Read, context: &mut HtmlContext) -> Result /// Reads and parses HTML from `input` and prepares a render tree. pub fn parse(input: impl io::Read) -> Result { - let mut context = config::plain().make_context(); - parse_with_context(input, &mut context) + config::plain().do_parse(input) } /// Reads HTML from `input`, decorates it using `decorator`, and From dd17579034e61692f0ad1a58ae490ae7d99cfa91 Mon Sep 17 00:00:00 2001 From: c Date: Thu, 12 Dec 2024 14:27:47 +0100 Subject: [PATCH 11/16] Inline parse_with_context into callsites --- src/lib.rs | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ca5c7e9..06e641b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2372,7 +2372,12 @@ pub mod config { } /// Parse with context. pub(crate) fn do_parse(&self, input: R) -> Result { - super::parse_with_context(input, &mut self.make_context()) + let mut context = self.make_context(); + let doc = plain().parse_html(input)?.document; + let render_tree = + super::dom_to_render_tree_with_context(doc, &mut io::sink(), &mut context)? + .ok_or(Error::Fail)?; + Ok(RenderTree(render_tree)) } /// Parse the HTML into a DOM structure. @@ -2659,14 +2664,6 @@ impl RenderedText { } } -fn parse_with_context(input: impl io::Read, context: &mut HtmlContext) -> Result { - let dom = config::plain().parse_html(input)?; - let render_tree = - dom_to_render_tree_with_context(dom.document.clone(), &mut io::sink(), context)? - .ok_or(Error::Fail)?; - Ok(RenderTree(render_tree)) -} - /// Reads and parses HTML from `input` and prepares a render tree. pub fn parse(input: impl io::Read) -> Result { config::plain().do_parse(input) From 2eb8f255430ef4ad2ed82ab196f54cbd86d5c076 Mon Sep 17 00:00:00 2001 From: c Date: Thu, 12 Dec 2024 14:31:28 +0100 Subject: [PATCH 12/16] Express do_parse using dom_to_render_tree --- src/lib.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 06e641b..c5f1c11 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2372,12 +2372,8 @@ pub mod config { } /// Parse with context. pub(crate) fn do_parse(&self, input: R) -> Result { - let mut context = self.make_context(); - let doc = plain().parse_html(input)?.document; - let render_tree = - super::dom_to_render_tree_with_context(doc, &mut io::sink(), &mut context)? - .ok_or(Error::Fail)?; - Ok(RenderTree(render_tree)) + let doc = plain().parse_html(input)?; + self.dom_to_render_tree(&doc) } /// Parse the HTML into a DOM structure. From 0c0cf620840a79f6191340042912393d921a0d66 Mon Sep 17 00:00:00 2001 From: c Date: Fri, 13 Dec 2024 17:20:00 +0100 Subject: [PATCH 13/16] Use the config instead of plain(), this is more obvious although equivalent right now --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index c5f1c11..a18401d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2372,7 +2372,7 @@ pub mod config { } /// Parse with context. pub(crate) fn do_parse(&self, input: R) -> Result { - let doc = plain().parse_html(input)?; + let doc = self.parse_html(input)?; self.dom_to_render_tree(&doc) } From e9786ac3f0012a545ac6eea65820b28a42820b88 Mon Sep 17 00:00:00 2001 From: c Date: Mon, 16 Dec 2024 11:31:39 +0100 Subject: [PATCH 14/16] tests: use higher-level functions to express tests in equivalent way --- src/tests.rs | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/src/tests.rs b/src/tests.rs index 70e9be4..86ad6c2 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -5,7 +5,7 @@ use crate::{config, Error}; #[cfg(feature = "css")] use super::render::text_renderer::RichDecorator; use super::render::text_renderer::{RichAnnotation, TaggedLine, TrivialDecorator}; -use super::{from_read, from_read_with_decorator, parse, TextDecorator}; +use super::{from_read, from_read_with_decorator, TextDecorator}; /// Like assert_eq!(), but prints out the results normally as well macro_rules! assert_eq_str { @@ -1361,33 +1361,32 @@ fn test_s() { #[test] fn test_multi_parse() { + let cfg = config::plain(); let html: &[u8] = b"one two three four five six seven eight nine ten eleven twelve thirteen \ fourteen fifteen sixteen seventeen"; - let tree = parse(html).unwrap(); + let tree = cfg.do_parse(html).unwrap(); assert_eq!( "one two three four five six seven eight nine ten eleven twelve thirteen fourteen\n\ fifteen sixteen seventeen\n", - config::plain().render_to_string(tree.clone(), 80).unwrap() + cfg.render_to_string(tree.clone(), 80).unwrap() ); assert_eq!( "one two three four five six seven eight nine ten eleven twelve\n\ thirteen fourteen fifteen sixteen seventeen\n", - config::plain().render_to_string(tree.clone(), 70).unwrap() + cfg.render_to_string(tree.clone(), 70).unwrap() ); assert_eq!( "one two three four five six seven eight nine ten\n\ eleven twelve thirteen fourteen fifteen sixteen\n\ seventeen\n", - config::plain().render_to_string(tree.clone(), 50).unwrap() + cfg.render_to_string(tree.clone(), 50).unwrap() ); } #[test] fn test_read_rich() { let html: &[u8] = b"bold"; - let lines = config::rich() - .render_to_lines(parse(html).unwrap(), 80) - .unwrap(); + let lines = config::rich().lines_from_read(html, 80).unwrap(); let tag = vec![RichAnnotation::Strong]; let line = TaggedLine::from_string("*bold*".to_owned(), &tag); assert_eq!(vec![line], lines); @@ -1397,7 +1396,7 @@ fn test_read_rich() { fn test_read_custom() { let html: &[u8] = b"bold"; let lines = config::with_decorator(TrivialDecorator::new()) - .render_to_lines(parse(html).unwrap(), 80) + .lines_from_read(html, 80) .unwrap(); let tag = vec![()]; let line = TaggedLine::from_string("bold".to_owned(), &tag); @@ -1409,7 +1408,7 @@ fn test_pre_rich() { use RichAnnotation::*; assert_eq!( config::rich() - .render_to_lines(parse(&b"
test
"[..]).unwrap(), 100) + .lines_from_read(&b"
test
"[..], 100) .unwrap(), [TaggedLine::from_string( "test".into(), @@ -1419,7 +1418,7 @@ fn test_pre_rich() { assert_eq!( config::rich() - .render_to_lines(crate::parse("
testlong
".as_bytes()).unwrap(), 4) + .lines_from_read("
testlong
".as_bytes(), 4) .unwrap(), [ TaggedLine::from_string("test".into(), &vec![Preformat(false)]), @@ -1431,10 +1430,7 @@ fn test_pre_rich() { // tags. assert_eq!( config::rich() - .render_to_lines( - crate::parse(r#"

testlong

"#.as_bytes()).unwrap(), - 4 - ) + .lines_from_read(r#"

testlong

"#.as_bytes(), 4) .unwrap(), [ TaggedLine::from_string("test".into(), &vec![]), @@ -1931,8 +1927,9 @@ fn test_issue_93_x() { 114, 104, 60, 47, 101, 109, 62, 60, 99, 99, 172, 97, 97, 58, 60, 119, 99, 64, 126, 118, 104, 100, 100, 107, 105, 60, 120, 98, 255, 255, 255, 0, 60, 255, 127, 46, 60, 113, 127, ]; - let _local0 = crate::parse(&data[..]).unwrap(); - let _local1 = config::with_decorator(TrivialDecorator::new()).render_to_string(_local0, 1); + let cfg = config::with_decorator(TrivialDecorator::new()); + let _local0 = cfg.do_parse(&data[..]).unwrap(); + let _local1 = cfg.render_to_string(_local0, 1); } #[test] From f0f7bd124cb6e085fbc1e009ecf4db49644aa1cd Mon Sep 17 00:00:00 2001 From: c Date: Mon, 16 Dec 2024 12:23:00 +0100 Subject: [PATCH 15/16] Less indexing, less unwraps --- src/css/parser.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/css/parser.rs b/src/css/parser.rs index 83886dd..5e33bfa 100644 --- a/src/css/parser.rs +++ b/src/css/parser.rs @@ -390,6 +390,10 @@ pub(crate) fn parse_color_attribute( parse_color(&value.tokens).or_else(|e| parse_faulty_color(e, text)) } +fn parse_color_part(text: &str, index: std::ops::Range) -> Option { + u8::from_str_radix(text.get(index)?, 16).ok() +} + // Both Firefox and Chromium accept "00aabb" as a bgcolor - I'm not sure this has ever been legal, // but regrettably I've had e-mails which were unreadable without doing this. fn parse_faulty_color( @@ -397,10 +401,10 @@ fn parse_faulty_color( text: &str, ) -> Result>> { let text = text.trim(); - if text.chars().all(|c| c.is_hex_digit()) && text.len() == 6 { - let r = u8::from_str_radix(&text[0..2], 16).unwrap(); - let g = u8::from_str_radix(&text[2..4], 16).unwrap(); - let b = u8::from_str_radix(&text[4..6], 16).unwrap(); + let r = parse_color_part(text, 0..2); + let g = parse_color_part(text, 2..4); + let b = parse_color_part(text, 4..6); + if let (Some(r), Some(g), Some(b)) = (r, g, b) { return Ok(Colour::Rgb(r, g, b)); } Err(e) @@ -618,7 +622,7 @@ fn parse_string_token(text: &str) -> IResult<&str, Token> { loop { match chars.next() { - None => return Ok((&text[text.len()..], Token::String(s.into()))), + None => return Ok(("", Token::String(s.into()))), Some((i, c)) if c == end_char => { return Ok((&text[i + 1..], Token::String(s.into()))); } From fbaf8b32e7ff98dd2163f04baa3a1cd0b9f959cd Mon Sep 17 00:00:00 2001 From: c Date: Thu, 12 Dec 2024 14:45:07 +0100 Subject: [PATCH 16/16] Remove fn parse() from public API and replace with pub fn do_parse() method on Config The parse function is an odd one in the current API, other functions can be separated cleanly into functions that are convenient and functions that need to be configured. The parse function does not advertise what configuration it uses, and will silently drop css information, as it parses with config::plain() as default configuration. This can lead to the following misuse of the public API let render_tree = parse(html)?; customcfg.render_to_string(render_tree, 80)?; --- src/lib.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a18401d..f40b4f9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2370,8 +2370,9 @@ pub mod config { wrap_links: self.wrap_links, } } - /// Parse with context. - pub(crate) fn do_parse(&self, input: R) -> Result { + + /// Reads and parses HTML from `input` and prepares a render tree. + pub fn do_parse(&self, input: R) -> Result { let doc = self.parse_html(input)?; self.dom_to_render_tree(&doc) } @@ -2660,11 +2661,6 @@ impl RenderedText { } } -/// Reads and parses HTML from `input` and prepares a render tree. -pub fn parse(input: impl io::Read) -> Result { - config::plain().do_parse(input) -} - /// Reads HTML from `input`, decorates it using `decorator`, and /// returns a `String` with text wrapped to `width` columns. pub fn from_read_with_decorator(input: R, width: usize, decorator: D) -> Result