diff --git a/c-api/include/lol_html.h b/c-api/include/lol_html.h index 27e37931..a259fc0c 100644 --- a/c-api/include/lol_html.h +++ b/c-api/include/lol_html.h @@ -523,7 +523,15 @@ int lol_html_element_tag_name_set( size_t name_len ); -// Whether the element is explicitly self-closing, e.g. ``. +// Whether the tag syntactically ends with `/>`. In HTML content this is purely a decorative, unnecessary, and has no effect of any kind. +// +// The `/>` syntax only affects parsing of elements in foreign content (SVG and MathML). +// It will never close any HTML tags that aren't already defined as void in HTML. +// +// This function only reports the parsed syntax, and will not report which elements are actually void in HTML. +// Use `lol_html_element_can_have_content` to check if the element is non-void. +// +// If the `/` is part of an unquoted attribute, it's not parsed as the self-closing syntax. bool lol_html_element_is_self_closing( lol_html_element_t *element ); diff --git a/src/rewritable_units/element.rs b/src/rewritable_units/element.rs index 562d86a6..b3e13cf8 100644 --- a/src/rewritable_units/element.rs +++ b/src/rewritable_units/element.rs @@ -121,6 +121,10 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { } /// Sets the tag name of the element. + /// + /// The new tag name must be in the same namespace, have the same content model, and be valid in its location. + /// Otherwise change of the tag name may cause the resulting document to be parsed in an unexpected way, + /// out of sync with this library. #[inline] pub fn set_tag_name(&mut self, name: &str) -> Result<(), TagNameError> { let name = self.tag_name_bytes_from_str(name)?; @@ -134,16 +138,31 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { Ok(()) } - /// Whether the element is explicitly self-closing, e.g. ``. + /// Whether the tag syntactically ends with `/>`. In HTML content this is purely a decorative, unnecessary, and has no effect of any kind. + /// + /// The `/>` syntax only affects parsing of elements in foreign content (SVG and MathML). + /// It will never close any HTML tags that aren't already defined as [void][spec] in HTML. + /// + /// This function only reports the parsed syntax, and will not report which elements are actually void in HTML. + /// Use [`can_have_content()`][Self::can_have_content] to check if the element is non-void. + /// + /// [spec]: https://html.spec.whatwg.org/multipage/syntax.html#start-tags + /// + /// If the `/` is part of an unquoted attribute, it's not parsed as the self-closing syntax. #[inline] #[must_use] pub fn is_self_closing(&self) -> bool { self.start_tag.self_closing() } - /// Whether the element can have inner content. Returns `true` unless the element is an [HTML void - /// element](https://html.spec.whatwg.org/multipage/syntax.html#void-elements) or has a - /// self-closing tag (eg, ``). + /// Whether the element can have inner content. + /// + /// Returns `true` if the element isn't a [void element in HTML][void], + /// or is in **foreign content** and doesn't have a self-closing tag (eg, ``). + /// + /// [void]: https://html.spec.whatwg.org/multipage/syntax.html#void-elements + /// + /// Note that the self-closing syntax has no effect in HTML content. #[inline] #[must_use] pub fn can_have_content(&self) -> bool { @@ -351,6 +370,7 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { fn prepend_chunk(&mut self, chunk: StringChunk) { if self.can_have_content { + self.start_tag.set_self_closing_syntax(false); self.start_tag .mutations .mutate() @@ -415,6 +435,7 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { fn append_chunk(&mut self, chunk: StringChunk) { if self.can_have_content { + self.start_tag.set_self_closing_syntax(false); self.end_tag_mutations_mut().content_before.push_back(chunk); } } @@ -473,6 +494,7 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { fn set_inner_content_chunk(&mut self, chunk: StringChunk) { if self.can_have_content { + self.start_tag.set_self_closing_syntax(false); self.remove_content(); self.start_tag .mutations diff --git a/src/rewritable_units/tokens/start_tag.rs b/src/rewritable_units/tokens/start_tag.rs index d4adeb19..a026f6eb 100644 --- a/src/rewritable_units/tokens/start_tag.rs +++ b/src/rewritable_units/tokens/start_tag.rs @@ -102,12 +102,25 @@ impl<'i> StartTag<'i> { } } - /// Whether the tag is explicitly self-closing, e.g. ``. + /// Whether the tag syntactically ends with `/>`. In HTML content this is purely a decorative, unnecessary, and has no effect of any kind. + /// + /// The `/>` syntax only affects parsing of elements in foreign content (SVG and MathML). + /// It will never close any HTML tags that aren't already defined as [void](spec) in HTML. + /// + /// This function only reports the parsed syntax, and will not report which elements are actually void in HTML. + /// + /// [spec]: https://html.spec.whatwg.org/multipage/syntax.html#start-tags + /// + /// If the `/` is part of an unquoted attribute, it's not parsed as the self-closing syntax. #[inline] pub fn self_closing(&self) -> bool { self.self_closing } + pub(crate) fn set_self_closing_syntax(&mut self, has_slash: bool) { + self.self_closing = has_slash; + } + /// Inserts `content` before the start tag. /// /// Consequent calls to the method append `content` to the previously inserted content. diff --git a/src/rewriter/mod.rs b/src/rewriter/mod.rs index 73511b61..ac336960 100644 --- a/src/rewriter/mod.rs +++ b/src/rewriter/mod.rs @@ -446,6 +446,30 @@ mod tests { assert_eq!(res, ""); } + #[test] + fn rewrite_incorrect_self_closing() { + let res = rewrite_str::( + "
+

", + RewriteStrSettings { + element_content_handlers: vec![element!("*:not(svg)", |el| { + el.set_attribute("s", if el.is_self_closing() { "y" } else { "n" })?; + el.set_attribute("c", if el.can_have_content() { "y" } else { "n" })?; + el.append("…", ContentType::Text); + Ok(()) + })], + ..RewriteStrSettings::new() + }, + ) + .unwrap(); + + assert_eq!( + res, + r#"
+

"# + ); + } + #[test] fn rewrite_arbitrary_settings() { let res = rewrite_str("Some text", Settings::new()).unwrap();