Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
455 changes: 356 additions & 99 deletions Cargo.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[workspace]
resolver = "2"
members = [ "marked", "marked-cli" ]
exclude = [ "ammonia-compare", "marked-sanitizer" ]

Expand Down
2 changes: 1 addition & 1 deletion marked-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ edition = "2018"

[dependencies]
marked = { version=">=0.3.0, <0.4.0" }
html5ever = { version=">=0.25.1, <0.26" }
html5ever = { version=">=0.27.0, <0.28" }
encoding_rs = { version=">=0.8.13, <0.9" }
clap = { version=">=2.33.0, <2.34", default-features=false, features=["wrap_help"] }
log = { version=">=0.4.4, <0.4.15", features = ["std"] }
Expand Down
19 changes: 10 additions & 9 deletions marked/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
name = "marked"
version = "0.3.0"
authors = ["David Kellum <[email protected]>"]
edition = "2018"
edition = "2021"
rust-version = "1.80"
license = "MIT/Apache-2.0"
description = "Parsing, filtering, selecting and serializing HTML/XML markup."
repository = "https://github.com/dekellum/marked"
Expand All @@ -15,18 +16,18 @@ build = "build.rs"
doctest = false

[dependencies]
html5ever = { version=">=0.25.1, <0.26" }
tendril = { version=">=0.4.1, <0.5", features=["encoding_rs"] }
encoding_rs = { version=">=0.8.13, <0.9" }
xml-rs = { version=">=0.8, <0.9", package="xml-rs", optional=true }
string_cache = { version=">=0.8.0, <0.9" }
mime = { version=">=0.3.14, <0.4" }
log = { version=">=0.4.4, <0.5", features = ["std"] }
lazy_static = { version=">=1.3.0, <1.5" }
html5ever = { version="0.27.*" }
tendril = { version="0.4.*", features=["encoding_rs"] }
encoding_rs = { version="0.8.12" } # Match tendril dependency
xml-rs = { version="0.8.*", package="xml-rs", optional=true }
string_cache = { version="0.8.*" }
mime = { version="0.3.14" }
log = { version="0.4.*", features = ["std"] }

[features]
default = []
xml = ["xml-rs"]
cargo-clippy = []

[dev-dependencies]
rand = { version=">=0.7.0, <0.8" }
Expand Down
2 changes: 1 addition & 1 deletion marked/clippy.toml
Original file line number Diff line number Diff line change
@@ -1 +1 @@
msrv = "1.38.0"
msrv = "1.80.0"
23 changes: 13 additions & 10 deletions marked/src/dom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,20 +96,23 @@ pub enum NodeData {
Pi(ProcessingInstruction),
}

#[allow(clippy::manual_non_exhaustive)] // Maybe?
/// Document type definition details.
#[derive(Clone, Debug)]
pub struct DocumentType {
pub name: StrTendril,
_priv: ()
}

#[allow(clippy::manual_non_exhaustive)] // Maybe?
/// Processing instruction details.
#[derive(Clone, Debug)]
pub struct ProcessingInstruction {
pub data: StrTendril,
_priv: ()
}

#[allow(clippy::manual_non_exhaustive)] // Maybe?
/// A markup element with name and attributes.
#[derive(Clone, Debug)]
pub struct Element {
Expand Down Expand Up @@ -174,9 +177,9 @@ impl Document {
pub fn root_element(&self) -> Option<NodeId> {
let document_node = &self[Document::DOCUMENT_NODE_ID];
debug_assert!(
(if let NodeData::Document = document_node.data { true }
else { false }),
"not document node: {:?}", document_node);
matches!(document_node.data, NodeData::Document),
"not document node: {document_node:?}",
);
debug_assert!(document_node.parent.is_none());
debug_assert!(document_node.next_sibling.is_none());
debug_assert!(document_node.prev_sibling.is_none());
Expand Down Expand Up @@ -215,9 +218,9 @@ impl Document {

fn push_node(&mut self, node: Node) -> NodeId {
debug_assert!(
(if let NodeData::Document | NodeData::Hole = node.data { false }
else { true }),
"Invalid push {:?}", node.data);
! matches!(node.data, NodeData::Document | NodeData::Hole),
"Invalid push {:?}", node.data,
);
let next_index = self.nodes.len()
.try_into()
.expect("Document (u32) node index overflow");
Expand Down Expand Up @@ -424,7 +427,7 @@ impl Document {
if let NodeData::Text(t) = &node.data {
match &mut text {
None => text = Some(t.clone()),
Some(text) => text.push_tendril(&t),
Some(text) => text.push_tendril(t),
}
ns.push_if(node.next_sibling);
} else {
Expand Down Expand Up @@ -825,9 +828,9 @@ impl NodeData {
#[inline]
fn assert_suitable_parent(&self) {
debug_assert!(
(if let NodeData::Document | NodeData::Elem(_) = self { true }
else { false }),
"Not a suitable parent: {:?}", self)
matches!(self, NodeData::Document | NodeData::Elem(_)),
"Not a suitable parent: {:?}", self,
)
}
}

Expand Down
47 changes: 16 additions & 31 deletions marked/src/dom/html/meta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,15 @@
//! also checked in.

use std::collections::HashMap;

use lazy_static::lazy_static;
use std::sync::LazyLock;

use crate::dom::LocalName;

lazy_static! {
/// A static lookup table for metadata on known HTML tags.
pub static ref TAG_META: HashMap<LocalName, TagMeta> = init_tag_metadata();
}
/// A static lookup table for metadata on known HTML tags.
pub static TAG_META: LazyLock<HashMap<LocalName, TagMeta>> = LazyLock::new(init_tag_metadata);

/// Metadata about HTML tags and their attributes.
#[derive(Default)]
pub struct TagMeta {
is_empty: bool,
is_deprecated: bool,
Expand Down Expand Up @@ -81,18 +79,7 @@ impl TagMeta {
}
}

impl Default for TagMeta {
fn default() -> TagMeta {
TagMeta {
is_empty: false,
is_deprecated: false,
is_inline: false,
is_meta: false,
is_banned: false,
basic_attrs: vec![],
}
}
}


/// `Namespace` constants
pub mod ns {
Expand All @@ -106,6 +93,7 @@ pub mod ns {
pub mod t {
use html5ever::local_name as lname;
use crate::dom::LocalName;
use std::sync::LazyLock;

/// Tag `<a>`: anchor.
/// (meta: inline)
Expand Down Expand Up @@ -362,13 +350,11 @@ pub mod t {
pub const Q: LocalName = lname!("q");
/// Tag `<rb>`: ruby base text.
pub const RB: LocalName = lname!("rb");
lazy_static::lazy_static! {
/// Tag `<rbc>`: ruby base container (complex).
/// (meta: undefined)
///
/// This is a lazy static (struct) as its not defined by html5ever.
pub static ref RBC: LocalName = "rbc".into();
}
/// Tag `<rbc>`: ruby base container (complex).
/// (meta: undefined)
///
/// This is a lazy static (struct) as its not defined by html5ever.
pub static RBC: LazyLock<LocalName> = LazyLock::new(|| "rbc".into());
/// Tag `<rp>`: ruby simple text container.
pub const RP: LocalName = lname!("rp");
/// Tag `<rt>`: ruby annotation text.
Expand Down Expand Up @@ -475,6 +461,7 @@ pub mod t {
pub mod a {
use html5ever::local_name as lname;
use crate::dom::LocalName;
use std::sync::LazyLock;

pub const ABBR: LocalName = lname!("abbr");
/// Attribute accept: (file) types accepted.
Expand Down Expand Up @@ -504,12 +491,10 @@ pub mod a {
pub const COORDS: LocalName = lname!("coords");
pub const DATA: LocalName = lname!("data");
pub const DATETIME: LocalName = lname!("datetime");
lazy_static::lazy_static! {
/// Attribute decoding: preferred method to decode.
///
/// This is a lazy static (struct) as its not defined by html5ever.
pub static ref DECODING: LocalName = "decoding".into();
}
/// Attribute decoding: preferred method to decode.
///
/// This is a lazy static (struct) as its not defined by html5ever.
pub static DECODING: LazyLock<LocalName> = LazyLock::new(|| "decoding".into());
/// Attribute dir: Text direction; ltr or rtl.
pub const DIR: LocalName = lname!("dir");
pub const FRAME: LocalName = lname!("frame");
Expand Down
6 changes: 1 addition & 5 deletions marked/src/dom/node_ref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,7 @@ impl<'a> NodeRef<'a> {

#[inline]
fn for_some_node(&self, id: Option<NodeId>) -> Option<NodeRef<'a>> {
if let Some(id) = id {
Some(NodeRef::new(self.doc, id))
} else {
None
}
id.map(|id| NodeRef::new(self.doc, id))
}
}

Expand Down
8 changes: 5 additions & 3 deletions marked/src/dom/serializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,18 +70,19 @@ impl<'a> Serialize for NodeRef<'a> {
serializer.write_doctype(&dt.name)
}
(IncludeNode, Text(ref t)) => {
serializer.write_text(&t)
serializer.write_text(t)
}
(IncludeNode, Comment(ref t)) => {
serializer.write_comment(&t)
serializer.write_comment(t)
}
(IncludeNode, Pi(ref pi)) => {
serializer.write_processing_instruction(&"", &pi.data)
serializer.write_processing_instruction("", &pi.data)
}
}
}
}

#[allow(clippy::to_string_trait_impl)]
/// Implemented via [`Document::serialize`].
impl ToString for Document {
fn to_string(&self) -> String {
Expand Down Expand Up @@ -127,6 +128,7 @@ impl<'a> NodeRef<'a> {
}
}

#[allow(clippy::to_string_trait_impl)]
/// Implemented via [`NodeRef::serialize`].
impl<'a> ToString for NodeRef<'a> {
fn to_string(&self) -> String {
Expand Down
4 changes: 2 additions & 2 deletions marked/src/dom/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ use rand::Rng;
#[cfg(target_pointer_width = "64")]
fn size_of() {
use std::mem::size_of;
assert_eq!(size_of::<Node>(), 80);
assert_eq!(size_of::<Node>(), 72);
assert_eq!(size_of::<NodeId>(), 4);
assert_eq!(size_of::<NodeData>(), 56);
assert_eq!(size_of::<NodeData>(), 48);
assert_eq!(size_of::<Element>(), 48);
assert_eq!(size_of::<Attribute>(), 40);
assert_eq!(size_of::<Vec<Attribute>>(), 24);
Expand Down
2 changes: 1 addition & 1 deletion marked/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

#![warn(rust_2018_idioms)]

#[macro_use] extern crate html5ever;
#[macro_use] pub extern crate html5ever;

/// Initial parse buffer size in which encoding hints are considered, possibly
/// triggering reparse.
Expand Down