diff --git a/Cargo.lock b/Cargo.lock index ae01b1a..5ca1abd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -122,6 +122,7 @@ dependencies = [ "markup5ever_rcdom", "mime", "rand", + "smallvec", "string_cache", "tendril", "xml-rs", @@ -335,6 +336,12 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83da420ee8d1a89e640d0948c646c1c088758d3a3c538f943bfa97bdac17929d" +[[package]] +name = "smallvec" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c2fb2ec9bcd216a5b0d0ccf31ab17b5ed1d627960edff65bbe95d3ce221cefc" + [[package]] name = "string_cache" version = "0.8.0" diff --git a/ammonia-compare/Cargo.lock b/ammonia-compare/Cargo.lock index 86115d2..8d0376b 100644 --- a/ammonia-compare/Cargo.lock +++ b/ammonia-compare/Cargo.lock @@ -98,9 +98,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.67" +version = "0.2.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb147597cdf94ed43ab7a9038716637d2d1bf2bc571da995d0028dec06bd3018" +checksum = "dea0c0405123bba743ee3f91f49b1c7cfb684eef0da0a50110f758ccf24cdff0" [[package]] name = "log" @@ -132,6 +132,7 @@ dependencies = [ "lazy_static", "log", "mime", + "smallvec", "string_cache", "tendril", "xml-rs", @@ -317,9 +318,9 @@ checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" [[package]] name = "ryu" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa8506c1de11c9c4e4c38863ccbe02a305c8188e85a05a784c9e11e1c3910c8" +checksum = "535622e6be132bccd223f4bb2b8ac8d53cda3c7a6394944d3b2b33fb974f9d76" [[package]] name = "serde" diff --git a/marked-xml/CHANGELOG.md b/marked-xml/CHANGELOG.md new file mode 100644 index 0000000..075e11f --- /dev/null +++ b/marked-xml/CHANGELOG.md @@ -0,0 +1,3 @@ +## 0.0.0 (2020-3-16) + +* Just a name reservation. diff --git a/marked-xml/Cargo.lock b/marked-xml/Cargo.lock new file mode 100644 index 0000000..b101178 --- /dev/null +++ b/marked-xml/Cargo.lock @@ -0,0 +1,508 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "ammonia" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89eac85170f4b3fb3dc5e442c1cfb036cb8eecf9dbbd431a161ffad15d90ea3b" +dependencies = [ + "html5ever", + "lazy_static", + "maplit", + "markup5ever_rcdom", + "matches", + "tendril", + "url", +] + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + +[[package]] +name = "encoding_rs" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd8d03faa7fe0c1431609dfad7bbe827af30f82e1e2ae6f7ee4fca6bd764bc28" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "futf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b" +dependencies = [ + "mac", + "new_debug_unreachable", +] + +[[package]] +name = "getrandom" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "html5ever" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "idna" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02e2673c30ee86b5b96a9cb52ad15718aa1f966f5ab9ad54a8b95d5ca33120a9" +dependencies = [ + "matches", + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "itoa" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb147597cdf94ed43ab7a9038716637d2d1bf2bc571da995d0028dec06bd3018" + +[[package]] +name = "log" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" + +[[package]] +name = "marked" +version = "0.1.0" +dependencies = [ + "encoding_rs", + "html5ever", + "lazy_static", + "log", + "mime", + "string_cache", + "tendril", + "xml-rs", +] + +[[package]] +name = "marked-sanitizer" +version = "0.0.0" +dependencies = [ + "ammonia", + "marked", +] + +[[package]] +name = "markup5ever" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae38d669396ca9b707bfc3db254bc382ddb94f57cc5c235f34623a669a01dab" +dependencies = [ + "log", + "phf", + "phf_codegen", + "serde", + "serde_derive", + "serde_json", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "markup5ever_rcdom" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f015da43bcd8d4f144559a3423f4591d69b8ce0652c905374da7205df336ae2b" +dependencies = [ + "html5ever", + "markup5ever", + "tendril", + "xml5ever", +] + +[[package]] +name = "matches" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" + +[[package]] +name = "mime" +version = "0.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" + +[[package]] +name = "new_debug_unreachable" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" + +[[package]] +name = "percent-encoding" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" + +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" +dependencies = [ + "siphasher", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b" + +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + +[[package]] +name = "proc-macro2" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c09721c6781493a2a492a96b5a5bf19b65917fe6728884e7c44dd0c60ca3435" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bdc6c187c65bca4260c9011c9e3132efe4909da44726bad24cf7572ae338d7f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom", + "libc", + "rand_chacha", + "rand_core", + "rand_hc", + "rand_pcg", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rand_pcg" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" +dependencies = [ + "rand_core", +] + +[[package]] +name = "redox_syscall" +version = "0.1.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" + +[[package]] +name = "ryu" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "535622e6be132bccd223f4bb2b8ac8d53cda3c7a6394944d3b2b33fb974f9d76" + +[[package]] +name = "serde" +version = "1.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "414115f25f818d7dfccec8ee535d76949ae78584fc4f79a6f45a904bf8ab4449" + +[[package]] +name = "serde_derive" +version = "1.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "128f9e303a5a29922045a830221b8f78ec74a5f544944f3d5984f8ec3895ef64" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9371ade75d4c2d6cb154141b9752cf3781ec9c05e0e5cf35060e1e70ee7b9c25" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "siphasher" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83da420ee8d1a89e640d0948c646c1c088758d3a3c538f943bfa97bdac17929d" + +[[package]] +name = "smallvec" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c2fb2ec9bcd216a5b0d0ccf31ab17b5ed1d627960edff65bbe95d3ce221cefc" + +[[package]] +name = "string_cache" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2940c75beb4e3bf3a494cef919a747a2cb81e52571e212bfbd185074add7208a" +dependencies = [ + "lazy_static", + "new_debug_unreachable", + "phf_shared", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f24c8e5e19d22a726626f1a5e16fe15b132dcf21d10177fa5a45ce7962996b97" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", +] + +[[package]] +name = "syn" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "123bd9499cfb380418d509322d7a6d52e5315f064fe4b3ad18a53d6b92c07859" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "tendril" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707feda9f2582d5d680d733e38755547a3e8fb471e7ba11452ecfd9ce93a5d3b" +dependencies = [ + "encoding_rs", + "futf", + "mac", + "utf-8", +] + +[[package]] +name = "time" +version = "0.1.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f" +dependencies = [ + "libc", + "redox_syscall", + "winapi", +] + +[[package]] +name = "unicode-bidi" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" +dependencies = [ + "matches", +] + +[[package]] +name = "unicode-normalization" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5479532badd04e128284890390c1e876ef7a993d0570b3597ae43dfa1d59afa4" +dependencies = [ + "smallvec", +] + +[[package]] +name = "unicode-xid" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" + +[[package]] +name = "url" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "829d4a8476c35c9bf0bbce5a3b23f4106f79728039b726d292bb93bc106787cb" +dependencies = [ + "idna", + "matches", + "percent-encoding", +] + +[[package]] +name = "utf-8" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7" + +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + +[[package]] +name = "winapi" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "xml-rs" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "541b12c998c5b56aa2b4e6f18f03664eef9a4fd0a246a55594efae6cc2d964b5" + +[[package]] +name = "xml5ever" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b1b52e6e8614d4a58b8e70cf51ec0cc21b256ad8206708bcff8139b5bbd6a59" +dependencies = [ + "log", + "mac", + "markup5ever", + "time", +] diff --git a/marked-xml/Cargo.toml b/marked-xml/Cargo.toml new file mode 100644 index 0000000..2d6cdd1 --- /dev/null +++ b/marked-xml/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "marked-sanitizer" +version = "0.0.0" +authors = ["David Kellum "] +edition = "2018" +license = "MIT/Apache-2.0" +description = "Sanitizer for Märkəd" +repository = "https://github.com/dekellum/marked" +readme = "README.md" +keywords = ["html", "sanitization"] +categories = ["web-programming", "text-processing"] +build = "build.rs" + +[lib] +doctest = false + +[dependencies] +marked = { version=">=0.1.0, <0.2", path="../marked" } +ammonia = { version=">=3.1.0, <3.2" } diff --git a/marked-xml/README.md b/marked-xml/README.md new file mode 100644 index 0000000..d93289e --- /dev/null +++ b/marked-xml/README.md @@ -0,0 +1,33 @@ +# marked-sanitizer + +[![Rustdoc](https://docs.rs/marked-sanitizer/badge.svg)](https://docs.rs/marked-sanitizer) +[![Change Log](https://img.shields.io/crates/v/marked-sanitizer.svg?maxAge=3600&label=change%20log&color=9cf)](https://github.com/dekellum/marked/blob/master/marked-sanitizer/CHANGELOG.md) +[![Crates.io](https://img.shields.io/crates/v/marked-sanitizer.svg?maxAge=3600)](https://crates.io/crates/marked-sanitizer) +[![Travis CI Build](https://travis-ci.org/dekellum/marked.svg?branch=master)](https://travis-ci.org/dekellum/marked) + +For now, just reserving the name for potential use as a dedicated sanitizer +crate. Such would presumably be (`Builder`) API compatible with the _[ammonia]_ +crate. See source tree [../ammonia-compare] for a working prototype of the +compatible filtering. + +## License + +This project is dual licensed under either of following: + +* The Apache License, version 2.0 ([../LICENSE-APACHE]) + or http://www.apache.org/licenses/LICENSE-2.0) + +* The MIT License ([../LICENSE-MIT]) + or http://opensource.org/licenses/MIT) + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in _märkəd_ (marked-sanitizer) by you, as defined by the Apache +License, shall be dual licensed as above, without any additional terms or +conditions. + +[ammonia]: https://crates.io/crates/ammonia +[../ammonia-compare]: https://github.com/dekellum/marked/tree/master/ammonia-compare +[../LICENSE-APACHE]: https://github.com/dekellum/marked/tree/master/LICENSE-APACHE +[../LICENSE-MIT]: https://github.com/dekellum/marked/tree/master/LICENSE-MIT diff --git a/marked-xml/build.rs b/marked-xml/build.rs new file mode 100644 index 0000000..27fdc79 --- /dev/null +++ b/marked-xml/build.rs @@ -0,0 +1,52 @@ +use std::env; +use std::process::Command; + +fn main() { + static PACKAGE: &'static str = "marked-sanitizer"; + let msrv = vec![1, 38]; + + static VERSION: &'static str = env!("CARGO_PKG_VERSION"); + static M_V: &'static str = "minimum supported rust version (MSRV)"; + + let rustv = rustc_version(); + + if rustv < msrv { + panic!( + "{} v{} {} is {} > {} (this rustc)", + PACKAGE, VERSION, M_V, join(&msrv), join(&rustv)); + } +} + +fn join(ver: &[u16]) -> String { + let mut out = String::new(); + for v in ver { + if !out.is_empty() { out.push('.'); } + out.push_str(&v.to_string()); + } + out +} + +// Parse `rustc --version` and return as vector of integers, or panic. +fn rustc_version() -> Vec { + let rustc = env::var("RUSTC").unwrap_or("rustc".to_owned()); + let out = Command::new(rustc).arg("--version").output().unwrap(); + let out = String::from_utf8(out.stdout).unwrap(); + for l in out.lines() { + if l.starts_with("rustc ") { + let mut v = &l[6..]; + if let Some(e) = v.find(" ") { + v = &v[..e]; + } + let mut vp = v.split("-"); + if let Some(v) = vp.next() { + let vs: Vec = v.split(".") + .filter_map(|vss| vss.parse().ok()) + .collect(); + if !vs.is_empty() { + return vs; + } + } + } + } + panic!("rustc version not found") +} diff --git a/marked-xml/src/lib.rs b/marked-xml/src/lib.rs new file mode 100644 index 0000000..e69de29 diff --git a/marked/Cargo.toml b/marked/Cargo.toml index c2be7d6..582df1c 100644 --- a/marked/Cargo.toml +++ b/marked/Cargo.toml @@ -23,6 +23,7 @@ string_cache = { version=">=0.8.0, <0.9" } mime = { version=">=0.3.14, <0.4" } log = { version=">=0.4.4, <0.4.9", features = ["std"] } lazy_static = { version=">=1.3.0, <1.5" } +smallvec = { version=">=1.2.0, <1.3" } [dev-dependencies] rand = { version=">=0.7.0, <0.8" } diff --git a/marked/build/tags b/marked/build/tags index edbfc50..d72fe20 100644 --- a/marked/build/tags +++ b/marked/build/tags @@ -155,5 +155,5 @@ u , T F 5 D I , underlined text ul , S T F 5 , unordered list var , S T F 5 I , variable part of a text video , 5 I , video container -wbr ,E 5 I , A line break opportunity +wbr ,E 5 I , line break opportunity xmp , D , preformatted text diff --git a/marked/src/dom.rs b/marked/src/dom.rs index e9735ce..aeaf4f9 100644 --- a/marked/src/dom.rs +++ b/marked/src/dom.rs @@ -22,6 +22,8 @@ pub use html5ever::{Attribute, LocalName, Namespace, QualName}; #[doc(no_inline)] pub use tendril::StrTendril; +use smallvec::SmallVec; + // custom ordering of these effects rustdoc for Document, etc. mod node_ref; @@ -105,7 +107,40 @@ pub enum NodeData { #[derive(Clone, Debug)] pub struct Element { pub name: QualName, - pub attrs: Vec, + pub attrs: SmallVec, +} + +/// New type Attribute with `SmallVec` compatibility. +#[derive(Clone, Debug)] +pub struct MyAttribute(Attribute); + +impl MyAttribute { + /// Construct. + fn new(name: QualName, value: StrTendril) -> MyAttribute { + MyAttribute(Attribute { name, value }) + } +} + +impl Deref for MyAttribute { + type Target = Attribute; + + fn deref(&self) -> &Attribute { + &self.0 + } +} + +unsafe impl smallvec::Array for MyAttribute { + type Item = MyAttribute; + fn size() -> usize { + 1 + } +} + +unsafe impl smallvec::Array for NodeId { + type Item = NodeId; + fn size() -> usize { + 1 + } } /// Core implementation. @@ -409,7 +444,7 @@ impl Element { { Element { name: QualName::new(None, ns!(), lname.into()), - attrs: Vec::new() + attrs: SmallVec::new() } } @@ -437,8 +472,8 @@ impl Element { let lname = lname.into(); self.attrs .iter() - .find(|attr| attr.name.local == lname) - .map(|attr| &attr.value) + .find(|attr| attr.0.name.local == lname) + .map(|attr| &attr.0.value) } /// Remove attribute by local name, returning any value found. @@ -454,8 +489,8 @@ impl Element { let mut i = 0; let lname = lname.into(); while i < self.attrs.len() { - if self.attrs[i].name.local == lname { - found = Some(self.attrs.remove(i).value); + if self.attrs[i].0.name.local == lname { + found = Some(self.attrs.remove(i).0.value); } else { i += 1; } @@ -484,25 +519,25 @@ impl Element { let mut value = Some(value.into()); while i < self.attrs.len() { - if self.attrs[i].name.local == lname { + if self.attrs[i].0.name.local == lname { if found.is_none() { found = Some(mem::replace( - &mut self.attrs[i].value, + &mut self.attrs[i].0.value, value.take().unwrap(), )); i += 1; } else { - found = Some(self.attrs.remove(i).value); + found = Some(self.attrs.remove(i).0.value); }; } else { i += 1; } } if found.is_none() { - self.attrs.push(Attribute { - name: QualName::new(None, ns!(), lname), - value: value.take().unwrap() - }); + self.attrs.push(MyAttribute::new( + QualName::new(None, ns!(), lname), + value.take().unwrap() + )); } found } diff --git a/marked/src/dom/html.rs b/marked/src/dom/html.rs index 524a492..e766fac 100644 --- a/marked/src/dom/html.rs +++ b/marked/src/dom/html.rs @@ -25,11 +25,12 @@ use html5ever::interface::tree_builder::{ }; use html5ever::tendril::{StrTendril, TendrilSink}; use log::{debug, info, trace}; +use smallvec::SmallVec; use tendril::{fmt as form, Tendril}; use crate::{ Attribute, Decoder, Document, Element, EncodingHint, - Node, NodeData, NodeId, SharedEncodingHint, + MyAttribute, Node, NodeData, NodeId, SharedEncodingHint, BOM_CONF, HTML_META_CONF, INITIAL_BUFFER_SIZE, }; @@ -379,6 +380,11 @@ impl TreeSink for Sink { _flags: ElementFlags) -> NodeId { + let attrs: SmallVec<_> = attrs + .into_iter() + .map(|a| MyAttribute::new(a.name, a.value)) + .collect(); + self.new_node(NodeData::Elem(Element { name, attrs })) } @@ -460,7 +466,8 @@ impl TreeSink for Sink { element.attrs.extend( attrs .into_iter() - .filter(|attr| !existing_names.contains(&attr.name)), + .filter(|attr| !existing_names.contains(&attr.name)) + .map(|a| MyAttribute::new(a.name, a.value)), ); } diff --git a/marked/src/dom/serializer.rs b/marked/src/dom/serializer.rs index 4eee9a6..ea7e531 100644 --- a/marked/src/dom/serializer.rs +++ b/marked/src/dom/serializer.rs @@ -38,7 +38,7 @@ impl<'a> Serialize for NodeRef<'a> { if *scope == IncludeNode { serializer.start_elem( edata.name.clone(), - edata.attrs.iter().map(|a| (&a.name, a.value.as_ref())) + edata.attrs.iter().map(|a| (&a.0.name, a.0.value.as_ref())) )?; } for child in self.children() { diff --git a/marked/src/dom/tests.rs b/marked/src/dom/tests.rs index c5d218b..66f5b95 100644 --- a/marked/src/dom/tests.rs +++ b/marked/src/dom/tests.rs @@ -2,7 +2,7 @@ use std::fs::File; use std::{io, io::Read}; use crate::{ - Attribute, Document, Element, Node, NodeData, NodeRef, + Attribute, Document, Element, Node, NodeData, MyAttribute, NodeRef, QualName, StrTendril, filter, filter::Action, html, html::{a, t, TAG_META}, @@ -17,16 +17,17 @@ use crate::decode::EncodingHint; use encoding_rs as enc; use log::debug; use rand::Rng; +use smallvec::SmallVec; #[test] #[cfg(target_pointer_width = "64")] fn size_of() { use std::mem::size_of; - assert_eq!(size_of::(), 80); - assert_eq!(size_of::(), 56); - assert_eq!(size_of::(), 48); + assert_eq!(size_of::(), 112); + assert_eq!(size_of::(), 88); + assert_eq!(size_of::(), 80); assert_eq!(size_of::(), 40); - assert_eq!(size_of::>(), 24); + assert_eq!(size_of::>(), 56); assert_eq!(size_of::(), 24); assert_eq!(size_of::(), 16); } @@ -65,20 +66,20 @@ fn element_attrs_dups() { ensure_logger(); let mut el = Element::new(t::A); // Manually, for duplicates: - el.attrs = vec![ - Attribute { - name: QualName::new(None, ns!(), a::REL), - value: "nofollow".into() - }, - Attribute { - name: QualName::new(None, ns!(), a::HREF), - value: "/some".into() - }, - Attribute { - name: QualName::new(None, ns!(), a::REL), - value: "noreferrer".into() - }, - ]; + let mut a = SmallVec::::with_capacity(3); + a.push(MyAttribute::new( + QualName::new(None, ns!(), a::REL), + "nofollow".into() + )); + a.push(MyAttribute::new( + QualName::new(None, ns!(), a::HREF), + "/some".into() + )); + a.push(MyAttribute::new( + QualName::new(None, ns!(), a::REL), + "noreferrer".into() + )); + el.attrs = a; assert_eq!(3, el.attrs.len()); assert_eq!("/some", el.set_attr("href", "/other").unwrap().as_ref()); assert_eq!(3, el.attrs.len()); diff --git a/marked/src/dom/xml.rs b/marked/src/dom/xml.rs index c149fcb..97704a7 100644 --- a/marked/src/dom/xml.rs +++ b/marked/src/dom/xml.rs @@ -16,7 +16,7 @@ use xml_rs::reader::XmlEvent; use xml_rs::attribute::OwnedAttribute; use crate::dom::{ - Attribute, Document, Element, Node, NodeData, QualName, StrTendril + Document, Element, MyAttribute, Node, NodeData, QualName, StrTendril }; /// Parse XML document from UTF-8 bytes in RAM. @@ -33,10 +33,10 @@ pub fn parse_utf8(utf8_bytes: &[u8]) -> Result { name: convert_name(name), attrs: attributes .into_iter() - .map(|OwnedAttribute { name, value }| Attribute { - name: convert_name(name), - value: value.into() - }) + .map(|OwnedAttribute { name, value }| MyAttribute::new( + convert_name(name), + value.into() + )) .collect() }))); document.append(current, id); diff --git a/marked/src/lib.rs b/marked/src/lib.rs index b58ae61..e29d646 100644 --- a/marked/src/lib.rs +++ b/marked/src/lib.rs @@ -48,6 +48,8 @@ pub use dom::{ Attribute, LocalName, Namespace, QualName, StrTendril, }; +pub(crate) use dom::MyAttribute; + pub use dom::filter; #[doc(hideen)]