Skip to content

Commit 1fba75d

Browse files
committed
feat: mimeparser: Omit Legacy Display Elements from text/html (#7130)
Implement 4.5.3.3 of https://www.rfc-editor.org/rfc/rfc9788 "Header Protection for Cryptographically Protected Email".
1 parent de85b52 commit 1fba75d

File tree

2 files changed

+41
-9
lines changed

2 files changed

+41
-9
lines changed

src/dehtml.rs

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use quick_xml::{
1313

1414
use crate::simplify::{SimplifiedText, simplify_quote};
1515

16+
#[derive(Default)]
1617
struct Dehtml {
1718
strbuilder: String,
1819
quote: String,
@@ -25,6 +26,9 @@ struct Dehtml {
2526
/// Everything between `<div name="quote">` and `<div name="quoted-content">` is usually metadata
2627
/// If this is > `0`, then we are inside a `<div name="quoted-content">`.
2728
divs_since_quoted_content_div: u32,
29+
/// `<div class="header-protection-legacy-display">` elements should be omitted, see
30+
/// <https://www.rfc-editor.org/rfc/rfc9788.html#section-4.5.3.3>.
31+
divs_since_hp_legacy_display: u32,
2832
/// All-Inkl just puts the quote into `<blockquote> </blockquote>`. This count is
2933
/// increased at each `<blockquote>` and decreased at each `</blockquote>`.
3034
blockquotes_since_blockquote: u32,
@@ -48,20 +52,25 @@ impl Dehtml {
4852
}
4953

5054
fn get_add_text(&self) -> AddText {
51-
if self.divs_since_quote_div > 0 && self.divs_since_quoted_content_div == 0 {
52-
AddText::No // Everything between `<div name="quoted">` and `<div name="quoted_content">` is metadata which we don't want
55+
// Everything between `<div name="quoted">` and `<div name="quoted_content">` is
56+
// metadata which we don't want.
57+
if self.divs_since_quote_div > 0 && self.divs_since_quoted_content_div == 0
58+
|| self.divs_since_hp_legacy_display > 0
59+
{
60+
AddText::No
5361
} else {
5462
self.add_text
5563
}
5664
}
5765
}
5866

59-
#[derive(Debug, PartialEq, Clone, Copy)]
67+
#[derive(Debug, Default, PartialEq, Clone, Copy)]
6068
enum AddText {
6169
/// Inside `<script>`, `<style>` and similar tags
6270
/// which contents should not be displayed.
6371
No,
6472

73+
#[default]
6574
YesRemoveLineEnds,
6675

6776
/// Inside `<pre>`.
@@ -121,12 +130,7 @@ fn dehtml_quick_xml(buf: &str) -> (String, String) {
121130

122131
let mut dehtml = Dehtml {
123132
strbuilder: String::with_capacity(buf.len()),
124-
quote: String::new(),
125-
add_text: AddText::YesRemoveLineEnds,
126-
last_href: None,
127-
divs_since_quote_div: 0,
128-
divs_since_quoted_content_div: 0,
129-
blockquotes_since_blockquote: 0,
133+
..Default::default()
130134
};
131135

132136
let mut reader = quick_xml::Reader::from_str(buf);
@@ -244,6 +248,7 @@ fn dehtml_endtag_cb(event: &BytesEnd, dehtml: &mut Dehtml) {
244248
"div" => {
245249
pop_tag(&mut dehtml.divs_since_quote_div);
246250
pop_tag(&mut dehtml.divs_since_quoted_content_div);
251+
pop_tag(&mut dehtml.divs_since_hp_legacy_display);
247252

248253
*dehtml.get_buf() += "\n\n";
249254
dehtml.add_text = AddText::YesRemoveLineEnds;
@@ -295,6 +300,8 @@ fn dehtml_starttag_cb<B: std::io::BufRead>(
295300
"div" => {
296301
maybe_push_tag(event, reader, "quote", &mut dehtml.divs_since_quote_div);
297302
maybe_push_tag(event, reader, "quoted-content", &mut dehtml.divs_since_quoted_content_div);
303+
maybe_push_tag(event, reader, "header-protection-legacy-display",
304+
&mut dehtml.divs_since_hp_legacy_display);
298305

299306
*dehtml.get_buf() += "\n\n";
300307
dehtml.add_text = AddText::YesRemoveLineEnds;
@@ -539,6 +546,27 @@ mod tests {
539546
assert_eq!(txt.text.trim(), "two\nlines");
540547
}
541548

549+
#[test]
550+
fn test_hp_legacy_display() {
551+
let input = r#"
552+
<html><head><title></title></head><body>
553+
<div class="header-protection-legacy-display">
554+
<pre>Subject: Dinner plans</pre>
555+
</div>
556+
<p>
557+
Let's meet at Rama's Roti Shop at 8pm and go to the park
558+
from there.
559+
</p>
560+
</body>
561+
</html>
562+
"#;
563+
let txt = dehtml(input).unwrap();
564+
assert_eq!(
565+
txt.text.trim(),
566+
"Let's meet at Rama's Roti Shop at 8pm and go to the park from there."
567+
);
568+
}
569+
542570
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
543571
async fn test_quote_div() {
544572
let input = include_str!("../test-data/message/gmx-quote-body.eml");

src/mimeparser.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1323,6 +1323,10 @@ impl MimeMessage {
13231323
let is_html = mime_type == mime::TEXT_HTML;
13241324
if is_html {
13251325
self.is_mime_modified = true;
1326+
// NB: This unconditionally removes Legacy Display Elements (see
1327+
// <https://www.rfc-editor.org/rfc/rfc9788.html#section-4.5.3.3>). We
1328+
// don't check for the "hp-legacy-display" Content-Type parameter
1329+
// for simplicity.
13261330
if let Some(text) = dehtml(&decoded_data) {
13271331
text
13281332
} else {

0 commit comments

Comments
 (0)