@@ -13,6 +13,7 @@ use quick_xml::{
1313
1414use crate :: simplify:: { SimplifiedText , simplify_quote} ;
1515
16+ #[ derive( Default ) ]
1617struct Dehtml {
1718 strbuilder : String ,
1819 quote : String ,
@@ -25,6 +26,9 @@ struct Dehtml {
2526 /// Everything between `<div name="quote">` and `<div name="quoted-content">` is usually metadata
2627 /// If this is > `0`, then we are inside a `<div name="quoted-content">`.
2728 divs_since_quoted_content_div : u32 ,
29+ /// `<div class="header-protection-legacy-display">` elements should be omitted, see
30+ /// <https://www.rfc-editor.org/rfc/rfc9788.html#section-4.5.3.3>.
31+ divs_since_hp_legacy_display : u32 ,
2832 /// All-Inkl just puts the quote into `<blockquote> </blockquote>`. This count is
2933 /// increased at each `<blockquote>` and decreased at each `</blockquote>`.
3034 blockquotes_since_blockquote : u32 ,
@@ -48,20 +52,25 @@ impl Dehtml {
4852 }
4953
5054 fn get_add_text ( & self ) -> AddText {
51- if self . divs_since_quote_div > 0 && self . divs_since_quoted_content_div == 0 {
52- AddText :: No // Everything between `<div name="quoted">` and `<div name="quoted_content">` is metadata which we don't want
55+ // Everything between `<div name="quoted">` and `<div name="quoted_content">` is
56+ // metadata which we don't want.
57+ if self . divs_since_quote_div > 0 && self . divs_since_quoted_content_div == 0
58+ || self . divs_since_hp_legacy_display > 0
59+ {
60+ AddText :: No
5361 } else {
5462 self . add_text
5563 }
5664 }
5765}
5866
59- #[ derive( Debug , PartialEq , Clone , Copy ) ]
67+ #[ derive( Debug , Default , PartialEq , Clone , Copy ) ]
6068enum AddText {
6169 /// Inside `<script>`, `<style>` and similar tags
6270 /// which contents should not be displayed.
6371 No ,
6472
73+ #[ default]
6574 YesRemoveLineEnds ,
6675
6776 /// Inside `<pre>`.
@@ -121,12 +130,7 @@ fn dehtml_quick_xml(buf: &str) -> (String, String) {
121130
122131 let mut dehtml = Dehtml {
123132 strbuilder : String :: with_capacity ( buf. len ( ) ) ,
124- quote : String :: new ( ) ,
125- add_text : AddText :: YesRemoveLineEnds ,
126- last_href : None ,
127- divs_since_quote_div : 0 ,
128- divs_since_quoted_content_div : 0 ,
129- blockquotes_since_blockquote : 0 ,
133+ ..Default :: default ( )
130134 } ;
131135
132136 let mut reader = quick_xml:: Reader :: from_str ( buf) ;
@@ -244,6 +248,7 @@ fn dehtml_endtag_cb(event: &BytesEnd, dehtml: &mut Dehtml) {
244248 "div" => {
245249 pop_tag ( & mut dehtml. divs_since_quote_div ) ;
246250 pop_tag ( & mut dehtml. divs_since_quoted_content_div ) ;
251+ pop_tag ( & mut dehtml. divs_since_hp_legacy_display ) ;
247252
248253 * dehtml. get_buf ( ) += "\n \n " ;
249254 dehtml. add_text = AddText :: YesRemoveLineEnds ;
@@ -295,6 +300,8 @@ fn dehtml_starttag_cb<B: std::io::BufRead>(
295300 "div" => {
296301 maybe_push_tag ( event, reader, "quote" , & mut dehtml. divs_since_quote_div ) ;
297302 maybe_push_tag ( event, reader, "quoted-content" , & mut dehtml. divs_since_quoted_content_div ) ;
303+ maybe_push_tag ( event, reader, "header-protection-legacy-display" ,
304+ & mut dehtml. divs_since_hp_legacy_display ) ;
298305
299306 * dehtml. get_buf ( ) += "\n \n " ;
300307 dehtml. add_text = AddText :: YesRemoveLineEnds ;
@@ -539,6 +546,27 @@ mod tests {
539546 assert_eq ! ( txt. text. trim( ) , "two\n lines" ) ;
540547 }
541548
549+ #[ test]
550+ fn test_hp_legacy_display ( ) {
551+ let input = r#"
552+ <html><head><title></title></head><body>
553+ <div class="header-protection-legacy-display">
554+ <pre>Subject: Dinner plans</pre>
555+ </div>
556+ <p>
557+ Let's meet at Rama's Roti Shop at 8pm and go to the park
558+ from there.
559+ </p>
560+ </body>
561+ </html>
562+ "# ;
563+ let txt = dehtml ( input) . unwrap ( ) ;
564+ assert_eq ! (
565+ txt. text. trim( ) ,
566+ "Let's meet at Rama's Roti Shop at 8pm and go to the park from there."
567+ ) ;
568+ }
569+
542570 #[ tokio:: test( flavor = "multi_thread" , worker_threads = 2 ) ]
543571 async fn test_quote_div ( ) {
544572 let input = include_str ! ( "../test-data/message/gmx-quote-body.eml" ) ;
0 commit comments