diff --git a/script/rss2html.ml b/script/rss2html.ml index a5ad663c1..a4dbf3faa 100644 --- a/script/rss2html.ml +++ b/script/rss2html.ml @@ -590,6 +590,10 @@ let delete_author title = let caml_list_re = Str.regexp_case_fold "^\\(Re: *\\)*\\(\\[[a-zA-Z0-9-]+\\] *\\)*" + (*Remove the unsubscribe emails*) +let unsubscribe_email_re = + Str.regexp_case_fold ".*unsubscribe.*" + (** [email_threads] does basically the same as [headlines] but filter the posts to have repeated subjects. It also presents the subject better. *) @@ -603,12 +607,13 @@ let email_threads ?n ~l9n url = let title = delete_author title in { e with Atom.title = Atom.Text title } in let posts = List.map normalize_title posts in - (* Keep only the more recent post of redundant subjects. *) + (* Keep only the more recent post of redundant subjects and filter out the unsubscribe emails *) let module S = Set.Make(String) in let seen = ref S.empty in let must_keep (e: Atom.entry) = let title = string_of_text_construct e.Atom.title in if S.mem title !seen then false + else if Str.string_match unsubscribe_email_re title 0 then false else (seen := S.add title !seen; true) in let posts = List.filter must_keep posts in let posts = (match n with