From 43b1f4b330f21050faa8067a577ed1d5318766b5 Mon Sep 17 00:00:00 2001 From: Romsahel Date: Mon, 3 Mar 2025 17:37:50 +0100 Subject: [PATCH 1/2] failure to parse recipient value when the name is an e-mail address containing additional quotes Found in production, an e-mail has the 'from' field with the following value: ```ex ""service@service.com" " ```` which is incorrectly parsed into `service@service.com\"` instead of ```ex {"\"service@service.com\"", "service@service.com"} ```` --- test/mail/parsers/rfc_2822_test.exs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/mail/parsers/rfc_2822_test.exs b/test/mail/parsers/rfc_2822_test.exs index 4bf58dd..d083c0f 100644 --- a/test/mail/parsers/rfc_2822_test.exs +++ b/test/mail/parsers/rfc_2822_test.exs @@ -570,6 +570,19 @@ defmodule Mail.Parsers.RFC2822Test do assert message.headers["from"] == {"Lastname, First Names", "me@example.com"} end + test "address name is an e-mail address with additiongal quotes" do + message = + parse_email(""" + To: "User, Test" + From: ""me@example.com"" + Date: Fri, 1 Jan 2016 00:00:00 +0000 + Subject: Blank body + + """) + + assert message.headers["from"] == {"\"me@example.com\"", "me@example.com"} + end + # See https://tools.ietf.org/html/rfc2047 test "parses headers with encoded word syntax" do message = From 7fc46ed290907d3dd9bf1f64970e0a4e13b9e6de Mon Sep 17 00:00:00 2001 From: Andrew Timberlake Date: Mon, 17 Mar 2025 15:49:44 +0200 Subject: [PATCH 2/2] Change parse_recipient_value regex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I’ve broken the regular expression to match three distinct types of address 1. Quoted name with address 2. Non-quoted name with address 3. address on it’s own I have also changed to expanded notation to make it a little clearer. --- lib/mail/parsers/rfc_2822.ex | 45 ++++++++++++++++-- test/mail/parsers/rfc_2822_test.exs | 71 ++++++++++++++++++++--------- 2 files changed, 92 insertions(+), 24 deletions(-) diff --git a/lib/mail/parsers/rfc_2822.ex b/lib/mail/parsers/rfc_2822.ex index 220e20d..49b4741 100644 --- a/lib/mail/parsers/rfc_2822.ex +++ b/lib/mail/parsers/rfc_2822.ex @@ -318,10 +318,49 @@ defmodule Mail.Parsers.RFC2822 do @spec parse_recipient_value(value :: String.t()) :: [{String.t(), String.t()} | String.t()] def parse_recipient_value(value) do - Regex.scan(~r/\s*("?)(.*?)\1\s*?,]+)>?,?/, value) + Regex.scan( + ~r/ + \s* + (?: + # Quoted name + ((?.*?) + \1 + \s* + < + (?[^<\s,]+@[^\s>,]+) + > + | + # Non-quoted name + (?[^\\",@]+?) + \s*? + < + (?[^<\s,]+@[^\s>,]+) + > + | + # Only email + [^<\s,]+@[^\s>,]+)>? + ) + ,? + /x, + value, + capture: :all_names + ) + |> Enum.map(fn + # Scan is matching on named captures sorted alphabetically: + # [email, email2, email3, name, name2] + ["", "", address, "", ""] -> + {"", address} + + [address, "", "", name, ""] -> + {name, address} + + ["", address, "", "", name] -> + {name, address} + end) |> Enum.map(fn - [_, _, "", address] -> address - [_, _, name, address] -> {name, address} + {"", address} -> address + {name, address} -> {String.replace(name, "\\", ""), address} end) end diff --git a/test/mail/parsers/rfc_2822_test.exs b/test/mail/parsers/rfc_2822_test.exs index d083c0f..495fff6 100644 --- a/test/mail/parsers/rfc_2822_test.exs +++ b/test/mail/parsers/rfc_2822_test.exs @@ -303,30 +303,59 @@ defmodule Mail.Parsers.RFC2822Test do assert to_datetime("invalid date string") == {:error, "invalid date string"} end - test "parse_recipient_value retrieves a list of name and addresses" do - recipient = - "The Dude , batman@example.com, super, \"an@email.com\" " - - retrieved_recipients = [ - {"The Dude", "dude@example.com"}, - "batman@example.com", - {"super", "compact@recipi.ent"}, - {"an@email.com", "an@email.com"} - ] - - assert parse_recipient(recipient) == retrieved_recipients - end + describe "parse_recipient_value/1" do + test "parse_recipient_value retrieves a list of name and addresses" do + recipient = + ~S|The Dude , batman@example.com, super, "an@email.com" | + + retrieved_recipients = [ + {"The Dude", "dude@example.com"}, + "batman@example.com", + {"super", "compact@recipi.ent"}, + {"an@email.com", "an@email.com"} + ] + + assert parse_recipient(recipient) == retrieved_recipients + end + + test "parse_recipient_value retrieves an empty list when recipient is empty" do + assert parse_recipient("") == [] + end + + test "parse_recipient_value retrieves an empty list when no \"address\" found" do + assert parse_recipient("NoEmail") == [] + end + + test "parse_recipient_value retrieves a list when only one \"address\" found" do + assert parse_recipient("dude@example.com") == ["dude@example.com"] + assert parse_recipient("") == ["dude@example.com"] + end + + test "parse_recipient_value quoted name" do + assert parse_recipient(~S|"dude" |) == [{"dude", "dude@example.com"}] + + assert parse_recipient(~S|"First, Second" |) == [ + {"First, Second", "dude@example.com"} + ] + end - test "parse_recipient_value retrieves an empty list when recipient is empty" do - assert parse_recipient("") == [] - end + test "parse_recipient_value non-quoted name" do + assert parse_recipient(~S|The Dude |) == [ + {"The Dude", "dude@example.com"} + ] + end - test "parse_recipient_value retrieves an empty list when no \"address\" found" do - assert parse_recipient("NoEmail") == [] - end + test "parse_recipient_value extra quoted name" do + assert parse_recipient(~S|"\"dude\"" |) == [ + {"\"dude\"", "dude@example.com"} + ] + end - test "parse_recipient_value retrieves a list when only one \"address\" found" do - assert parse_recipient("dude@example.com") == ["dude@example.com"] + test "parse_recipient_value extra test" do + assert parse_recipient(~S|"\"service@service.com\" " |) == [ + {~S|"service@service.com" |, "service@service.com"} + ] + end end test "parses a nested multipart message with encoded part" do