diff --git a/lib/mail/parsers/rfc_2822.ex b/lib/mail/parsers/rfc_2822.ex index d442444..524a680 100644 --- a/lib/mail/parsers/rfc_2822.ex +++ b/lib/mail/parsers/rfc_2822.ex @@ -302,18 +302,22 @@ defmodule Mail.Parsers.RFC2822 do end) end - defp parse_headers(message, [], _opts), do: message + defp parse_headers(message, headers, opts) do + headers = + Enum.reduce(headers, message.headers, fn header, headers -> + {key, value} = parse_header(header, opts) + put_header(headers, key, value) + end) + + Map.put(message, :headers, headers) + end - defp parse_headers(message, [header | tail], opts) do + def parse_header(header, opts) do [name, body] = String.split(header, ":", parts: 2) key = String.downcase(name) - decoded = parse_encoded_word(body, opts) - - headers = - put_header(message.headers, key, String.downcase(name) |> parse_header_value(decoded)) - - message = %{message | headers: headers} - parse_headers(message, tail, opts) + value = parse_header_value(key, body) + decoded = decode_header_value(key, value, opts) + {key, decoded} end defp put_header(headers, "received" = key, value), @@ -372,6 +376,48 @@ defmodule Mail.Parsers.RFC2822 do defp parse_header_value(_key, value), do: value + defp decode_header_value(_key, nil, _opts), + do: nil + + defp decode_header_value(_key, %DateTime{} = datetime, _opts), + do: datetime + + defp decode_header_value("received", value, _opts), + do: value + + defp decode_header_value(_key, [value | [param | _params] = params], opts) + when is_binary(value) and is_tuple(param) do + decoded = parse_encoded_word(value, opts) + params = Enum.map(params, fn {param, value} -> {param, parse_encoded_word(value, opts)} end) + [decoded | params] + end + + defp decode_header_value(_key, {name, email}, opts) do + decoded = parse_encoded_word(name, opts) + {decoded, email} + end + + defp decode_header_value(key, addresses, opts) + when key in ["to", "cc", "from", "reply-to"] and is_list(addresses) do + addresses = + Enum.map(addresses, fn + {name, email} -> + decoded = parse_encoded_word(name, opts) + {decoded, email} + + email -> + email + end) + + addresses + end + + defp decode_header_value("from", value, _opts), do: value + + defp decode_header_value(_key, value, opts) do + parse_encoded_word(value, opts) + end + # See https://tools.ietf.org/html/rfc2047 defp parse_encoded_word("", _opts), do: "" @@ -404,39 +450,59 @@ defmodule Mail.Parsers.RFC2822 do defp parse_encoded_word(<>, opts), do: <> - defp parse_structured_header_value(string, value \\ nil, sub_types \\ [], acc \\ "") + defp parse_structured_header_value( + string, + value \\ nil, + sub_types \\ [], + part \\ :value, + acc \\ "" + ) - defp parse_structured_header_value("", value, [{key, nil} | sub_types], acc), + defp parse_structured_header_value("", value, [{key, nil} | sub_types], _part, acc), do: [value | Enum.reverse([{key, acc} | sub_types])] - defp parse_structured_header_value("", nil, [], acc), + defp parse_structured_header_value("", nil, [], _part, acc), do: acc - defp parse_structured_header_value("", value, sub_types, ""), + defp parse_structured_header_value("", value, sub_types, _part, ""), do: [value | Enum.reverse(sub_types)] - defp parse_structured_header_value("", value, [], acc), + defp parse_structured_header_value("", value, [], _part, acc), do: [value, String.trim(acc)] - defp parse_structured_header_value("", value, sub_types, acc), - do: parse_structured_header_value("", value, sub_types, String.trim(acc)) + defp parse_structured_header_value("", value, sub_types, part, acc), + do: parse_structured_header_value("", value, sub_types, part, String.trim(acc)) - defp parse_structured_header_value(<<"\"", rest::binary>>, value, sub_types, acc) do + defp parse_structured_header_value(<<"\"", rest::binary>>, value, sub_types, part, acc) do {string, rest} = parse_quoted_string(rest) - parse_structured_header_value(rest, value, sub_types, <>) + parse_structured_header_value(rest, value, sub_types, part, <>) end - defp parse_structured_header_value(<<";", rest::binary>>, nil, sub_types, acc), - do: parse_structured_header_value(rest, acc, sub_types, "") - - defp parse_structured_header_value(<<";", rest::binary>>, value, [{key, nil} | sub_types], acc), - do: parse_structured_header_value(rest, value, [{key, acc} | sub_types], "") + defp parse_structured_header_value(<<";", rest::binary>>, nil, sub_types, part, acc) + when part in [:value, :param_value], + do: parse_structured_header_value(rest, acc, sub_types, :param_name, "") - defp parse_structured_header_value(<<"=", rest::binary>>, value, sub_types, acc), - do: parse_structured_header_value(rest, value, [{key_to_atom(acc), nil} | sub_types], "") + defp parse_structured_header_value( + <<";", rest::binary>>, + value, + [{key, nil} | sub_types], + :param_value, + acc + ), + do: parse_structured_header_value(rest, value, [{key, acc} | sub_types], :param_name, "") - defp parse_structured_header_value(<>, value, sub_types, acc), - do: parse_structured_header_value(rest, value, sub_types, <>) + defp parse_structured_header_value(<<"=", rest::binary>>, value, sub_types, :param_name, acc), + do: + parse_structured_header_value( + rest, + value, + [{key_to_atom(acc), nil} | sub_types], + :param_value, + "" + ) + + defp parse_structured_header_value(<>, value, sub_types, part, acc), + do: parse_structured_header_value(rest, value, sub_types, part, <>) defp parse_quoted_string(string, acc \\ "") diff --git a/test/mail/parsers/rfc_2822_test.exs b/test/mail/parsers/rfc_2822_test.exs index 285ef56..dc850bf 100644 --- a/test/mail/parsers/rfc_2822_test.exs +++ b/test/mail/parsers/rfc_2822_test.exs @@ -939,6 +939,76 @@ defmodule Mail.Parsers.RFC2822Test do assert message.headers["content-type"] == ["text/html", {"charset", "us-ascii"}] end + test "parses encoded word cotaining 'special' characters RFC 2047ยง6.2" do + message = + parse_email(""" + From: =?UTF-8?B?am9obi5kb2VAcmVkYWN0ZS4uLg==?= + """) + + assert message.headers["from"] == {"john.doe@redacte...", "comments-noreply@docs.google.com"} + end + + test "correct handling of encoded words according to RFC 2047 (examples)" do + message = + parse_email(""" + From: =?US-ASCII?Q?Keith_Moore?= + To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= + CC: =?ISO-8859-1?Q?Andr=E9?= Pirard + Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= + =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?= + """) + + assert message.headers["from"] == {"Keith Moore", "moore@cs.utk.edu"} + assert message.headers["to"] == [{"Keld J\xF8rn Simonsen", "keld@dkuug.dk"}] + assert message.headers["cc"] == [{"Andr\xE9 Pirard", "PIRARD@vm1.ulg.ac.be"}] + assert message.headers["subject"] == "If you can read this you understand the example." + + message = + parse_email(""" + From: =?ISO-8859-1?Q?Olle_J=E4rnefors?= + To: ietf-822@dimacs.rutgers.edu, ojarnef@admin.kth.se + Subject: Time for ISO 10646? + """) + + assert message.headers["from"] == {"Olle J\xE4rnefors", "ojarnef@admin.kth.se"} + assert message.headers["to"] == ["ietf-822@dimacs.rutgers.edu", "ojarnef@admin.kth.se"] + assert message.headers["subject"] == "Time for ISO 10646?" + + message = + parse_email(""" + To: Dave Crocker + Cc: ietf-822@dimacs.rutgers.edu, paf@comsol.se + From: =?ISO-8859-1?Q?Patrik_F=E4ltstr=F6m?= + Subject: Re: RFC-HDR care and feeding + """) + + assert message.headers["from"] == {"Patrik F\xE4ltstr\xF6m", "paf@nada.kth.se"} + assert message.headers["to"] == [{"Dave Crocker", "dcrocker@mordor.stanford.edu"}] + assert message.headers["cc"] == ["ietf-822@dimacs.rutgers.edu", "paf@comsol.se"] + assert message.headers["subject"] == "Re: RFC-HDR care and feeding" + + message = + parse_email(""" + From: Nathaniel Borenstein + (=?iso-8859-8?b?7eXs+SDv4SDp7Oj08A==?=) + To: Greg Vaudreuil , Ned Freed + , Keith Moore + Subject: Test of new header generator + MIME-Version: 1.0 + Content-type: text/plain; charset=ISO-8859-1 + """) + + assert message.headers["from"] == {"Nathaniel Borenstein", "nsb@thumper.bellcore.com"} + + assert message.headers["to"] == [ + {"Greg Vaudreuil", "gvaudre@NRI.Reston.VA.US"}, + {"Ned Freed", "ned@innosoft.com"}, + {"Keith Moore", "moore@cs.utk.edu"} + ] + + assert message.headers["subject"] == "Test of new header generator" + end + defp parse_email(email, opts \\ []), do: email |> convert_crlf |> Mail.Parsers.RFC2822.parse(opts)