Skip to content

Commit f8a1565

Browse files
committedNov 6, 2024·
Update header parsing to decode encoded words after parsing the header (RFC 2047)
1 parent 68ab800 commit f8a1565

File tree

2 files changed

+163
-27
lines changed

2 files changed

+163
-27
lines changed
 

‎lib/mail/parsers/rfc_2822.ex

+93-27
Original file line numberDiff line numberDiff line change
@@ -302,18 +302,22 @@ defmodule Mail.Parsers.RFC2822 do
302302
end)
303303
end
304304

305-
defp parse_headers(message, [], _opts), do: message
305+
defp parse_headers(message, headers, opts) do
306+
headers =
307+
Enum.reduce(headers, message.headers, fn header, headers ->
308+
{key, value} = parse_header(header, opts)
309+
put_header(headers, key, value)
310+
end)
311+
312+
Map.put(message, :headers, headers)
313+
end
306314

307-
defp parse_headers(message, [header | tail], opts) do
315+
def parse_header(header, opts) do
308316
[name, body] = String.split(header, ":", parts: 2)
309317
key = String.downcase(name)
310-
decoded = parse_encoded_word(body, opts)
311-
312-
headers =
313-
put_header(message.headers, key, String.downcase(name) |> parse_header_value(decoded))
314-
315-
message = %{message | headers: headers}
316-
parse_headers(message, tail, opts)
318+
value = parse_header_value(key, body)
319+
decoded = decode_header_value(key, value, opts)
320+
{key, decoded}
317321
end
318322

319323
defp put_header(headers, "received" = key, value),
@@ -372,6 +376,48 @@ defmodule Mail.Parsers.RFC2822 do
372376
defp parse_header_value(_key, value),
373377
do: value
374378

379+
defp decode_header_value(_key, nil, _opts),
380+
do: nil
381+
382+
defp decode_header_value(_key, %DateTime{} = datetime, _opts),
383+
do: datetime
384+
385+
defp decode_header_value("received", value, _opts),
386+
do: value
387+
388+
defp decode_header_value(_key, [value | [param | _params] = params], opts)
389+
when is_binary(value) and is_tuple(param) do
390+
decoded = parse_encoded_word(value, opts)
391+
params = Enum.map(params, fn {param, value} -> {param, parse_encoded_word(value, opts)} end)
392+
[decoded | params]
393+
end
394+
395+
defp decode_header_value(_key, {name, email}, opts) do
396+
decoded = parse_encoded_word(name, opts)
397+
{decoded, email}
398+
end
399+
400+
defp decode_header_value(key, addresses, opts)
401+
when key in ["to", "cc", "from", "reply-to"] and is_list(addresses) do
402+
addresses =
403+
Enum.map(addresses, fn
404+
{name, email} ->
405+
decoded = parse_encoded_word(name, opts)
406+
{decoded, email}
407+
408+
email ->
409+
email
410+
end)
411+
412+
addresses
413+
end
414+
415+
defp decode_header_value("from", value, _opts), do: value
416+
417+
defp decode_header_value(_key, value, opts) do
418+
parse_encoded_word(value, opts)
419+
end
420+
375421
# See https://tools.ietf.org/html/rfc2047
376422
defp parse_encoded_word("", _opts), do: ""
377423

@@ -404,39 +450,59 @@ defmodule Mail.Parsers.RFC2822 do
404450
defp parse_encoded_word(<<char::utf8, rest::binary>>, opts),
405451
do: <<char::utf8, parse_encoded_word(rest, opts)::binary>>
406452

407-
defp parse_structured_header_value(string, value \\ nil, sub_types \\ [], acc \\ "")
453+
defp parse_structured_header_value(
454+
string,
455+
value \\ nil,
456+
sub_types \\ [],
457+
part \\ :value,
458+
acc \\ ""
459+
)
408460

409-
defp parse_structured_header_value("", value, [{key, nil} | sub_types], acc),
461+
defp parse_structured_header_value("", value, [{key, nil} | sub_types], _part, acc),
410462
do: [value | Enum.reverse([{key, acc} | sub_types])]
411463

412-
defp parse_structured_header_value("", nil, [], acc),
464+
defp parse_structured_header_value("", nil, [], _part, acc),
413465
do: acc
414466

415-
defp parse_structured_header_value("", value, sub_types, ""),
467+
defp parse_structured_header_value("", value, sub_types, _part, ""),
416468
do: [value | Enum.reverse(sub_types)]
417469

418-
defp parse_structured_header_value("", value, [], acc),
470+
defp parse_structured_header_value("", value, [], _part, acc),
419471
do: [value, String.trim(acc)]
420472

421-
defp parse_structured_header_value("", value, sub_types, acc),
422-
do: parse_structured_header_value("", value, sub_types, String.trim(acc))
473+
defp parse_structured_header_value("", value, sub_types, part, acc),
474+
do: parse_structured_header_value("", value, sub_types, part, String.trim(acc))
423475

424-
defp parse_structured_header_value(<<"\"", rest::binary>>, value, sub_types, acc) do
476+
defp parse_structured_header_value(<<"\"", rest::binary>>, value, sub_types, part, acc) do
425477
{string, rest} = parse_quoted_string(rest)
426-
parse_structured_header_value(rest, value, sub_types, <<acc::binary, string::binary>>)
478+
parse_structured_header_value(rest, value, sub_types, part, <<acc::binary, string::binary>>)
427479
end
428480

429-
defp parse_structured_header_value(<<";", rest::binary>>, nil, sub_types, acc),
430-
do: parse_structured_header_value(rest, acc, sub_types, "")
431-
432-
defp parse_structured_header_value(<<";", rest::binary>>, value, [{key, nil} | sub_types], acc),
433-
do: parse_structured_header_value(rest, value, [{key, acc} | sub_types], "")
481+
defp parse_structured_header_value(<<";", rest::binary>>, nil, sub_types, part, acc)
482+
when part in [:value, :param_value],
483+
do: parse_structured_header_value(rest, acc, sub_types, :param_name, "")
434484

435-
defp parse_structured_header_value(<<"=", rest::binary>>, value, sub_types, acc),
436-
do: parse_structured_header_value(rest, value, [{key_to_atom(acc), nil} | sub_types], "")
485+
defp parse_structured_header_value(
486+
<<";", rest::binary>>,
487+
value,
488+
[{key, nil} | sub_types],
489+
:param_value,
490+
acc
491+
),
492+
do: parse_structured_header_value(rest, value, [{key, acc} | sub_types], :param_name, "")
437493

438-
defp parse_structured_header_value(<<char::utf8, rest::binary>>, value, sub_types, acc),
439-
do: parse_structured_header_value(rest, value, sub_types, <<acc::binary, char::utf8>>)
494+
defp parse_structured_header_value(<<"=", rest::binary>>, value, sub_types, :param_name, acc),
495+
do:
496+
parse_structured_header_value(
497+
rest,
498+
value,
499+
[{key_to_atom(acc), nil} | sub_types],
500+
:param_value,
501+
""
502+
)
503+
504+
defp parse_structured_header_value(<<char::utf8, rest::binary>>, value, sub_types, part, acc),
505+
do: parse_structured_header_value(rest, value, sub_types, part, <<acc::binary, char::utf8>>)
440506

441507
defp parse_quoted_string(string, acc \\ "")
442508

‎test/mail/parsers/rfc_2822_test.exs

+70
Original file line numberDiff line numberDiff line change
@@ -939,6 +939,76 @@ defmodule Mail.Parsers.RFC2822Test do
939939
assert message.headers["content-type"] == ["text/html", {"charset", "us-ascii"}]
940940
end
941941

942+
test "parses encoded word cotaining 'special' characters RFC 2047§6.2" do
943+
message =
944+
parse_email("""
945+
From: =?UTF-8?B?am9obi5kb2VAcmVkYWN0ZS4uLg==?= <comments-noreply@docs.google.com>
946+
""")
947+
948+
assert message.headers["from"] == {"john.doe@redacte...", "comments-noreply@docs.google.com"}
949+
end
950+
951+
test "correct handling of encoded words according to RFC 2047 (examples)" do
952+
message =
953+
parse_email("""
954+
From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>
955+
To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
956+
CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
957+
Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
958+
=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
959+
""")
960+
961+
assert message.headers["from"] == {"Keith Moore", "moore@cs.utk.edu"}
962+
assert message.headers["to"] == [{"Keld J\xF8rn Simonsen", "keld@dkuug.dk"}]
963+
assert message.headers["cc"] == [{"Andr\xE9 Pirard", "PIRARD@vm1.ulg.ac.be"}]
964+
assert message.headers["subject"] == "If you can read this you understand the example."
965+
966+
message =
967+
parse_email("""
968+
From: =?ISO-8859-1?Q?Olle_J=E4rnefors?= <ojarnef@admin.kth.se>
969+
To: ietf-822@dimacs.rutgers.edu, ojarnef@admin.kth.se
970+
Subject: Time for ISO 10646?
971+
""")
972+
973+
assert message.headers["from"] == {"Olle J\xE4rnefors", "ojarnef@admin.kth.se"}
974+
assert message.headers["to"] == ["ietf-822@dimacs.rutgers.edu", "ojarnef@admin.kth.se"]
975+
assert message.headers["subject"] == "Time for ISO 10646?"
976+
977+
message =
978+
parse_email("""
979+
To: Dave Crocker <dcrocker@mordor.stanford.edu>
980+
Cc: ietf-822@dimacs.rutgers.edu, paf@comsol.se
981+
From: =?ISO-8859-1?Q?Patrik_F=E4ltstr=F6m?= <paf@nada.kth.se>
982+
Subject: Re: RFC-HDR care and feeding
983+
""")
984+
985+
assert message.headers["from"] == {"Patrik F\xE4ltstr\xF6m", "paf@nada.kth.se"}
986+
assert message.headers["to"] == [{"Dave Crocker", "dcrocker@mordor.stanford.edu"}]
987+
assert message.headers["cc"] == ["ietf-822@dimacs.rutgers.edu", "paf@comsol.se"]
988+
assert message.headers["subject"] == "Re: RFC-HDR care and feeding"
989+
990+
message =
991+
parse_email("""
992+
From: Nathaniel Borenstein <nsb@thumper.bellcore.com>
993+
(=?iso-8859-8?b?7eXs+SDv4SDp7Oj08A==?=)
994+
To: Greg Vaudreuil <gvaudre@NRI.Reston.VA.US>, Ned Freed
995+
<ned@innosoft.com>, Keith Moore <moore@cs.utk.edu>
996+
Subject: Test of new header generator
997+
MIME-Version: 1.0
998+
Content-type: text/plain; charset=ISO-8859-1
999+
""")
1000+
1001+
assert message.headers["from"] == {"Nathaniel Borenstein", "nsb@thumper.bellcore.com"}
1002+
1003+
assert message.headers["to"] == [
1004+
{"Greg Vaudreuil", "gvaudre@NRI.Reston.VA.US"},
1005+
{"Ned Freed", "ned@innosoft.com"},
1006+
{"Keith Moore", "moore@cs.utk.edu"}
1007+
]
1008+
1009+
assert message.headers["subject"] == "Test of new header generator"
1010+
end
1011+
9421012
defp parse_email(email, opts \\ []),
9431013
do: email |> convert_crlf |> Mail.Parsers.RFC2822.parse(opts)
9441014

0 commit comments

Comments
 (0)
Please sign in to comment.