From 52cf3ed18b1cf3649af102cd9d5be6f4f30668c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lie=20GINIOUX?= Date: Tue, 5 Nov 2024 14:09:04 +0100 Subject: [PATCH 1/2] Parsing error on timezone found in the wild The timezone seem to be invalid according to RFC 2822, but I'm not totally sure, and since some other invalid datetime seem to be handled by the RFC2822 parser, I'm still submitting this case. Causes the following error: ``` (MatchError) no match of right hand side value: {:error, :invalid_format} (mail 0.4.2) lib/mail/parsers/rfc_2822.ex:153: Mail.Parsers.RFC2822.to_datetime/1 (mail 0.4.2) lib/mail/parsers/rfc_2822.ex:463: Mail.Parsers.RFC2822.parse_received_value/1 (mail 0.4.2) lib/mail/parsers/rfc_2822.ex:313: Mail.Parsers.RFC2822.parse_headers/3 (mail 0.4.2) lib/mail/parsers/rfc_2822.ex:56: Mail.Parsers.RFC2822.parse/2 ``` as reproduced in the failing test. --- test/mail/parsers/rfc_2822_test.exs | 1 + 1 file changed, 1 insertion(+) diff --git a/test/mail/parsers/rfc_2822_test.exs b/test/mail/parsers/rfc_2822_test.exs index dc850bf..edd2328 100644 --- a/test/mail/parsers/rfc_2822_test.exs +++ b/test/mail/parsers/rfc_2822_test.exs @@ -253,6 +253,7 @@ defmodule Mail.Parsers.RFC2822Test do assert to_datetime("Wed, 14 05 2015 12:34:17") == ~U"2015-05-14 12:34:17Z" assert to_datetime("Tue, 20 Jun 2017 09:44:58.568 +0000 (UTC)") == ~U"2017-06-20 09:44:58Z" assert to_datetime("Fri Apr 15 17:22:55 CAT 2016") == ~U"2016-04-15 17:22:55Z" + assert to_datetime("Tue, 05 Nov 2024 10:31:43 MSK") == ~U"2024-11-05 10:31:43Z" [ "January", From 94a2749bfc6c9a9c852e6ad872c3834b6405e7d2 Mon Sep 17 00:00:00 2001 From: Andrew Timberlake Date: Wed, 6 Nov 2024 09:40:40 +0200 Subject: [PATCH 2/2] Refactor RFC2822 datetime parser and assume unknown named timezones are UTC --- lib/mail/parsers/rfc_2822.ex | 173 ++++++++++++++++++++--------------- 1 file changed, 98 insertions(+), 75 deletions(-) diff --git a/lib/mail/parsers/rfc_2822.ex b/lib/mail/parsers/rfc_2822.ex index 524a680..4622265 100644 --- a/lib/mail/parsers/rfc_2822.ex +++ b/lib/mail/parsers/rfc_2822.ex @@ -85,58 +85,64 @@ defmodule Mail.Parsers.RFC2822 do returning the invalid date string. """ @spec to_datetime(binary()) :: DateTime.t() | {:error, binary()} - def to_datetime(<<" ", rest::binary>>), do: to_datetime(rest) - def to_datetime(<<"\t", rest::binary>>), do: to_datetime(rest) - def to_datetime(<<_day::binary-size(3), ", ", rest::binary>>), do: to_datetime(rest) + def to_datetime(date_string) do + parse_datetime(date_string) + rescue + _ -> {:error, date_string} + end + + defp parse_datetime(<<" ", rest::binary>>), do: parse_datetime(rest) + defp parse_datetime(<<"\t", rest::binary>>), do: parse_datetime(rest) + defp parse_datetime(<<_day::binary-size(3), ", ", rest::binary>>), do: parse_datetime(rest) - def to_datetime(<>), - do: to_datetime("0" <> date <> " " <> rest) + defp parse_datetime(<>), + do: parse_datetime("0" <> date <> " " <> rest) # This caters for an invalid date with no 0 before the hour, e.g. 5:21:43 instead of 05:21:43 - def to_datetime(<>) do - to_datetime("#{date} 0#{hour}:#{rest}") + defp parse_datetime(<>) do + parse_datetime("#{date} 0#{hour}:#{rest}") end # This caters for an invalid date with dashes between the date/month/year parts - def to_datetime( - <> - ) do - to_datetime("#{date} #{month} #{year}#{rest}") + defp parse_datetime( + <> + ) do + parse_datetime("#{date} #{month} #{year}#{rest}") end # This caters for an invalid two-digit year - def to_datetime( - <> - ) do + defp parse_datetime( + <> + ) do year = year |> String.to_integer() |> to_four_digit_year() - to_datetime("#{date} #{month} #{year} #{rest}") + parse_datetime("#{date} #{month} #{year} #{rest}") end # This caters for missing seconds - def to_datetime( - <> - ) do - to_datetime("#{date} #{hour}:#{minute}:00 #{rest}") + defp parse_datetime( + <> + ) do + parse_datetime("#{date} #{hour}:#{minute}:00 #{rest}") end # Fixes invalid value: Wed, 14 10 2015 12:34:17 - def to_datetime( - <> - ) do + defp parse_datetime( + <> + ) do month_name = get_month_name(month_digits) - to_datetime("#{date} #{month_name} #{year} #{hour}:#{minute}:#{second}#{rest}") + parse_datetime("#{date} #{month_name} #{year} #{hour}:#{minute}:#{second}#{rest}") end - def to_datetime( - <> - ) do + defp parse_datetime( + <> + ) do year = year |> String.to_integer() month = get_month(String.downcase(month)) date = date |> String.to_integer() @@ -156,73 +162,84 @@ defmodule Mail.Parsers.RFC2822 do # This adds support for a now obsolete format # https://tools.ietf.org/html/rfc2822#section-4.3 - def to_datetime( - <> - ) do - to_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second} (#{timezone})") + defp parse_datetime( + <> + ) do + parse_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second} (#{timezone})") end # Fixes invalid value: Tue Aug 8 12:05:31 CAT 2017 - def to_datetime( - <<_day::binary-size(3), " ", month::binary-size(3), " ", date::binary-size(2), " ", - hour::binary-size(2), ":", minute::binary-size(2), ":", second::binary-size(2), " ", - _tz::binary-size(3), " ", year::binary-size(4), _rest::binary>> - ) do - to_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second}") + defp parse_datetime( + <> + ) do + parse_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second}") end # Fixes invalid value with milliseconds Tue, 20 Jun 2017 09:44:58.568 +0000 (UTC) - def to_datetime( - <> - ) do - to_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second}#{rest}}") + defp parse_datetime( + <> + ) do + parse_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second}#{rest}") end # Fixes invalid value: Tue May 30 15:29:15 2017 - def to_datetime( - <<_day::binary-size(3), " ", month::binary-size(3), " ", date::binary-size(2), " ", - hour::binary-size(2), ":", minute::binary-size(2), ":", second::binary-size(2), " ", - year::binary-size(4), _rest::binary>> - ) do - to_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second} +0000") + defp parse_datetime( + <> + ) do + parse_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second} +0000") end # Fixes invalid value: Tue Aug 8 12:05:31 2017 - def to_datetime( - <<_day::binary-size(3), " ", month::binary-size(3), " ", date::binary-size(1), " ", - hour::binary-size(2), ":", minute::binary-size(2), ":", second::binary-size(2), " ", - year::binary-size(4), _rest::binary>> - ) do - to_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second} +0000") + defp parse_datetime( + <> + ) do + parse_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second} +0000") end # Fixes missing time zone - def to_datetime( - <> - ) do - to_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second} +0000") + defp parse_datetime( + <> + ) do + parse_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second} +0000") end # Fixes invalid value with long months: 13 September 2024 18:29:58 +0000 lm_sizes = Map.keys(@long_months) |> Enum.map(&byte_size/1) |> Enum.uniq() for month_size <- lm_sizes do - def to_datetime( - <> - ) do + defp parse_datetime( + <> + ) do month = long_month |> String.downcase() |> get_month_name() - to_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second}#{rest}") + parse_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second}#{rest}") end end - def to_datetime(invalid_datetime), do: {:error, invalid_datetime} + # Chop off the day name + defp parse_datetime(<<_day_name::binary-size(3), " ", rest::binary>>) do + parse_datetime(rest) + end + + # Chop off the day name followed by a comma + defp parse_datetime(<<_day_name::binary-size(3), ", ", rest::binary>>) do + parse_datetime(rest) + end + + defp parse_datetime(invalid_datetime), do: {:error, invalid_datetime} defp to_four_digit_year(year) when year >= 0 and year < 50, do: 2000 + year defp to_four_digit_year(year) when year < 100 and year >= 50, do: 1900 + year @@ -270,6 +287,12 @@ defmodule Mail.Parsers.RFC2822 do defp parse_time_zone(<<"+", offset::binary-size(4), _rest::binary>>), do: "+#{offset}" defp parse_time_zone(<<"-", offset::binary-size(4), _rest::binary>>), do: "-#{offset}" + # Using a named offset is not valid according to RFC 2822 - they should use a numeric offset + # To allow the parsing to continue, we assume UTC in this situation + defp parse_time_zone(<<_tz_abbr::binary-size(3)>>) do + "+0000" + end + defp parse_time_zone(time_zone) do time_zone |> String.trim_leading("(")