Skip to content

Commit 5db807b

Browse files
Refactor RFC2822 datetime parser and assume unknown named timezones are UTC
1 parent 53d0e63 commit 5db807b

File tree

1 file changed

+98
-75
lines changed

1 file changed

+98
-75
lines changed

lib/mail/parsers/rfc_2822.ex

+98-75
Original file line numberDiff line numberDiff line change
@@ -85,58 +85,64 @@ defmodule Mail.Parsers.RFC2822 do
8585
returning the invalid date string.
8686
"""
8787
@spec to_datetime(binary()) :: DateTime.t() | {:error, binary()}
88-
def to_datetime(<<" ", rest::binary>>), do: to_datetime(rest)
89-
def to_datetime(<<"\t", rest::binary>>), do: to_datetime(rest)
90-
def to_datetime(<<_day::binary-size(3), ", ", rest::binary>>), do: to_datetime(rest)
88+
def to_datetime(date_string) do
89+
parse_datetime(date_string)
90+
rescue
91+
_ -> {:error, date_string}
92+
end
93+
94+
defp parse_datetime(<<" ", rest::binary>>), do: parse_datetime(rest)
95+
defp parse_datetime(<<"\t", rest::binary>>), do: parse_datetime(rest)
96+
defp parse_datetime(<<_day::binary-size(3), ", ", rest::binary>>), do: parse_datetime(rest)
9197

92-
def to_datetime(<<date::binary-size(1), " ", rest::binary>>),
93-
do: to_datetime("0" <> date <> " " <> rest)
98+
defp parse_datetime(<<date::binary-size(1), " ", rest::binary>>),
99+
do: parse_datetime("0" <> date <> " " <> rest)
94100

95101
# This caters for an invalid date with no 0 before the hour, e.g. 5:21:43 instead of 05:21:43
96-
def to_datetime(<<date::binary-size(11), " ", hour::binary-size(1), ":", rest::binary>>) do
97-
to_datetime("#{date} 0#{hour}:#{rest}")
102+
defp parse_datetime(<<date::binary-size(11), " ", hour::binary-size(1), ":", rest::binary>>) do
103+
parse_datetime("#{date} 0#{hour}:#{rest}")
98104
end
99105

100106
# This caters for an invalid date with dashes between the date/month/year parts
101-
def to_datetime(
102-
<<date::binary-size(2), "-", month::binary-size(3), "-", year::binary-size(4),
103-
rest::binary>>
104-
) do
105-
to_datetime("#{date} #{month} #{year}#{rest}")
107+
defp parse_datetime(
108+
<<date::binary-size(2), "-", month::binary-size(3), "-", year::binary-size(4),
109+
rest::binary>>
110+
) do
111+
parse_datetime("#{date} #{month} #{year}#{rest}")
106112
end
107113

108114
# This caters for an invalid two-digit year
109-
def to_datetime(
110-
<<date::binary-size(2), " ", month::binary-size(3), " ", year::binary-size(2), " ",
111-
rest::binary>>
112-
) do
115+
defp parse_datetime(
116+
<<date::binary-size(2), " ", month::binary-size(3), " ", year::binary-size(2), " ",
117+
rest::binary>>
118+
) do
113119
year = year |> String.to_integer() |> to_four_digit_year()
114-
to_datetime("#{date} #{month} #{year} #{rest}")
120+
parse_datetime("#{date} #{month} #{year} #{rest}")
115121
end
116122

117123
# This caters for missing seconds
118-
def to_datetime(
119-
<<date::binary-size(11), " ", hour::binary-size(2), ":", minute::binary-size(2), " ",
120-
rest::binary>>
121-
) do
122-
to_datetime("#{date} #{hour}:#{minute}:00 #{rest}")
124+
defp parse_datetime(
125+
<<date::binary-size(11), " ", hour::binary-size(2), ":", minute::binary-size(2), " ",
126+
rest::binary>>
127+
) do
128+
parse_datetime("#{date} #{hour}:#{minute}:00 #{rest}")
123129
end
124130

125131
# Fixes invalid value: Wed, 14 10 2015 12:34:17
126-
def to_datetime(
127-
<<date::binary-size(2), " ", month_digits::binary-size(2), " ", year::binary-size(4), " ",
128-
hour::binary-size(2), ":", minute::binary-size(2), ":", second::binary-size(2),
129-
rest::binary>>
130-
) do
132+
defp parse_datetime(
133+
<<date::binary-size(2), " ", month_digits::binary-size(2), " ", year::binary-size(4),
134+
" ", hour::binary-size(2), ":", minute::binary-size(2), ":", second::binary-size(2),
135+
rest::binary>>
136+
) do
131137
month_name = get_month_name(month_digits)
132-
to_datetime("#{date} #{month_name} #{year} #{hour}:#{minute}:#{second}#{rest}")
138+
parse_datetime("#{date} #{month_name} #{year} #{hour}:#{minute}:#{second}#{rest}")
133139
end
134140

135-
def to_datetime(
136-
<<date::binary-size(2), " ", month::binary-size(3), " ", year::binary-size(4), " ",
137-
hour::binary-size(2), ":", minute::binary-size(2), ":", second::binary-size(2), " ",
138-
time_zone::binary>>
139-
) do
141+
defp parse_datetime(
142+
<<date::binary-size(2), " ", month::binary-size(3), " ", year::binary-size(4), " ",
143+
hour::binary-size(2), ":", minute::binary-size(2), ":", second::binary-size(2), " ",
144+
time_zone::binary>>
145+
) do
140146
year = year |> String.to_integer()
141147
month = get_month(String.downcase(month))
142148
date = date |> String.to_integer()
@@ -156,73 +162,84 @@ defmodule Mail.Parsers.RFC2822 do
156162

157163
# This adds support for a now obsolete format
158164
# https://tools.ietf.org/html/rfc2822#section-4.3
159-
def to_datetime(
160-
<<date::binary-size(2), " ", month::binary-size(3), " ", year::binary-size(4), " ",
161-
hour::binary-size(2), ":", minute::binary-size(2), ":", second::binary-size(2), " ",
162-
timezone::binary-size(3), _rest::binary>>
163-
) do
164-
to_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second} (#{timezone})")
165+
defp parse_datetime(
166+
<<date::binary-size(2), " ", month::binary-size(3), " ", year::binary-size(4), " ",
167+
hour::binary-size(2), ":", minute::binary-size(2), ":", second::binary-size(2), " ",
168+
timezone::binary-size(3), _rest::binary>>
169+
) do
170+
parse_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second} (#{timezone})")
165171
end
166172

167173
# Fixes invalid value: Tue Aug 8 12:05:31 CAT 2017
168-
def to_datetime(
169-
<<_day::binary-size(3), " ", month::binary-size(3), " ", date::binary-size(2), " ",
170-
hour::binary-size(2), ":", minute::binary-size(2), ":", second::binary-size(2), " ",
171-
_tz::binary-size(3), " ", year::binary-size(4), _rest::binary>>
172-
) do
173-
to_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second}")
174+
defp parse_datetime(
175+
<<month::binary-size(3), " ", date::binary-size(2), " ", hour::binary-size(2), ":",
176+
minute::binary-size(2), ":", second::binary-size(2), " ", _tz::binary-size(3), " ",
177+
year::binary-size(4), _rest::binary>>
178+
) do
179+
parse_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second}")
174180
end
175181

176182
# Fixes invalid value with milliseconds Tue, 20 Jun 2017 09:44:58.568 +0000 (UTC)
177-
def to_datetime(
178-
<<date::binary-size(2), " ", month::binary-size(3), " ", year::binary-size(4), " ",
179-
hour::binary-size(2), ":", minute::binary-size(2), ":", second::binary-size(2), ".",
180-
_milliseconds::binary-size(3), rest::binary>>
181-
) do
182-
to_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second}#{rest}}")
183+
defp parse_datetime(
184+
<<date::binary-size(2), " ", month::binary-size(3), " ", year::binary-size(4), " ",
185+
hour::binary-size(2), ":", minute::binary-size(2), ":", second::binary-size(2), ".",
186+
_milliseconds::binary-size(3), rest::binary>>
187+
) do
188+
parse_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second}#{rest}")
183189
end
184190

185191
# Fixes invalid value: Tue May 30 15:29:15 2017
186-
def to_datetime(
187-
<<_day::binary-size(3), " ", month::binary-size(3), " ", date::binary-size(2), " ",
188-
hour::binary-size(2), ":", minute::binary-size(2), ":", second::binary-size(2), " ",
189-
year::binary-size(4), _rest::binary>>
190-
) do
191-
to_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second} +0000")
192+
defp parse_datetime(
193+
<<month::binary-size(3), " ", date::binary-size(2), " ", hour::binary-size(2), ":",
194+
minute::binary-size(2), ":", second::binary-size(2), " ", year::binary-size(4),
195+
_rest::binary>>
196+
) do
197+
parse_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second} +0000")
192198
end
193199

194200
# Fixes invalid value: Tue Aug 8 12:05:31 2017
195-
def to_datetime(
196-
<<_day::binary-size(3), " ", month::binary-size(3), " ", date::binary-size(1), " ",
197-
hour::binary-size(2), ":", minute::binary-size(2), ":", second::binary-size(2), " ",
198-
year::binary-size(4), _rest::binary>>
199-
) do
200-
to_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second} +0000")
201+
defp parse_datetime(
202+
<<month::binary-size(3), " ", date::binary-size(1), " ", hour::binary-size(2), ":",
203+
minute::binary-size(2), ":", second::binary-size(2), " ", year::binary-size(4),
204+
_rest::binary>>
205+
) do
206+
parse_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second} +0000")
201207
end
202208

203209
# Fixes missing time zone
204-
def to_datetime(
205-
<<date::binary-size(2), " ", month::binary-size(3), " ", year::binary-size(4), " ",
206-
hour::binary-size(2), ":", minute::binary-size(2), ":", second::binary-size(2)>>
207-
) do
208-
to_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second} +0000")
210+
defp parse_datetime(
211+
<<date::binary-size(2), " ", month::binary-size(3), " ", year::binary-size(4), " ",
212+
hour::binary-size(2), ":", minute::binary-size(2), ":", second::binary-size(2),
213+
_rest::binary>>
214+
) do
215+
parse_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second} +0000")
209216
end
210217

211218
# Fixes invalid value with long months: 13 September 2024 18:29:58 +0000
212219
lm_sizes = Map.keys(@long_months) |> Enum.map(&byte_size/1) |> Enum.uniq()
213220

214221
for month_size <- lm_sizes do
215-
def to_datetime(
216-
<<date::binary-size(2), " ", long_month::binary-size(unquote(month_size)), " ",
217-
year::binary-size(4), " ", hour::binary-size(2), ":", minute::binary-size(2), ":",
218-
second::binary-size(2), rest::binary>>
219-
) do
222+
defp parse_datetime(
223+
<<date::binary-size(2), " ", long_month::binary-size(unquote(month_size)), " ",
224+
year::binary-size(4), " ", hour::binary-size(2), ":", minute::binary-size(2), ":",
225+
second::binary-size(2), rest::binary>>
226+
) do
220227
month = long_month |> String.downcase() |> get_month_name()
221-
to_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second}#{rest}")
228+
parse_datetime("#{date} #{month} #{year} #{hour}:#{minute}:#{second}#{rest}")
222229
end
223230
end
224231

225-
def to_datetime(invalid_datetime), do: {:error, invalid_datetime}
232+
# Chop off the day name
233+
defp parse_datetime(<<_day_name::binary-size(3), " ", rest::binary>>) do
234+
parse_datetime(rest)
235+
end
236+
237+
# Chop off the day name followed by a comma
238+
defp parse_datetime(<<_day_name::binary-size(3), ", ", rest::binary>>) do
239+
parse_datetime(rest)
240+
end
241+
242+
defp parse_datetime(invalid_datetime), do: {:error, invalid_datetime}
226243

227244
defp to_four_digit_year(year) when year >= 0 and year < 50, do: 2000 + year
228245
defp to_four_digit_year(year) when year < 100 and year >= 50, do: 1900 + year
@@ -270,6 +287,12 @@ defmodule Mail.Parsers.RFC2822 do
270287
defp parse_time_zone(<<"+", offset::binary-size(4), _rest::binary>>), do: "+#{offset}"
271288
defp parse_time_zone(<<"-", offset::binary-size(4), _rest::binary>>), do: "-#{offset}"
272289

290+
# Using a named offset is not valid according to RFC 2822 - they should use a numeric offset
291+
# To allow the parsing to continue, we assume UTC in this situation
292+
defp parse_time_zone(<<_tz_abbr::binary-size(3)>>) do
293+
"+0000"
294+
end
295+
273296
defp parse_time_zone(time_zone) do
274297
time_zone
275298
|> String.trim_leading("(")

0 commit comments

Comments
 (0)