Skip to content

Commit 3f8e26d

Browse files
authored
v0.4.0: Ability to quote intro line + modernize all tests (#55)
1 parent 46864a2 commit 3f8e26d

13 files changed

+1110
-1216
lines changed

CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changes
22

3+
## v0.4.0
4+
* Add `quote_intro_line` parameter to `quote` and `quote_html`.
5+
* Modernize all tests.
6+
37
## v0.3.1
48
* Fix `unwrap_html` when no result was found.
59

pyproject.toml

+4-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,10 @@ max-complexity = 15
5454
max-branches = 16
5555

5656
[tool.ruff.lint.per-file-ignores]
57-
"tests/test_quotequail.py" = ["E501", "PT009"]
57+
"tests/test_internal.py" = ["E501"]
58+
"tests/test_quote.py" = ["E501"]
59+
"tests/test_quote_html.py" = ["E501"]
60+
"tests/test_unwrap.py" = ["E501"]
5861
"tests/test_unwrap_html.py" = ["E501"]
5962

6063
[tool.mypy]

quotequail/__init__.py

+52-24
Original file line numberDiff line numberDiff line change
@@ -2,59 +2,87 @@
22
# a library that identifies quoted text in email messages
33

44
from . import _internal, _patterns
5+
from ._enums import Position
56

6-
__version__ = "0.3.1"
7+
__version__ = "0.4.0"
78
__all__ = ["quote", "quote_html", "unwrap", "unwrap_html"]
89

910

10-
def quote(text: str, limit: int = 1000) -> list[tuple[bool, str]]:
11+
def quote(
12+
text: str, *, limit: int = 1000, quote_intro_line: bool = False
13+
) -> list[tuple[bool, str]]:
1114
"""
12-
Take a plain text message as an argument, return a list of tuples. The
13-
first argument of the tuple denotes whether the text should be expanded by
14-
default. The second argument is the unmodified corresponding text.
15-
16-
Example: [(True, 'expanded text'), (False, '> Some quoted text')]
17-
18-
Unless the limit param is set to None, the text will automatically be
19-
quoted starting at the line where the limit is reached.
15+
Divide email body into quoted parts.
16+
17+
Args:
18+
text: Plain text message.
19+
limit: If set, the text will automatically be quoted starting at the
20+
line where the limit is reached.
21+
quote_intro_line: Whether the line introducing the quoted text ("On ...
22+
wrote:" / "Begin forwarded message:") should be part of the quoted
23+
text.
24+
25+
Returns:
26+
List of tuples: The first argument of the tuple denotes whether the
27+
text should be expanded by default. The second argument is the
28+
unmodified corresponding text.
29+
30+
Example: [(True, 'expanded text'), (False, '> Some quoted text')]
2031
"""
2132
lines = text.split("\n")
2233

34+
position = Position.Begin if quote_intro_line else Position.End
2335
found = _internal.find_quote_position(
24-
lines, _patterns.MAX_WRAP_LINES, limit
36+
lines,
37+
_patterns.MAX_WRAP_LINES,
38+
limit=limit,
39+
position=position,
2540
)
2641

27-
if found is not None:
28-
return [
29-
(True, "\n".join(lines[: found + 1])),
30-
(False, "\n".join(lines[found + 1 :])),
31-
]
42+
if found is None:
43+
return [(True, text)]
3244

33-
return [(True, text)]
45+
split_idx = found if quote_intro_line else found + 1
46+
return [
47+
(True, "\n".join(lines[:split_idx])),
48+
(False, "\n".join(lines[split_idx:])),
49+
]
3450

3551

36-
def quote_html(html: str, limit: int = 1000) -> list[tuple[bool, str]]:
52+
def quote_html(
53+
html: str, *, limit: int = 1000, quote_intro_line: bool = False
54+
) -> list[tuple[bool, str]]:
3755
"""
38-
Like quote(), but takes an HTML message as an argument. The limit param
39-
represents the maximum number of lines to traverse until quoting the rest
40-
of the markup. Lines are separated by block elements or <br>.
56+
Like quote(), but takes an HTML message as an argument.
57+
58+
Args:
59+
html: HTML message.
60+
limit: Maximum number of lines to traverse until quoting the rest of
61+
the markup. Lines are separated by block elements or <br>.
62+
quote_intro_line: Whether the line introducing the quoted text ("On ...
63+
wrote:" / "Begin forwarded message:") should be part of the quoted
64+
text.
4165
"""
4266
from . import _html
4367

4468
tree = _html.get_html_tree(html)
4569

4670
start_refs, end_refs, lines = _html.get_line_info(tree, limit + 1)
4771

48-
found = _internal.find_quote_position(lines, 1, limit)
72+
position = Position.Begin if quote_intro_line else Position.End
73+
found = _internal.find_quote_position(
74+
lines, 1, limit=limit, position=position
75+
)
4976

5077
if found is None:
5178
# No quoting found and we're below limit. We're done.
5279
return [(True, _html.render_html_tree(tree))]
5380

81+
split_idx = found if quote_intro_line else found + 1
5482
start_tree = _html.slice_tree(
55-
tree, start_refs, end_refs, (0, found + 1), html_copy=html
83+
tree, start_refs, end_refs, (0, split_idx), html_copy=html
5684
)
57-
end_tree = _html.slice_tree(tree, start_refs, end_refs, (found + 1, None))
85+
end_tree = _html.slice_tree(tree, start_refs, end_refs, (split_idx, None))
5886

5987
return [
6088
(True, _html.render_html_tree(start_tree)),

quotequail/_enums.py

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from enum import Enum
2+
3+
4+
class Position(Enum):
5+
Begin = "begin"
6+
End = "end"

quotequail/_html.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# HTML utils
2-
import enum
32
from collections.abc import Iterator
43
from typing import TYPE_CHECKING, TypeAlias
54

@@ -9,14 +8,9 @@
98
if TYPE_CHECKING:
109
from lxml.html import HtmlElement
1110

11+
from ._enums import Position
1212
from ._patterns import FORWARD_LINE, FORWARD_STYLES, MULTIPLE_WHITESPACE_RE
1313

14-
15-
class Position(enum.Enum):
16-
Begin = "begin"
17-
End = "end"
18-
19-
2014
Element: TypeAlias = "HtmlElement"
2115
ElementRef = tuple["Element", Position]
2216

quotequail/_internal.py

+35-7
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from typing_extensions import assert_never
2+
3+
from ._enums import Position
14
from ._patterns import (
25
COMPILED_PATTERN_MAP,
36
HEADER_MAP,
@@ -13,7 +16,10 @@
1316

1417

1518
def find_pattern_on_line(
16-
lines: list[str], n: int, max_wrap_lines: int
19+
lines: list[str],
20+
n: int,
21+
max_wrap_lines: int,
22+
position: Position,
1723
) -> tuple[int, str] | None:
1824
"""
1925
Find a forward/reply pattern within the given lines on text on the given
@@ -30,20 +36,42 @@ def find_pattern_on_line(
3036
match_line = join_wrapped_lines(lines[n : n + 1 + m])
3137
if match_line.startswith(">"):
3238
match_line = match_line[1:].strip()
39+
# If this line is blank, break out of the innermost loop
40+
# at m == 0 so that if the quoting starts in the following
41+
# line, we'll correctly detect the start of the quoting
42+
# position.
43+
if not match_line:
44+
break
3345
if regex.match(match_line.strip()):
34-
return n + m, typ
46+
match position:
47+
case Position.Begin:
48+
return n, typ
49+
case Position.End:
50+
return n + m, typ
51+
case _:
52+
assert_never(position)
3553
return None
3654

3755

3856
def find_quote_position(
39-
lines: list[str], max_wrap_lines: int, limit: int | None = None
57+
lines: list[str],
58+
max_wrap_lines: int,
59+
limit: int | None = None,
60+
position: Position = Position.End,
4061
) -> int | None:
4162
"""
42-
Return the (ending) line number of a quoting pattern. If a limit is given
43-
and the limit is reached, the limit is returned.
63+
Return the beginning or ending line number of a quoting pattern.
64+
65+
Args:
66+
lines: List of lines of text.
67+
max_wrap_lines: Amount to lines to join to check for potential wrapped
68+
patterns.
69+
limit: If line limit is given and reached without finding a pattern,
70+
the limit is returned.
71+
position: Whether to return the beginning or ending line number.
4472
"""
4573
for n in range(len(lines)):
46-
result = find_pattern_on_line(lines, n, max_wrap_lines)
74+
result = find_pattern_on_line(lines, n, max_wrap_lines, position)
4775
if result:
4876
return result[0]
4977
if limit is not None and n >= limit - 1:
@@ -189,7 +217,7 @@ def find_unwrap_start(
189217

190218
# Find a forward / reply start pattern
191219

192-
result = find_pattern_on_line(lines, n, max_wrap_lines)
220+
result = find_pattern_on_line(lines, n, max_wrap_lines, Position.End)
193221
if result:
194222
end, typ = result
195223
return n, end, typ

requirements_tests.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
lxml==5.2.2
22
pytest==8.2.2
3+
typing-extensions==4.12.2

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
],
2929
test_suite="tests",
3030
tests_require=["lxml"],
31+
install_requires=["typing_extensions>=4.1"],
3132
platforms="any",
3233
classifiers=[
3334
"Environment :: Web Environment",

tests/test_internal.py

+135
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
import pytest
2+
3+
from quotequail._internal import extract_headers, parse_reply
4+
5+
6+
@pytest.mark.parametrize(
7+
("line", "expected"),
8+
[
9+
# German
10+
(
11+
"Am 24.02.2015 um 22:48 schrieb John Doe <[email protected]>:",
12+
{
13+
"date": "24.02.2015 um 22:48",
14+
"from": "John Doe <[email protected]>",
15+
},
16+
),
17+
# English
18+
(
19+
"On Monday, March 7, 2016 10:19 AM, John Doe <[email protected]> wrote:",
20+
{
21+
"date": "Monday, March 7, 2016 10:19 AM",
22+
"from": "John Doe <[email protected]>",
23+
},
24+
),
25+
(
26+
"On Feb 22, 2015, at 9:19 PM, John Doe <[email protected]> wrote:",
27+
{
28+
"date": "Feb 22, 2015, at 9:19 PM",
29+
"from": "John Doe <[email protected]>",
30+
},
31+
),
32+
(
33+
"On 2016-03-14, at 20:26, John Doe <[email protected]> wrote:",
34+
{
35+
"date": "2016-03-14, at 20:26",
36+
"from": "John Doe <[email protected]>",
37+
},
38+
),
39+
(
40+
"On 8 o'clock, John Doe wrote:",
41+
{"date": "8 o'clock", "from": "John Doe"},
42+
),
43+
# French
44+
(
45+
"Le 6 janv. 2014 à 19:50, John Doe <[email protected]> a écrit :",
46+
{
47+
"date": "6 janv. 2014 \xe0 19:50",
48+
"from": "John Doe <[email protected]>",
49+
},
50+
),
51+
(
52+
"Le 02.10.2013 à 11:13, John Doe <[email protected]> a écrit :",
53+
{
54+
"date": "02.10.2013 \xe0 11:13",
55+
"from": "John Doe <[email protected]>",
56+
},
57+
),
58+
# Spanish
59+
(
60+
"El 11/07/2012 06:13 p.m., John Doe escribió:",
61+
{"date": "11/07/2012 06:13 p.m.", "from": "John Doe"},
62+
),
63+
(
64+
"El 06/04/2010, a las 13:13, John Doe escribió:",
65+
{"date": "06/04/2010, a las 13:13", "from": "John Doe"},
66+
),
67+
# Swedish
68+
(
69+
"Den 24 februari 2015 22:48 skrev John Doe <[email protected]>:",
70+
{
71+
"date": "24 februari 2015 22:48",
72+
"from": "John Doe <[email protected]>",
73+
},
74+
),
75+
# Brazillian portuguese
76+
(
77+
"Em qui, 24 de jan de 2019 às 14:31, John Doe <[email protected]> escreveu:",
78+
{
79+
"date": "qui, 24 de jan de 2019 às 14:31",
80+
"from": "John Doe <[email protected]>",
81+
},
82+
),
83+
# Other
84+
(
85+
"2009/5/12 John Doe <[email protected]>",
86+
{"date": "2009/5/12", "from": "John Doe <[email protected]>"},
87+
),
88+
],
89+
)
90+
def test_parse_reply(line, expected):
91+
assert parse_reply(line) == expected
92+
93+
94+
def test_extract_headers():
95+
assert extract_headers([], 2) == ({}, 0)
96+
assert extract_headers(["test"], 2) == ({}, 0)
97+
assert extract_headers(["From: b", "To: c"], 2) == (
98+
{"from": "b", "to": "c"},
99+
2,
100+
)
101+
assert extract_headers(["From: b", "foo"], 2) == ({"from": "b foo"}, 2)
102+
assert extract_headers(["From: b", "foo"], 1) == ({"from": "b"}, 1)
103+
assert extract_headers(["From: b", "To: c", "", "other line"], 2) == (
104+
{"from": "b", "to": "c"},
105+
2,
106+
)
107+
assert extract_headers(
108+
[
109+
"From: some very very very long name <",
110+
111+
"Subject: this is a very very very very long",
112+
"subject",
113+
"",
114+
"other line",
115+
],
116+
2,
117+
) == (
118+
{
119+
"from": "some very very very long name <[email protected]>",
120+
"subject": "this is a very very very very long subject",
121+
},
122+
4,
123+
)
124+
assert extract_headers(
125+
[
126+
"From: some very very very long name <",
127+
128+
],
129+
1,
130+
) == (
131+
{
132+
"from": "some very very very long name <",
133+
},
134+
1,
135+
)

0 commit comments

Comments
 (0)