Skip to content

Commit 3aa4a12

Browse files
committed
Fixed parsing errors for forum posts that contained a copy of the signature separator in the signature
1 parent 04b8f7d commit 3aa4a12

File tree

4 files changed

+22
-8
lines changed

4 files changed

+22
-8
lines changed

CHANGELOG.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@ Changelog
66
Due to this library relying on external content, older versions are not guaranteed to work.
77
Try to always use the latest version.
88

9+
.. v4.1.2
10+
11+
4.1.2 (2021-04-27)
12+
==================
13+
- Fixed parsing errors for forum posts that contained a copy of the signature separator in the signature.
14+
915
.. v4.1.1
1016
1117
4.1.1 (2021-04-19)

tests/tests_client.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import datetime
2+
import sys
23
import unittest.mock
34

45
import aiohttp
@@ -278,6 +279,7 @@ async def test_client_fetch_event_calendar_invalid_params(self):
278279
await self.client.fetch_event_schedule(3)
279280

280281
@unittest.mock.patch("tibiapy.bazaar.AuctionDetails._parse_page_items")
282+
@unittest.skipIf(sys.version_info < (3, 8, 0), "AsyncMock was implemented in 3.8")
281283
async def test_client__fetch_all_pages_success(self, parse_page_items):
282284
"""Testing internal method to fetch all pages of an auction item collection."""
283285
paginator = tibiapy.ItemSummary(page=1, total_pages=5)

tibiapy/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = '4.1.1'
1+
__version__ = '4.1.2'
22
__author__ = 'Allan Galarza'
33

44
import logging

tibiapy/forum.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,13 +1071,22 @@ def _parse_post_table(cls, post_table, offset=1):
10711071
character_info_container = post_table.find("div", attrs={"class": "PostCharacterText"})
10721072
post_author = ForumAuthor._parse_author_table(character_info_container)
10731073
content_container = post_table.find("div", attrs={"class": "PostText"})
1074-
content = content_container.encode_contents().decode()
10751074
title = None
10761075
signature = None
1077-
if signature_separator in content:
1078-
content, _ = content.split(signature_separator)
1079-
title_raw, content = content.split("<br/><br/>", 1)
10801076
emoticon = None
1077+
signature_container = post_table.find("td", attrs={"class": "ff_pagetext"})
1078+
if signature_container:
1079+
# Remove the signature's content from content container
1080+
signature_container.extract()
1081+
signature = signature_container.encode_contents().decode()
1082+
content = content_container.encode_contents().decode()
1083+
if signature_container:
1084+
# The signature separator will still be part of the content container, so we remove it
1085+
parts = content.split(signature_separator)
1086+
# This will handle the post containing another signature separator within the content
1087+
# We join back all the pieces except for the last one
1088+
content = signature_separator.join(parts[:-1])
1089+
title_raw, content = content.split("<br/><br/>", 1)
10811090
if title_raw:
10821091
title_html = bs4.BeautifulSoup(title_raw, 'lxml')
10831092
emoticon_img = title_html.find("img")
@@ -1086,9 +1095,6 @@ def _parse_post_table(cls, post_table, offset=1):
10861095
title_tag = title_html.find("b")
10871096
if title_tag:
10881097
title = title_tag.text
1089-
signature_container = post_table.find("td", attrs={"class": "ff_pagetext"})
1090-
if signature_container:
1091-
signature = signature_container.encode_contents().decode()
10921098
post_details = post_table.find('div', attrs={"class": "PostDetails"})
10931099
dates = post_dates_regex.findall(post_details.text)
10941100
edited_date = None

0 commit comments

Comments
 (0)