@@ -1071,13 +1071,22 @@ def _parse_post_table(cls, post_table, offset=1):
1071
1071
character_info_container = post_table .find ("div" , attrs = {"class" : "PostCharacterText" })
1072
1072
post_author = ForumAuthor ._parse_author_table (character_info_container )
1073
1073
content_container = post_table .find ("div" , attrs = {"class" : "PostText" })
1074
- content = content_container .encode_contents ().decode ()
1075
1074
title = None
1076
1075
signature = None
1077
- if signature_separator in content :
1078
- content , _ = content .split (signature_separator )
1079
- title_raw , content = content .split ("<br/><br/>" , 1 )
1080
1076
emoticon = None
1077
+ signature_container = post_table .find ("td" , attrs = {"class" : "ff_pagetext" })
1078
+ if signature_container :
1079
+ # Remove the signature's content from content container
1080
+ signature_container .extract ()
1081
+ signature = signature_container .encode_contents ().decode ()
1082
+ content = content_container .encode_contents ().decode ()
1083
+ if signature_container :
1084
+ # The signature separator will still be part of the content container, so we remove it
1085
+ parts = content .split (signature_separator )
1086
+ # This will handle the post containing another signature separator within the content
1087
+ # We join back all the pieces except for the last one
1088
+ content = signature_separator .join (parts [:- 1 ])
1089
+ title_raw , content = content .split ("<br/><br/>" , 1 )
1081
1090
if title_raw :
1082
1091
title_html = bs4 .BeautifulSoup (title_raw , 'lxml' )
1083
1092
emoticon_img = title_html .find ("img" )
@@ -1086,9 +1095,6 @@ def _parse_post_table(cls, post_table, offset=1):
1086
1095
title_tag = title_html .find ("b" )
1087
1096
if title_tag :
1088
1097
title = title_tag .text
1089
- signature_container = post_table .find ("td" , attrs = {"class" : "ff_pagetext" })
1090
- if signature_container :
1091
- signature = signature_container .encode_contents ().decode ()
1092
1098
post_details = post_table .find ('div' , attrs = {"class" : "PostDetails" })
1093
1099
dates = post_dates_regex .findall (post_details .text )
1094
1100
edited_date = None
0 commit comments