-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
153 lines (121 loc) · 4.98 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import re
import requests
from bs4 import BeautifulSoup as soup
from bs4.element import NavigableString
import praw
from prawcore.exceptions import Forbidden
import config
COMMENT_BODY = "{} \n" \
"[Image Link]({}) \n" \
"Title Text: {} \n\n" \
"Transcript: \n\n" \
"{} \n\n" \
"^[Explanation]({}) \n" \
"______________________________________\n" \
"^(I am a bot :D) ^[xkcd](https://xkcd.com)|" \
"[Code](https://github.com/ronserruya/Reddit_XKCD)|" \
"[Contact](https://www.reddit.com/message/compose/?to={})"
def bot_login():
# Login with praw
print("Logging in...")
reddit = praw.Reddit(username=config.username,
password=config.password,
client_id=config.client_id,
client_secret=config.client_secret,
user_agent="XKCD Reddit bot")
print("Logged in!")
return reddit
def get_HTML(comic_number):
# Get the html from the explainxkcd page
response = requests.get('http://www.explainxkcd.com/wiki/index.php/{}'.format(comic_number))
response.raise_for_status()
return response.content.decode()
def get_transcript(page):
transcript_header = page.find(id='Transcript').parent
next = transcript_header.next_sibling
transcript = ''
# Keep going until you are not in the transcript block anymore
while next.name not in ['h2','h1','span']:
if isinstance(next, NavigableString):
transcript += '\n'
else:
transcript += next.get_text()
next = next.next_sibling
# Fix for reddit markdown
return transcript.replace('\n', ' \n')
def get_image_link(title):
# Get image from xkcd.com
title_text = title.split(': ')[-1]
return 'https://imgs.xkcd.com/comics/{}.png'.format(title_text.replace(' ', '_').lower())
def get_title(page):
# Get the number + title of the comic
return page.find(id='firstHeading').getText()
def get_explanation_url(comic_number):
# Get a link to the comic explanation
return 'http://www.explainxkcd.com/wiki/index.php/{}#Explanation'.format(comic_number)
def get_title_text(page):
title_text_header = page.find(text='Title text:')
return str(title_text_header.next_element)
def report_exception(reddit, comment, exception):
# Send a the exception to me in a PM
try:
print(exception)
error_msg = '{} Error report: \n\n' \
'Comment: {} \n\n' \
'Error: {}'.format(config.username,comment.permalink,exception)
reddit.redditor(config.developer).message('{} Error report'.format(config.username),error_msg)
except Exception as e:
print(e)
quit(1)
def report_shutdown(reddit):
# Report shutting down
try:
error_msg = '{} Error report: \n\n' \
'Shutting down'.format(config.username)
reddit.redditor(config.developer).message('{} Error report'.format(config.username),error_msg)
except Exception as e:
print(e)
quit(1)
def main():
current_comment = None
errors = 0
reddit = bot_login()
while True:
try:
# Get all comments on /r/all , skipping comments before the bot stated running
for comment in reddit.subreddit('all').stream.comments(skip_existing=True):
# Save the comment for the exception scope
current_comment = comment
if 'xkcd' in comment.subreddit_name_prefixed:
# this subs have their own bots
continue
if 'i am a bot' in comment.body.lower() or "i'm a bot" in comment.body.lower():
# Dont reply to self or bots
continue
# Find out if its referencing an xkcd
comic_number = re.findall('(?<!what-if.)xkcd\.com\/(\d*).*',comment.body)
if not comic_number or comic_number[0] == '':
continue
html = get_HTML(comic_number[0])
page = soup(html,'html5lib')
title = get_title(page)
image = get_image_link(title)
transcript = get_transcript(page)
title_text = get_title_text(page)
explanation_url = get_explanation_url(comic_number[0])
reply = COMMENT_BODY.format(title, image, title_text, transcript, explanation_url, config.developer)
try:
comment.reply(reply)
except Forbidden:
# banned :(
continue
print('Replied to comment {}'.format(comment.permalink))
except Exception as e:
report_exception(reddit,current_comment,e)
errors += 1
# Shut down if i got 10 errors
if errors >= 10:
report_shutdown(reddit)
quit(1)
if __name__ == '__main__':
main()