-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrobot.py
148 lines (125 loc) · 4.75 KB
/
robot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# coding=utf-8
# !/usr/bin/env python
import os
import logging
import time
import math
import threading
import ConfigParser
import pymongo
import requests
from flask import Flask, render_template
import base62
from weibo import Client
requests.packages.urllib3.disable_warnings()
logging.basicConfig(
level=logging.INFO,
format="[%(asctime)s] %(name)s:%(lineno)d:%(levelname)s: %(message)s")
conf = ConfigParser.ConfigParser()
conf.read(os.path.abspath(os.path.dirname(__file__)) + '/config.ini')
app = Flask(__name__)
N = 10**7 # ~ 64 ** 4
pagenum = 10
def _datetime(x=None):
return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(x))
def weibo_url(uid, mid):
s = ''
while mid:
s = base62.encode(mid % N) + s
mid /= N
return u'http://weibo.com/{0}/{1}'.format(uid, s)
mongo = pymongo.MongoClient(
conf.get('mongo', 'host'), int(conf.get('mongo', 'port')))
class MyClient(Client):
def __init__(self,
api_key,
api_secret,
redirect_uri,
token=None,
username=None,
password=None):
super(MyClient, self).__init__(api_key, api_secret, redirect_uri,
token, username, password)
self.weibos = mongo.sinaweibo.weibos
self._picurl = 'http://ww1.sinaimg.cn/thumbnail/{0}.jpg'
def run(self):
'''定时抓取'''
times = 1
while 1:
try:
comments = self.get('comments/mentions')['comments']
except Exception, e:
logging.error(e, exc_info=True)
else:
logging.info('scrapy %s times.' % times)
times += 1
for comment in comments:
# if comment['user']['name'] != WHOCANAT:
# # 忽略陌生人的@
# continue
status = comment['status']
origin = status[
'retweeted_status'] if 'retweeted_status' in status else status
if origin.get('deleted') == '1':
# 微博被删除
continue
if self.weibos.find_one({'id': origin['id']}):
# 微博已保存
continue
logging.info(u'new weibo: {0}, time: {1}, @ by {2}'.format(
origin['id'], _datetime(), comment['user']['name']))
w = {
'id': origin['id'],
'text': origin['text'],
'author': origin['user']['name'],
'addtime': self._format(status['created_at']),
'at_time': self._format(comment['created_at']),
'at_by': comment['user']['name'],
'url': weibo_url(origin['user']['id'],
int(origin['mid'])),
}
if origin.get('pic_ids'):
w['pics'] = [
self._picurl.format(i) for i in origin['pic_ids']
]
self.weibos.insert_one(w)
time.sleep(3600)
def _format(self, t):
return time.mktime(time.strptime(t, '%a %b %d %H:%M:%S +0800 %Y'))
@app.route('/')
def index():
# 客户端跳转
# return redirect(url_for('show', page=1))
# 相当于服务器内部跳转
return show(1)
@app.route('/page/<page>')
def show(page):
page = int(page)
weibo_count = mongo.sinaweibo.weibos.count()
total_page = int(math.ceil(float(weibo_count) / pagenum))
skip = (page - 1) * pagenum
np = page + 1 if (page * pagenum < weibo_count) else False
lp = page - 1 if (page - 1 > 0) else False
weibos = mongo.sinaweibo.weibos.find().sort(
'at_time', pymongo.DESCENDING).skip(skip).limit(pagenum)
return render_template('weibo.html', weibos=weibos, next=np, last=lp, page=page, total_page=total_page)
@app.context_processor
def utility_processor():
_globals = {'datetime': _datetime}
return _globals
if __name__ == '__main__':
api_key = conf.get('weibo', 'api_key')
api_secret = conf.get('weibo', 'api_secret')
redirect_uri = conf.get('weibo', 'redirect_uri')
username = conf.get('weibo', 'username')
password = conf.get('weibo', 'password')
c = MyClient(
api_key,
api_secret,
redirect_uri,
username=username,
password=password)
t = threading.Thread(target=c.run, args=())
t.setDaemon(True)
t.start()
app.run()