-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrss_single_crawler.py
51 lines (42 loc) · 1.19 KB
/
rss_single_crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# coding: UTF-8
import hashlib
import simplejson as json
import time
import MySQLdb
import sys
import getopt
import feedparser
reload(sys)
sys.setdefaultencoding("utf-8")
class RssSingleCrawler:
def transJson(self,rss):
str = '{'
str += '\"title\":\"'+rss.feed.title.replace('\"','\\\"')+'\",\"link\":\"'+rss.feed.link.replace('\"','\\\"')+'\",\"subtitle\":\"\",'+'\"isSuccess\":true,'
str += '\"itemList\":['
for i in rss.entries:
str += '{\"text\":\"'+i['title'].replace('\"','\\\"')+'\",\"href\":\"'+i['link'].replace('\"','\\\"')+'\"},'
str = str[0:-1]
str += ']}'
return str
def crawlRssByUrl(self, url):
flag = 0
try:
rss = feedparser.parse(url)
rss_json = self.transJson(rss)
except Exception:
flag = 2
if flag==0 :
return rss_json
else:
return flag
shortargs = ''
longargs = ['url=']
opts, args = getopt.getopt( sys.argv[1:], shortargs, longargs)
target_id = None
url = None
for t in opts:
if t[0]=="--url":
url = t[1]
crawler = RssSingleCrawler()
if url is not None:
sys.stdout.write(crawler.crawlRssByUrl(url))