-
Notifications
You must be signed in to change notification settings - Fork 2
/
import.py
66 lines (53 loc) · 2.09 KB
/
import.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python3
"""OPML importer"""
from asyncio import (Semaphore, ensure_future, gather, get_event_loop,
set_event_loop_policy)
from config import (DATABASE_NAME, DATE_FORMAT, FETCH_INTERVAL, MONGO_SERVER,
log)
from datetime import datetime
from time import sleep, strftime, time
from xml.etree import ElementTree
from aiohttp import ClientSession
from common import REDIS_NAMESPACE, connect_redis, safe_id
from motor.motor_asyncio import AsyncIOMotorClient
from pymongo import ASCENDING, DESCENDING
from pymongo.errors import DuplicateKeyError
from uvloop import EventLoopPolicy
def feeds_from_opml(filename):
"""Extract useful attributes from OPML"""
tree = ElementTree.parse(filename)
for feed in tree.findall('.//outline'):
if feed.get('xmlUrl'):
yield {'title': feed.get('title'),
'url': feed.get('xmlUrl')}
async def update_database(db, filename):
"""Create indexes and import feeds"""
entries = db.entries
await db.entries.create_index([("date", DESCENDING)])
await db.entries.create_index([("url", ASCENDING)])
feeds = db.feeds
await db.feeds.create_index([("url", ASCENDING)])
# TODO: turn this into a bulk upsert
for feed in feeds_from_opml(filename):
if not await feeds.find_one({'url': feed['url']}):
log.debug("Inserting %s" % feed)
feed = dict(feed, **{
'_id': safe_id(feed['url']),
'created': datetime.now(),
'last_fetched': datetime(1970, 1, 1)
})
try:
await feeds.insert_one(feed)
except DuplicateKeyError as e:
log.debug(e)
redis = await connect_redis()
await redis.hset(REDIS_NAMESPACE + 'status', 'feed_count', await db.feeds.count_documents({}))
if __name__ == '__main__':
set_event_loop_policy(EventLoopPolicy())
loop = get_event_loop()
c = AsyncIOMotorClient(MONGO_SERVER)
db = c[DATABASE_NAME]
try:
loop.run_until_complete(update_database(db,'feeds.opml'))
finally:
loop.close()