-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhal_findAll.py
33 lines (26 loc) · 893 Bytes
/
hal_findAll.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from libraries import hal
import threading
import pymongo
from datetime import datetime
def findAll():
count = hal.findByFilter('', 0, True, 0)
# Set $i value to the amount of documents you've already collected
# i = 0
i = 2177430
# Collect documents and save them to the DB every 12030 documents
while i < count:
articles = hal.findByFilter('shs.info', i, False, i + 12000)
i += 12000
# Save documents to mongoDB
server = pymongo.MongoClient("mongodb://localhost:27017/")
db = server['hal']
col = db['documents_w_files']
col.insert_many(articles)
print(datetime.now().strftime("%H:%M:%S") )
print('Processing...')
return
if __name__ == '__main__':
# sys.setrecursionlimit(100000)
# threading.stack_size(64*1024)
thread = threading.Thread(target=findAll)
thread.start()