-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmongo_extract.py
97 lines (77 loc) · 2.61 KB
/
mongo_extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from pymongo import MongoClient
import csv
import boto3
import datetime
from datetime import timedelta
import configparser
# load the mongo_config values
parser = configparser.ConfigParser()
parser.read('pipeline.conf')
hostname = parser.get('mongo_config', 'hostname')
username = parser.get('mongo_config', 'username')
password = parser.get('mongo_config', 'password')
database_name = parser.get('mongo_config', 'database')
collection_name = parser.get('mongo_config', 'collection')
# altered the parameter because of an error
uri = ("mongodb://"
+ username
+ ":" + password
+ "@" + hostname
+ "/"
+ database_name
+ "?retryWrites=true&"
+ "w=majority&ssl=false&"
+ "authSource=admin"
)
mongo_client = MongoClient(uri)
# connect to the db where the collection resides
mongo_db = mongo_client[database_name]
# choose the collection to query documents from
mongo_collection = mongo_db[collection_name]
start_date = datetime.datetime.today() + timedelta(days=-1)
end_date = start_date + timedelta(days=1)
mongo_query = {"$and":
[
{
"event_timestamp":
{
"$gte": start_date
}
},
{
"event_timestamp":
{
"$lt": end_date
}
}
]
}
event_docs = mongo_collection.find(mongo_query, batch_size=3000)
# create a blank list to store the results
all_events = []
# iterate through the cursor
for doc in event_docs:
# Includ default values
event_id = str(doc.get("event_id", -1))
event_timestamp = doc.get("event_timestamp", None)
event_name = doc.get("event_name", None)
# add all the event properties into a list
current_event = []
current_event.append(event_id)
current_event.append(event_timestamp)
current_event.append(event_name)
# add the event to the final list of events
all_events.append(current_event)
export_file = "export_file.csv"
with open(export_file, "w") as fp:
csvw = csv.writer(fp, delimiter="|")
csvw.writerows(all_events)
fp.close()
mongo_client.close()
acces_key = parser.get('aws_boto_credentials', 'access_key')
secret_key = parser.get('aws_boto_credentials', 'secret_key')
bucket_name = parser.get('aws_boto_credentials', 'bucket_name')
s3 = boto3.client('s3', aws_access_key_id=acces_key,
aws_secret_access_key=secret_key)
s3_file = export_file
s3.upload_file(export_file, bucket_name, s3_file)