-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsql2csv.py
24 lines (19 loc) · 882 Bytes
/
sql2csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
def obtain_raw(sql_dump):
'''Rewrites SQL dump to list of votes'''
with open(sql_dump, 'r') as fin:
for line in fin:
if line.startswith("INSERT INTO `w4grb_votes` VALUES"):
return line.strip().split("VALUES (")[1]
def raw2list_of_tuples(raw):
list_of_votes = raw.split('),(')
list_of_votes[-1] = list_of_votes[-1].replace(');', '')
print('total number of votes: %d' % len(list_of_votes))
# Re-format to list of lists
return [quintet.split(',') for quintet in list_of_votes]
if __name__ == '__main__':
raw = obtain_raw('w4grb.sql')
list_of_tuples = raw2list_of_tuples(raw)
with open('rating_pid.csv', 'w') as fout:
fout.write('userID,pageID,rating,time\n')
for uid, pid, rating, userIP, raw_time in list_of_tuples:
fout.write('%s,%s,%s,%s\n' % (uid, pid, rating, raw_time))