-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse.py
66 lines (53 loc) · 1.83 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from os import walk
import re
import mmap
import struct
import plotly.express as px
import pandas as pd
from datetime import datetime
urlRegex = re.compile(b'(?<=\xFF\xFF\x75\x72\x6C[\x00-\xFF]{13})[\x00-\xFF]+?(?=\x09\x00)')
timestampRegex = re.compile(b'(?<=\xFF\xFF\x74\x69\x6D\x65\x53\x74\x61\x6D\x70\x00\x00\x00\x00\x00\x00\x00)[\x00-\xFF]{8}')
completeUrl = "http://7wcnwsu3oqzm7b4zgputusb4yxng5memwchkovasee4u2nwbfnuaieqd.onion/"
id = -1
def parseFile(filename):
global id
id += 1
with open("data/"+filename, 'rb') as f:
m = mmap.mmap(f.fileno(), 0, access = mmap.ACCESS_READ)
itemsUrl = re.finditer(urlRegex, m)
itemsTime = re.finditer(timestampRegex, m)
foundUrl = False
foundTime = False
time = 0
url = ""
for item in itemsUrl:
try:
url = item.group(0).decode("utf-16").replace('\x00','')
except:
print(item.group(0))
print("Parsing error " + filename)
foundUrl = True
for item in itemsTime:
time = int(struct.unpack("<d", item.group(0))[0])
foundTime = True
if not foundUrl or not foundTime:
print("Not found " + filename)
resource = url.replace(completeUrl,"")
start = datetime.fromtimestamp(time/1000)
y = 0
if ".png" in resource:
y = int(resource.replace(".png",""))
return (dict(Id=id, Time=start, Y=y, Resource=resource),dict(Time=time, Resource=resource))
_, _, filenames = next(walk("data/"))
data = []
dataCSV = []
for filename in filenames:
val = parseFile(filename)
data.append(val[0])
dataCSV.append(val[1])
print(data)
df = pd.DataFrame(data)
dfCsv = pd.DataFrame(dataCSV)
dfCsv.to_csv("data_exported.csv")
fig = px.scatter(df, x="Time", y="Y", color="Resource")
fig.show()