-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathactivity
executable file
·437 lines (384 loc) · 13.2 KB
/
activity
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
#!/usr/bin/env python3
#
# activity TummyTrials activity summary for one TractDB account
#
import sys
import io
import datetime
import json
g_msecpd = 24 * 60 * 60 * 1000 # Millisec per day
def usage():
print('usage: activity account-json [ trial-id ]', file=sys.stderr)
print(' trial-id Couch trial id; none = list trials', file=sys.stderr)
sys.exit(1)
def is_trial(doc):
# Return true iff the document is a TummyTrials trial with a
# starting and ending time.
#
try:
return doc['doctype']['name'] == 'activity' and \
doc['datatype']['name'] == 'tummytrials_experiment' and \
'start_time' in doc and 'end_time' in doc
except:
return False
def is_log(doc):
# Return true iff the document is a TummyTrials log with a
# starting time and message list.
#
try:
return doc['doctype']['name'] == 'log' and \
doc['datatype']['name'] == 'tummytrials_log' and \
'start_time' in doc and 'messages' in doc
except:
return False
def get_documents(jsonfn):
with io.open(jsonfn) as f:
res = json.load(f)
if not isinstance(res, list):
print('activity: ill formed account json file:', jsonfn,
file=sys.stderr)
sys.exit(1)
return res
def list_trials(docs):
trials = []
for doc in docs:
if is_trial(doc):
trials.append(doc)
trials.sort(key=lambda d: d['start_time'])
for t in trials:
print('{:20} {!s:10} {}'.format(
t['_id'],
datetime.date.fromtimestamp(t['start_time']),
t['name']))
def find_cur_trial(docs):
# Return the trial with the most recent starting time, or None if
# there are no trials.
#
res = None
restime = 0
for doc in docs:
if is_trial(doc):
if res == None or doc['start_time'] > restime:
res = doc
restime = doc['start_time']
return res
def find_trial(docs, id):
# Return the trial with the given id, or None if there is no such.
#
for doc in docs:
if is_trial(doc) and doc['_id'] == id:
return doc
return None
def find_following_trial(docs, trial):
# Return the earliest trial after the given one, or None if there is
# no such.
#
res = None
restime = 0
for doc in docs:
if is_trial(doc) and doc['start_time'] >= trial['end_time']:
if res == None or doc['start_time'] < restime:
res = doc
restime = doc['start_time']
return res
def find_log_docs(docs):
# Return the TummyTrials log documents
#
return list(filter(is_log, docs))
# A log message looks like this:
#
# Category~DATE [SEVERITY] rest of message
#
# Categories right now are: Activity, Navigation
#
# DATE has different forms, probably depending on iOS preferences
# settings.
#
# SEVERITY is the severity level of the message. Currently the logging
# module uses only the INFO level.
#
# Rest of message has whitespace-separated fields. Assume any new fields
# will be added at the end. Right now for Activity messages there are
# the following:
#
# 0 Timestamp
# 1 URL of currently active page
# 2 Trial id
# 3 Event type (appActive, appPause, etc.)
#
#
# Navigation messages have the following rest of message:
#
# 0 Timestamp
# 1 URL of currently active page
# 2 Trial id
# 3 Event type ($locationChangeSuccess)
# 4 Timestamp of navigational touch
# 5 Attribute that did navigation (ui-sref)
# 6 Containing HTML element (A)
# 7+ Text inside the HTML element (quoted string)
#
def log_line_parse(line):
# Parse the log line, which has one of the layouts given above.
# Return a pair (category, rest). Category is 'Activity' or
# 'Navigation'. Rest is the "rest of message" as a list of strings.
# If the line has an unrecognizable format, return None.
#
# For simplicity, just split on whitespace. Later, might want to
# parse the 7th field of Navigation messages more carefully.
#
pcs = line.split('~')
if len(pcs) < 2:
return None
ix = pcs[1].find(']') # Skip past severity
if ix < 0:
return None
rest = pcs[1][ix + 1 :].split()
return (pcs[0].strip(), rest)
def end_of_log(logs):
# Return the timestamp (millisec since Epoch) of the end of the
# logs. If there are no logs, return 0 (i.e., Jan 1970).
#
res = -1
for log in logs:
for msg in log['messages']:
catpcs = log_line_parse(msg)
if catpcs == None:
continue
(cat, pcs) = catpcs
if int(pcs[0]) > res:
res = int(pcs[0])
return res + 1
def active_app_intervals(logs, start, end):
# Return intervals where app was active, as a list of tuples:
# (start, duration). Starts are millisec epoch values, durations in
# millisec.
#
def start_evt(evt):
return evt == 'appActive' or evt == 'appResume' or evt == 'appStartup'
def stop_evt(evt):
return evt == 'appPause' or evt == 'appResign'
# Allow final app close up to 10 min after last open
closelim = 600000
events = []
closer = None # Possible closer for trailing open interval
for log in logs:
for msg in log['messages']:
catpcs = log_line_parse(msg)
if catpcs == None:
continue
(cat, pcs) = catpcs
if cat != 'Activity':
continue
ts = int(pcs[0])
if ts >= start and ts < end:
events.append((ts, pcs[3]))
if ts >= end and stop_evt(pcs[3]):
if closer == None or ts < closer[0]:
closer = (ts, pcs[3])
events.sort(key=lambda ab: ab[0])
evedge = []
if len(events) > 0 and start_evt(events[0][1]):
active = False
else:
active = True
for ev in events:
if active and stop_evt(ev[1]):
evedge.append(ev)
active = False
elif not active and start_evt(ev[1]):
evedge.append(ev)
active = True
if len(evedge) > 0 and stop_evt(evedge[0][1]):
del evedge[0]
last = len(evedge) - 1
if len(evedge) > 0 and start_evt(evedge[last][1]):
if closer != None and closer[0] < evedge[last][0] + closelim:
evedge.append(closer)
else:
del evedge[last]
res = []
for i in range(0, len(evedge), 2):
res.append((evedge[i][0], evedge[i + 1][0] - evedge[i][0]))
return res
def visited_pages(logs, start, end):
# Return pages visited for each day in the given interval. The start
# and end values are milliseconds since Epoch. Return value looks
# like this:
# [
# [ { 'url': URL, 'visits': count }, ... ], # Pages of day 1
# [ { 'url': URL, 'visits': count }, ... ], # Pages of day 2
# ...
# ]
#
def add_url(urls, url):
for u in urls:
if u['url'] == url:
u['visits'] = u['visits'] + 1
break
else:
urls.append({ 'url': url, 'visits': 1 })
dayct = int((end - start + g_msecpd - 1) / g_msecpd)
days = [ [] for x in range(dayct) ]
for log in logs:
for msg in log['messages']:
catpcs = log_line_parse(msg)
if catpcs == None:
continue
(cat, pcs) = catpcs
if cat != 'Navigation':
continue
ts = int(pcs[0])
if ts >= start and ts < end:
dn = int((ts - start) / g_msecpd)
add_url(days[dn], pcs[1])
return days
def time_of_day(ms):
s = round(ms / 1000)
noon = 12 * 60 * 60
if s == 0:
ampm = 'midnight'
elif s == noon:
ampm = 'noon'
elif s < noon:
ampm = 'am'
else:
ampm = 'pm'
if s >= noon:
s -= noon
seconds = s % 60
s = int(s / 60)
minutes = s % 60
s = int(s / 60)
hours = s
if hours == 0:
hours = 12
return '{:d}:{:0>2d}:{:0>2d} {}'.format(hours, minutes, seconds, ampm)
def activity(trial, fotrial, logs):
# Summarize activity between beginning of trial and beginning of the
# following trial (if any).
#
# A time of day looks like this: "9:30:00 am"
#
# For each day:
# month, day, year
# reminder_times { type => time of day }
# types from remdescs
# reports { type => report_obj }
# types: morning, breakfast, lunch, symptom
# report_obj: { time: time of day, value: <<Value of report>> }
# report_histories { type => array of time of day }
# types: breakfast, lunch, symptom
# active: array of (time of day, seconds of duration)
# visited_pages: array of { url: url, visits: count }
#
def rep_sum(midn, ts, val):
# Make a report summary. midn is beginning of report day, ts is
# time of report. Both are Unix style (seconds since 1970).
# timestamp of the returned summary is milliseconds since the
# beginning of the report day. val is the value of the report
# (depends on report type).
#
return { 'time': time_of_day((ts - midn) * 1000), 'value': val }
def hist_times(midn, hist):
# A history is a list of dicts each with a report time in
# seconds. Return a list of just the times scaled to millisec
# and rebased to the given midnight (Unix style).
#
def rt(d):
return time_of_day((d['report_time'] - midn) * 1000)
return list(map(rt, hist))
# Our report period is from the start of the trial to the start of
# the next trial (or the end of the logs if there's no following
# trial).
#
startms = trial['start_time'] * 1000
if (fotrial):
endms = fotrial['start_time'] * 1000
else:
endms = end_of_log(logs)
intervals = active_app_intervals(logs, startms, endms)
intervix = 0
vpages = visited_pages(logs, startms, endms)
res = []
days = round((endms - startms) / g_msecpd)
for dx in range(days):
midn = round((startms + dx * g_msecpd) / 1000)
dxdate = datetime.date.fromtimestamp(midn)
dayob = {}
dayob['day'] = dxdate.day;
dayob['month'] = dxdate.month;
dayob['year'] = dxdate.year;
remob = {}
if 'remdescrs' in trial:
for rd in trial['remdescrs']:
remob[rd['type']] = time_of_day(rd['time'] * 1000 )
dayob['reminder_times'] = remob
repob = {}
histob = {}
if 'reports' in trial and len(trial['reports']) > dx:
if trial['reports'][dx] == None:
repdx = {}
else:
repdx = trial['reports'][dx]
if 'morning_reminded_time' in repdx:
repob['morning'] = \
rep_sum(midn, repdx['morning_reminded_time'],
repdx['morning_reminded'])
if 'breakfast_report_time' in repdx:
repob['breakfast'] = \
rep_sum(midn, repdx['breakfast_report_time'],
repdx['breakfast_compliance'])
if 'lunch_report_time' in repdx:
repob['lunch'] = \
rep_sum(midn, repdx['lunch_report_time'],
repdx['lunch_compliance'])
if 'symptom_report_time' in repdx:
repob['symptom'] = \
rep_sum(midn, repdx['symptom_report_time'],
repdx['symptom_scores'])
# if 'confirmed_time' in repdx:
# repob['confirm'] = \
# repdx['confirmed_time'] * 1000 - midn
if 'breakfast_history' in repdx:
histob['breakfast'] = \
hist_times(midn, repdx['breakfast_history'])
if 'lunch_history' in repdx:
histob['lunch'] = \
hist_times(midn, repdx['lunch_history'])
if 'symptom_history' in repdx:
histob['symptom'] = \
hist_times(midn, repdx['symptom_history'])
dayob['reports'] = repob
dayob['report_histories'] = histob
tomw = midn * 1000 + g_msecpd
active = []
while intervix < len(intervals) and intervals[intervix][0] < tomw:
relms = intervals[intervix][0] - (midn * 1000)
active.append((time_of_day(relms), intervals[intervix][1]/1000))
intervix = intervix + 1
dayob['active'] = active
dayob['visited_pages'] = vpages[dx]
res.append(dayob)
return res
def main():
if len(sys.argv) == 2:
docs = get_documents(sys.argv[1])
list_trials(docs)
sys.exit(0)
if len(sys.argv) != 3:
usage()
docs = get_documents(sys.argv[1])
if sys.argv[2] == 'cur':
trial = find_cur_trial(docs)
else:
trial = find_trial(docs, sys.argv[2])
if trial == None:
print('activity: no trial found', file=sys.stderr)
sys.exit(1)
fotrial = find_following_trial(docs, trial)
logs = find_log_docs(docs)
actob = activity(trial, fotrial, logs)
json.dump(actob, sys.stdout, default=lambda o: o.__dict__,
sort_keys=True, indent=4)
main()