Skip to content

Commit 9eed035

Browse files
committed
returns count, mb size, and nodes for each file type
1 parent 3a9ade2 commit 9eed035

File tree

2 files changed

+83
-1
lines changed

2 files changed

+83
-1
lines changed

api/api.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ def prefix(path, routes):
8585
# General-purpose upload & download
8686

8787
route('/download', Download, h='download', m=['GET', 'POST']),
88+
route('/download/summary', Download, h='summary', m=['POST']),
8889
route('/upload/<strategy:label|uid|uid-match|reaper>', Upload, h='upload', m=['POST']),
8990
route('/clean-packfiles', Upload, h='clean_packfile_tokens', m=['POST']),
9091
route('/engine', Upload, h='engine', m=['POST']),

api/download.py

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@
1313
from . import validators
1414
import os
1515
from .dao.containerutil import pluralize
16-
1716
log = config.log
1817

18+
BYTES_IN_MEGABYTE = float(1<<20)
1919

2020
def _filter_check(property_filter, property_values):
2121
minus = set(property_filter.get('-', []))
@@ -315,3 +315,84 @@ def download(self):
315315
log.debug(json.dumps(req_spec, sort_keys=True, indent=4, separators=(',', ': ')))
316316

317317
return self._preflight_archivestream(req_spec, collection=self.get_param('collection'))
318+
319+
def summary(self):
320+
"""Return a summary of what has been/will be downloaded based on a given query"""
321+
req = self.request.json_body
322+
req['_id'] = bson.ObjectId(req['_id'])
323+
level = req['level']
324+
325+
containers = ['projects', 'sessions', 'acquisitions', 'analyses']
326+
cont_query = {}
327+
if level == 'projects':
328+
# Grab sessions and their ids
329+
sessions = config.db.sessions.find({'project': req['_id']}, {'_id': 1})
330+
session_ids = [s['_id'] for s in sessions]
331+
332+
# Grab acquisitions and their ids
333+
acquisitions = config.db.acquisitions.find({'session': {'$in': session_ids}}, {'_id': 1})
334+
acquisition_ids = [a['_id'] for a in acquisitions]
335+
parent_ids = [req['_id']] + session_ids + acquisition_ids
336+
337+
# # Grab analyses and their ids
338+
# analysis_ids = [an['_id'] for an in config.db.analyses.find({'parent.id': {'$in': parent_ids}})]
339+
340+
# for each type of container below it will have a slightly modified match query
341+
cont_query = {
342+
'projects': {'_id': {'project': req['_id']}},
343+
'sessions': {'project': req['_id']},
344+
'acquisitions': {'session': {'$in': session_ids}},
345+
'analyses': {'parent.id': {'$in': parent_ids}}
346+
}
347+
if level == 'sessions':
348+
349+
# Grab acquisitions and their ids
350+
acquisitions = config.db.acquisitions.find({'session': req['_id']}, {'_id': 1})
351+
acquisition_ids = [a['_id'] for a in acquisitions]
352+
parent_ids = [req['_id']] + acquisition_ids
353+
354+
# # Grab analyses and their ids
355+
# analysis_ids = [an['_id'] for an in config.db.analyses.find({'parent.id': {'$in': parent_ids}})]
356+
357+
# for each type of container below it will have a slightly modified match query
358+
cont_query = {
359+
'sessions': {'_id': req['_id']},
360+
'acquisitions': {'session': req['_id']},
361+
'analyses': {'parent.id': {'$in': parent_ids}}
362+
}
363+
containers = containers[1:]
364+
365+
res = {}
366+
for cont_name in containers:
367+
# Aggregate file types
368+
pipeline = [
369+
{'$match': cont_query[cont_name]},
370+
{'$unwind': '$files'},
371+
{'$project': {'_id': '$_id', 'type': '$files.type','mbs': {'$divide': ['$files.size', BYTES_IN_MEGABYTE]}}},
372+
{'$group': {
373+
'_id': '$type',
374+
'count': {'$sum' : 1},
375+
'mb_total': {'$sum':'$mbs'},
376+
'nodes' : {
377+
'$addToSet': {'level': {'$literal':cont_name}, '_id': '$_id'}
378+
}
379+
}}
380+
]
381+
382+
try:
383+
result = config.db.command('aggregate', cont_name, pipeline=pipeline)
384+
except Exception as e: # pylint: disable=broad-except
385+
result = e
386+
return result
387+
388+
if result.get("ok"):
389+
for doc in result.get("result"):
390+
type_ = doc['_id']
391+
if res.get(type_):
392+
res[type_]['count'] += doc.get('count',0)
393+
res[type_]['mb_total'] += doc.get('mb_total',0)
394+
res[type_]['nodes'] += doc.get('nodes', [])
395+
else:
396+
res[type_] = doc
397+
return res
398+

0 commit comments

Comments
 (0)