returns count, mb size, and nodes for each file type

hkethi002 · hkethi002 · commit 9eed0356fe37 · 2017-10-04T14:22:45.000-05:00
diff --git a/api/api.py b/api/api.py
@@ -85,6 +85,7 @@ def prefix(path, routes):
         # General-purpose upload & download
 
         route('/download',                                      Download, h='download',              m=['GET', 'POST']),
+        route('/download/summary',                              Download, h='summary',               m=['POST']),
         route('/upload/<strategy:label|uid|uid-match|reaper>',  Upload,   h='upload',                m=['POST']),
         route('/clean-packfiles',                               Upload,   h='clean_packfile_tokens', m=['POST']),
         route('/engine',                                        Upload,   h='engine',                m=['POST']),
diff --git a/api/download.py b/api/download.py
@@ -13,9 +13,9 @@
 from . import validators
 import os
 from .dao.containerutil import pluralize
-
 log = config.log
 
+BYTES_IN_MEGABYTE = float(1<<20)
 
 def _filter_check(property_filter, property_values):
     minus = set(property_filter.get('-', []))
@@ -315,3 +315,84 @@ def download(self):
                 log.debug(json.dumps(req_spec, sort_keys=True, indent=4, separators=(',', ': ')))
 
                 return self._preflight_archivestream(req_spec, collection=self.get_param('collection'))
+
+    def summary(self):
+        """Return a summary of what has been/will be downloaded based on a given query"""
+        req = self.request.json_body
+        req['_id'] = bson.ObjectId(req['_id'])
+        level = req['level']
+
+        containers = ['projects', 'sessions', 'acquisitions', 'analyses']
+        cont_query = {}
+        if level == 'projects':
+            # Grab sessions and their ids
+            sessions = config.db.sessions.find({'project': req['_id']}, {'_id': 1})
+            session_ids = [s['_id'] for s in sessions]
+
+            # Grab acquisitions and their ids
+            acquisitions = config.db.acquisitions.find({'session': {'$in': session_ids}}, {'_id': 1})
+            acquisition_ids = [a['_id'] for a in acquisitions]
+            parent_ids = [req['_id']] + session_ids + acquisition_ids
+
+            # # Grab analyses and their ids
+            # analysis_ids = [an['_id'] for an in config.db.analyses.find({'parent.id': {'$in': parent_ids}})]
+
+            # for each type of container below it will have a slightly modified match query
+            cont_query = {
+                'projects': {'_id': {'project': req['_id']}},
+                'sessions': {'project': req['_id']},
+                'acquisitions': {'session': {'$in': session_ids}},
+                'analyses': {'parent.id': {'$in': parent_ids}}
+            }
+        if level == 'sessions':
+
+            # Grab acquisitions and their ids
+            acquisitions = config.db.acquisitions.find({'session': req['_id']}, {'_id': 1})
+            acquisition_ids = [a['_id'] for a in acquisitions]
+            parent_ids = [req['_id']] + acquisition_ids
+
+            # # Grab analyses and their ids
+            # analysis_ids = [an['_id'] for an in config.db.analyses.find({'parent.id': {'$in': parent_ids}})]
+
+            # for each type of container below it will have a slightly modified match query
+            cont_query = {
+                'sessions': {'_id': req['_id']},
+                'acquisitions': {'session': req['_id']},
+                'analyses': {'parent.id': {'$in': parent_ids}}
+            }
+            containers = containers[1:]
+
+        res = {}
+        for cont_name in containers:
+            # Aggregate file types
+            pipeline = [
+                {'$match': cont_query[cont_name]},
+                {'$unwind': '$files'},
+                {'$project': {'_id': '$_id', 'type': '$files.type','mbs': {'$divide': ['$files.size', BYTES_IN_MEGABYTE]}}},
+                {'$group': {
+                    '_id': '$type',
+                    'count': {'$sum' : 1}, 
+                    'mb_total': {'$sum':'$mbs'}, 
+                    'nodes' : {
+                        '$addToSet': {'level': {'$literal':cont_name}, '_id': '$_id'}
+                    }
+                }}
+            ]
+
+            try:
+                result = config.db.command('aggregate', cont_name, pipeline=pipeline)
+            except Exception as e: # pylint: disable=broad-except
+                result = e
+                return result
+
+            if result.get("ok"):
+                for doc in result.get("result"):
+                    type_ = doc['_id']
+                    if res.get(type_):
+                        res[type_]['count'] += doc.get('count',0)
+                        res[type_]['mb_total'] += doc.get('mb_total',0)
+                        res[type_]['nodes'] += doc.get('nodes', [])
+                    else:
+                        res[type_] = doc
+        return res
+