|
13 | 13 | from . import validators |
14 | 14 | import os |
15 | 15 | from .dao.containerutil import pluralize |
16 | | - |
17 | 16 | log = config.log |
18 | 17 |
|
| 18 | +BYTES_IN_MEGABYTE = float(1<<20) |
19 | 19 |
|
20 | 20 | def _filter_check(property_filter, property_values): |
21 | 21 | minus = set(property_filter.get('-', [])) |
@@ -315,3 +315,84 @@ def download(self): |
315 | 315 | log.debug(json.dumps(req_spec, sort_keys=True, indent=4, separators=(',', ': '))) |
316 | 316 |
|
317 | 317 | return self._preflight_archivestream(req_spec, collection=self.get_param('collection')) |
| 318 | + |
| 319 | + def summary(self): |
| 320 | + """Return a summary of what has been/will be downloaded based on a given query""" |
| 321 | + req = self.request.json_body |
| 322 | + req['_id'] = bson.ObjectId(req['_id']) |
| 323 | + level = req['level'] |
| 324 | + |
| 325 | + containers = ['projects', 'sessions', 'acquisitions', 'analyses'] |
| 326 | + cont_query = {} |
| 327 | + if level == 'projects': |
| 328 | + # Grab sessions and their ids |
| 329 | + sessions = config.db.sessions.find({'project': req['_id']}, {'_id': 1}) |
| 330 | + session_ids = [s['_id'] for s in sessions] |
| 331 | + |
| 332 | + # Grab acquisitions and their ids |
| 333 | + acquisitions = config.db.acquisitions.find({'session': {'$in': session_ids}}, {'_id': 1}) |
| 334 | + acquisition_ids = [a['_id'] for a in acquisitions] |
| 335 | + parent_ids = [req['_id']] + session_ids + acquisition_ids |
| 336 | + |
| 337 | + # # Grab analyses and their ids |
| 338 | + # analysis_ids = [an['_id'] for an in config.db.analyses.find({'parent.id': {'$in': parent_ids}})] |
| 339 | + |
| 340 | + # for each type of container below it will have a slightly modified match query |
| 341 | + cont_query = { |
| 342 | + 'projects': {'_id': {'project': req['_id']}}, |
| 343 | + 'sessions': {'project': req['_id']}, |
| 344 | + 'acquisitions': {'session': {'$in': session_ids}}, |
| 345 | + 'analyses': {'parent.id': {'$in': parent_ids}} |
| 346 | + } |
| 347 | + if level == 'sessions': |
| 348 | + |
| 349 | + # Grab acquisitions and their ids |
| 350 | + acquisitions = config.db.acquisitions.find({'session': req['_id']}, {'_id': 1}) |
| 351 | + acquisition_ids = [a['_id'] for a in acquisitions] |
| 352 | + parent_ids = [req['_id']] + acquisition_ids |
| 353 | + |
| 354 | + # # Grab analyses and their ids |
| 355 | + # analysis_ids = [an['_id'] for an in config.db.analyses.find({'parent.id': {'$in': parent_ids}})] |
| 356 | + |
| 357 | + # for each type of container below it will have a slightly modified match query |
| 358 | + cont_query = { |
| 359 | + 'sessions': {'_id': req['_id']}, |
| 360 | + 'acquisitions': {'session': req['_id']}, |
| 361 | + 'analyses': {'parent.id': {'$in': parent_ids}} |
| 362 | + } |
| 363 | + containers = containers[1:] |
| 364 | + |
| 365 | + res = {} |
| 366 | + for cont_name in containers: |
| 367 | + # Aggregate file types |
| 368 | + pipeline = [ |
| 369 | + {'$match': cont_query[cont_name]}, |
| 370 | + {'$unwind': '$files'}, |
| 371 | + {'$project': {'_id': '$_id', 'type': '$files.type','mbs': {'$divide': ['$files.size', BYTES_IN_MEGABYTE]}}}, |
| 372 | + {'$group': { |
| 373 | + '_id': '$type', |
| 374 | + 'count': {'$sum' : 1}, |
| 375 | + 'mb_total': {'$sum':'$mbs'}, |
| 376 | + 'nodes' : { |
| 377 | + '$addToSet': {'level': {'$literal':cont_name}, '_id': '$_id'} |
| 378 | + } |
| 379 | + }} |
| 380 | + ] |
| 381 | + |
| 382 | + try: |
| 383 | + result = config.db.command('aggregate', cont_name, pipeline=pipeline) |
| 384 | + except Exception as e: # pylint: disable=broad-except |
| 385 | + result = e |
| 386 | + return result |
| 387 | + |
| 388 | + if result.get("ok"): |
| 389 | + for doc in result.get("result"): |
| 390 | + type_ = doc['_id'] |
| 391 | + if res.get(type_): |
| 392 | + res[type_]['count'] += doc.get('count',0) |
| 393 | + res[type_]['mb_total'] += doc.get('mb_total',0) |
| 394 | + res[type_]['nodes'] += doc.get('nodes', []) |
| 395 | + else: |
| 396 | + res[type_] = doc |
| 397 | + return res |
| 398 | + |
0 commit comments