diff --git a/scripts/etl.py b/scripts/etl.py new file mode 100644 index 000000000..a90dcade7 --- /dev/null +++ b/scripts/etl.py @@ -0,0 +1,324 @@ +### +# Copyright 2015-2020, Institute for Systems Biology +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +### + +from __future__ import print_function + +from builtins import str +from builtins import object +import datetime +import logging +import traceback +import os +import re +from csv import reader as csv_reader +import csv +from argparse import ArgumentParser +import sys +import time +from copy import deepcopy +from itertools import combinations, product + +from idc import secret_settings, settings + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "idc.settings") + +import django +django.setup() + +from idc_collections.models import Program, Collection, Attribute, Attribute_Ranges, \ + Attribute_Display_Values, DataSource, DataSourceJoin, DataVersion, DataSetType, \ + Attribute_Set_Type, Attribute_Display_Category, ImagingDataCommonsVersion + +from django.contrib.auth.models import User +idc_superuser = User.objects.get(username="idc") + +logger = logging.getLogger('main_logger') + +BQ_PROJ_DATASET = 'idc-dev-etl.idc_tcia_views_mvp_wave0' + + +def new_attribute(name, displ_name, type, display_default, cross_collex=False, units=None): + return { + 'name': name, + "display_name": displ_name, + "type": type, + 'units': units, + 'cross_collex': cross_collex, + 'solr_collex': [], + 'bq_tables': [], + 'set_types': [], + 'display': display_default, + 'categories': [] + } + +def add_data_sets(sets_set): + for dss in sets_set: + try: + obj, created = DataSetType.objects.update_or_create(name=dss['name'], data_type=dss['data_type'], set_type=dss['set_type']) + + print("Data Set Type created:") + print(obj) + except Exception as e: + logger.error("[ERROR] Data Version {} may not have been added!".format(dss['name'])) + logger.exception(e) + +def add_data_versions(dv_set): + idc_dev, created = ImagingDataCommonsVersion.objects.update_or_create(name="Imaging Data Commons Data Release", version_number="1.0") + ver_to_idc = [] + try: + for dv in dv_set: + obj, created = DataVersion.objects.update_or_create(name=dv['name'], version=dv['ver']) + + progs = Program.objects.filter(name__in=dv['progs']) + ver_to_prog = [] + for prog in progs: + ver_to_prog.append(DataVersion.programs.through(dataversion_id=obj.id, program_id=prog.id)) + + ver_to_idc.append(DataVersion.idc_versions.through(dataversion_id=obj.id, imagingdatacommonsversion_id=idc_dev.id)) + + DataVersion.programs.through.objects.bulk_create(ver_to_prog) + DataVersion.idc_versions.through.objects.bulk_create(ver_to_idc) + + logger.info("[STATUS] Data Versions loaded:") + logger.info("{}".format(DataVersion.objects.all())) + except Exception as e: + logger.error("[ERROR] Data Versions may not have been added!") + logger.exception(e) + +def add_programs(program_set): + results = {} + for prog in program_set: + try: + obj, created = Program.objects.update_or_create( + short_name=prog['short_name'], name=prog['full_name'], is_public=prog['public'], + owner=User.objects.get(email=prog['owner']) if 'owner' in prog else idc_superuser) + + print("Program created:") + print(obj) + + results[obj.short_name] = obj + + except Exception as e: + logger.error("[ERROR] Program {} may not have been added!".format(prog['short_name'])) + logger.exception(e) + return results + +def add_data_source(source_set, versions, programs, data_sets, source_type): + for source in source_set: + try: + obj, created = DataSource.objects.update_or_create( + name=source, + count_col="case_barcode" if "tcga" in source else "PatientID", + source_type=source_type + ) + + progs = Program.objects.filter(short_name__in=programs) + src_to_prog = [] + for prog in progs: + src_to_prog.append(DataSource.programs.through(datasource_id=obj.id, program_id=prog.id)) + DataSource.programs.through.objects.bulk_create(src_to_prog) + + data_versions = DataVersion.objects.filter(name__in=versions) + versions_to_source = [] + for dv in data_versions: + versions_to_source.append(DataSource.versions.through(dataversion_id=dv.id, datasource_id=obj.id)) + DataSource.versions.through.objects.bulk_create(versions_to_source) + + datasets = DataSetType.objects.filter(name__in=data_sets) + datasets_to_source = [] + for data_set in datasets: + datasets_to_source.append(DataSource.data_sets.through(datasource_id=obj.id, datasettype_id=data_set.id)) + DataSource.data_sets.through.objects.bulk_create(datasets_to_source) + + print("DataSource entry created: {}".format(source)) + except Exception as e: + logger.error("[ERROR] DataSource {} may not have been added!".format(source)) + logger.exception(e) + +def add_source_joins(froms, from_col, tos=None, to_col=None): + src_joins = [] + + if not tos and not to_col: + joins = combinations(froms, 2) + for join in joins: + for from_join in DataSource.objects.filter(name=join[0]): + for to_join in DataSource.objects.filter(name=join[1]): + src_joins.append(DataSourceJoin( + from_src=from_join, + to_src=to_join, + from_src_col=from_col, + to_src_col=from_col) + ) + else: + joins = product(froms,tos) + for join in joins: + for from_join in DataSource.objects.filter(name=join[0]): + for to_join in DataSource.objects.filter(name=join[1]): + src_joins.append(DataSourceJoin( + from_src=from_join, + to_src=to_join, + from_src_col=from_col, + to_src_col=to_col) + ) + + if len(src_joins): + DataSourceJoin.objects.bulk_create(src_joins) + +def add_collections(collection_set): + collex_list = [] + try: + for collex in collection_set: + collex_list.append( + Collection( + **collex['data'], + owner=User.objects.get(email=collex['owner']) if 'owner' in collex else idc_superuser + ) + ) + + Collection.objects.bulk_create(collex_list) + + for collex in collection_set: + obj = Collection.objects.get(collection_id=collex['data']['collection_id']) + + if len(collex.get('data_versions',[])): + collex_to_dv = [] + data_versions = DataVersion.objects.filter(name__in=collex['data_versions']) + for dv in data_versions: + collex_to_dv.append(Collection.data_versions.through(collection_id=obj.id, dataversion_id=dv.id)) + + Collection.data_versions.through.objects.bulk_create(collex_to_dv) + + except Exception as e: + logger.error("[ERROR] Collection {} may not have been added!".format(collex['data']['collection_id'])) + logger.exception(e) + + +def add_attributes(attr_set): + for attr in attr_set: + try: + obj, created = Attribute.objects.update_or_create( + name=attr['name'], display_name=attr['display_name'], data_type=attr['type'], + preformatted_values=True if 'preformatted_values' in attr else False, + is_cross_collex=True if 'cross_collex' in attr else False, + default_ui_display=attr['display'], + units=attr.get('units',None) + ) + if 'range' in attr: + if len(attr['range']): + for attr_range in attr['range']: + Attribute_Ranges.objects.update_or_create( + **attr_range, attribute=obj + ) + else: + Attribute_Ranges.objects.update_or_create( + attribute=obj + ) + if len(attr.get('display_vals',[])): + for dv in attr['display_vals']: + Attribute_Display_Values.objects.update_or_create( + raw_value=dv['raw_value'], display_value=dv['display_value'], attribute=obj + ) + if len(attr.get('solr_collex',[])): + for sc in DataSource.objects.filter(name__in=attr['solr_collex']): + obj.data_sources.add(sc) + if len(attr.get('bq_tables',[])): + for bqt in DataSource.objects.filter(name__in=attr['bq_tables']): + obj.data_sources.add(bqt) + if len(attr.get('set_types',[])): + for set_type in attr.get('set_types'): + Attribute_Set_Type.objects.update_or_create( + datasettype=DataSetType.objects.get(data_type=set_type['set']), attribute=obj, child_record_search=set_type['child_record_search'] + ) + if len(attr.get('categories',[])): + for cat in attr['categories']: + Attribute_Display_Category.objects.update_or_create( + category=cat['name'], category_display_name=cat['display_name'], attribute=obj + ) + + except Exception as e: + logger.error("[ERROR] Attribute {} may not have been added!".format(attr['name'])) + logger.exception(e) + +def move_attrs(from_data_sources, to_data_sources): + to_sources = DataSource.objects.filter(name__in=to_data_sources) + from_sources = DataSource.objects.filter(name__in=from_data_sources) + to_sources_attrs = to_sources.get_source_attrs() + bulk_add = [] + + for fds in from_sources: + from_source_attrs = fds.attribute_set.exclude(id__in=to_sources_attrs['ids']) + print("Moving attributes from {}: {}".format(fds.name, "; ".join(from_source_attrs.values_list('name',flat=True)))) + + for attr in from_source_attrs: + for ds in to_sources: + bulk_add.append(Attribute.data_sources.through(attribute_id=attr.id, datasource_id=ds.id)) + + Attribute.data_sources.through.objects.bulk_create(bulk_add) + +def update_data_sources(to_data_sources,set_types=None,versions=None,progs=None): + to_sources = DataSource.objects.filter(name__in=to_data_sources) + for ds in to_sources: + if versions and len(versions): + data_versions = DataVersion.objects.filter(name__in=versions) + versions_to_source = [] + for dv in data_versions: + versions_to_source.append(DataSource.versions.through(dataversion_id=dv.id, datasource_id=ds.id)) + DataSource.versions.through.objects.bulk_create(versions_to_source) + + if set_types and len(set_types): + datasets = DataSetType.objects.filter(name__in=set_types) + datasets_to_source = [] + for data_set in datasets: + datasets_to_source.append(DataSource.data_sets.through(datasource_id=ds.id, datasettype_id=data_set.id)) + DataSource.data_sets.through.objects.bulk_create(datasets_to_source) + + if progs and len(progs): + progs = Program.objects.filter(short_name__in=progs) + src_to_prog = [] + for prog in progs: + src_to_prog.append(DataSource.programs.through(datasource_id=ds.id, program_id=prog.id)) + DataSource.programs.through.objects.bulk_create(src_to_prog) + + +def disable_data_sources(sources): + disable = DataSource.objects.filter(name__in=sources) + for ds in disable: + ds.versions.clear() + ds.data_sets.clear() + ds.attribute_set.clear() + ds.programs.clear() + +def main(): + + try: + move_attrs(["idc-dev-etl.idc_tcia_views_mvp_wave0.segmentations", + "idc-dev-etl.idc_tcia_views_mvp_wave0.qualitative_measurements", + "idc-dev-etl.idc_tcia_views_mvp_wave0.quantitative_measurements" + ],["idc-dev.metadata.dicom_pivot_wave0"]) + + update_data_sources(["idc-dev.metadata.dicom_pivot_wave0"],['Derived Data'],['TCIA Derived Data'],["TCGA","QIN","ISPY","LIDC"]) + + disable_data_sources(["idc-dev-etl.idc_tcia_views_mvp_wave0.segmentations", + "idc-dev-etl.idc_tcia_views_mvp_wave0.qualitative_measurements", + "idc-dev-etl.idc_tcia_views_mvp_wave0.quantitative_measurements" + ]) + + except Exception as e: + logging.exception(e) + + +if __name__ == "__main__": + main() diff --git a/shell/vagrant-set-env.sh b/shell/vagrant-set-env.sh index 99f770165..a6ef27c28 100755 --- a/shell/vagrant-set-env.sh +++ b/shell/vagrant-set-env.sh @@ -1,6 +1,6 @@ #!/bin/bash echo 'export PYTHONPATH=/home/vagrant/www:/home/vagrant/www/lib:/home/vagrant/www/IDC-Common' | tee -a /home/vagrant/.bash_profile -echo 'export SECURE_LOCAL_PATH=../secure_files/idc/' | tee -a /home/vagrant/.bash_profile +echo 'export SECURE_LOCAL_PATH=../parentDir/secure_files/idc/' | tee -a /home/vagrant/.bash_profile echo 'export DJANGO_SETTINGS_MODULE=idc.settings' | tee -a /home/vagrant/.bash_profile source /home/vagrant/.bash_profile chmod +x /home/vagrant/www/shell/python-su.sh diff --git a/static/css/style.css b/static/css/style.css index a70711875..e7381bc5b 100755 --- a/static/css/style.css +++ b/static/css/style.css @@ -3904,13 +3904,25 @@ html { background: none; } -#save-cohort-btn, #download-manifest { +#save-cohort-btn { margin-top: 12px; } -#save-cohort-btn.disabled, #download-manifest.disabled { +.cohort-manifest { + margin-top: 8px; +} + +.manifest-size-warning { + display: inline-block; +} +.cohort-manifest i { + margin-right: 15px; + color: #ecf609; + font-size: 22px; + margin-top: 12px; } + .collex-table th { background-color: #e4d8f9; border-top: 1px solid #ddd; diff --git a/static/js/cohorts/cohort-details.js b/static/js/cohorts/cohort-details.js index 2ad8f4a4e..a5431ae8c 100644 --- a/static/js/cohorts/cohort-details.js +++ b/static/js/cohorts/cohort-details.js @@ -25,13 +25,22 @@ require.config({ underscore: 'libs/underscore-min', assetscore: 'libs/assets.core', assetsresponsive: 'libs/assets.responsive', - base: 'base' + base: 'base', + tippy: 'libs/tippy-bundle.umd.min', + '@popperjs/core': 'libs/popper.min' }, shim: { 'bootstrap': ['jquery'], 'jqueryui': ['jquery'], 'assetscore': ['jquery', 'bootstrap', 'jqueryui'], - 'assetsresponsive': ['jquery', 'bootstrap', 'jqueryui'] + 'assetsresponsive': ['jquery', 'bootstrap', 'jqueryui'], + '@popperjs/core': { + exports: "@popperjs/core" + }, + 'tippy': { + exports: 'tippy', + deps: ['@popperjs/core'] + } } }); @@ -39,10 +48,11 @@ require([ 'jquery', 'jqueryui', 'base', + 'tippy', 'bootstrap', 'assetscore' ,'assetsresponsive', -], function($, jqueryui, base, bootstrap) { +], function($, jqueryui, base, tippy, bootstrap) { A11y.Core(); var downloadToken = new Date().getTime(); @@ -62,4 +72,11 @@ require([ },downloadToken, 'downloadToken'); }); + tippy('.manifest-size-warning',{ + content: 'Your cohort is too large to be downloaded in its entirety, and will be truncated at 65,000 records ' + + 'ordered by PatientID, StudyID, SeriesID, and InstanceID.', + theme: 'light', + placement: 'left', + arrow: false + }); }); \ No newline at end of file diff --git a/static/js/explore.js b/static/js/explore.js index 4c5693970..d0b310fda 100644 --- a/static/js/explore.js +++ b/static/js/explore.js @@ -10,7 +10,6 @@ require.config({ imagesearch: 'image_search', cohortfilelist: 'cohort_filelist', tippy: 'libs/tippy-bundle.umd.min', - //d3: 'libs/d3.v5.min', '@popperjs/core': 'libs/popper.min' }, shim: { @@ -26,8 +25,7 @@ require.config({ 'assetscore': ['jquery', 'bootstrap', 'jqueryui'], 'assetsresponsive': ['jquery', 'bootstrap', 'jqueryui'], 'tablesorter': ['jquery'], - 'base': ['jquery'], - //'imagesearch':['d3'], + 'base': ['jquery'] } }); diff --git a/static/js/image_search.js b/static/js/image_search.js index 2eee6db5f..f1a2a5b6d 100644 --- a/static/js/image_search.js +++ b/static/js/image_search.js @@ -27,72 +27,74 @@ require([ 'base' ], function($, _, jqueryui, bootstrap, jquerydt ) { - window.filterObj = {}; - window.projIdSel = []; - window.studyIdSel = []; - //window.tcgaColls = ["tcga_blca", "tcga_brca", "tcga_cesc", "tcga_coad", "tcga_esca", "tcga_gbm", "tcga_hnsc", "tcga_kich", "tcga_kirc", "tcga_kirp", "tcga_lgg", "tcga_lihc", "tcga_luad", "tcga_lusc", "tcga_ov", "tcga_prad", "tcga_read", "tcga_sarc", "tcga_stad", "tcga_thca", "tcga_ucec"]; - window.projSets = new Object(); - window.projSets['tcga']=["tcga_blca", "tcga_brca", "tcga_cesc", "tcga_coad", "tcga_esca", "tcga_gbm", "tcga_hnsc", "tcga_kich", "tcga_kirc", "tcga_kirp", "tcga_lgg", "tcga_lihc", "tcga_luad", "tcga_lusc", "tcga_ov", "tcga_prad", "tcga_read", "tcga_sarc", "tcga_stad", "tcga_thca", "tcga_ucec"]; - window.projSets['rider']=["rider_lung_ct", "rider_phantom_pet_ct","rider_breast_mri", "rider_neuro_mri","rider_phantom_mri", "rider_lung_pet_ct"]; - window.projSets['qin'] = ["qin_headneck","qin_lung_ct","qin_pet_phantom","qin_breast_dce_mri"]; - - var plotLayout = { - title: '', - autosize: true, - margin: { - l: 30, - r: 30, - b: 60, - t: 30, - pad: 0 - }, - xaxis: {type: 'category', dtick: 1} - }; + $('.manifest-size-warning').hide(); + + window.filterObj = {}; + window.projIdSel = []; + window.studyIdSel = []; + //window.tcgaColls = ["tcga_blca", "tcga_brca", "tcga_cesc", "tcga_coad", "tcga_esca", "tcga_gbm", "tcga_hnsc", "tcga_kich", "tcga_kirc", "tcga_kirp", "tcga_lgg", "tcga_lihc", "tcga_luad", "tcga_lusc", "tcga_ov", "tcga_prad", "tcga_read", "tcga_sarc", "tcga_stad", "tcga_thca", "tcga_ucec"]; + window.projSets = new Object(); + window.projSets['tcga']=["tcga_blca", "tcga_brca", "tcga_cesc", "tcga_coad", "tcga_esca", "tcga_gbm", "tcga_hnsc", "tcga_kich", "tcga_kirc", "tcga_kirp", "tcga_lgg", "tcga_lihc", "tcga_luad", "tcga_lusc", "tcga_ov", "tcga_prad", "tcga_read", "tcga_sarc", "tcga_stad", "tcga_thca", "tcga_ucec"]; + window.projSets['rider']=["rider_lung_ct", "rider_phantom_pet_ct","rider_breast_mri", "rider_neuro_mri","rider_phantom_mri", "rider_lung_pet_ct"]; + window.projSets['qin'] = ["qin_headneck","qin_lung_ct","qin_pet_phantom","qin_breast_dce_mri"]; + + var plotLayout = { + title: '', + autosize: true, + margin: { + l: 30, + r: 30, + b: 60, + t: 30, + pad: 0 + }, + xaxis: {type: 'category', dtick: 1} + }; - var pieLayout = { - title: '', - autosize: true, - margin: { - l: 30, - r: 30, - b: 60, - t: 30, - pad: 0 + var pieLayout = { + title: '', + autosize: true, + margin: { + l: 30, + r: 30, + b: 60, + t: 30, + pad: 0 + }, + showlegend: false, + legend: { + x: 2, + y: 0, + traceorder: 'normal', + font: { + family: 'sans-serif', + size: 4, + color: '#000' }, - showlegend: false, - legend: { - x: 2, - y: 0, - traceorder: 'normal', - font: { - family: 'sans-serif', - size: 4, - color: '#000' - }, - bgcolor: '#E2E2E2', - bordercolor: '#FFFFFF', - borderwidth: 2 - } - }; - - window.hidePanel=function(){ - $('#lh_panel').hide(); - $('#show_lh').show(); - $('#show_lh').removeClass('hidden'); - $('#rh_panel').removeClass('col-lg-9'); - $('#rh_panel').removeClass('col-md-9'); - $('#rh_panel').addClass('col-lg-12'); - $('#rh_panel').addClass('col-md-12'); + bgcolor: '#E2E2E2', + bordercolor: '#FFFFFF', + borderwidth: 2 } + }; - window.showPanel=function(){ - $('#lh_panel').show(); - $('#show_lh').hide(); - $('#rh_panel').removeClass('col-lg-12'); - $('#rh_panel').removeClass('col-md-12'); - $('#rh_panel').addClass('col-lg-9'); - $('#rh_panel').addClass('col-md-9'); - } + window.hidePanel=function(){ + $('#lh_panel').hide(); + $('#show_lh').show(); + $('#show_lh').removeClass('hidden'); + $('#rh_panel').removeClass('col-lg-9'); + $('#rh_panel').removeClass('col-md-9'); + $('#rh_panel').addClass('col-lg-12'); + $('#rh_panel').addClass('col-md-12'); + }; + + window.showPanel=function(){ + $('#lh_panel').show(); + $('#show_lh').hide(); + $('#rh_panel').removeClass('col-lg-12'); + $('#rh_panel').removeClass('col-md-12'); + $('#rh_panel').addClass('col-lg-9'); + $('#rh_panel').addClass('col-md-9'); + }; window.setSlider = function (slideDiv, reset, strt, end, isInt, updateNow) { //var slideDiv = divName + "_slide"; @@ -1419,17 +1421,25 @@ require([ contentType: 'application/x-www-form-urlencoded', success: function (data) { var isFiltered = Boolean($('#search_def p').length>0); - if (isFiltered && data.total > 0){ - $('#save-cohort-btn').prop('disabled',''); - if(user_is_auth) { - $('#save-cohort-btn').prop('title',''); + if(is_cohort) { + if(data.total > 65000) { + $('.manifest-size-warning').show(); + } else { + $('.manifest-size-warning').hide(); } } else { - $('#save-cohort-btn').prop('disabled','disabled'); - if(user_is_auth) { - $('#save-cohort-btn').prop('title',data.total > 0 ? 'Please select at least one filter.' : 'There are no cases in this cohort.'); + if (isFiltered && data.total > 0){ + $('#save-cohort-btn').prop('disabled',''); + if(user_is_auth) { + $('#save-cohort-btn').prop('title',''); + } } else { - $('#save-cohort-btn').prop('title','Log in to save.'); + $('#save-cohort-btn').prop('disabled','disabled'); + if(user_is_auth) { + $('#save-cohort-btn').prop('title',data.total > 0 ? 'Please select at least one filter.' : 'There are no cases in this cohort.'); + } else { + $('#save-cohort-btn').prop('title','Log in to save.'); + } } } //updateCollectionTotals(data.total, data.origin_set.attributes.collection_id); diff --git a/templates/cohorts/cohort_details.html b/templates/cohorts/cohort_details.html index 41f28d05c..1877ce227 100644 --- a/templates/cohorts/cohort_details.html +++ b/templates/cohorts/cohort_details.html @@ -41,9 +41,12 @@