From db4b4810e3930c6d29a788f28ffd77181513c75b Mon Sep 17 00:00:00 2001 From: "S. Paquette" Date: Fri, 7 Apr 2023 01:20:49 -0700 Subject: [PATCH] -> s5cmd manifest option -> Prep to add hierarchical URLs --- idc/settings.py | 5 +- shell/backup-solr.sh | 76 ++-- shell/database-setup.sh | 12 +- static/css/style.css | 9 + static/js/cohorts/export-manifest.js | 65 ++-- templates/cohorts/export-manifest-modal.html | 363 ++++++++++++------- 6 files changed, 320 insertions(+), 210 deletions(-) diff --git a/idc/settings.py b/idc/settings.py index aa3d32154..c949ca235 100644 --- a/idc/settings.py +++ b/idc/settings.py @@ -103,9 +103,10 @@ PAIRWISE_SERVICE_URL = os.environ.get('PAIRWISE_SERVICE_URL', None) # Data Buckets -GCLOUD_BUCKET = os.environ.get('GOOGLE_STORAGE_BUCKET') +GCLOUD_BUCKET = os.environ.get('GOOGLE_STORAGE_BUCKET', 'FAKE_BUCKET') +AWS_BUCKET = os.environ.get('AWS_BUCKET', 'FAKE_BUCKET') -DCF_GUID_SUFFIX = os.environ.get('DCF_GUID_SUFFIX','') +DCF_GUID_SUFFIX = os.environ.get('DCF_GUID_SUFFIX', '') # BigQuery cohort storage settings BIGQUERY_COHORT_DATASET_ID = os.environ.get('BIGQUERY_COHORT_DATASET_ID', 'cohort_dataset') diff --git a/shell/backup-solr.sh b/shell/backup-solr.sh index 80035aff8..a0659fa28 100644 --- a/shell/backup-solr.sh +++ b/shell/backup-solr.sh @@ -1,4 +1,6 @@ #!/bin/bash +set -euo pipefail + CORE_LIST_FILE="" CORE_LIST="" FILE_NAME="" @@ -7,7 +9,7 @@ MAX_WAIT=3 SOLR_DATA="/opt/bitnami/solr/server/solr" RUN=`date +%s` BACKUPS_DIR="${SOLR_DATA}/backups_${RUN}" -PARSE_RESPONSE="import sys, json; print(json.load(sys.stdin)['status'])" +PARSE_RESPONSE="import sys, json; print(json.load(sys.stdin)['details']['backup'].get('snapshotCompletedAt',None) or 'INCOMPLETE')" while getopts ":c:l:f:d:h" flag do @@ -57,7 +59,7 @@ fi cores=[] if [[ $FILE_NAME == "" ]]; then - FILE_NAME="solr_cores_backup_${RUN}.tar" + FILE_NAME="solr_cores_backup_${RUN}.tar.gz" fi echo "[STATUS] Backups will be tar'd to ${FILE_NAME}" @@ -72,39 +74,51 @@ echo "[STATUS] Building backup script: " for core in "${cores[@]}"; do if [[ $core != "" ]]; then - echo "-----> Backup for core ${core} <-----" + echo "----------------> Backup for core ${core} <-----------------" echo "Copying schema for ${core}..." - sudo -u solr cp ${SOLR_DATA}/${core}/conf/managed-schema ${BACKUPS_DIR}/$core.managed-schema - echo "Backup command for ${core}:" - echo "curl -u ${SOLR_USER}:${SOLR_PWD} -X GET \"https://localhost:8983/solr/$core/replication?command=backup&location=${BACKUPS_DIR}/&name=${core}\" --cacert solr-ssl.pem" - echo "Status command for ${core}": - echo "curl -u ${SOLR_USER}:${SOLR_PWD} -X GET \"https://localhost:8983/solr/${core}/replication?command=details\" --cacert solr-ssl.pem" -# curl -u $SOLR_USER:$SOLR_PWD -X GET "https://localhost:8983/solr/$core/replication?command=backup&location=${BACKUPS_DIR}/&name=$core" --cacert solr-ssl.pem -# status=`curl -u $SOLR_USER:${SOLR_PWD} -X GET "https://localhost:8983/solr/${core}/replication?command=details" --cacert solr-ssl.pem | python3 -c "${PARSE_RESPONSE}"` -# retries=0 -# while [[ "$status" != "OK" && "$retries" -lt "$MAX_WAIT" ]]; do -# echo "Backup for core ${core} isn't completed, waiting..." -# sleep 2 -# ((retries++)) -# status=`curl -u $SOLR_USER:${SOLR_PWD} -X GET "https://localhost:8983/solr/${core}/replication?command=details" --cacert solr-ssl.pem | python3 -c "${PARSE_RESPONSE}"` -# done -# if [ "$status" == "OK" ]; then -# echo "Core ${core} backup completed." -# else -# echo "Core ${core} backup is still pending." -# echo "You may need to re-run TAR on the snapshots from ${BACKUPS_DIR} if you see an error message about files changing during the TAR process." -# fi - echo "-------------> Done <-------------" + sudo -u solr cp ${SOLR_DATA}/${core}/conf/managed-schema.xml ${BACKUPS_DIR}/$core.managed-schema.xml + echo "Executing backup command for ${core}:" + curl -u ${SOLR_USER}:${SOLR_PWD} -X GET "https://localhost:8983/solr/$core/replication?command=backup&location=${BACKUPS_DIR}/&name=${core}" --cacert solr-ssl.pem + curl -s -u ${SOLR_USER}:${SOLR_PWD} -X GET "https://localhost:8983/solr/${core}/replication?command=details" --cacert solr-ssl.pem + status=`curl -s -u ${SOLR_USER}:${SOLR_PWD} -X GET "https://localhost:8983/solr/${core}/replication?command=details" --cacert solr-ssl.pem | python3 -c "${PARSE_RESPONSE}"` + retries=0 + while [[ "$status" == "INCOMPLETE" && "$retries" -lt "$MAX_WAIT" ]]; do + echo "Backup for core ${core} isn't completed, waiting..." + sleep 2 + ((retries++)) + status=`curl -s -u ${SOLR_USER}:${SOLR_PWD} -X GET "https://localhost:8983/solr/${core}/replication?command=details" --cacert solr-ssl.pem | python3 -c "${PARSE_RESPONSE}"` + done + if [ "$status" != "INCOMPLETE" ]; then + echo "Core ${core} backup completed at ${status}." + else + echo "Core ${core} backup is still pending." + echo "You may need to re-run TAR on the snapshots from ${BACKUPS_DIR} if you see an error message about files changing during the TAR process." + fi + echo "----------------> /Backup for core ${core} <-----------------" fi done -echo "Tar command: " -echo "tar -cvf ${FILE_NAME} -C ${BACKUPS_DIR} ." +if [ $? -ne 0 ]; then + echo "There was a problem backing up some of the cores! Exiting." + exit 1 +fi + +echo "" +echo -n "Allowing extra time for all writes to the backup directory to complete" +for k in `seq 1 3`; do + echo -n "." + sleep 1 +done +echo ".done." + +echo "Taring contents of ${BACKUPS_DIR}..." +tar -cvzf ${FILE_NAME} -C ${BACKUPS_DIR} . + # -#if [[ ! -z ${DEST_BUCKET} ]]; then -# gsutil cp ${FILE_NAME} gs://${DEST_BUCKET}/ -#else -# echo "[STATUS] Backups stored in tarfile ${FILE_NAME}." -#fi +if [[ ! -z ${DEST_BUCKET} ]]; then + gsutil cp ${FILE_NAME} gs://${DEST_BUCKET}/ +else + echo "[STATUS] Backups stored in tarfile ${FILE_NAME}." +fi exit 0 diff --git a/shell/database-setup.sh b/shell/database-setup.sh index f627258f0..edc1d28d9 100755 --- a/shell/database-setup.sh +++ b/shell/database-setup.sh @@ -30,12 +30,12 @@ echo "Increase group_concat max, for longer data type names" mysql -u$MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "SET GLOBAL group_concat_max_len=18446744073709547520;" echo "Creating database users..." -mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER '${DATABASE_USER}'@'%' IDENTIFIED BY '${DATABASE_PASSWORD}';" -mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER '${DATABASE_USER}'@'localhost' IDENTIFIED BY '${DATABASE_PASSWORD}';" -mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER 'api-user'@'%' IDENTIFIED BY '${DATABASE_PASSWORD}';" -mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER 'api-user'@'localhost' IDENTIFIED BY '${DATABASE_PASSWORD}';" -mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER 'dev-user'@'%' IDENTIFIED BY '${DATABASE_PASSWORD}';" -mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER 'dev-user'@'localhost' IDENTIFIED BY '${DATABASE_PASSWORD}';" +mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER IF NOT EXISTS '${DATABASE_USER}'@'%' IDENTIFIED BY '${DATABASE_PASSWORD}';" +mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER IF NOT EXISTS '${DATABASE_USER}'@'localhost' IDENTIFIED BY '${DATABASE_PASSWORD}';" +mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER IF NOT EXISTS 'api-user'@'%' IDENTIFIED BY '${DATABASE_PASSWORD}';" +mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER IF NOT EXISTS 'api-user'@'localhost' IDENTIFIED BY '${DATABASE_PASSWORD}';" +mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER IF NOT EXISTS 'dev-user'@'%' IDENTIFIED BY '${DATABASE_PASSWORD}';" +mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER IF NOT EXISTS 'dev-user'@'localhost' IDENTIFIED BY '${DATABASE_PASSWORD}';" echo "Granting permissions to database users..." mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "GRANT ALL PRIVILEGES ON *.* TO '${DATABASE_USER}'@'%';" diff --git a/static/css/style.css b/static/css/style.css index 5247595ac..31d105273 100755 --- a/static/css/style.css +++ b/static/css/style.css @@ -4085,6 +4085,15 @@ html { display: inline-block; } +.manifest-name { + padding: 5px 0px 10px 0px; +} + +.bq-table-name p { + font-style: italic; + color: #1a344c; +} + .bq-disabled { color: darkred; font-size: 15px; diff --git a/static/js/cohorts/export-manifest.js b/static/js/cohorts/export-manifest.js index 2b34ccc6d..d61bd1493 100644 --- a/static/js/cohorts/export-manifest.js +++ b/static/js/cohorts/export-manifest.js @@ -76,31 +76,16 @@ require([ download_manifest("json", $(this), e) }); - $('.export-option input').on('click', function(){ - let export_option = $(this).attr("value") - update_export_option(export_option); - if(export_option == 'bq-manifest') { - $('.file-name').hide(); - $('.table-name').show(); - $('.bq-only').show(); - } else { - $('.file-name').show(); - $('.table-name').hide(); - $('.bq-only').hide(); - } + $('#download-s5cmd').on('click', function(e) { + download_manifest("s5cmd", $(this), e) }); - var update_export_option = function(export_option) { - $('#bq-manifest').hide(); - $('#file-manifest').hide(); - $('#' + export_option).show(); - export_option !== 'bq-manifest' ? $('.bq-only').hide() : $('.bq-only').show(); - }; - - update_export_option("file-manifest"); + $('#get-bq-table').on('click',function(e){ + download_manifest('bq',$(this), e); + }); var download_manifest = function(file_type, clicked_button, e) { - let manifest_type = $('input[name="manifest-type"]:checked').val(); + let manifest_type = file_type === 'bq' ? 'bq-manifest' : 'file-manifest'; $('#unallowed-chars-alert').hide(); $('#name-too-long-alert-modal').hide(); @@ -133,10 +118,7 @@ require([ return false; } - $('#download-csv').attr('disabled','disabled'); - $('#download-tsv').attr('disabled','disabled'); - $('#download-json').attr('disabled','disabled'); - $('#get-bq-table').attr('disabled','disabled'); + $('.get-manifest').attr('disabled','disabled'); $('#manifest-in-progress').modal('show'); @@ -148,7 +130,7 @@ require([ } var checked_fields = []; - $('.field-checkbox').each(function() { + clicked_button.parents('.tab-pane.manifest').find('.field-checkbox').each(function() { var cb = $(this)[0]; if (cb.checked) { checked_fields.push(cb.value); @@ -156,9 +138,9 @@ require([ }); var checked_columns = []; - $('.column-checkbox').each(function() { + clicked_button.parents('.tab-pane.manifest').find('.column-checkbox').each(function() { var cb = $(this)[0]; - if (cb.checked && (manifest_type !== 'file-manifest' || ! $(this).hasClass('bq-only'))) { + if (cb.checked) { checked_columns.push(cb.value); } }); @@ -167,7 +149,15 @@ require([ $('input[name="header_fields"]').val(JSON.stringify(checked_fields)); $('input[name="columns"]').val(JSON.stringify(checked_columns)); $('input[name="downloadToken"]').val(downloadToken); - $('input[name="include_header"]').val($('#include-header-checkbox').is(':checked') ? 'true': 'false'); + $('input[name="manifest-type"]').val(manifest_type); + + if(file_type !== 'bq') { + $('input[name="include_header"]').val($('#include-header-' + + file_type === 's5cmd' ? 's5cmd' : 'file' + + '-checkbox').is(':checked') ? 'true': 'false'); + } else { + $('input[name="include_header"]').val('false'); + } var select_box_div = $('#file-part-select-box'); var select_box = select_box_div.find('select'); @@ -211,17 +201,11 @@ require([ } }; - // The Cohort Details page button (single export at a time) - $('#export-manifest').on('click',function(){ - $('#export-manifest-name').val(cohort_id + "_" + cohort_name.replaceAll(" ","_")+$('#export-manifest-name').data('name-base')); - update_download_manifest_buttons(); - }); - $('.column-checkbox').change(function() { update_download_manifest_buttons($(this)); }); - $("#export-manifest-name").change(function(){ + $('.manifest-file-name').find('input.form-control').change(function(){ update_download_manifest_buttons(); }); @@ -233,8 +217,7 @@ require([ var input_cohort_name_len = $('#export-manifest-name').val().length; if (input_cohort_name_len == 0 || num_selected_column == 0 || (is_list && checked_cohorts == 0)) { - $('.download-file').attr('disabled', 'disabled'); - $('#get-bq-table').attr('disabled', 'disabled'); + $('.get-manifest').attr('disabled', 'disabled'); } else { if( is_list && checked_cohorts > 1 ) { $('.download-file,.file-manifest').attr('disabled', 'disabled'); @@ -295,10 +278,6 @@ require([ } }; - $('#get-bq-table').on('click',function(){ - download_manifest('',$(this)); - }); - // The Cohort list page export button (a set of cohorts) $('#export-manifest-set').on('click',function(){ var cohort_ids = $('input[name="id"]:checked').map(function () { @@ -312,7 +291,7 @@ require([ $('input[name="ids"]').val(cohort_ids.join(",")) - $('#export-manifest-name').val("cohorts_"+cohort_ids.join("_")+$('#export-manifest-name').data('name-base')); + $('.manifest-name').find('input.form-control').val("cohorts_"+cohort_ids.join("_")+$('#export-manifest-name').data('name-base')); update_download_manifest_buttons(); }); diff --git a/templates/cohorts/export-manifest-modal.html b/templates/cohorts/export-manifest-modal.html index 753a4f95d..fee9dff50 100644 --- a/templates/cohorts/export-manifest-modal.html +++ b/templates/cohorts/export-manifest-modal.html @@ -33,143 +33,249 @@