Skip to content

Commit

Permalink
-> s5cmd manifest option
Browse files Browse the repository at this point in the history
-> Prep to add hierarchical URLs
  • Loading branch information
s-paquette committed Apr 7, 2023
1 parent dbd4d48 commit db4b481
Show file tree
Hide file tree
Showing 6 changed files with 320 additions and 210 deletions.
5 changes: 3 additions & 2 deletions idc/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,10 @@
PAIRWISE_SERVICE_URL = os.environ.get('PAIRWISE_SERVICE_URL', None)

# Data Buckets
GCLOUD_BUCKET = os.environ.get('GOOGLE_STORAGE_BUCKET')
GCLOUD_BUCKET = os.environ.get('GOOGLE_STORAGE_BUCKET', 'FAKE_BUCKET')
AWS_BUCKET = os.environ.get('AWS_BUCKET', 'FAKE_BUCKET')

DCF_GUID_SUFFIX = os.environ.get('DCF_GUID_SUFFIX','')
DCF_GUID_SUFFIX = os.environ.get('DCF_GUID_SUFFIX', '')

# BigQuery cohort storage settings
BIGQUERY_COHORT_DATASET_ID = os.environ.get('BIGQUERY_COHORT_DATASET_ID', 'cohort_dataset')
Expand Down
76 changes: 45 additions & 31 deletions shell/backup-solr.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#!/bin/bash
set -euo pipefail

CORE_LIST_FILE=""
CORE_LIST=""
FILE_NAME=""
Expand All @@ -7,7 +9,7 @@ MAX_WAIT=3
SOLR_DATA="/opt/bitnami/solr/server/solr"
RUN=`date +%s`
BACKUPS_DIR="${SOLR_DATA}/backups_${RUN}"
PARSE_RESPONSE="import sys, json; print(json.load(sys.stdin)['status'])"
PARSE_RESPONSE="import sys, json; print(json.load(sys.stdin)['details']['backup'].get('snapshotCompletedAt',None) or 'INCOMPLETE')"

while getopts ":c:l:f:d:h" flag
do
Expand Down Expand Up @@ -57,7 +59,7 @@ fi
cores=[]

if [[ $FILE_NAME == "" ]]; then
FILE_NAME="solr_cores_backup_${RUN}.tar"
FILE_NAME="solr_cores_backup_${RUN}.tar.gz"
fi
echo "[STATUS] Backups will be tar'd to ${FILE_NAME}"

Expand All @@ -72,39 +74,51 @@ echo "[STATUS] Building backup script: "

for core in "${cores[@]}"; do
if [[ $core != "" ]]; then
echo "-----> Backup for core ${core} <-----"
echo "----------------> Backup for core ${core} <-----------------"
echo "Copying schema for ${core}..."
sudo -u solr cp ${SOLR_DATA}/${core}/conf/managed-schema ${BACKUPS_DIR}/$core.managed-schema
echo "Backup command for ${core}:"
echo "curl -u ${SOLR_USER}:${SOLR_PWD} -X GET \"https://localhost:8983/solr/$core/replication?command=backup&location=${BACKUPS_DIR}/&name=${core}\" --cacert solr-ssl.pem"
echo "Status command for ${core}":
echo "curl -u ${SOLR_USER}:${SOLR_PWD} -X GET \"https://localhost:8983/solr/${core}/replication?command=details\" --cacert solr-ssl.pem"
# curl -u $SOLR_USER:$SOLR_PWD -X GET "https://localhost:8983/solr/$core/replication?command=backup&location=${BACKUPS_DIR}/&name=$core" --cacert solr-ssl.pem
# status=`curl -u $SOLR_USER:${SOLR_PWD} -X GET "https://localhost:8983/solr/${core}/replication?command=details" --cacert solr-ssl.pem | python3 -c "${PARSE_RESPONSE}"`
# retries=0
# while [[ "$status" != "OK" && "$retries" -lt "$MAX_WAIT" ]]; do
# echo "Backup for core ${core} isn't completed, waiting..."
# sleep 2
# ((retries++))
# status=`curl -u $SOLR_USER:${SOLR_PWD} -X GET "https://localhost:8983/solr/${core}/replication?command=details" --cacert solr-ssl.pem | python3 -c "${PARSE_RESPONSE}"`
# done
# if [ "$status" == "OK" ]; then
# echo "Core ${core} backup completed."
# else
# echo "Core ${core} backup is still pending."
# echo "You may need to re-run TAR on the snapshots from ${BACKUPS_DIR} if you see an error message about files changing during the TAR process."
# fi
echo "-------------> Done <-------------"
sudo -u solr cp ${SOLR_DATA}/${core}/conf/managed-schema.xml ${BACKUPS_DIR}/$core.managed-schema.xml
echo "Executing backup command for ${core}:"
curl -u ${SOLR_USER}:${SOLR_PWD} -X GET "https://localhost:8983/solr/$core/replication?command=backup&location=${BACKUPS_DIR}/&name=${core}" --cacert solr-ssl.pem
curl -s -u ${SOLR_USER}:${SOLR_PWD} -X GET "https://localhost:8983/solr/${core}/replication?command=details" --cacert solr-ssl.pem
status=`curl -s -u ${SOLR_USER}:${SOLR_PWD} -X GET "https://localhost:8983/solr/${core}/replication?command=details" --cacert solr-ssl.pem | python3 -c "${PARSE_RESPONSE}"`
retries=0
while [[ "$status" == "INCOMPLETE" && "$retries" -lt "$MAX_WAIT" ]]; do
echo "Backup for core ${core} isn't completed, waiting..."
sleep 2
((retries++))
status=`curl -s -u ${SOLR_USER}:${SOLR_PWD} -X GET "https://localhost:8983/solr/${core}/replication?command=details" --cacert solr-ssl.pem | python3 -c "${PARSE_RESPONSE}"`
done
if [ "$status" != "INCOMPLETE" ]; then
echo "Core ${core} backup completed at ${status}."
else
echo "Core ${core} backup is still pending."
echo "You may need to re-run TAR on the snapshots from ${BACKUPS_DIR} if you see an error message about files changing during the TAR process."
fi
echo "----------------> /Backup for core ${core} <-----------------"
fi
done

echo "Tar command: "
echo "tar -cvf ${FILE_NAME} -C ${BACKUPS_DIR} ."
if [ $? -ne 0 ]; then
echo "There was a problem backing up some of the cores! Exiting."
exit 1
fi

echo ""
echo -n "Allowing extra time for all writes to the backup directory to complete"
for k in `seq 1 3`; do
echo -n "."
sleep 1
done
echo ".done."

echo "Taring contents of ${BACKUPS_DIR}..."
tar -cvzf ${FILE_NAME} -C ${BACKUPS_DIR} .

#
#if [[ ! -z ${DEST_BUCKET} ]]; then
# gsutil cp ${FILE_NAME} gs://${DEST_BUCKET}/
#else
# echo "[STATUS] Backups stored in tarfile ${FILE_NAME}."
#fi
if [[ ! -z ${DEST_BUCKET} ]]; then
gsutil cp ${FILE_NAME} gs://${DEST_BUCKET}/
else
echo "[STATUS] Backups stored in tarfile ${FILE_NAME}."
fi

exit 0
12 changes: 6 additions & 6 deletions shell/database-setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ echo "Increase group_concat max, for longer data type names"
mysql -u$MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "SET GLOBAL group_concat_max_len=18446744073709547520;"
echo "Creating database users..."
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER '${DATABASE_USER}'@'%' IDENTIFIED BY '${DATABASE_PASSWORD}';"
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER '${DATABASE_USER}'@'localhost' IDENTIFIED BY '${DATABASE_PASSWORD}';"
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER 'api-user'@'%' IDENTIFIED BY '${DATABASE_PASSWORD}';"
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER 'api-user'@'localhost' IDENTIFIED BY '${DATABASE_PASSWORD}';"
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER 'dev-user'@'%' IDENTIFIED BY '${DATABASE_PASSWORD}';"
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER 'dev-user'@'localhost' IDENTIFIED BY '${DATABASE_PASSWORD}';"
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER IF NOT EXISTS '${DATABASE_USER}'@'%' IDENTIFIED BY '${DATABASE_PASSWORD}';"
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER IF NOT EXISTS '${DATABASE_USER}'@'localhost' IDENTIFIED BY '${DATABASE_PASSWORD}';"
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER IF NOT EXISTS 'api-user'@'%' IDENTIFIED BY '${DATABASE_PASSWORD}';"
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER IF NOT EXISTS 'api-user'@'localhost' IDENTIFIED BY '${DATABASE_PASSWORD}';"
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER IF NOT EXISTS 'dev-user'@'%' IDENTIFIED BY '${DATABASE_PASSWORD}';"
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER IF NOT EXISTS 'dev-user'@'localhost' IDENTIFIED BY '${DATABASE_PASSWORD}';"
echo "Granting permissions to database users..."
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "GRANT ALL PRIVILEGES ON *.* TO '${DATABASE_USER}'@'%';"
Expand Down
9 changes: 9 additions & 0 deletions static/css/style.css
Original file line number Diff line number Diff line change
Expand Up @@ -4085,6 +4085,15 @@ html {
display: inline-block;
}

.manifest-name {
padding: 5px 0px 10px 0px;
}

.bq-table-name p {
font-style: italic;
color: #1a344c;
}

.bq-disabled {
color: darkred;
font-size: 15px;
Expand Down
65 changes: 22 additions & 43 deletions static/js/cohorts/export-manifest.js
Original file line number Diff line number Diff line change
Expand Up @@ -76,31 +76,16 @@ require([
download_manifest("json", $(this), e)
});

$('.export-option input').on('click', function(){
let export_option = $(this).attr("value")
update_export_option(export_option);
if(export_option == 'bq-manifest') {
$('.file-name').hide();
$('.table-name').show();
$('.bq-only').show();
} else {
$('.file-name').show();
$('.table-name').hide();
$('.bq-only').hide();
}
$('#download-s5cmd').on('click', function(e) {
download_manifest("s5cmd", $(this), e)
});

var update_export_option = function(export_option) {
$('#bq-manifest').hide();
$('#file-manifest').hide();
$('#' + export_option).show();
export_option !== 'bq-manifest' ? $('.bq-only').hide() : $('.bq-only').show();
};

update_export_option("file-manifest");
$('#get-bq-table').on('click',function(e){
download_manifest('bq',$(this), e);
});

var download_manifest = function(file_type, clicked_button, e) {
let manifest_type = $('input[name="manifest-type"]:checked').val();
let manifest_type = file_type === 'bq' ? 'bq-manifest' : 'file-manifest';

$('#unallowed-chars-alert').hide();
$('#name-too-long-alert-modal').hide();
Expand Down Expand Up @@ -133,10 +118,7 @@ require([
return false;
}

$('#download-csv').attr('disabled','disabled');
$('#download-tsv').attr('disabled','disabled');
$('#download-json').attr('disabled','disabled');
$('#get-bq-table').attr('disabled','disabled');
$('.get-manifest').attr('disabled','disabled');

$('#manifest-in-progress').modal('show');

Expand All @@ -148,17 +130,17 @@ require([
}

var checked_fields = [];
$('.field-checkbox').each(function() {
clicked_button.parents('.tab-pane.manifest').find('.field-checkbox').each(function() {
var cb = $(this)[0];
if (cb.checked) {
checked_fields.push(cb.value);
}
});

var checked_columns = [];
$('.column-checkbox').each(function() {
clicked_button.parents('.tab-pane.manifest').find('.column-checkbox').each(function() {
var cb = $(this)[0];
if (cb.checked && (manifest_type !== 'file-manifest' || ! $(this).hasClass('bq-only'))) {
if (cb.checked) {
checked_columns.push(cb.value);
}
});
Expand All @@ -167,7 +149,15 @@ require([
$('input[name="header_fields"]').val(JSON.stringify(checked_fields));
$('input[name="columns"]').val(JSON.stringify(checked_columns));
$('input[name="downloadToken"]').val(downloadToken);
$('input[name="include_header"]').val($('#include-header-checkbox').is(':checked') ? 'true': 'false');
$('input[name="manifest-type"]').val(manifest_type);

if(file_type !== 'bq') {
$('input[name="include_header"]').val($('#include-header-'
+ file_type === 's5cmd' ? 's5cmd' : 'file'
+ '-checkbox').is(':checked') ? 'true': 'false');
} else {
$('input[name="include_header"]').val('false');
}

var select_box_div = $('#file-part-select-box');
var select_box = select_box_div.find('select');
Expand Down Expand Up @@ -211,17 +201,11 @@ require([
}
};

// The Cohort Details page button (single export at a time)
$('#export-manifest').on('click',function(){
$('#export-manifest-name').val(cohort_id + "_" + cohort_name.replaceAll(" ","_")+$('#export-manifest-name').data('name-base'));
update_download_manifest_buttons();
});

$('.column-checkbox').change(function() {
update_download_manifest_buttons($(this));
});

$("#export-manifest-name").change(function(){
$('.manifest-file-name').find('input.form-control').change(function(){
update_download_manifest_buttons();
});

Expand All @@ -233,8 +217,7 @@ require([
var input_cohort_name_len = $('#export-manifest-name').val().length;

if (input_cohort_name_len == 0 || num_selected_column == 0 || (is_list && checked_cohorts == 0)) {
$('.download-file').attr('disabled', 'disabled');
$('#get-bq-table').attr('disabled', 'disabled');
$('.get-manifest').attr('disabled', 'disabled');
} else {
if( is_list && checked_cohorts > 1 ) {
$('.download-file,.file-manifest').attr('disabled', 'disabled');
Expand Down Expand Up @@ -295,10 +278,6 @@ require([
}
};

$('#get-bq-table').on('click',function(){
download_manifest('',$(this));
});

// The Cohort list page export button (a set of cohorts)
$('#export-manifest-set').on('click',function(){
var cohort_ids = $('input[name="id"]:checked').map(function () {
Expand All @@ -312,7 +291,7 @@ require([

$('input[name="ids"]').val(cohort_ids.join(","))

$('#export-manifest-name').val("cohorts_"+cohort_ids.join("_")+$('#export-manifest-name').data('name-base'));
$('.manifest-name').find('input.form-control').val("cohorts_"+cohort_ids.join("_")+$('#export-manifest-name').data('name-base'));
update_download_manifest_buttons();
});

Expand Down
Loading

0 comments on commit db4b481

Please sign in to comment.