Skip to content

Commit db4b481

Browse files
committed
-> s5cmd manifest option
-> Prep to add hierarchical URLs
1 parent dbd4d48 commit db4b481

File tree

6 files changed

+320
-210
lines changed

6 files changed

+320
-210
lines changed

idc/settings.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,10 @@
103103
PAIRWISE_SERVICE_URL = os.environ.get('PAIRWISE_SERVICE_URL', None)
104104

105105
# Data Buckets
106-
GCLOUD_BUCKET = os.environ.get('GOOGLE_STORAGE_BUCKET')
106+
GCLOUD_BUCKET = os.environ.get('GOOGLE_STORAGE_BUCKET', 'FAKE_BUCKET')
107+
AWS_BUCKET = os.environ.get('AWS_BUCKET', 'FAKE_BUCKET')
107108

108-
DCF_GUID_SUFFIX = os.environ.get('DCF_GUID_SUFFIX','')
109+
DCF_GUID_SUFFIX = os.environ.get('DCF_GUID_SUFFIX', '')
109110

110111
# BigQuery cohort storage settings
111112
BIGQUERY_COHORT_DATASET_ID = os.environ.get('BIGQUERY_COHORT_DATASET_ID', 'cohort_dataset')

shell/backup-solr.sh

Lines changed: 45 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#!/bin/bash
2+
set -euo pipefail
3+
24
CORE_LIST_FILE=""
35
CORE_LIST=""
46
FILE_NAME=""
@@ -7,7 +9,7 @@ MAX_WAIT=3
79
SOLR_DATA="/opt/bitnami/solr/server/solr"
810
RUN=`date +%s`
911
BACKUPS_DIR="${SOLR_DATA}/backups_${RUN}"
10-
PARSE_RESPONSE="import sys, json; print(json.load(sys.stdin)['status'])"
12+
PARSE_RESPONSE="import sys, json; print(json.load(sys.stdin)['details']['backup'].get('snapshotCompletedAt',None) or 'INCOMPLETE')"
1113

1214
while getopts ":c:l:f:d:h" flag
1315
do
@@ -57,7 +59,7 @@ fi
5759
cores=[]
5860

5961
if [[ $FILE_NAME == "" ]]; then
60-
FILE_NAME="solr_cores_backup_${RUN}.tar"
62+
FILE_NAME="solr_cores_backup_${RUN}.tar.gz"
6163
fi
6264
echo "[STATUS] Backups will be tar'd to ${FILE_NAME}"
6365

@@ -72,39 +74,51 @@ echo "[STATUS] Building backup script: "
7274

7375
for core in "${cores[@]}"; do
7476
if [[ $core != "" ]]; then
75-
echo "-----> Backup for core ${core} <-----"
77+
echo "----------------> Backup for core ${core} <-----------------"
7678
echo "Copying schema for ${core}..."
77-
sudo -u solr cp ${SOLR_DATA}/${core}/conf/managed-schema ${BACKUPS_DIR}/$core.managed-schema
78-
echo "Backup command for ${core}:"
79-
echo "curl -u ${SOLR_USER}:${SOLR_PWD} -X GET \"https://localhost:8983/solr/$core/replication?command=backup&location=${BACKUPS_DIR}/&name=${core}\" --cacert solr-ssl.pem"
80-
echo "Status command for ${core}":
81-
echo "curl -u ${SOLR_USER}:${SOLR_PWD} -X GET \"https://localhost:8983/solr/${core}/replication?command=details\" --cacert solr-ssl.pem"
82-
# curl -u $SOLR_USER:$SOLR_PWD -X GET "https://localhost:8983/solr/$core/replication?command=backup&location=${BACKUPS_DIR}/&name=$core" --cacert solr-ssl.pem
83-
# status=`curl -u $SOLR_USER:${SOLR_PWD} -X GET "https://localhost:8983/solr/${core}/replication?command=details" --cacert solr-ssl.pem | python3 -c "${PARSE_RESPONSE}"`
84-
# retries=0
85-
# while [[ "$status" != "OK" && "$retries" -lt "$MAX_WAIT" ]]; do
86-
# echo "Backup for core ${core} isn't completed, waiting..."
87-
# sleep 2
88-
# ((retries++))
89-
# status=`curl -u $SOLR_USER:${SOLR_PWD} -X GET "https://localhost:8983/solr/${core}/replication?command=details" --cacert solr-ssl.pem | python3 -c "${PARSE_RESPONSE}"`
90-
# done
91-
# if [ "$status" == "OK" ]; then
92-
# echo "Core ${core} backup completed."
93-
# else
94-
# echo "Core ${core} backup is still pending."
95-
# echo "You may need to re-run TAR on the snapshots from ${BACKUPS_DIR} if you see an error message about files changing during the TAR process."
96-
# fi
97-
echo "-------------> Done <-------------"
79+
sudo -u solr cp ${SOLR_DATA}/${core}/conf/managed-schema.xml ${BACKUPS_DIR}/$core.managed-schema.xml
80+
echo "Executing backup command for ${core}:"
81+
curl -u ${SOLR_USER}:${SOLR_PWD} -X GET "https://localhost:8983/solr/$core/replication?command=backup&location=${BACKUPS_DIR}/&name=${core}" --cacert solr-ssl.pem
82+
curl -s -u ${SOLR_USER}:${SOLR_PWD} -X GET "https://localhost:8983/solr/${core}/replication?command=details" --cacert solr-ssl.pem
83+
status=`curl -s -u ${SOLR_USER}:${SOLR_PWD} -X GET "https://localhost:8983/solr/${core}/replication?command=details" --cacert solr-ssl.pem | python3 -c "${PARSE_RESPONSE}"`
84+
retries=0
85+
while [[ "$status" == "INCOMPLETE" && "$retries" -lt "$MAX_WAIT" ]]; do
86+
echo "Backup for core ${core} isn't completed, waiting..."
87+
sleep 2
88+
((retries++))
89+
status=`curl -s -u ${SOLR_USER}:${SOLR_PWD} -X GET "https://localhost:8983/solr/${core}/replication?command=details" --cacert solr-ssl.pem | python3 -c "${PARSE_RESPONSE}"`
90+
done
91+
if [ "$status" != "INCOMPLETE" ]; then
92+
echo "Core ${core} backup completed at ${status}."
93+
else
94+
echo "Core ${core} backup is still pending."
95+
echo "You may need to re-run TAR on the snapshots from ${BACKUPS_DIR} if you see an error message about files changing during the TAR process."
96+
fi
97+
echo "----------------> /Backup for core ${core} <-----------------"
9898
fi
9999
done
100100

101-
echo "Tar command: "
102-
echo "tar -cvf ${FILE_NAME} -C ${BACKUPS_DIR} ."
101+
if [ $? -ne 0 ]; then
102+
echo "There was a problem backing up some of the cores! Exiting."
103+
exit 1
104+
fi
105+
106+
echo ""
107+
echo -n "Allowing extra time for all writes to the backup directory to complete"
108+
for k in `seq 1 3`; do
109+
echo -n "."
110+
sleep 1
111+
done
112+
echo ".done."
113+
114+
echo "Taring contents of ${BACKUPS_DIR}..."
115+
tar -cvzf ${FILE_NAME} -C ${BACKUPS_DIR} .
116+
103117
#
104-
#if [[ ! -z ${DEST_BUCKET} ]]; then
105-
# gsutil cp ${FILE_NAME} gs://${DEST_BUCKET}/
106-
#else
107-
# echo "[STATUS] Backups stored in tarfile ${FILE_NAME}."
108-
#fi
118+
if [[ ! -z ${DEST_BUCKET} ]]; then
119+
gsutil cp ${FILE_NAME} gs://${DEST_BUCKET}/
120+
else
121+
echo "[STATUS] Backups stored in tarfile ${FILE_NAME}."
122+
fi
109123

110124
exit 0

shell/database-setup.sh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,12 @@ echo "Increase group_concat max, for longer data type names"
3030
mysql -u$MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "SET GLOBAL group_concat_max_len=18446744073709547520;"
3131
3232
echo "Creating database users..."
33-
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER '${DATABASE_USER}'@'%' IDENTIFIED BY '${DATABASE_PASSWORD}';"
34-
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER '${DATABASE_USER}'@'localhost' IDENTIFIED BY '${DATABASE_PASSWORD}';"
35-
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER 'api-user'@'%' IDENTIFIED BY '${DATABASE_PASSWORD}';"
36-
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER 'api-user'@'localhost' IDENTIFIED BY '${DATABASE_PASSWORD}';"
37-
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER 'dev-user'@'%' IDENTIFIED BY '${DATABASE_PASSWORD}';"
38-
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER 'dev-user'@'localhost' IDENTIFIED BY '${DATABASE_PASSWORD}';"
33+
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER IF NOT EXISTS '${DATABASE_USER}'@'%' IDENTIFIED BY '${DATABASE_PASSWORD}';"
34+
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER IF NOT EXISTS '${DATABASE_USER}'@'localhost' IDENTIFIED BY '${DATABASE_PASSWORD}';"
35+
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER IF NOT EXISTS 'api-user'@'%' IDENTIFIED BY '${DATABASE_PASSWORD}';"
36+
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER IF NOT EXISTS 'api-user'@'localhost' IDENTIFIED BY '${DATABASE_PASSWORD}';"
37+
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER IF NOT EXISTS 'dev-user'@'%' IDENTIFIED BY '${DATABASE_PASSWORD}';"
38+
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "CREATE USER IF NOT EXISTS 'dev-user'@'localhost' IDENTIFIED BY '${DATABASE_PASSWORD}';"
3939
4040
echo "Granting permissions to database users..."
4141
mysql -u $MYSQL_ROOT_USER -h $MYSQL_DB_HOST -p$MYSQL_ROOT_PASSWORD -e "GRANT ALL PRIVILEGES ON *.* TO '${DATABASE_USER}'@'%';"

static/css/style.css

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4085,6 +4085,15 @@ html {
40854085
display: inline-block;
40864086
}
40874087

4088+
.manifest-name {
4089+
padding: 5px 0px 10px 0px;
4090+
}
4091+
4092+
.bq-table-name p {
4093+
font-style: italic;
4094+
color: #1a344c;
4095+
}
4096+
40884097
.bq-disabled {
40894098
color: darkred;
40904099
font-size: 15px;

static/js/cohorts/export-manifest.js

Lines changed: 22 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -76,31 +76,16 @@ require([
7676
download_manifest("json", $(this), e)
7777
});
7878

79-
$('.export-option input').on('click', function(){
80-
let export_option = $(this).attr("value")
81-
update_export_option(export_option);
82-
if(export_option == 'bq-manifest') {
83-
$('.file-name').hide();
84-
$('.table-name').show();
85-
$('.bq-only').show();
86-
} else {
87-
$('.file-name').show();
88-
$('.table-name').hide();
89-
$('.bq-only').hide();
90-
}
79+
$('#download-s5cmd').on('click', function(e) {
80+
download_manifest("s5cmd", $(this), e)
9181
});
9282

93-
var update_export_option = function(export_option) {
94-
$('#bq-manifest').hide();
95-
$('#file-manifest').hide();
96-
$('#' + export_option).show();
97-
export_option !== 'bq-manifest' ? $('.bq-only').hide() : $('.bq-only').show();
98-
};
99-
100-
update_export_option("file-manifest");
83+
$('#get-bq-table').on('click',function(e){
84+
download_manifest('bq',$(this), e);
85+
});
10186

10287
var download_manifest = function(file_type, clicked_button, e) {
103-
let manifest_type = $('input[name="manifest-type"]:checked').val();
88+
let manifest_type = file_type === 'bq' ? 'bq-manifest' : 'file-manifest';
10489

10590
$('#unallowed-chars-alert').hide();
10691
$('#name-too-long-alert-modal').hide();
@@ -133,10 +118,7 @@ require([
133118
return false;
134119
}
135120

136-
$('#download-csv').attr('disabled','disabled');
137-
$('#download-tsv').attr('disabled','disabled');
138-
$('#download-json').attr('disabled','disabled');
139-
$('#get-bq-table').attr('disabled','disabled');
121+
$('.get-manifest').attr('disabled','disabled');
140122

141123
$('#manifest-in-progress').modal('show');
142124

@@ -148,17 +130,17 @@ require([
148130
}
149131

150132
var checked_fields = [];
151-
$('.field-checkbox').each(function() {
133+
clicked_button.parents('.tab-pane.manifest').find('.field-checkbox').each(function() {
152134
var cb = $(this)[0];
153135
if (cb.checked) {
154136
checked_fields.push(cb.value);
155137
}
156138
});
157139

158140
var checked_columns = [];
159-
$('.column-checkbox').each(function() {
141+
clicked_button.parents('.tab-pane.manifest').find('.column-checkbox').each(function() {
160142
var cb = $(this)[0];
161-
if (cb.checked && (manifest_type !== 'file-manifest' || ! $(this).hasClass('bq-only'))) {
143+
if (cb.checked) {
162144
checked_columns.push(cb.value);
163145
}
164146
});
@@ -167,7 +149,15 @@ require([
167149
$('input[name="header_fields"]').val(JSON.stringify(checked_fields));
168150
$('input[name="columns"]').val(JSON.stringify(checked_columns));
169151
$('input[name="downloadToken"]').val(downloadToken);
170-
$('input[name="include_header"]').val($('#include-header-checkbox').is(':checked') ? 'true': 'false');
152+
$('input[name="manifest-type"]').val(manifest_type);
153+
154+
if(file_type !== 'bq') {
155+
$('input[name="include_header"]').val($('#include-header-'
156+
+ file_type === 's5cmd' ? 's5cmd' : 'file'
157+
+ '-checkbox').is(':checked') ? 'true': 'false');
158+
} else {
159+
$('input[name="include_header"]').val('false');
160+
}
171161

172162
var select_box_div = $('#file-part-select-box');
173163
var select_box = select_box_div.find('select');
@@ -211,17 +201,11 @@ require([
211201
}
212202
};
213203

214-
// The Cohort Details page button (single export at a time)
215-
$('#export-manifest').on('click',function(){
216-
$('#export-manifest-name').val(cohort_id + "_" + cohort_name.replaceAll(" ","_")+$('#export-manifest-name').data('name-base'));
217-
update_download_manifest_buttons();
218-
});
219-
220204
$('.column-checkbox').change(function() {
221205
update_download_manifest_buttons($(this));
222206
});
223207

224-
$("#export-manifest-name").change(function(){
208+
$('.manifest-file-name').find('input.form-control').change(function(){
225209
update_download_manifest_buttons();
226210
});
227211

@@ -233,8 +217,7 @@ require([
233217
var input_cohort_name_len = $('#export-manifest-name').val().length;
234218

235219
if (input_cohort_name_len == 0 || num_selected_column == 0 || (is_list && checked_cohorts == 0)) {
236-
$('.download-file').attr('disabled', 'disabled');
237-
$('#get-bq-table').attr('disabled', 'disabled');
220+
$('.get-manifest').attr('disabled', 'disabled');
238221
} else {
239222
if( is_list && checked_cohorts > 1 ) {
240223
$('.download-file,.file-manifest').attr('disabled', 'disabled');
@@ -295,10 +278,6 @@ require([
295278
}
296279
};
297280

298-
$('#get-bq-table').on('click',function(){
299-
download_manifest('',$(this));
300-
});
301-
302281
// The Cohort list page export button (a set of cohorts)
303282
$('#export-manifest-set').on('click',function(){
304283
var cohort_ids = $('input[name="id"]:checked').map(function () {
@@ -312,7 +291,7 @@ require([
312291

313292
$('input[name="ids"]').val(cohort_ids.join(","))
314293

315-
$('#export-manifest-name').val("cohorts_"+cohort_ids.join("_")+$('#export-manifest-name').data('name-base'));
294+
$('.manifest-name').find('input.form-control').val("cohorts_"+cohort_ids.join("_")+$('#export-manifest-name').data('name-base'));
316295
update_download_manifest_buttons();
317296
});
318297

0 commit comments

Comments
 (0)