-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrun-workflow.sh
executable file
·143 lines (116 loc) · 4.05 KB
/
run-workflow.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/bin/bash
if [ "$#" -ne 6 ]; then
echo "You must enter exactly 6 command line arguments"
echo "Usage: ./run-workflow.sh [env folder location] [dataset size] [dataset] [driver] [workflow] [result destination]"
echo "Example: ./run-workflow.sh ../env 1M movies monetdb 8862a0ca-295b-42b6-acb6-840a53128b62_movies_3_workflow_fixed results/test"
exit 0
fi
#set -x
# the virtual environment to use
ENVIR_FOLDER=$1
#dataset size
DATASET_SIZE=$2
# which dataset to test
DATASET=$3
# which database driver to test
DRIVER=$4
# which workflow to test
WORKFLOW=$5
# where to move the result to
RESULT_DESTINATION=$6
# where DuckDB files are located
DUCKDB_INSTALL_FOLDER="duckdb_install"
if [ -f "stop_scripts" ]; then
echo "stopping execution of run-workflow.sh"
exit 0
fi
# clear the results folder
echo "clearing the results folder"
rm results/*.json
# activate the environment
echo "activating environment"
source ${ENVIR_FOLDER}/bin/activate
# worst case, it will attempt to run the current workflow 20 times
for ATTEMPT_ID in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
do
if [ -f "stop_scripts" ]; then
echo "stopping execution of run-workflow.sh"
exit 0
fi
echo "attempt ${ATTEMPT_ID}"
# stop the database
echo "stopping database ${DRIVER}"
./setup/${DRIVER}/stop-database.sh
# start the database
echo "starting database ${DRIVER}"
./setup/${DRIVER}/start-database.sh
# duckdb will end up in an inconsistent state if we continuously
# reuse the same database. to play it safe with duckdb,
# just refresh every time. Slow, but dependable.
# UPDATE 10/05/2019: we really need this!!! do not comment out!
if [ "${DRIVER}" = "duckdb" ] && [ "${ATTEMPT_ID}" -gt "1" ]; then
echo "pip uninstall -y duckdb"
pip uninstall -y duckdb
echo "pip install --find-links $DUCKDB_INSTALL_FOLDER duckdb"
pip install --find-links $DUCKDB_INSTALL_FOLDER duckdb
echo "rm crossfilter-eval-db.duckdb.wal"
rm crossfilter-eval-db.duckdb.wal
echo "rm crossfilter-eval-db.duckdb"
rm crossfilter-eval-db.duckdb
echo "setup/${DRIVER}/./load_${DATASET_SIZE}.sh"
setup/${DRIVER}/./load_${DATASET_SIZE}.sh
fi
# run IDEBench in the background
echo "python idebench.py --settings-dataset $DATASET --settings-size 1GB --driver-name $DRIVER --run --settings-workflow $WORKFLOW &"
python idebench.py --settings-dataset $DATASET --settings-size 1GB --driver-name $DRIVER --run --settings-workflow $WORKFLOW & pid=$!
# check if it's still running in the background
STILL_RUNNING=$(jobs -p | grep $pid | wc -l)
# check the number of results produced
TOTAL_RESULTS=$(ls results | grep ".*.json" | wc -l)
# wait for the process to finish
while true
do
if [ -f "stop_scripts" ]; then
echo "stopping execution of run-workflow.sh"
kill $pid
exit 0
fi
echo "checking status of job $pid"
# check if it's still running in the background
STILL_RUNNING=$(jobs -p | grep $pid | wc -l)
# check the number of results produced
TOTAL_RESULTS=$(ls results | grep ".*.json" | wc -l)
if [ -f "restart_job" ]; then
echo "restarting execution of run-workflow.sh"
STILL_RUNNING=0
TOTAL_RESULTS=0
fi
echo "status of ${pid}: still running? ${STILL_RUNNING}. total results? ${TOTAL_RESULTS}"
if [ "$STILL_RUNNING" -eq "0" ]; then
# it's done already, move on
break
elif [ "$TOTAL_RESULTS" -gt "0" ]; then
# to be sure, give it time to finish gracefully, then try to kill it
sleep 10
kill $pid
break
else
# wait 10 seconds
sleep 10
fi
done
if [ "$TOTAL_RESULTS" -gt "0" ]; then
echo "found results for attempt ${ATTEMPT_ID}. Continuing on."
break
else
echo "no results found for attempt ${ATTEMPT_ID}. Waiting 10 seconds, then trying again..."
rm restart_job
sleep 10 # wait for a bit, then try again
fi
done
# move results to destination
echo "mv results/*.json ${RESULT_DESTINATION}"
mv results/*.json ${RESULT_DESTINATION}
# deactivate the environment
echo "deactivating environment"
deactivate