-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathrun_single.sh
More file actions
executable file
·122 lines (98 loc) · 6.76 KB
/
run_single.sh
File metadata and controls
executable file
·122 lines (98 loc) · 6.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/bin/bash
if [[ $# -le 5 || $# -ge 8 ]]; then
echo "Usage: $0 <query names comma sep> <iterations> <num-executors> <executor-cores> <executor-memory> <db_name> -o|-n|-no"
exit
fi
${WORKDIR?"Need to set WORKDIR env"} 2>/dev/null
RUNCONF=${WORKDIR}/tpcds-setup/conf/run.config
if [ ! -f ${RUNCONF} ]; then
echo "File : ${RUNCONF} not found!"
fi
. ${RUNCONF}
query_name=$1
iteration=$2
num_executors=$3
executor_cores=$4
executor_memory=$5
databaseName=$6
enableOperf=$7
executor_memoryOverhead=$EXEC_MEM_OVERHEAD
sql_shuffle_partitions=${SHPART:-$SHUFFLE_PARTITIONS}
echo "sql_shuffle_partitions is set to ${sql_shuffle_partitions}"
gcThreads=$GC_THREADS
PREFIX=${query_name}_single_${ARCH}_${num_executors}e_${executor_cores}c_${executor_memory}
SEQ=0
CNT=`ls -lrt ${LOG_DIR}/${PREFIX}_*.nohup 2>/dev/null | wc | awk '{print \$1}'`
SEQ=$CNT
cat ${HADOOP_HOME}/etc/hadoop/slaves | grep -v ^# | xargs -i ssh {} "sync && echo 3 | sudo tee /proc/sys/vm/drop_caches"
if [[ $enableOperf == *"n"* ]]; then
CUR_NMON_DIR=${LOG_DIR}/${PREFIX}_${SEQ}_nmon_logs
echo "Starting nmon and logs will be placed under ${CUR_NMON_DIR}"
startnmon.sh $CUR_NMON_DIR
fi
if [[ $enableOperf == *"o"* ]]; then
type operf >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "operf is not installed. Exiting."
exit 255
fi
export OPERFLIB=${WORKDIR}/oprofile/oprofile_install/lib
if [ ! -d ${OPERFLIB} ]; then
echo "OPERFLIB is not set properly"
echo "check OPERFLIB value in this script and continue."
exit 255
fi
oprofile_start.sh
executor_extraJavaOptions="-agentpath:${OPERFLIB}/oprofile/libjvmti_oprofile.so"
extraOptions="--conf spark.executor.extraLibraryPath=${OPERFLIB} --driver-library-path ${OPERFLIB} --driver-java-options -agentpath:${OPERFLIB}/oprofile/libjvmti_oprofile.so"
else
executor_extraJavaOptions=""
extraOptions=""
fi
if [[ $query_name == *".run" ]]
then
query_list=`sed ':a;N;$!ba;s/\n/,/g' ${query_name}`
queryinput="-f ${query_list}"
else
queryinput="-f ${query_name}"
fi
echo "Execution logs will be placed under : ${LOG_DIR}${PREFIX}_${SEQ}.nohup "
# /usr/bin/time -v ${SPARK_HOME}/bin/spark-sql --master yarn-client --conf spark.kryo.referenceTracking=true --conf spark.shuffle.io.numConnectionsPerPeer=4 --conf spark.reducer.maxSizeInFlight=128m --conf spark.executor.extraJavaOptions="-Diop.version=4.1.0.0 -XX:ParallelGCThreads=${gcThreads} -XX:+AlwaysTenure" --conf spark.sql.shuffle.partitions=${sql_shuffle_partitions} --conf spark.yarn.driver.memoryOverhead=400 --conf spark.yarn.executor.memoryOverhead=${executor_memoryOverhead} --conf spark.shuffle.consolidateFiles=true --conf spark.reducer.maxSizeInFlight=128m --conf spark.sql.autoBroadcastJoinThreshold=67108864 --conf spark.serializer=org.apache.spark.serializer.KryoSerializer --name ${query_name} --database ${databaseName} --driver-memory 12g --driver-cores 16 --num-executors ${num_executors} --executor-cores ${executor_cores} --executor-memory ${executor_memory} -f ${QUERIES_DIR}/${query_name}.sql > ${LOG_DIR}/${PREFIX}_${SEQ}.nohup 2>&1
${SPARK_HOME}/bin/spark-submit \
--class com.databricks.spark.sql.perf.RunTPCBenchmark \
--conf spark.kryo.referenceTracking=true \
--conf spark.kryoserializer.buffer.max=256m \
--conf spark.shuffle.io.numConnectionsPerPeer=4 \
--conf spark.reducer.maxSizeInFlight=128m \
--conf spark.executor.extraJavaOptions="-Diop.version=4.1.0.0 -XX:ParallelGCThreads=${gcThreads} -XX:+AlwaysTenure ${executor_extraJavaOptions}" \
${extraOptions} \
--conf spark.sql.shuffle.partitions=${sql_shuffle_partitions} \
--conf spark.yarn.driver.memoryOverhead=400 \
--conf spark.yarn.executor.memoryOverhead=${executor_memoryOverhead} \
--conf spark.shuffle.consolidateFiles=true \
--conf spark.reducer.maxSizeInFlight=128m \
--conf spark.sql.autoBroadcastJoinThreshold=67108864 \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--master yarn \
--deploy-mode client \
--name ${query_name} \
--driver-memory 22g \
--driver-cores 10 \
--num-executors ${num_executors} \
--executor-cores ${executor_cores} \
--executor-memory ${executor_memory} \
--verbose \
${SQLPERF_JAR} \
-b com.databricks.spark.sql.perf.tpcds.TPCDS -i ${iteration} \
${queryinput} \
-d ${databaseName} 2>&1 | tee ${LOG_DIR}/${PREFIX}_${SEQ}.nohup
echo "Execution logs are placed under : ${LOG_DIR}${PREFIX}_${SEQ}.nohup "
if [[ $enableOperf == *"n"* ]]; then
stopnmon.sh $CUR_NMON_DIR
fi
if [[ $enableOperf == *"o"* ]]; then
oprofile_stop.sh
fi
cd ${SPARK_EVENT_LOG_PATH}
ls -lart application* | tail -n 1 | awk '{print $9}' | xargs -i tar czf ${LOG_DIR}/{}.tgz {}
cd -