-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_spark.sh
executable file
·49 lines (41 loc) · 1.06 KB
/
run_spark.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/bin/bash
if [ $# -ne 0 ]; then
echo $0: "usage: ./run_spark.sh input"
exit 1
fi
echo $SPARK_HOME
#input1=spark_read_me.txt
#input1=stack/math.stackexchange.com/Posts.xml.gz
#input1=stack/stackoverflow.stackexchange.com/Comments.xml.gz
input1=stack/stackoverflow.stackexchange.com/Posts.xml.gz
output_dir="wordcount_$(date +%Y%m%d_%H%M%S)"
echo Reading input from $input1
echo Writing output to $output_dir
APP="
target/scala-2.11/spark-wc_2.11-1.0.jar \
$input1 \
$output_dir
"
flag=2
if [ ${flag} == 0 ]; then
# Run application locally
$SPARK_HOME/bin/spark-submit \
--class WordCountApp \
--master local[*] \
$APP
elif [ ${flag} == 1 ]; then
# Run on a Spark standalone cluster in client deploy mode
$SPARK_HOME/bin/spark-submit \
--class WordCountApp \
--master spark://vmp741.vampire:7077 \
$APP
elif [ ${flag} == 2 ]; then
# Run on a YARN cluster
$SPARK_HOME/bin/spark-submit \
--class WordCountApp \
--master yarn \
--deploy-mode cluster \
--num-executors 25 \
--executor-cores 3 \
$APP
fi