Skip to content

Commit

Permalink
updated to spark 3.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelkamprath committed Jun 23, 2020
1 parent 3b4e44c commit 069946f
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 4 deletions.
2 changes: 1 addition & 1 deletion spark-qfs-swarm/jupyter-server/start-jupyter.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ SHELL=/bin/bash \
XDG_RUNTIME_DIR=/home/spark/jupyter/runtime \
PYSPARK_DRIVER_PYTHON=jupyter \
PYSPARK_DRIVER_PYTHON_OPTS="notebook --no-browser --port=7777 --notebook-dir=/home/spark/jupyter/notebooks --ip=0.0.0.0 --NotebookApp.password='' --NotebookApp.token=''" \
$SPARK_HOME/bin/pyspark --packages graphframes:graphframes:0.8.0-spark2.4-s_2.11 --master spark://spark-master:7077
$SPARK_HOME/bin/pyspark --packages graphframes:graphframes:0.8.0-spark3.0-s_2.12 --master spark://spark-master:7077
4 changes: 4 additions & 0 deletions spark-qfs-swarm/qfs-master/qfs-conf/Metaserver.prp
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@ chunkServer.msgLogWriter.logLevel = NOTICE
metaServer.rootDirMode = 0777
metaServer.rootDirGroup = 1000
metaServer.rootDirUser = 1000

metaServer.rebalancingEnabled = 1
metaServer.maxRebalanceSpaceUtilThreshold = 0.50
metaServer.minRebalanceSpaceUtilThreshold = 0.45
4 changes: 2 additions & 2 deletions spark-qfs-swarm/worker-node/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ MAINTAINER Michael Kamprath "https://github.com/michaelkamprath"
#

ARG QFS_VERSION=2.2.0
ARG SPARK_VERSION=2.4.6
ARG SPARK_VERSION=3.0.0
ARG HADOOP_MINOR_VERSION=2.7
ARG HADOOP_VERSION=2.7.2
ARG SCALA_VERSION=2.11.12
ARG SCALA_VERSION=2.12.11

RUN apt-get update \
&& apt-get install -y locales \
Expand Down
5 changes: 4 additions & 1 deletion spark-qfs-swarm/worker-node/spark-conf/spark-defaults.conf
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# performance optimizations
spark.serializer org.apache.spark.serializer.KryoSerializer
spark.default.parallelism 100
spark.default.parallelism 200
spark.sql.shuffle.partitions 400

# worker node / executor set up
# expecting a worker with 12 cores and 56g of memory
Expand All @@ -13,6 +14,8 @@ spark.driver.cores 2

# operational configurations
spark.logConf true
spark.worker.cleanup.enabled true
spark.ui.reverseProxy true

# This setting is to tell the class loaders in Spark that they
# only need to load the QFS access libraries once
Expand Down

0 comments on commit 069946f

Please sign in to comment.