Skip to content

Commit

Permalink
tuned spark's memory usage
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelkamprath committed Jan 2, 2020
1 parent d021253 commit a62e216
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 7 deletions.
1 change: 0 additions & 1 deletion spark-qfs-swarm/deploy-spark-qfs-swarm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ services:
mode: global
resources:
limits:
cpus: "8.0"
memory: 56g
networks:
cluster_network:
Expand Down
6 changes: 4 additions & 2 deletions spark-qfs-swarm/worker-node/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ RUN curl -sL --retry 3 \
| tar x -C /usr/ \
&& mv /usr/$QFS_PACKAGE $QFS_HOME \
&& chown -R root:root $QFS_HOME
COPY ./qfs-conf/* $QFS_HOME/conf/
ENV PATH $PATH:${QFS_HOME}/bin:${QFS_HOME}/bin/tools
RUN mkdir -p /data/qfs/ \
&& chown spark -R /data/qfs
Expand All @@ -81,14 +80,17 @@ RUN curl -sL --retry 3 \
| tar x -C /usr/ \
&& mv /usr/$SPARK_PACKAGE $SPARK_HOME \
&& chown -R root:root $SPARK_HOME
COPY ./spark-conf/* $SPARK_HOME/conf/
RUN mkdir -p /data/spark \
&& chown spark -R /data/spark

# add python libraries useful in PySpark
RUN python3 -mpip install matplotlib \
&& pip3 install pandas seaborn

# copy QFS and Spark configurations
COPY ./qfs-conf/* $QFS_HOME/conf/
COPY ./spark-conf/* $SPARK_HOME/conf/

# set up command
COPY start-worker-node.sh /
USER spark
Expand Down
6 changes: 3 additions & 3 deletions spark-qfs-swarm/worker-node/spark-conf/spark-defaults.conf
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ spark.serializer org.apache.spark.serializer.KryoSerializer
spark.default.parallelism 100

# worker node / executor set up
# expecting a worker with 10 cores and 52g of memory
spark.executor.memory 24g
# expecting a worker with 10 cores and 56g of memory
spark.executor.memory 26g
spark.executor.cores 6

# driver configurations
spark.driver.memory 8g
spark.driver.memory 6g
spark.driver.cores 2

# operational configurations
Expand Down
2 changes: 1 addition & 1 deletion spark-qfs-swarm/worker-node/spark-conf/spark-env.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# the total amount of memory a worker (node) can use
SPARK_WORKER_MEMORY=52g
SPARK_WORKER_MEMORY=56g

# the total amount of cores a worker (node) can use
SPARK_WORKER_CORES=12
Expand Down

0 comments on commit a62e216

Please sign in to comment.