From a62e21614a450ea4ce647f04960a71f24f5e0c35 Mon Sep 17 00:00:00 2001 From: Michael Kamprath Date: Wed, 1 Jan 2020 17:20:06 -0800 Subject: [PATCH] tuned spark's memory usage --- spark-qfs-swarm/deploy-spark-qfs-swarm.yml | 1 - spark-qfs-swarm/worker-node/Dockerfile | 6 ++++-- spark-qfs-swarm/worker-node/spark-conf/spark-defaults.conf | 6 +++--- spark-qfs-swarm/worker-node/spark-conf/spark-env.sh | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/spark-qfs-swarm/deploy-spark-qfs-swarm.yml b/spark-qfs-swarm/deploy-spark-qfs-swarm.yml index 8458025..1c92e82 100644 --- a/spark-qfs-swarm/deploy-spark-qfs-swarm.yml +++ b/spark-qfs-swarm/deploy-spark-qfs-swarm.yml @@ -93,7 +93,6 @@ services: mode: global resources: limits: - cpus: "8.0" memory: 56g networks: cluster_network: diff --git a/spark-qfs-swarm/worker-node/Dockerfile b/spark-qfs-swarm/worker-node/Dockerfile index 5a40c82..c97114b 100644 --- a/spark-qfs-swarm/worker-node/Dockerfile +++ b/spark-qfs-swarm/worker-node/Dockerfile @@ -63,7 +63,6 @@ RUN curl -sL --retry 3 \ | tar x -C /usr/ \ && mv /usr/$QFS_PACKAGE $QFS_HOME \ && chown -R root:root $QFS_HOME -COPY ./qfs-conf/* $QFS_HOME/conf/ ENV PATH $PATH:${QFS_HOME}/bin:${QFS_HOME}/bin/tools RUN mkdir -p /data/qfs/ \ && chown spark -R /data/qfs @@ -81,7 +80,6 @@ RUN curl -sL --retry 3 \ | tar x -C /usr/ \ && mv /usr/$SPARK_PACKAGE $SPARK_HOME \ && chown -R root:root $SPARK_HOME -COPY ./spark-conf/* $SPARK_HOME/conf/ RUN mkdir -p /data/spark \ && chown spark -R /data/spark @@ -89,6 +87,10 @@ RUN mkdir -p /data/spark \ RUN python3 -mpip install matplotlib \ && pip3 install pandas seaborn +# copy QFS and Spark configurations +COPY ./qfs-conf/* $QFS_HOME/conf/ +COPY ./spark-conf/* $SPARK_HOME/conf/ + # set up command COPY start-worker-node.sh / USER spark diff --git a/spark-qfs-swarm/worker-node/spark-conf/spark-defaults.conf b/spark-qfs-swarm/worker-node/spark-conf/spark-defaults.conf index 5085801..8b77f62 100644 --- a/spark-qfs-swarm/worker-node/spark-conf/spark-defaults.conf +++ b/spark-qfs-swarm/worker-node/spark-conf/spark-defaults.conf @@ -3,12 +3,12 @@ spark.serializer org.apache.spark.serializer.KryoSerializer spark.default.parallelism 100 # worker node / executor set up -# expecting a worker with 10 cores and 52g of memory -spark.executor.memory 24g +# expecting a worker with 10 cores and 56g of memory +spark.executor.memory 26g spark.executor.cores 6 # driver configurations -spark.driver.memory 8g +spark.driver.memory 6g spark.driver.cores 2 # operational configurations diff --git a/spark-qfs-swarm/worker-node/spark-conf/spark-env.sh b/spark-qfs-swarm/worker-node/spark-conf/spark-env.sh index d95b1db..c61c333 100644 --- a/spark-qfs-swarm/worker-node/spark-conf/spark-env.sh +++ b/spark-qfs-swarm/worker-node/spark-conf/spark-env.sh @@ -1,5 +1,5 @@ # the total amount of memory a worker (node) can use -SPARK_WORKER_MEMORY=52g +SPARK_WORKER_MEMORY=56g # the total amount of cores a worker (node) can use SPARK_WORKER_CORES=12