From 882c37a147cb1a94e08a184dee32a7b7bfefa542 Mon Sep 17 00:00:00 2001
From: Michael Kamprath <michael@kamprath.net>
Date: Tue, 26 May 2020 01:20:00 -0700
Subject: [PATCH] minor tweaks

---
 spark-qfs-swarm/qfs-master/Dockerfile         |  2 +-
 spark-qfs-swarm/worker-node/Dockerfile        |  8 ++---
 .../spark-conf/spark-defaults.conf            | 36 +++++++++----------
 .../worker-node/spark-conf/spark-env.sh       |  4 +--
 4 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/spark-qfs-swarm/qfs-master/Dockerfile b/spark-qfs-swarm/qfs-master/Dockerfile
index 5144e6b..47abc49 100644
--- a/spark-qfs-swarm/qfs-master/Dockerfile
+++ b/spark-qfs-swarm/qfs-master/Dockerfile
@@ -10,7 +10,7 @@ FROM worker-node:latest
 # need python 2 for webserver
 USER root
 RUN apt-get update \
- && apt-get install -y python2.7 less wget vim openssh-client \
+ && apt-get install -y python2.7 wget vim openssh-client \
  && ln -s /usr/bin/python2.7 /usr/bin/python2 \
  && apt-get clean \
  && rm -rf /var/lib/apt/lists/*
diff --git a/spark-qfs-swarm/worker-node/Dockerfile b/spark-qfs-swarm/worker-node/Dockerfile
index 201a55d..8b947b2 100644
--- a/spark-qfs-swarm/worker-node/Dockerfile
+++ b/spark-qfs-swarm/worker-node/Dockerfile
@@ -1,7 +1,7 @@
 FROM debian:stretch
 MAINTAINER Michael Kamprath "https://github.com/michaelkamprath"
 #
-# Base image for Apace Spak standalone cluster with QFS 
+# Base image for Apace Spak standalone cluster with QFS
 #
 # Inspired by https://hub.docker.com/r/gettyimages/spark/dockerfile
 #
@@ -30,13 +30,13 @@ RUN apt-get update \
  && locale-gen \
  && apt-get clean \
  && rm -rf /var/lib/apt/lists/*
- 
+
 ENV LANG en_US.UTF-8
 ENV LANGUAGE en_US:en
 ENV LC_ALL en_US.UTF-8
 
 RUN apt-get update \
- && apt-get install -y curl unzip procps \
+ && apt-get install -y less curl unzip procps \
     python3 python3-setuptools \
     libboost-regex-dev \
  && ln -s /usr/bin/python3 /usr/bin/python \
@@ -95,7 +95,7 @@ RUN echo "Downloading Spark from : ${SPARK_DOWNLOAD_URL}\n" \
 	&& ln -s $SPARK_HOME /usr/local/spark
 RUN mkdir -p /data/spark \
  && chown spark -R /data/spark
- 
+
 
 # add python libraries useful in PySpark
 RUN python3 -mpip install matplotlib \
diff --git a/spark-qfs-swarm/worker-node/spark-conf/spark-defaults.conf b/spark-qfs-swarm/worker-node/spark-conf/spark-defaults.conf
index 6cef406..8516515 100644
--- a/spark-qfs-swarm/worker-node/spark-conf/spark-defaults.conf
+++ b/spark-qfs-swarm/worker-node/spark-conf/spark-defaults.conf
@@ -1,38 +1,38 @@
 # performance optimizations
-spark.serializer		org.apache.spark.serializer.KryoSerializer
-spark.default.parallelism	100
+spark.serializer                org.apache.spark.serializer.KryoSerializer
+spark.default.parallelism       100
 
 # worker node / executor set up
-# expecting a worker with 10 cores and 56g of memory 
-spark.executor.memory 		26g
-spark.executor.cores			6
+# expecting a worker with 12 cores and 56g of memory
+spark.executor.memory           25g
+spark.executor.cores            6
 
 # driver configurations
-spark.driver.memory			6g
-spark.driver.cores			2
+spark.driver.memory             8g
+spark.driver.cores              2
 
 # operational configurations
-spark.logConf				true
+spark.logConf                   true
 
 # This setting is to tell the class loaders in Spark that they
-# only need to load the QFS access libraries once 
+# only need to load the QFS access libraries once
 spark.sql.hive.metastore.sharedPrefixes         com.quantcast.qfs
 
 # Set up retention of Spark events to enable the history server.
 # The configured directory needs to be created prior to launching
 # Spark master.
-spark.eventLog.enabled			true
-spark.eventLog.dir				qfs:///history/spark-event/
-spark.history.fs.logDirectory	qfs:///history/spark-event/
-spark.history.fs.cleaner.maxAge	30d
+spark.eventLog.enabled              true
+spark.eventLog.dir                  qfs:///history/spark-event/
+spark.history.fs.logDirectory       qfs:///history/spark-event/
+spark.history.fs.cleaner.maxAge     30d
 
 # Configure QFS here rather than in core-site.xml
-spark.hadoop.fs.qfs.impl				com.quantcast.qfs.hadoop.QuantcastFileSystem
-spark.hadoop.fs.defaultFS			qfs://qfs-master:20000
-spark.hadoop.fs.qfs.metaServerHost	qfs-master
-spark.hadoop.fs.qfs.metaServerPort	20000
+spark.hadoop.fs.qfs.impl            com.quantcast.qfs.hadoop.QuantcastFileSystem
+spark.hadoop.fs.defaultFS           qfs://qfs-master:20000
+spark.hadoop.fs.qfs.metaServerHost  qfs-master
+spark.hadoop.fs.qfs.metaServerPort  20000
 
 # this spark.hadoop.fs.qfs.createParams	 configure causes files written by Sark to
 # QFS to be 2x replicated  rather than using Reed-Solomon encoding. If you have at
 # least 9 chunkservers, remove this configuration to instead use Reed-Solomon encoding.
-spark.hadoop.fs.qfs.createParams		2
+spark.hadoop.fs.qfs.createParams    2
diff --git a/spark-qfs-swarm/worker-node/spark-conf/spark-env.sh b/spark-qfs-swarm/worker-node/spark-conf/spark-env.sh
index c61c333..32132f2 100644
--- a/spark-qfs-swarm/worker-node/spark-conf/spark-env.sh
+++ b/spark-qfs-swarm/worker-node/spark-conf/spark-env.sh
@@ -1,5 +1,5 @@
 # the total amount of memory a worker (node) can use
-SPARK_WORKER_MEMORY=56g
+SPARK_WORKER_MEMORY=55g
 
 # the total amount of cores a worker (node) can use
 SPARK_WORKER_CORES=12
@@ -12,7 +12,7 @@ SPARK_WORKER_PORT=8881
 SPARK_WORKER_WEBUI_PORT=8081
 
 # which python the spark cluster should use for pyspark
-PYSPARK_PYTHON=python3 
+PYSPARK_PYTHON=python3
 
 # hash seed so all node hash numbers consistently
 PYTHONHASHSEED=8675309