Skip to content

Commit

Permalink
minor tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelkamprath committed May 26, 2020
1 parent 5a319cb commit 882c37a
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 25 deletions.
2 changes: 1 addition & 1 deletion spark-qfs-swarm/qfs-master/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ FROM worker-node:latest
# need python 2 for webserver
USER root
RUN apt-get update \
&& apt-get install -y python2.7 less wget vim openssh-client \
&& apt-get install -y python2.7 wget vim openssh-client \
&& ln -s /usr/bin/python2.7 /usr/bin/python2 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
Expand Down
8 changes: 4 additions & 4 deletions spark-qfs-swarm/worker-node/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM debian:stretch
MAINTAINER Michael Kamprath "https://github.com/michaelkamprath"
#
# Base image for Apace Spak standalone cluster with QFS
# Base image for Apace Spak standalone cluster with QFS
#
# Inspired by https://hub.docker.com/r/gettyimages/spark/dockerfile
#
Expand Down Expand Up @@ -30,13 +30,13 @@ RUN apt-get update \
&& locale-gen \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
ENV LC_ALL en_US.UTF-8

RUN apt-get update \
&& apt-get install -y curl unzip procps \
&& apt-get install -y less curl unzip procps \
python3 python3-setuptools \
libboost-regex-dev \
&& ln -s /usr/bin/python3 /usr/bin/python \
Expand Down Expand Up @@ -95,7 +95,7 @@ RUN echo "Downloading Spark from : ${SPARK_DOWNLOAD_URL}\n" \
&& ln -s $SPARK_HOME /usr/local/spark
RUN mkdir -p /data/spark \
&& chown spark -R /data/spark


# add python libraries useful in PySpark
RUN python3 -mpip install matplotlib \
Expand Down
36 changes: 18 additions & 18 deletions spark-qfs-swarm/worker-node/spark-conf/spark-defaults.conf
Original file line number Diff line number Diff line change
@@ -1,38 +1,38 @@
# performance optimizations
spark.serializer org.apache.spark.serializer.KryoSerializer
spark.default.parallelism 100
spark.serializer org.apache.spark.serializer.KryoSerializer
spark.default.parallelism 100

# worker node / executor set up
# expecting a worker with 10 cores and 56g of memory
spark.executor.memory 26g
spark.executor.cores 6
# expecting a worker with 12 cores and 56g of memory
spark.executor.memory 25g
spark.executor.cores 6

# driver configurations
spark.driver.memory 6g
spark.driver.cores 2
spark.driver.memory 8g
spark.driver.cores 2

# operational configurations
spark.logConf true
spark.logConf true

# This setting is to tell the class loaders in Spark that they
# only need to load the QFS access libraries once
# only need to load the QFS access libraries once
spark.sql.hive.metastore.sharedPrefixes com.quantcast.qfs

# Set up retention of Spark events to enable the history server.
# The configured directory needs to be created prior to launching
# Spark master.
spark.eventLog.enabled true
spark.eventLog.dir qfs:///history/spark-event/
spark.history.fs.logDirectory qfs:///history/spark-event/
spark.history.fs.cleaner.maxAge 30d
spark.eventLog.enabled true
spark.eventLog.dir qfs:///history/spark-event/
spark.history.fs.logDirectory qfs:///history/spark-event/
spark.history.fs.cleaner.maxAge 30d

# Configure QFS here rather than in core-site.xml
spark.hadoop.fs.qfs.impl com.quantcast.qfs.hadoop.QuantcastFileSystem
spark.hadoop.fs.defaultFS qfs://qfs-master:20000
spark.hadoop.fs.qfs.metaServerHost qfs-master
spark.hadoop.fs.qfs.metaServerPort 20000
spark.hadoop.fs.qfs.impl com.quantcast.qfs.hadoop.QuantcastFileSystem
spark.hadoop.fs.defaultFS qfs://qfs-master:20000
spark.hadoop.fs.qfs.metaServerHost qfs-master
spark.hadoop.fs.qfs.metaServerPort 20000

# this spark.hadoop.fs.qfs.createParams configure causes files written by Sark to
# QFS to be 2x replicated rather than using Reed-Solomon encoding. If you have at
# least 9 chunkservers, remove this configuration to instead use Reed-Solomon encoding.
spark.hadoop.fs.qfs.createParams 2
spark.hadoop.fs.qfs.createParams 2
4 changes: 2 additions & 2 deletions spark-qfs-swarm/worker-node/spark-conf/spark-env.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# the total amount of memory a worker (node) can use
SPARK_WORKER_MEMORY=56g
SPARK_WORKER_MEMORY=55g

# the total amount of cores a worker (node) can use
SPARK_WORKER_CORES=12
Expand All @@ -12,7 +12,7 @@ SPARK_WORKER_PORT=8881
SPARK_WORKER_WEBUI_PORT=8081

# which python the spark cluster should use for pyspark
PYSPARK_PYTHON=python3
PYSPARK_PYTHON=python3

# hash seed so all node hash numbers consistently
PYTHONHASHSEED=8675309
Expand Down

0 comments on commit 882c37a

Please sign in to comment.