From c7abfaed6f9efe6f55a48dc938e55790ff4cb0a1 Mon Sep 17 00:00:00 2001 From: Michael Kamprath Date: Sun, 24 Nov 2019 02:04:47 -0800 Subject: [PATCH] added support for spark graphframes --- spark-qfs-swarm/jupyter-server/start-jupyter.sh | 2 +- spark-qfs-swarm/worker-node/Dockerfile | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/spark-qfs-swarm/jupyter-server/start-jupyter.sh b/spark-qfs-swarm/jupyter-server/start-jupyter.sh index 7060e6b..24ed0ea 100644 --- a/spark-qfs-swarm/jupyter-server/start-jupyter.sh +++ b/spark-qfs-swarm/jupyter-server/start-jupyter.sh @@ -1,3 +1,3 @@ #!/bin/bash -SHELL=/bin/bash XDG_RUNTIME_DIR=/home/spark/jupyter/runtime PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS="notebook --no-browser --port=7777 --notebook-dir=/home/spark/jupyter/notebooks --ip=* --no-browser --allow-root --NotebookApp.token='' --NotebookApp.password=''" $SPARK_HOME/bin/pyspark --master spark://spark-master:7077 +SHELL=/bin/bash XDG_RUNTIME_DIR=/home/spark/jupyter/runtime PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS="notebook --no-browser --port=7777 --notebook-dir=/home/spark/jupyter/notebooks --ip=* --no-browser --allow-root --NotebookApp.token='' --NotebookApp.password=''" $SPARK_HOME/bin/pyspark --packages graphframes:graphframes:0.7.0-spark2.4-s_2.11 --master spark://spark-master:7077 diff --git a/spark-qfs-swarm/worker-node/Dockerfile b/spark-qfs-swarm/worker-node/Dockerfile index bcac2db..41a0ed5 100644 --- a/spark-qfs-swarm/worker-node/Dockerfile +++ b/spark-qfs-swarm/worker-node/Dockerfile @@ -38,10 +38,6 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# add python libraries useful in PySpark -RUN python3 -mpip install matplotlib \ - && pip3 install pandas - ENV PYTHONIOENCODING UTF-8 ENV PIP_DISABLE_PIP_VERSION_CHECK 1 @@ -89,6 +85,10 @@ COPY ./spark-conf/* $SPARK_HOME/conf/ RUN mkdir -p /data/spark \ && chown spark -R /data/spark +# add python libraries useful in PySpark +RUN python3 -mpip install matplotlib \ + && pip3 install pandas + # set up command COPY start-worker-node.sh / USER spark