diff --git a/spark-qfs-swarm/jupyter-server/Dockerfile b/spark-qfs-swarm/jupyter-server/Dockerfile index e891349..29967f0 100644 --- a/spark-qfs-swarm/jupyter-server/Dockerfile +++ b/spark-qfs-swarm/jupyter-server/Dockerfile @@ -6,13 +6,15 @@ FROM qfs-master:latest # /data/spark - Spark's data directory # +ENV GRAPHFRAMES_VERSION 0.8.1-spark3.0-s_2.12 + USER root RUN apt-get install -y g++ RUN pip3 install \ notebook==5.7.9 \ jupyter_nbextensions_configurator \ jupyter_contrib_nbextensions \ - yapf + yapf wget COPY start-jupyter.sh / @@ -24,6 +26,7 @@ RUN jupyter nbextension enable codefolding/main RUN jupyter nbextension enable execute_time/ExecuteTime RUN mkdir -p /home/spark/jupyter/runtime \ - && mkdir -p /home/spark/jupyter/notebooks + && mkdir -p /home/spark/jupyter/notebooks \ + && mkdir -p /home/spark/jars/ CMD ["/bin/bash", "/start-jupyter.sh"] diff --git a/spark-qfs-swarm/jupyter-server/start-jupyter.sh b/spark-qfs-swarm/jupyter-server/start-jupyter.sh index a5c6a90..2be5739 100644 --- a/spark-qfs-swarm/jupyter-server/start-jupyter.sh +++ b/spark-qfs-swarm/jupyter-server/start-jupyter.sh @@ -4,4 +4,7 @@ SHELL=/bin/bash \ XDG_RUNTIME_DIR=/home/spark/jupyter/runtime \ PYSPARK_DRIVER_PYTHON=jupyter \ PYSPARK_DRIVER_PYTHON_OPTS="notebook --no-browser --port=7777 --notebook-dir=/home/spark/jupyter/notebooks --ip=0.0.0.0 --NotebookApp.password='' --NotebookApp.token=''" \ - $SPARK_HOME/bin/pyspark --packages graphframes:graphframes:0.8.1-spark3.0-s_2.12 --master spark://spark-master:7077 + $SPARK_HOME/bin/pyspark \ + --packages graphframes:graphframes:$GRAPHFRAMES_VERSION \ + --repositories https://repos.spark-packages.org/ \ + --master spark://spark-master:7077