diff --git a/spark-qfs-swarm/README.md b/spark-qfs-swarm/README.md index 6735910..dc8d041 100644 --- a/spark-qfs-swarm/README.md +++ b/spark-qfs-swarm/README.md @@ -1,5 +1,5 @@ # Deploy Standalone Spark Cluster with QFS on Docker Swarm -This project deploys a standalone Spark Cluster onto a Docker Swarm. Includes the [Quantcast File System](https://github.com/quantcast/qfs) (QFS) as the clusters distributed file system. Why QFS? Why not. this configuration will also launch and make available a Jupyter PySpark notebook that is connected to the Spark cluster. The cluster has [`matplotlib`](https://matplotlib.org) and [`pandas`](https://pandas.pydata.org) preinstalled for your PySpark on Jupyter joys. +This project deploys a standalone Spark Cluster onto a Docker Swarm. Includes the [Quantcast File System](https://github.com/quantcast/qfs) (QFS) as the clusters distributed file system. Why QFS? Why not. This configuration will also launch and make available a Jupyter PySpark notebook that is connected to the Spark cluster. The cluster has [`matplotlib`](https://matplotlib.org) and [`pandas`](https://pandas.pydata.org) preinstalled for your PySpark on Jupyter joys. ## Usage First, edit the following items as needed for your swarm: diff --git a/spark-qfs-swarm/jupyter-server/Dockerfile b/spark-qfs-swarm/jupyter-server/Dockerfile index 1a68bb9..71fe9f1 100644 --- a/spark-qfs-swarm/jupyter-server/Dockerfile +++ b/spark-qfs-swarm/jupyter-server/Dockerfile @@ -1,4 +1,4 @@ -FROM worker-node:latest +FROM qfs-master:latest RUN apt-get install -y g++ RUN pip3 install jupyter diff --git a/spark-qfs-swarm/jupyter-server/start-jupyter.sh b/spark-qfs-swarm/jupyter-server/start-jupyter.sh index 5595578..aed9cd3 100644 --- a/spark-qfs-swarm/jupyter-server/start-jupyter.sh +++ b/spark-qfs-swarm/jupyter-server/start-jupyter.sh @@ -1,3 +1,3 @@ #!/bin/bash -XDG_RUNTIME_DIR=/home/jupyter/runtime PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS="notebook --no-browser --port=7777 --notebook-dir=/home/jupyter/notebooks --ip=* --no-browser --allow-root --NotebookApp.token='' --NotebookApp.password=''" $SPARK_HOME/bin/pyspark --master spark://spark-master:7077 +SHELL=/bin/bash XDG_RUNTIME_DIR=/home/jupyter/runtime PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS="notebook --no-browser --port=7777 --notebook-dir=/home/jupyter/notebooks --ip=* --no-browser --allow-root --NotebookApp.token='' --NotebookApp.password=''" $SPARK_HOME/bin/pyspark --master spark://spark-master:7077 diff --git a/spark-qfs-swarm/qfs-master/Dockerfile b/spark-qfs-swarm/qfs-master/Dockerfile index 86273a0..b0b345e 100644 --- a/spark-qfs-swarm/qfs-master/Dockerfile +++ b/spark-qfs-swarm/qfs-master/Dockerfile @@ -19,7 +19,8 @@ RUN apt-get update \ COPY ./qfs-conf/* $QFS_HOME/conf/ # create some useful bash aliases for when at bash shell prompt of this image -RUN echo 'alias qfs="qfs -fs qfs://qfs-master:20000"' >> ~/.bashrc \ +RUN echo 'export PATH=$PATH:$QFS_HOME/bin/:$QFS_HOME/bin/tools/' >> ~/.bashrc \ + && echo 'alias qfs="qfs -fs qfs://qfs-master:20000"' >> ~/.bashrc \ && echo 'alias cptoqfs="cptoqfs -s qfs-master -p 20000"' >> ~/.bashrc \ && echo 'alias cpfromqfs="cpfromqfs -s qfs-master -p 20000"' >> ~/.bashrc \ && echo 'alias qfsshell="qfsshell -s qfs-master -p 20000"' >> ~/.bashrc diff --git a/spark-qfs-swarm/qfs-master/qfs-conf/qfs-client.prp b/spark-qfs-swarm/qfs-master/qfs-conf/qfs-client.prp deleted file mode 100644 index e69de29..0000000