From 61638216cc4dc359b46aa9e57bf985841f6df630 Mon Sep 17 00:00:00 2001
From: Michael Kamprath <michael@kamprath.net>
Date: Sun, 22 Sep 2019 13:37:14 -0700
Subject: [PATCH] rearranged dirctory and fixed typos

---
 .../README.md                                          | 10 +++-------
 .../build-images.sh                                    |  2 +-
 .../configured-spark-node/Dockerfile                   |  0
 .../spark-conf/spark-defaults.conf                     |  0
 .../configured-spark-node/spark-conf/spark-env.sh      |  0
 .../deploy-spark-swarm.yml                             |  2 +-
 .../spark-jupyter-notebook/Dockerfile                  |  0
 .../spark-jupyter-notebook/start-jupyter.sh            |  0
 8 files changed, 5 insertions(+), 9 deletions(-)
 rename {docker-swarm-spark-simple => simple-spark-swarm}/README.md (86%)
 rename {docker-swarm-spark-simple => simple-spark-swarm}/build-images.sh (97%)
 rename {docker-swarm-spark-simple => simple-spark-swarm}/configured-spark-node/Dockerfile (100%)
 rename {docker-swarm-spark-simple => simple-spark-swarm}/configured-spark-node/spark-conf/spark-defaults.conf (100%)
 rename {docker-swarm-spark-simple => simple-spark-swarm}/configured-spark-node/spark-conf/spark-env.sh (100%)
 rename {docker-swarm-spark-simple => simple-spark-swarm}/deploy-spark-swarm.yml (98%)
 rename {docker-swarm-spark-simple => simple-spark-swarm}/spark-jupyter-notebook/Dockerfile (100%)
 rename {docker-swarm-spark-simple => simple-spark-swarm}/spark-jupyter-notebook/start-jupyter.sh (100%)

diff --git a/docker-swarm-spark-simple/README.md b/simple-spark-swarm/README.md
similarity index 86%
rename from docker-swarm-spark-simple/README.md
rename to simple-spark-swarm/README.md
index 10ce6f5..10c2a8d 100644
--- a/docker-swarm-spark-simple/README.md
+++ b/simple-spark-swarm/README.md
@@ -10,10 +10,10 @@ First, edit the following items as needed for your swarm:
 3. `build-images.sh`: Adjust the IP address for your local Docker registry that all nodes in your cluster can access. You can use a domain name if all nodes in your swarm can resolve it. This is needed as it allows all nodes in the swarm to pull the locally built Docker images.
 4. `spark-deploy.yml`: Adjust all image names for the updated local Docker registry address you used in the prior step. Also, adjust the resource limits for each of the services. Setting a `cpus` limit here that is smaller than the number of cores on your node has the effect of giving your process a fraction of each core's capacity. You might consider doing this if your swarm hosts other services or does not handle long term 100% CPU load well (e.g., overheats). Also adjust the `replicas` count for the `spark-worker` service to be equal to the number of nodes in your swarm (or less). 
 
-This set up depends on have a GlusterFS volume mounted at `/mnt/gfs` on all nodes and the directories exist on it:
+This set up depends on have a GlusterFS volume mounted at `/mnt/gfs` on all nodes and the following directories exist on it:
 
-* `/mnt/gfs/jupyter-notbooks`
-* `/mnt/gfs/data`
+* `/mnt/gfs/jupyter-notbooks` - used to persist the Jupyter notebooks.
+* `/mnt/gfs/data` - This is where data to analyze with spark gets placed.
 
 Then, to start up the Spark cluster in your Docker swarm, `cd` into this project's directory and:
 ```
@@ -23,9 +23,5 @@ docker stack deploy -c deploy-spark-swarm.yml spark
 
 Point your development computer's browser at `http://swarm-public-ip:7777/` to load the Jupyter notebook.
 
-## TODO
-This cluster is a work in progress. Currently, the following items are missing:
-* A distributed file system, such as HDFS or QFS. Currently there is no way to ingest data into the cluster except through network transfers, such as through `curl`, set up in a Jupyter notebook.
-
 ## Acknowledgements
 The docker configuration leverages the [`gettyimages/spark`](https://hub.docker.com/r/gettyimages/spark/) Docker image as a starting point. 
diff --git a/docker-swarm-spark-simple/build-images.sh b/simple-spark-swarm/build-images.sh
similarity index 97%
rename from docker-swarm-spark-simple/build-images.sh
rename to simple-spark-swarm/build-images.sh
index da0af03..40c25b6 100755
--- a/docker-swarm-spark-simple/build-images.sh
+++ b/simple-spark-swarm/build-images.sh
@@ -2,7 +2,7 @@
 
 set -e
 
-#build images
+# build images
 docker build -t configured-spark-node:latest ./configured-spark-node
 docker build -t spark-jupyter-notebook:latest ./spark-jupyter-notebook
 
diff --git a/docker-swarm-spark-simple/configured-spark-node/Dockerfile b/simple-spark-swarm/configured-spark-node/Dockerfile
similarity index 100%
rename from docker-swarm-spark-simple/configured-spark-node/Dockerfile
rename to simple-spark-swarm/configured-spark-node/Dockerfile
diff --git a/docker-swarm-spark-simple/configured-spark-node/spark-conf/spark-defaults.conf b/simple-spark-swarm/configured-spark-node/spark-conf/spark-defaults.conf
similarity index 100%
rename from docker-swarm-spark-simple/configured-spark-node/spark-conf/spark-defaults.conf
rename to simple-spark-swarm/configured-spark-node/spark-conf/spark-defaults.conf
diff --git a/docker-swarm-spark-simple/configured-spark-node/spark-conf/spark-env.sh b/simple-spark-swarm/configured-spark-node/spark-conf/spark-env.sh
similarity index 100%
rename from docker-swarm-spark-simple/configured-spark-node/spark-conf/spark-env.sh
rename to simple-spark-swarm/configured-spark-node/spark-conf/spark-env.sh
diff --git a/docker-swarm-spark-simple/deploy-spark-swarm.yml b/simple-spark-swarm/deploy-spark-swarm.yml
similarity index 98%
rename from docker-swarm-spark-simple/deploy-spark-swarm.yml
rename to simple-spark-swarm/deploy-spark-swarm.yml
index a5a9300..c800438 100644
--- a/docker-swarm-spark-simple/deploy-spark-swarm.yml
+++ b/simple-spark-swarm/deploy-spark-swarm.yml
@@ -83,7 +83,7 @@ services:
             - 4040:4040
         volumes:
             - type: bind
-              source: /mnt/gfs/jupyter-notbooks
+              source: /mnt/gfs/jupyter-notebooks
               target: /home/jupyter/notebooks
             - type: bind
               source: /mnt/gfs/data
diff --git a/docker-swarm-spark-simple/spark-jupyter-notebook/Dockerfile b/simple-spark-swarm/spark-jupyter-notebook/Dockerfile
similarity index 100%
rename from docker-swarm-spark-simple/spark-jupyter-notebook/Dockerfile
rename to simple-spark-swarm/spark-jupyter-notebook/Dockerfile
diff --git a/docker-swarm-spark-simple/spark-jupyter-notebook/start-jupyter.sh b/simple-spark-swarm/spark-jupyter-notebook/start-jupyter.sh
similarity index 100%
rename from docker-swarm-spark-simple/spark-jupyter-notebook/start-jupyter.sh
rename to simple-spark-swarm/spark-jupyter-notebook/start-jupyter.sh