added example jupyter notebook

DIYBigData · Oct 27, 2019 · dcd3e00 · dcd3e00
1 parent 1160ff1
commit dcd3e00
Show file tree

Hide file tree

Showing 2 changed files with 114 additions and 1 deletion.
diff --git a/jupyter-notebooks/prime-numbers/prime-numbers.ipynb b/jupyter-notebooks/prime-numbers/prime-numbers.ipynb
@@ -0,0 +1,113 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pyspark.sql.functions as F\n",
+    "import pyspark.sql.types as T\n",
+    "\n",
+    "spark = SparkSession\\\n",
+    "        .builder\\\n",
+    "        .appName(\"CalculatePrimeNumbers\")\\\n",
+    "        .getOrCreate()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "MAX_VALUE = 1000000000"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Algorithm reference:\n",
+    "#\thttps://en.wikipedia.org/wiki/Primality_test\n",
+    "\n",
+    "def isPrime(val):\n",
+    "    if val <= 3:\n",
+    "        return val > 1\n",
+    "    elif val%2 == 0 or val%3 == 0:\n",
+    "        return False\n",
+    "    else:\n",
+    "        i = 5\n",
+    "        while i*i < val:\n",
+    "            if val%i == 0 or val%(i + 2) == 0:\n",
+    "                return False\n",
+    "            i += 6\n",
+    "    return True\n",
+    "\n",
+    "values = spark.sparkContext.parallelize(\n",
+    "        range(1,MAX_VALUE+1), \n",
+    "        2000\n",
+    "    ).map(\n",
+    "        lambda x: (x, isPrime(x))\n",
+    "    ).toDF().withColumnRenamed('_1', 'value').withColumnRenamed('_2', 'is_prime').cache()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "values.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "values.filter(F.col('is_prime')).count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "values.filter(F.col('is_prime')).agg(F.max('value')).collect()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/simple-spark-swarm/deploy-spark-swarm.yml b/simple-spark-swarm/deploy-spark-swarm.yml
@@ -60,7 +60,7 @@ services:
             replicas: 4
             resources:
                 limits:
-                    cpus: "8.0"
+                    cpus: "6.0"
                     memory: 52g
 
     spark-jupyter: