Skip to content

Commit

Permalink
added example jupyter notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelkamprath committed Oct 27, 2019
1 parent 1160ff1 commit dcd3e00
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 1 deletion.
113 changes: 113 additions & 0 deletions jupyter-notebooks/prime-numbers/prime-numbers.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pyspark.sql.functions as F\n",
"import pyspark.sql.types as T\n",
"\n",
"spark = SparkSession\\\n",
" .builder\\\n",
" .appName(\"CalculatePrimeNumbers\")\\\n",
" .getOrCreate()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"MAX_VALUE = 1000000000"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Algorithm reference:\n",
"#\thttps://en.wikipedia.org/wiki/Primality_test\n",
"\n",
"def isPrime(val):\n",
" if val <= 3:\n",
" return val > 1\n",
" elif val%2 == 0 or val%3 == 0:\n",
" return False\n",
" else:\n",
" i = 5\n",
" while i*i < val:\n",
" if val%i == 0 or val%(i + 2) == 0:\n",
" return False\n",
" i += 6\n",
" return True\n",
"\n",
"values = spark.sparkContext.parallelize(\n",
" range(1,MAX_VALUE+1), \n",
" 2000\n",
" ).map(\n",
" lambda x: (x, isPrime(x))\n",
" ).toDF().withColumnRenamed('_1', 'value').withColumnRenamed('_2', 'is_prime').cache()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"values.count()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"values.filter(F.col('is_prime')).count()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"values.filter(F.col('is_prime')).agg(F.max('value')).collect()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 1 addition & 1 deletion simple-spark-swarm/deploy-spark-swarm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ services:
replicas: 4
resources:
limits:
cpus: "8.0"
cpus: "6.0"
memory: 52g

spark-jupyter:
Expand Down

0 comments on commit dcd3e00

Please sign in to comment.