From 3d1988c5584013d8706b1db2423b00ed49d500b0 Mon Sep 17 00:00:00 2001 From: Michael Kamprath Date: Sun, 27 Oct 2019 12:23:43 -0700 Subject: [PATCH] added same code for QFS --- ...-numbers.ipynb => prime-numbers-qfs.ipynb} | 39 ++++++++++++++++--- 1 file changed, 33 insertions(+), 6 deletions(-) rename jupyter-notebooks/prime-numbers/{prime-numbers.ipynb => prime-numbers-qfs.ipynb} (74%) diff --git a/jupyter-notebooks/prime-numbers/prime-numbers.ipynb b/jupyter-notebooks/prime-numbers/prime-numbers-qfs.ipynb similarity index 74% rename from jupyter-notebooks/prime-numbers/prime-numbers.ipynb rename to jupyter-notebooks/prime-numbers/prime-numbers-qfs.ipynb index 477340c..451e33a 100644 --- a/jupyter-notebooks/prime-numbers/prime-numbers.ipynb +++ b/jupyter-notebooks/prime-numbers/prime-numbers-qfs.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -17,16 +17,16 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "MAX_VALUE = 1000000000" + "MAX_VALUE = 10000000" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -69,7 +69,34 @@ "metadata": {}, "outputs": [], "source": [ - "values.filter(F.col('is_prime')).count()" + "values.repartition(50).write.parquet('qfs:///test/prime-numbers', mode='overwrite')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = spark.read.parquet('qfs:///test/prime-numbers')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.filter(F.col('is_prime')).count()" ] }, { @@ -78,7 +105,7 @@ "metadata": {}, "outputs": [], "source": [ - "values.filter(F.col('is_prime')).agg(F.max('value')).collect()" + "df.filter(F.col('is_prime')).agg(F.max('value').alias('max_prime')).collect()[0].max_prime" ] }, {