diff --git a/Access_GitHub_Link.ipynb b/Access_GitHub_Link.ipynb
new file mode 100644
index 00000000..c6fa84ee
--- /dev/null
+++ b/Access_GitHub_Link.ipynb
@@ -0,0 +1,145 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": [],
+ "include_colab_link": true
+ },
+ "language_info": {
+ "name": "python"
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "SKG_3zy_AhjA"
+ },
+ "source": [
+ "# Setup\n",
+ "\n",
+ "Please ensure you have imported a Gemini API key from AI Studio.\n",
+ "You can do this directly in the Secrets tab on the left.\n",
+ "\n",
+ "After doing so, please run the setup cell below."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "swM2wHGtAhjI"
+ },
+ "source": [
+ "!pip install -U -q \"google\"\n",
+ "!pip install -U -q \"google.genai\"\n",
+ "\n",
+ "import os\n",
+ "from google.colab import userdata\n",
+ "from google.colab import drive\n",
+ "os.environ[\"GEMINI_API_KEY\"] = userdata.get(\"GOOGLE_API_KEY\")\n",
+ "\n",
+ "drive.mount(\"/content/drive\")\n",
+ "# Please ensure that uploaded files are available in the AI Studio folder or change the working folder.\n",
+ "os.chdir(\"/content/drive/MyDrive/Google AI Studio\")"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "DWbbJywhAhjL"
+ },
+ "source": [
+ "# Generated Code"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "VywpA5h0AhjM"
+ },
+ "source": [
+ "# To run this code you need to install the following dependencies:\n",
+ "# pip install google-genai\n",
+ "\n",
+ "import base64\n",
+ "import mimetypes\n",
+ "import os\n",
+ "from google import genai\n",
+ "from google.genai import types\n",
+ "\n",
+ "\n",
+ "def save_binary_file(file_name, data):\n",
+ " f = open(file_name, \"wb\")\n",
+ " f.write(data)\n",
+ " f.close()\n",
+ " print(f\"File saved to to: {file_name}\")\n",
+ "\n",
+ "\n",
+ "def generate():\n",
+ " client = genai.Client(\n",
+ " api_key=os.environ.get(\"GEMINI_API_KEY\"),\n",
+ " )\n",
+ "\n",
+ " model = \"gemini-2.5-flash-image\"\n",
+ " contents = [\n",
+ " types.Content(\n",
+ " role=\"user\",\n",
+ " parts=[\n",
+ " types.Part.from_text(text=\"\"\"INSERT_INPUT_HERE\"\"\"),\n",
+ " ],\n",
+ " ),\n",
+ " ]\n",
+ " generate_content_config = types.GenerateContentConfig(\n",
+ " response_modalities=[\n",
+ " \"IMAGE\",\n",
+ " \"TEXT\",\n",
+ " ],\n",
+ " )\n",
+ "\n",
+ " file_index = 0\n",
+ " for chunk in client.models.generate_content_stream(\n",
+ " model=model,\n",
+ " contents=contents,\n",
+ " config=generate_content_config,\n",
+ " ):\n",
+ " if (\n",
+ " chunk.candidates is None\n",
+ " or chunk.candidates[0].content is None\n",
+ " or chunk.candidates[0].content.parts is None\n",
+ " ):\n",
+ " continue\n",
+ " if chunk.candidates[0].content.parts[0].inline_data and chunk.candidates[0].content.parts[0].inline_data.data:\n",
+ " file_name = f\"ENTER_FILE_NAME_{file_index}\"\n",
+ " file_index += 1\n",
+ " inline_data = chunk.candidates[0].content.parts[0].inline_data\n",
+ " data_buffer = inline_data.data\n",
+ " file_extension = mimetypes.guess_extension(inline_data.mime_type)\n",
+ " save_binary_file(f\"{file_name}{file_extension}\", data_buffer)\n",
+ " else:\n",
+ " print(chunk.text)\n",
+ "\n",
+ "if __name__ == \"__main__\":\n",
+ " generate()\n"
+ ],
+ "execution_count": null,
+ "outputs": []
+ }
+ ]
+}
\ No newline at end of file
diff --git a/notebooks/accelerating_pandas_with_gpu.ipynb b/notebooks/accelerating_pandas_with_gpu.ipynb
new file mode 100644
index 00000000..99aa807d
--- /dev/null
+++ b/notebooks/accelerating_pandas_with_gpu.ipynb
@@ -0,0 +1,365 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Accelerating pandas with GPU: Sort the count of rows grouped on columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "Add `%load_ext cudf.pandas` before importing pandas to speed up operations using GPU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%load_ext cudf.pandas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import random\n",
+ "\n",
+ "# Define the species categories\n",
+ "species_categories = ['setosa', 'versicolor', 'virginica']\n",
+ "flower_color_categories = ['red','yellow','green']\n",
+ "\n",
+ "# Define the range for each attribute based on typical iris flower measurements\n",
+ "sepal_length_range = (4.0, 8.0)\n",
+ "\n",
+ "# Create data for 1,000,000 samples\n",
+ "n = 1000000\n",
+ "data = {\n",
+ " 'sepal_length': [random.uniform(*sepal_length_range) for _ in range(n)],\n",
+ " 'flower_color': [random.choice(flower_color_categories) for _ in range(n)],\n",
+ " 'species': [random.choice(species_categories) for _ in range(n)]\n",
+ "}\n",
+ "\n",
+ "df = pd.DataFrame(data)\n",
+ "\n",
+ "df.groupby(['species','flower_color']).size().sort_values(ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Acceleration pandas with GPU: Merging / Joining dataframes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "Add `%load_ext cudf.pandas` before importing pandas to speed up operations using GPU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%load_ext cudf.pandas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "# Define the number of rows\n",
+ "num_rows = 1000000\n",
+ "\n",
+ "states = ["NY", "NJ", "CA", "TX"]\n",
+ "violations = ["Double Parking", "Expired Meter", "No Parking", "Fire Hydrant",\n",
+ " "Bus Stop"]\n",
+ "vehicle_types = ["SUBN", "SDN"]\n",
+ "\n",
+ "# Generate random data for Dataset 1\n",
+ "data1 = {\n",
+ " "Registration State": np.random.choice(states, size=num_rows),\n",
+ " "Ticket Number": np.random.randint(1000000000, 9999999999, size=num_rows)\n",
+ "}\n",
+ "\n",
+ "# Generate random data for Dataset 2\n",
+ "data2 = {\n",
+ " "Ticket Number": np.random.choice(data1['Ticket Number'], size=num_rows), # Reusing ticket numbers to ensure matches\n",
+ " "Violation Description": np.random.choice(violations, size=num_rows)\n",
+ "}\n",
+ "\n",
+ "# Create DataFrames\n",
+ "df1 = pd.DataFrame(data1)\n",
+ "df2 = pd.DataFrame(data2)\n",
+ "\n",
+ "# Perform an inner join on 'Ticket Number'\n",
+ "merged_df = pd.merge(df1, df2, on="Ticket Number", how="inner")\n",
+ "\n",
+ "# Display some of the joined data\n",
+ "print(merged_df.head())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Accelerating pandas with GPU: Groupby aggregate on timeseries data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "Add `%load_ext cudf.pandas` before importing pandas to speed up operations using GPU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%load_ext cudf.pandas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "# Define the number of rows\n",
+ "num_rows = 1000000\n",
+ "\n",
+ "# Define the possible values\n",
+ "states = ["NY", "NJ", "CA", "TX"]\n",
+ "violations = ["Double Parking", "Expired Meter", "No Parking", "Fire Hydrant", "Bus Stop"]\n",
+ "vehicle_types = ["SUBN", "SDN"]\n",
+ "\n",
+ "start_date = "2022-01-01"\n",
+ "end_date = "2022-12-31"\n",
+ "# Create a date range\n",
+ "dates = pd.date_range(start=start_date, end=end_date, freq='D')\n",
+ "\n",
+ "# Generate random data\n",
+ "data = {\n",
+ " "Registration State": np.random.choice(states, size=num_rows),\n",
+ " "Violation Description": np.random.choice(violations, size=num_rows),\n",
+ " "Vehicle Body Type": np.random.choice(vehicle_types, size=num_rows),\n",
+ " "Issue Date": np.random.choice(dates, size=num_rows),\n",
+ " "Ticket Number": np.random.randint(1000000000, 9999999999, size=num_rows)\n",
+ "}\n",
+ "\n",
+ "# Create a DataFrame\n",
+ "df = pd.DataFrame(data)\n",
+ "\n",
+ "# Adding issue weekday based on the "Issue Date"\n",
+ "weekday_names = {\n",
+ " 0: "Monday",\n",
+ " 1: "Tuesday",\n",
+ " 2: "Wednesday",\n",
+ " 3: "Thursday",\n",
+ " 4: "Friday",\n",
+ " 5: "Saturday",\n",
+ " 6: "Sunday",\n",
+ "}\n",
+ "\n",
+ "df["issue_weekday"] = df["Issue Date"].dt.weekday.map(weekday_names)\n",
+ "\n",
+ "# Grouping by issue_weekday and counting the Summons Number\n",
+ "df.groupby(["Issue Date"])["Ticket Number"\
+",
+ "].count().sort_values()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Accelerating pandas with GPU: Count of values and GroupBy"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "Add `%load_ext cudf.pandas` before importing pandas to speed up operations using GPU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%load_ext cudf.pandas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "# Randomly generated dataset of parking violations-\n",
+ "# Define the number of rows\n",
+ "num_rows = 1000000\n",
+ "\n",
+ "states = ["NY", "NJ", "CA", "TX"]\n",
+ "violations = ["Double Parking", "Expired Meter", "No Parking",\n",
+ " "Fire Hydrant", "Bus Stop"]\n",
+ "vehicle_types = ["SUBN", "SDN"]\n",
+ "\n",
+ "# Create a date range\n",
+ "start_date = "2022-01-01"\n",
+ "end_date = "2022-12-31"\n",
+ "dates = pd.date_range(start=start_date, end=end_date, freq='D')\n",
+ "\n",
+ "# Generate random data\n",
+ "data = {\n",
+ " "Registration State": np.random.choice(states, size=num_rows),\n",
+ " "Violation Description": np.random.choice(violations, size=.ipynb_checkpoints/num_rows),\n",
+ " "Vehicle Body Type": np.random.choice(vehicle_types, size=num_rows),\n",
+ " "Issue Date": np.random.choice(dates, size=num_rows),\n",
+ " "Ticket Number": np.random.randint(1000000000, 9999999999, size=num_rows)\n",
+ "}\n",
+ "\n",
+ "# Create a DataFrame\n",
+ "df = pd.DataFrame(data)\n",
+ "\n",
+ "# Which parking violation is most commonly committed by vehicles from various U.S states?\n",
+ "\n",
+ "("df[["Registration State", "Violation Description"]] # get only these two columns\n",
+ " .value_counts() # get the count of offences per state and per type of offence\n",
+ " .groupby("Registration State") # group by state\n",
+ " .head(1) # get the first row in each group (the type of offence with the largest count)\n",
+ " .sort_index() # sort by state name\n",
+ " .reset_index()\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Accelerating pandas with GPU: Rolling Window Average"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%load_ext cudf.pandas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "# Randomly generated dataset of parking violations-\n",
+ "# Define the number of rows\n",
+ "num_rows = 1000000\n",
+ "\n",
+ "states = ["NY", "NJ", "CA", "TX"]\n",
+ "violations = ["Double Parking", "Expired Meter", "No Parking",\n",
+ " "Fire Hydrant", "Bus Stop"]\n",
+ "vehicle_types = ["SUBN", "SDN"]\n",
+ "\n",
+ "# Create a date range\n",
+ "start_date = "2022-01-01"\n",
+ "end_date = "2022-12-31"\n",
+ "dates = pd.date_range(start=start_date, end=end_date, freq='D')\n",
+ "\n",
+ "# Generate random data\n",
+ "data = {\n",
+ " "Registration State": np.random.choice(states, size=num_rows),\n",
+ " "Violation Description": np.random.choice(violations, size=num_rows),\n",
+ " "Vehicle Body Type": np.random.choice(vehicle_types, size=num_rows),\n",
+ " "Issue Date": np.random.choice(dates, size=num_rows),\n",
+ " "Ticket Number": np.random.randint(1000000000, 9999999999, size=num_rows)\n",
+ "}\n",
+ "\n",
+ "# Create a DataFrame\n",
+ "df = pd.DataFrame(data)\n",
+ "\n",
+ "# How does the parking violations change from day to day segmented by vehicle type\n",
+ "# Averaged using a 7-day rolling mean\n",
+ "\n",
+ "daily_counts = df.groupby(['Issue Date', 'Vehicle Body Type']\n",
+ " ).size().unstack(fill_value=0)\n",
+ "\n",
+ "# Calculate a 7-day rolling mean of daily violations for each vehicle type\n",
+ "rolling_means = daily_counts.rolling(window=7).mean()\n",
+ "\n",
+ "# Display the rolling means for each vehicle type over time\n",
+ "rolling_means.tail(100).plot(figsize=(14, 7),\n",
+ " title="7-Day Rolling Average of Parking Violations by Vehicle Type")\n",
+ "plt.ylabel("Average Number of Violations")\n",
+ "plt.xlabel("Date")\n",
+ "plt.show()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/pandas_acceleration_with_gpu.ipynb b/pandas_acceleration_with_gpu.ipynb
new file mode 100644
index 00000000..6b149423
--- /dev/null
+++ b/pandas_acceleration_with_gpu.ipynb
@@ -0,0 +1,351 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "N_63kkzYSrX1"
+ },
+ "source": [
+ "# Accelerating pandas with GPU: Sort the count of rows grouped on columns"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "igTJBL0vUb4F"
+ },
+ "source": [
+ "Add `%load_ext cudf.pandas` before importing pandas to speed up operations using GPU\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "8fuXnmHmSBzq"
+ },
+ "outputs": [],
+ "source": [
+ "%load_ext cudf.pandas\n",
+ "import pandas as pd\n",
+ "import random\n",
+ "\n",
+ "# Define the species categories\n",
+ "species_categories = ['setosa', 'versicolor', 'virginica']\n",
+ "flower_color_categories = ['red','yellow','green']\n",
+ "\n",
+ "# Define the range for each attribute based on typical iris flower measurements\n",
+ "sepal_length_range = (4.0, 8.0)\n",
+ "\n",
+ "# Create data for 1,000,000 samples\n",
+ "n = 1000000\n",
+ "data = {\n",
+ " 'sepal_length': [random.uniform(*sepal_length_range) for _ in range(n)],\n",
+ " 'flower_color': [random.choice(flower_color_categories) for _ in range(n)],\n",
+ " 'species': [random.choice(species_categories) for _ in range(n)]\n",
+ "}\n",
+ "\n",
+ "df = pd.DataFrame(data)\n",
+ "\n",
+ "df.groupby(['species','flower_color']).size().sort_values(ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "t2DVoM7ifOAq"
+ },
+ "source": [
+ "# Acceleration pandas with GPU: Merging / Joining dataframes-"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "k6MWgjQRfZl-"
+ },
+ "source": [
+ "Add `%load_ext cudf.pandas` before importing pandas to speed up operations using GPU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "WW3bxTL3f5Pd"
+ },
+ "outputs": [],
+ "source": [
+ "%load_ext cudf.pandas\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "# Define the number of rows\n",
+ "num_rows = 1000000\n",
+ "\n",
+ "states = [\"NY\", \"NJ\", \"CA\", \"TX\"]\n",
+ "violations = [\"Double Parking\", \"Expired Meter\", \"No Parking\", \"Fire Hydrant\",\n",
+ " \"Bus Stop\"]\n",
+ "vehicle_types = [\"SUBN\", \"SDN\"]\n",
+ "\n",
+ "# Generate random data for Dataset 1\n",
+ "data1 = {\n",
+ " \"Registration State\": np.random.choice(states, size=num_rows),\n",
+ " \"Ticket Number\": np.random.randint(1000000000, 9999999999, size=num_rows)\n",
+ "}\n",
+ "\n",
+ "# Generate random data for Dataset 2\n",
+ "data2 = {\n",
+ " \"Ticket Number\": np.random.choice(data1['Ticket Number'], size=num_rows), # Reusing ticket numbers to ensure matches\n",
+ " \"Violation Description\": np.random.choice(violations, size=num_rows)\n",
+ "}\n",
+ "\n",
+ "# Create DataFrames\n",
+ "df1 = pd.DataFrame(data1)\n",
+ "df2 = pd.DataFrame(data2)\n",
+ "\n",
+ "# Perform an inner join on 'Ticket Number'\n",
+ "merged_df = pd.merge(df1, df2, on=\"Ticket Number\", how=\"inner\")\n",
+ "\n",
+ "# Display some of the joined data\n",
+ "print(merged_df.head())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "41DVaHZHS571"
+ },
+ "source": [
+ "# Accelerating pandas with GPU: Groupby aggregate on timeseries data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "5YDBGAMOWZoG"
+ },
+ "source": [
+ "Add `%load_ext cudf.pandas` before importing pandas to speed up operations using GPU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "zlhe0827S1J5"
+ },
+ "outputs": [],
+ "source": [
+ "%load_ext cudf.pandas\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "# Define the number of rows\n",
+ "num_rows = 1000000\n",
+ "\n",
+ "# Define the possible values\n",
+ "states = [\"NY\", \"NJ\", \"CA\", \"TX\"]\n",
+ "violations = [\"Double Parking\", \"Expired Meter\", \"No Parking\", \"Fire Hydrant\", \"Bus Stop\"]\n",
+ "vehicle_types = [\"SUBN\", \"SDN\"]\n",
+ "\n",
+ "start_date = \"2022-01-01\"\n",
+ "end_date = \"2022-12-31\"\n",
+ "# Create a date range\n",
+ "dates = pd.date_range(start=start_date, end=end_date, freq='D')\n",
+ "\n",
+ "# Generate random data\n",
+ "data = {\n",
+ " \"Registration State\": np.random.choice(states, size=num_rows),\n",
+ " \"Violation Description\": np.random.choice(violations, size=num_rows),\n",
+ " \"Vehicle Body Type\": np.random.choice(vehicle_types, size=num_rows),\n",
+ " \"Issue Date\": np.random.choice(dates, size=num_rows),\n",
+ " \"Ticket Number\": np.random.randint(1000000000, 9999999999, size=num_rows)\n",
+ "}\n",
+ "\n",
+ "# Create a DataFrame\n",
+ "df = pd.DataFrame(data)\n",
+ "\n",
+ "# Adding issue weekday based on the \"Issue Date\"\n",
+ "weekday_names = {\n",
+ " 0: \"Monday\",\n",
+ " 1: \"Tuesday\",\n",
+ " 2: \"Wednesday\",\n",
+ " 3: \"Thursday\",\n",
+ " 4: \"Friday\",\n",
+ " 5: \"Saturday\",\n",
+ " 6: \"Sunday\",\n",
+ "}\n",
+ "\n",
+ "df[\"issue_weekday\"] = df[\"Issue Date\"].dt.weekday.map(weekday_names)\n",
+ "\n",
+ "# Grouping by issue_weekday and counting the Summons Number\n",
+ "df.groupby([\"Issue Date\"])[\"Ticket Number\"\n",
+ "].count().sort_values()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Kr4Niz4tWmCW"
+ },
+ "source": [
+ "# Accelerating pandas with GPU: Count of values and GroupBy\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "l_OzcTpBezh9"
+ },
+ "source": [
+ "Add `%load_ext cudf.pandas` before importing pandas to speed up operations using GPU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "hYWROytuURFH"
+ },
+ "outputs": [],
+ "source": [
+ "%load_ext cudf.pandas\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "# Randomly generated dataset of parking violations-\n",
+ "# Define the number of rows\n",
+ "num_rows = 1000000\n",
+ "\n",
+ "states = [\"NY\", \"NJ\", \"CA\", \"TX\"]\n",
+ "violations = [\"Double Parking\", \"Expired Meter\", \"No Parking\",\n",
+ " \"Fire Hydrant\", \"Bus Stop\"]\n",
+ "vehicle_types = [\"SUBN\", \"SDN\"]\n",
+ "\n",
+ "# Create a date range\n",
+ "start_date = \"2022-01-01\"\n",
+ "end_date = \"2022-12-31\"\n",
+ "dates = pd.date_range(start=start_date, end=end_date, freq='D')\n",
+ "\n",
+ "# Generate random data\n",
+ "data = {\n",
+ " \"Registration State\": np.random.choice(states, size=num_rows),\n",
+ " \"Violation Description\": np.random.choice(violations, size=num_rows),\n",
+ " \"Vehicle Body Type\": np.random.choice(vehicle_types, size=num_rows),\n",
+ " \"Issue Date\": np.random.choice(dates, size=num_rows),\n",
+ " \"Ticket Number\": np.random.randint(1000000000, 9999999999, size=num_rows)\n",
+ "}\n",
+ "\n",
+ "# Create a DataFrame\n",
+ "df = pd.DataFrame(data)\n",
+ "\n",
+ "# Which parking violation is most commonly committed by vehicles from various U.S states?\n",
+ "\n",
+ "(df[[\"Registration State\", \"Violation Description\"]] # get only these two columns\n",
+ " .value_counts() # get the count of offences per state and per type of offence\n",
+ " .groupby(\"Registration State\") # group by state\n",
+ " .head(1) # get the first row in each group (the type of offence with the largest count)\n",
+ " .sort_index() # sort by state name\n",
+ " .reset_index()\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "4sWzpEvabvBW"
+ },
+ "source": [
+ "# Accelerating pandas with GPU: Rolling Window Average\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "qbKJGeVnb6Uj"
+ },
+ "outputs": [],
+ "source": [
+ "%load_ext cudf.pandas\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "# Randomly generated dataset of parking violations-\n",
+ "# Define the number of rows\n",
+ "num_rows = 1000000\n",
+ "\n",
+ "states = [\"NY\", \"NJ\", \"CA\", \"TX\"]\n",
+ "violations = [\"Double Parking\", \"Expired Meter\", \"No Parking\",\n",
+ " \"Fire Hydrant\", \"Bus Stop\"]\n",
+ "vehicle_types = [\"SUBN\", \"SDN\"]\n",
+ "\n",
+ "# Create a date range\n",
+ "start_date = \"2022-01-01\"\n",
+ "end_date = \"2022-12-31\"\n",
+ "dates = pd.date_range(start=start_date, end=end_date, freq='D')\n",
+ "\n",
+ "# Generate random data\n",
+ "data = {\n",
+ " \"Registration State\": np.random.choice(states, size=num_rows),\n",
+ " \"Violation Description\": np.random.choice(violations, size=num_rows),\n",
+ " \"Vehicle Body Type\": np.random.choice(vehicle_types, size=num_rows),\n",
+ " \"Issue Date\": np.random.choice(dates, size=num_rows),\n",
+ " \"Ticket Number\": np.random.randint(1000000000, 9999999999, size=num_rows)\n",
+ "}\n",
+ "\n",
+ "# Create a DataFrame\n",
+ "df = pd.DataFrame(data)\n",
+ "\n",
+ "# How does the parking violations change from day to day segmented by vehicle type\n",
+ "# Averaged using a 7-day rolling mean\n",
+ "\n",
+ "daily_counts = df.groupby(['Issue Date', 'Vehicle Body Type']\n",
+ " ).size().unstack(fill_value=0)\n",
+ "\n",
+ "# Calculate a 7-day rolling mean of daily violations for each vehicle type\n",
+ "rolling_means = daily_counts.rolling(window=7).mean()\n",
+ "\n",
+ "# Display the rolling means for each vehicle type over time\n",
+ "rolling_means.tail(100).plot(figsize=(14, 7),\n",
+ " title=\"7-Day Rolling Average of Parking Violations by Vehicle Type\")\n",
+ "plt.ylabel(\"Average Number of Violations\")\n",
+ "plt.xlabel(\"Date\")\n",
+ "plt.show()"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "private_outputs": true,
+ "provenance": [],
+ "include_colab_link": true
+ },
+ "language_info": {
+ "name": "python"
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/simulation/decai/simulation/simulate_imdb_perceptron.py b/simulation/decai/simulation/simulate_imdb_perceptron.py
index ec093c26..a503f5de 100644
--- a/simulation/decai/simulation/simulate_imdb_perceptron.py
+++ b/simulation/decai/simulation/simulate_imdb_perceptron.py
@@ -1,4 +1,5 @@
import os
+import re
import sys
from typing import Optional
@@ -82,5 +83,7 @@ def main():
# Run with `bokeh serve PATH`.
-if __name__.startswith('bk_script_'):
+if re.match('bk_script_|bokeh_app_', __name__):
main()
+else:
+ print("`__name__` didn't match the pattern. Bokeh app will not run.")