diff --git a/Access_GitHub_Link.ipynb b/Access_GitHub_Link.ipynb new file mode 100644 index 00000000..c6fa84ee --- /dev/null +++ b/Access_GitHub_Link.ipynb @@ -0,0 +1,145 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "include_colab_link": true + }, + "language_info": { + "name": "python" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SKG_3zy_AhjA" + }, + "source": [ + "# Setup\n", + "\n", + "Please ensure you have imported a Gemini API key from AI Studio.\n", + "You can do this directly in the Secrets tab on the left.\n", + "\n", + "After doing so, please run the setup cell below." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "swM2wHGtAhjI" + }, + "source": [ + "!pip install -U -q \"google\"\n", + "!pip install -U -q \"google.genai\"\n", + "\n", + "import os\n", + "from google.colab import userdata\n", + "from google.colab import drive\n", + "os.environ[\"GEMINI_API_KEY\"] = userdata.get(\"GOOGLE_API_KEY\")\n", + "\n", + "drive.mount(\"/content/drive\")\n", + "# Please ensure that uploaded files are available in the AI Studio folder or change the working folder.\n", + "os.chdir(\"/content/drive/MyDrive/Google AI Studio\")" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DWbbJywhAhjL" + }, + "source": [ + "# Generated Code" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "VywpA5h0AhjM" + }, + "source": [ + "# To run this code you need to install the following dependencies:\n", + "# pip install google-genai\n", + "\n", + "import base64\n", + "import mimetypes\n", + "import os\n", + "from google import genai\n", + "from google.genai import types\n", + "\n", + "\n", + "def save_binary_file(file_name, data):\n", + " f = open(file_name, \"wb\")\n", + " f.write(data)\n", + " f.close()\n", + " print(f\"File saved to to: {file_name}\")\n", + "\n", + "\n", + "def generate():\n", + " client = genai.Client(\n", + " api_key=os.environ.get(\"GEMINI_API_KEY\"),\n", + " )\n", + "\n", + " model = \"gemini-2.5-flash-image\"\n", + " contents = [\n", + " types.Content(\n", + " role=\"user\",\n", + " parts=[\n", + " types.Part.from_text(text=\"\"\"INSERT_INPUT_HERE\"\"\"),\n", + " ],\n", + " ),\n", + " ]\n", + " generate_content_config = types.GenerateContentConfig(\n", + " response_modalities=[\n", + " \"IMAGE\",\n", + " \"TEXT\",\n", + " ],\n", + " )\n", + "\n", + " file_index = 0\n", + " for chunk in client.models.generate_content_stream(\n", + " model=model,\n", + " contents=contents,\n", + " config=generate_content_config,\n", + " ):\n", + " if (\n", + " chunk.candidates is None\n", + " or chunk.candidates[0].content is None\n", + " or chunk.candidates[0].content.parts is None\n", + " ):\n", + " continue\n", + " if chunk.candidates[0].content.parts[0].inline_data and chunk.candidates[0].content.parts[0].inline_data.data:\n", + " file_name = f\"ENTER_FILE_NAME_{file_index}\"\n", + " file_index += 1\n", + " inline_data = chunk.candidates[0].content.parts[0].inline_data\n", + " data_buffer = inline_data.data\n", + " file_extension = mimetypes.guess_extension(inline_data.mime_type)\n", + " save_binary_file(f\"{file_name}{file_extension}\", data_buffer)\n", + " else:\n", + " print(chunk.text)\n", + "\n", + "if __name__ == \"__main__\":\n", + " generate()\n" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/notebooks/accelerating_pandas_with_gpu.ipynb b/notebooks/accelerating_pandas_with_gpu.ipynb new file mode 100644 index 00000000..99aa807d --- /dev/null +++ b/notebooks/accelerating_pandas_with_gpu.ipynb @@ -0,0 +1,365 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Accelerating pandas with GPU: Sort the count of rows grouped on columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Add `%load_ext cudf.pandas` before importing pandas to speed up operations using GPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext cudf.pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import random\n", + "\n", + "# Define the species categories\n", + "species_categories = ['setosa', 'versicolor', 'virginica']\n", + "flower_color_categories = ['red','yellow','green']\n", + "\n", + "# Define the range for each attribute based on typical iris flower measurements\n", + "sepal_length_range = (4.0, 8.0)\n", + "\n", + "# Create data for 1,000,000 samples\n", + "n = 1000000\n", + "data = {\n", + " 'sepal_length': [random.uniform(*sepal_length_range) for _ in range(n)],\n", + " 'flower_color': [random.choice(flower_color_categories) for _ in range(n)],\n", + " 'species': [random.choice(species_categories) for _ in range(n)]\n", + "}\n", + "\n", + "df = pd.DataFrame(data)\n", + "\n", + "df.groupby(['species','flower_color']).size().sort_values(ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Acceleration pandas with GPU: Merging / Joining dataframes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Add `%load_ext cudf.pandas` before importing pandas to speed up operations using GPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext cudf.pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# Define the number of rows\n", + "num_rows = 1000000\n", + "\n", + "states = ["NY", "NJ", "CA", "TX"]\n", + "violations = ["Double Parking", "Expired Meter", "No Parking", "Fire Hydrant",\n", + " "Bus Stop"]\n", + "vehicle_types = ["SUBN", "SDN"]\n", + "\n", + "# Generate random data for Dataset 1\n", + "data1 = {\n", + " "Registration State": np.random.choice(states, size=num_rows),\n", + " "Ticket Number": np.random.randint(1000000000, 9999999999, size=num_rows)\n", + "}\n", + "\n", + "# Generate random data for Dataset 2\n", + "data2 = {\n", + " "Ticket Number": np.random.choice(data1['Ticket Number'], size=num_rows), # Reusing ticket numbers to ensure matches\n", + " "Violation Description": np.random.choice(violations, size=num_rows)\n", + "}\n", + "\n", + "# Create DataFrames\n", + "df1 = pd.DataFrame(data1)\n", + "df2 = pd.DataFrame(data2)\n", + "\n", + "# Perform an inner join on 'Ticket Number'\n", + "merged_df = pd.merge(df1, df2, on="Ticket Number", how="inner")\n", + "\n", + "# Display some of the joined data\n", + "print(merged_df.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Accelerating pandas with GPU: Groupby aggregate on timeseries data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Add `%load_ext cudf.pandas` before importing pandas to speed up operations using GPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext cudf.pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# Define the number of rows\n", + "num_rows = 1000000\n", + "\n", + "# Define the possible values\n", + "states = ["NY", "NJ", "CA", "TX"]\n", + "violations = ["Double Parking", "Expired Meter", "No Parking", "Fire Hydrant", "Bus Stop"]\n", + "vehicle_types = ["SUBN", "SDN"]\n", + "\n", + "start_date = "2022-01-01"\n", + "end_date = "2022-12-31"\n", + "# Create a date range\n", + "dates = pd.date_range(start=start_date, end=end_date, freq='D')\n", + "\n", + "# Generate random data\n", + "data = {\n", + " "Registration State": np.random.choice(states, size=num_rows),\n", + " "Violation Description": np.random.choice(violations, size=num_rows),\n", + " "Vehicle Body Type": np.random.choice(vehicle_types, size=num_rows),\n", + " "Issue Date": np.random.choice(dates, size=num_rows),\n", + " "Ticket Number": np.random.randint(1000000000, 9999999999, size=num_rows)\n", + "}\n", + "\n", + "# Create a DataFrame\n", + "df = pd.DataFrame(data)\n", + "\n", + "# Adding issue weekday based on the "Issue Date"\n", + "weekday_names = {\n", + " 0: "Monday",\n", + " 1: "Tuesday",\n", + " 2: "Wednesday",\n", + " 3: "Thursday",\n", + " 4: "Friday",\n", + " 5: "Saturday",\n", + " 6: "Sunday",\n", + "}\n", + "\n", + "df["issue_weekday"] = df["Issue Date"].dt.weekday.map(weekday_names)\n", + "\n", + "# Grouping by issue_weekday and counting the Summons Number\n", + "df.groupby(["Issue Date"])["Ticket Number"\ +", + "].count().sort_values()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Accelerating pandas with GPU: Count of values and GroupBy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Add `%load_ext cudf.pandas` before importing pandas to speed up operations using GPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext cudf.pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# Randomly generated dataset of parking violations-\n", + "# Define the number of rows\n", + "num_rows = 1000000\n", + "\n", + "states = ["NY", "NJ", "CA", "TX"]\n", + "violations = ["Double Parking", "Expired Meter", "No Parking",\n", + " "Fire Hydrant", "Bus Stop"]\n", + "vehicle_types = ["SUBN", "SDN"]\n", + "\n", + "# Create a date range\n", + "start_date = "2022-01-01"\n", + "end_date = "2022-12-31"\n", + "dates = pd.date_range(start=start_date, end=end_date, freq='D')\n", + "\n", + "# Generate random data\n", + "data = {\n", + " "Registration State": np.random.choice(states, size=num_rows),\n", + " "Violation Description": np.random.choice(violations, size=.ipynb_checkpoints/num_rows),\n", + " "Vehicle Body Type": np.random.choice(vehicle_types, size=num_rows),\n", + " "Issue Date": np.random.choice(dates, size=num_rows),\n", + " "Ticket Number": np.random.randint(1000000000, 9999999999, size=num_rows)\n", + "}\n", + "\n", + "# Create a DataFrame\n", + "df = pd.DataFrame(data)\n", + "\n", + "# Which parking violation is most commonly committed by vehicles from various U.S states?\n", + "\n", + "("df[["Registration State", "Violation Description"]] # get only these two columns\n", + " .value_counts() # get the count of offences per state and per type of offence\n", + " .groupby("Registration State") # group by state\n", + " .head(1) # get the first row in each group (the type of offence with the largest count)\n", + " .sort_index() # sort by state name\n", + " .reset_index()\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Accelerating pandas with GPU: Rolling Window Average" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext cudf.pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Randomly generated dataset of parking violations-\n", + "# Define the number of rows\n", + "num_rows = 1000000\n", + "\n", + "states = ["NY", "NJ", "CA", "TX"]\n", + "violations = ["Double Parking", "Expired Meter", "No Parking",\n", + " "Fire Hydrant", "Bus Stop"]\n", + "vehicle_types = ["SUBN", "SDN"]\n", + "\n", + "# Create a date range\n", + "start_date = "2022-01-01"\n", + "end_date = "2022-12-31"\n", + "dates = pd.date_range(start=start_date, end=end_date, freq='D')\n", + "\n", + "# Generate random data\n", + "data = {\n", + " "Registration State": np.random.choice(states, size=num_rows),\n", + " "Violation Description": np.random.choice(violations, size=num_rows),\n", + " "Vehicle Body Type": np.random.choice(vehicle_types, size=num_rows),\n", + " "Issue Date": np.random.choice(dates, size=num_rows),\n", + " "Ticket Number": np.random.randint(1000000000, 9999999999, size=num_rows)\n", + "}\n", + "\n", + "# Create a DataFrame\n", + "df = pd.DataFrame(data)\n", + "\n", + "# How does the parking violations change from day to day segmented by vehicle type\n", + "# Averaged using a 7-day rolling mean\n", + "\n", + "daily_counts = df.groupby(['Issue Date', 'Vehicle Body Type']\n", + " ).size().unstack(fill_value=0)\n", + "\n", + "# Calculate a 7-day rolling mean of daily violations for each vehicle type\n", + "rolling_means = daily_counts.rolling(window=7).mean()\n", + "\n", + "# Display the rolling means for each vehicle type over time\n", + "rolling_means.tail(100).plot(figsize=(14, 7),\n", + " title="7-Day Rolling Average of Parking Violations by Vehicle Type")\n", + "plt.ylabel("Average Number of Violations")\n", + "plt.xlabel("Date")\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/pandas_acceleration_with_gpu.ipynb b/pandas_acceleration_with_gpu.ipynb new file mode 100644 index 00000000..6b149423 --- /dev/null +++ b/pandas_acceleration_with_gpu.ipynb @@ -0,0 +1,351 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "N_63kkzYSrX1" + }, + "source": [ + "# Accelerating pandas with GPU: Sort the count of rows grouped on columns" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "igTJBL0vUb4F" + }, + "source": [ + "Add `%load_ext cudf.pandas` before importing pandas to speed up operations using GPU\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8fuXnmHmSBzq" + }, + "outputs": [], + "source": [ + "%load_ext cudf.pandas\n", + "import pandas as pd\n", + "import random\n", + "\n", + "# Define the species categories\n", + "species_categories = ['setosa', 'versicolor', 'virginica']\n", + "flower_color_categories = ['red','yellow','green']\n", + "\n", + "# Define the range for each attribute based on typical iris flower measurements\n", + "sepal_length_range = (4.0, 8.0)\n", + "\n", + "# Create data for 1,000,000 samples\n", + "n = 1000000\n", + "data = {\n", + " 'sepal_length': [random.uniform(*sepal_length_range) for _ in range(n)],\n", + " 'flower_color': [random.choice(flower_color_categories) for _ in range(n)],\n", + " 'species': [random.choice(species_categories) for _ in range(n)]\n", + "}\n", + "\n", + "df = pd.DataFrame(data)\n", + "\n", + "df.groupby(['species','flower_color']).size().sort_values(ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t2DVoM7ifOAq" + }, + "source": [ + "# Acceleration pandas with GPU: Merging / Joining dataframes-" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "k6MWgjQRfZl-" + }, + "source": [ + "Add `%load_ext cudf.pandas` before importing pandas to speed up operations using GPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WW3bxTL3f5Pd" + }, + "outputs": [], + "source": [ + "%load_ext cudf.pandas\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# Define the number of rows\n", + "num_rows = 1000000\n", + "\n", + "states = [\"NY\", \"NJ\", \"CA\", \"TX\"]\n", + "violations = [\"Double Parking\", \"Expired Meter\", \"No Parking\", \"Fire Hydrant\",\n", + " \"Bus Stop\"]\n", + "vehicle_types = [\"SUBN\", \"SDN\"]\n", + "\n", + "# Generate random data for Dataset 1\n", + "data1 = {\n", + " \"Registration State\": np.random.choice(states, size=num_rows),\n", + " \"Ticket Number\": np.random.randint(1000000000, 9999999999, size=num_rows)\n", + "}\n", + "\n", + "# Generate random data for Dataset 2\n", + "data2 = {\n", + " \"Ticket Number\": np.random.choice(data1['Ticket Number'], size=num_rows), # Reusing ticket numbers to ensure matches\n", + " \"Violation Description\": np.random.choice(violations, size=num_rows)\n", + "}\n", + "\n", + "# Create DataFrames\n", + "df1 = pd.DataFrame(data1)\n", + "df2 = pd.DataFrame(data2)\n", + "\n", + "# Perform an inner join on 'Ticket Number'\n", + "merged_df = pd.merge(df1, df2, on=\"Ticket Number\", how=\"inner\")\n", + "\n", + "# Display some of the joined data\n", + "print(merged_df.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "41DVaHZHS571" + }, + "source": [ + "# Accelerating pandas with GPU: Groupby aggregate on timeseries data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5YDBGAMOWZoG" + }, + "source": [ + "Add `%load_ext cudf.pandas` before importing pandas to speed up operations using GPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zlhe0827S1J5" + }, + "outputs": [], + "source": [ + "%load_ext cudf.pandas\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# Define the number of rows\n", + "num_rows = 1000000\n", + "\n", + "# Define the possible values\n", + "states = [\"NY\", \"NJ\", \"CA\", \"TX\"]\n", + "violations = [\"Double Parking\", \"Expired Meter\", \"No Parking\", \"Fire Hydrant\", \"Bus Stop\"]\n", + "vehicle_types = [\"SUBN\", \"SDN\"]\n", + "\n", + "start_date = \"2022-01-01\"\n", + "end_date = \"2022-12-31\"\n", + "# Create a date range\n", + "dates = pd.date_range(start=start_date, end=end_date, freq='D')\n", + "\n", + "# Generate random data\n", + "data = {\n", + " \"Registration State\": np.random.choice(states, size=num_rows),\n", + " \"Violation Description\": np.random.choice(violations, size=num_rows),\n", + " \"Vehicle Body Type\": np.random.choice(vehicle_types, size=num_rows),\n", + " \"Issue Date\": np.random.choice(dates, size=num_rows),\n", + " \"Ticket Number\": np.random.randint(1000000000, 9999999999, size=num_rows)\n", + "}\n", + "\n", + "# Create a DataFrame\n", + "df = pd.DataFrame(data)\n", + "\n", + "# Adding issue weekday based on the \"Issue Date\"\n", + "weekday_names = {\n", + " 0: \"Monday\",\n", + " 1: \"Tuesday\",\n", + " 2: \"Wednesday\",\n", + " 3: \"Thursday\",\n", + " 4: \"Friday\",\n", + " 5: \"Saturday\",\n", + " 6: \"Sunday\",\n", + "}\n", + "\n", + "df[\"issue_weekday\"] = df[\"Issue Date\"].dt.weekday.map(weekday_names)\n", + "\n", + "# Grouping by issue_weekday and counting the Summons Number\n", + "df.groupby([\"Issue Date\"])[\"Ticket Number\"\n", + "].count().sort_values()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Kr4Niz4tWmCW" + }, + "source": [ + "# Accelerating pandas with GPU: Count of values and GroupBy\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l_OzcTpBezh9" + }, + "source": [ + "Add `%load_ext cudf.pandas` before importing pandas to speed up operations using GPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hYWROytuURFH" + }, + "outputs": [], + "source": [ + "%load_ext cudf.pandas\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# Randomly generated dataset of parking violations-\n", + "# Define the number of rows\n", + "num_rows = 1000000\n", + "\n", + "states = [\"NY\", \"NJ\", \"CA\", \"TX\"]\n", + "violations = [\"Double Parking\", \"Expired Meter\", \"No Parking\",\n", + " \"Fire Hydrant\", \"Bus Stop\"]\n", + "vehicle_types = [\"SUBN\", \"SDN\"]\n", + "\n", + "# Create a date range\n", + "start_date = \"2022-01-01\"\n", + "end_date = \"2022-12-31\"\n", + "dates = pd.date_range(start=start_date, end=end_date, freq='D')\n", + "\n", + "# Generate random data\n", + "data = {\n", + " \"Registration State\": np.random.choice(states, size=num_rows),\n", + " \"Violation Description\": np.random.choice(violations, size=num_rows),\n", + " \"Vehicle Body Type\": np.random.choice(vehicle_types, size=num_rows),\n", + " \"Issue Date\": np.random.choice(dates, size=num_rows),\n", + " \"Ticket Number\": np.random.randint(1000000000, 9999999999, size=num_rows)\n", + "}\n", + "\n", + "# Create a DataFrame\n", + "df = pd.DataFrame(data)\n", + "\n", + "# Which parking violation is most commonly committed by vehicles from various U.S states?\n", + "\n", + "(df[[\"Registration State\", \"Violation Description\"]] # get only these two columns\n", + " .value_counts() # get the count of offences per state and per type of offence\n", + " .groupby(\"Registration State\") # group by state\n", + " .head(1) # get the first row in each group (the type of offence with the largest count)\n", + " .sort_index() # sort by state name\n", + " .reset_index()\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4sWzpEvabvBW" + }, + "source": [ + "# Accelerating pandas with GPU: Rolling Window Average\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qbKJGeVnb6Uj" + }, + "outputs": [], + "source": [ + "%load_ext cudf.pandas\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Randomly generated dataset of parking violations-\n", + "# Define the number of rows\n", + "num_rows = 1000000\n", + "\n", + "states = [\"NY\", \"NJ\", \"CA\", \"TX\"]\n", + "violations = [\"Double Parking\", \"Expired Meter\", \"No Parking\",\n", + " \"Fire Hydrant\", \"Bus Stop\"]\n", + "vehicle_types = [\"SUBN\", \"SDN\"]\n", + "\n", + "# Create a date range\n", + "start_date = \"2022-01-01\"\n", + "end_date = \"2022-12-31\"\n", + "dates = pd.date_range(start=start_date, end=end_date, freq='D')\n", + "\n", + "# Generate random data\n", + "data = {\n", + " \"Registration State\": np.random.choice(states, size=num_rows),\n", + " \"Violation Description\": np.random.choice(violations, size=num_rows),\n", + " \"Vehicle Body Type\": np.random.choice(vehicle_types, size=num_rows),\n", + " \"Issue Date\": np.random.choice(dates, size=num_rows),\n", + " \"Ticket Number\": np.random.randint(1000000000, 9999999999, size=num_rows)\n", + "}\n", + "\n", + "# Create a DataFrame\n", + "df = pd.DataFrame(data)\n", + "\n", + "# How does the parking violations change from day to day segmented by vehicle type\n", + "# Averaged using a 7-day rolling mean\n", + "\n", + "daily_counts = df.groupby(['Issue Date', 'Vehicle Body Type']\n", + " ).size().unstack(fill_value=0)\n", + "\n", + "# Calculate a 7-day rolling mean of daily violations for each vehicle type\n", + "rolling_means = daily_counts.rolling(window=7).mean()\n", + "\n", + "# Display the rolling means for each vehicle type over time\n", + "rolling_means.tail(100).plot(figsize=(14, 7),\n", + " title=\"7-Day Rolling Average of Parking Violations by Vehicle Type\")\n", + "plt.ylabel(\"Average Number of Violations\")\n", + "plt.xlabel(\"Date\")\n", + "plt.show()" + ] + } + ], + "metadata": { + "colab": { + "private_outputs": true, + "provenance": [], + "include_colab_link": true + }, + "language_info": { + "name": "python" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/simulation/decai/simulation/simulate_imdb_perceptron.py b/simulation/decai/simulation/simulate_imdb_perceptron.py index ec093c26..a503f5de 100644 --- a/simulation/decai/simulation/simulate_imdb_perceptron.py +++ b/simulation/decai/simulation/simulate_imdb_perceptron.py @@ -1,4 +1,5 @@ import os +import re import sys from typing import Optional @@ -82,5 +83,7 @@ def main(): # Run with `bokeh serve PATH`. -if __name__.startswith('bk_script_'): +if re.match('bk_script_|bokeh_app_', __name__): main() +else: + print("`__name__` didn't match the pattern. Bokeh app will not run.")