From c7cc70f1b9ae3b4db756dbe65a6a1ed92296f343 Mon Sep 17 00:00:00 2001 From: Josh Okrend Date: Fri, 3 Oct 2025 16:45:23 -0400 Subject: [PATCH 1/4] added notebook --- Okrend_ATMS523_Module3_Project.ipynb | 254 +++++++++++++++++++++++++++ 1 file changed, 254 insertions(+) create mode 100644 Okrend_ATMS523_Module3_Project.ipynb diff --git a/Okrend_ATMS523_Module3_Project.ipynb b/Okrend_ATMS523_Module3_Project.ipynb new file mode 100644 index 0000000..8ed4390 --- /dev/null +++ b/Okrend_ATMS523_Module3_Project.ipynb @@ -0,0 +1,254 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "bbc1470f", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "017fbb12", + "metadata": {}, + "outputs": [], + "source": [ + "def fetchGHCNStationData(stationId):\n", + " df = pd.read_csv(\n", + " f\"http://noaa-ghcn-pds.s3.amazonaws.com/csv/by_station/{stationId}.csv\"\n", + " )\n", + "\n", + " df = df[[\"DATE\", \"ELEMENT\", \"DATA_VALUE\"]]\n", + " df = df[(df[\"DATE\"] >= 19910101) & (df[\"DATE\"] <= 20201231)]\n", + " df = df[df[\"ELEMENT\"].isin([\"TMAX\", \"TMIN\"])]\n", + " df[\"DATE\"] = pd.to_datetime(df[\"DATE\"], format=\"%Y%m%d\")\n", + "\n", + " pivot = df.pivot(index=\"DATE\", columns=\"ELEMENT\", values=\"DATA_VALUE\").reset_index()\n", + " pivot[\"TMAX\"] = pivot[\"TMAX\"] / 10\n", + " pivot[\"TMIN\"] = pivot[\"TMIN\"] / 10\n", + " pivot[\"md\"] = pivot[\"DATE\"].dt.strftime(\"%m-%d\")\n", + "\n", + " dailyagg = pivot.groupby([\"md\"], as_index=False).agg(\n", + " record_max_temp=(\"TMAX\", \"max\"),\n", + " record_min_temp=(\"TMIN\", \"min\"),\n", + " average_max_temp=(\"TMAX\", \"mean\"),\n", + " average_min_temp=(\"TMIN\", \"mean\"),\n", + " )\n", + "\n", + " return dailyagg" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ce336902", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/8t/4dcpfx6j29d858grh_vp_d_m0000gn/T/ipykernel_75689/2318973544.py:2: DtypeWarning: Columns (5) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mdrecord_max_temprecord_min_tempaverage_max_tempaverage_min_temp
001-0119.4-11.710.083333-1.400000
101-0219.4-13.29.016667-0.806667
201-0322.8-12.28.410000-1.306667
301-0421.7-11.19.426667-1.670000
401-0519.4-12.29.073333-1.653333
..................
36112-2721.7-8.910.300000-1.433333
36212-2819.4-9.410.130000-0.766667
36312-2921.7-8.311.4666670.190000
36412-3020.0-8.99.620000-1.226667
36512-3121.1-10.610.376667-0.673333
\n", + "

366 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " md record_max_temp record_min_temp average_max_temp \\\n", + "0 01-01 19.4 -11.7 10.083333 \n", + "1 01-02 19.4 -13.2 9.016667 \n", + "2 01-03 22.8 -12.2 8.410000 \n", + "3 01-04 21.7 -11.1 9.426667 \n", + "4 01-05 19.4 -12.2 9.073333 \n", + ".. ... ... ... ... \n", + "361 12-27 21.7 -8.9 10.300000 \n", + "362 12-28 19.4 -9.4 10.130000 \n", + "363 12-29 21.7 -8.3 11.466667 \n", + "364 12-30 20.0 -8.9 9.620000 \n", + "365 12-31 21.1 -10.6 10.376667 \n", + "\n", + " average_min_temp \n", + "0 -1.400000 \n", + "1 -0.806667 \n", + "2 -1.306667 \n", + "3 -1.670000 \n", + "4 -1.653333 \n", + ".. ... \n", + "361 -1.433333 \n", + "362 -0.766667 \n", + "363 0.190000 \n", + "364 -1.226667 \n", + "365 -0.673333 \n", + "\n", + "[366 rows x 5 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "avl_stid = \"USW00003812\"\n", + "df = fetchGHCNStationData(avl_stid)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9ef46b8", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 5597bc36cf8939d54bdeaf82c36f92c98a574150 Mon Sep 17 00:00:00 2001 From: Josh Okrend Date: Sun, 5 Oct 2025 16:50:53 -0400 Subject: [PATCH 2/4] Created plot and added comments --- Okrend_ATMS523_Module3_Project.ipynb | 230 +++++++++++++++++++++++++-- 1 file changed, 221 insertions(+), 9 deletions(-) diff --git a/Okrend_ATMS523_Module3_Project.ipynb b/Okrend_ATMS523_Module3_Project.ipynb index 8ed4390..67c0e1f 100644 --- a/Okrend_ATMS523_Module3_Project.ipynb +++ b/Okrend_ATMS523_Module3_Project.ipynb @@ -20,20 +20,29 @@ "outputs": [], "source": [ "def fetchGHCNStationData(stationId):\n", + " \"\"\"Returns a DataFrame with record max temp, record min temp,\n", + " average max temp, and average min temp\n", + " from the 1991-2020 period from the provided GHCNd Station Id\n", + " \"\"\"\n", + "\n", + " # Reads a GHCNd csv file at selected Station Id from AWS S3 Bucket.\n", " df = pd.read_csv(\n", " f\"http://noaa-ghcn-pds.s3.amazonaws.com/csv/by_station/{stationId}.csv\"\n", " )\n", "\n", + " # Select date, element, and data value columns and select the 1991-2020 period.\n", " df = df[[\"DATE\", \"ELEMENT\", \"DATA_VALUE\"]]\n", " df = df[(df[\"DATE\"] >= 19910101) & (df[\"DATE\"] <= 20201231)]\n", " df = df[df[\"ELEMENT\"].isin([\"TMAX\", \"TMIN\"])]\n", " df[\"DATE\"] = pd.to_datetime(df[\"DATE\"], format=\"%Y%m%d\")\n", "\n", + " # Create pivot table from Element column and convert data to degrees Celcius.\n", " pivot = df.pivot(index=\"DATE\", columns=\"ELEMENT\", values=\"DATA_VALUE\").reset_index()\n", " pivot[\"TMAX\"] = pivot[\"TMAX\"] / 10\n", " pivot[\"TMIN\"] = pivot[\"TMIN\"] / 10\n", " pivot[\"md\"] = pivot[\"DATE\"].dt.strftime(\"%m-%d\")\n", "\n", + " # Compute record max temp, record min temp, average max temp, and average min temp\n", " dailyagg = pivot.groupby([\"md\"], as_index=False).agg(\n", " record_max_temp=(\"TMAX\", \"max\"),\n", " record_min_temp=(\"TMIN\", \"min\"),\n", @@ -41,7 +50,7 @@ " average_min_temp=(\"TMIN\", \"mean\"),\n", " )\n", "\n", - " return dailyagg" + " return pivot, dailyagg" ] }, { @@ -54,10 +63,155 @@ "name": "stderr", "output_type": "stream", "text": [ - "/var/folders/8t/4dcpfx6j29d858grh_vp_d_m0000gn/T/ipykernel_75689/2318973544.py:2: DtypeWarning: Columns (5) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/var/folders/8t/4dcpfx6j29d858grh_vp_d_m0000gn/T/ipykernel_92128/1540462304.py:8: DtypeWarning: Columns (5) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.read_csv(\n" ] }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ELEMENTDATETMAXTMINmd
01991-01-015.0-3.901-01
11991-01-0213.92.201-02
21991-01-036.12.201-03
31991-01-047.83.901-04
41991-01-058.93.301-05
...............
109532020-12-2710.6-6.612-27
109542020-12-2811.1-1.012-28
109552020-12-2913.92.812-29
109562020-12-304.41.712-30
109572020-12-3118.96.712-31
\n", + "

10958 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + "ELEMENT DATE TMAX TMIN md\n", + "0 1991-01-01 5.0 -3.9 01-01\n", + "1 1991-01-02 13.9 2.2 01-02\n", + "2 1991-01-03 6.1 2.2 01-03\n", + "3 1991-01-04 7.8 3.9 01-04\n", + "4 1991-01-05 8.9 3.3 01-05\n", + "... ... ... ... ...\n", + "10953 2020-12-27 10.6 -6.6 12-27\n", + "10954 2020-12-28 11.1 -1.0 12-28\n", + "10955 2020-12-29 13.9 2.8 12-29\n", + "10956 2020-12-30 4.4 1.7 12-30\n", + "10957 2020-12-31 18.9 6.7 12-31\n", + "\n", + "[10958 rows x 4 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get Asheville, NC Airport Station Id and call the fetch function.\n", + "avl_stid = \"USW00003812\"\n", + "df, agg = fetchGHCNStationData(avl_stid)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "21b18077", + "metadata": {}, + "outputs": [ { "data": { "text/html": [ @@ -210,24 +364,82 @@ "[366 rows x 5 columns]" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "avl_stid = \"USW00003812\"\n", - "df = fetchGHCNStationData(avl_stid)\n", - "df" + "agg" ] }, { "cell_type": "code", - "execution_count": null, - "id": "e9ef46b8", + "execution_count": 5, + "id": "29d3b2ad", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# Select a year from the Asheville Airport data\n", + "df = df[(df[\"DATE\"] >= \"2016-01-01\") & (df[\"DATE\"] <= \"2016-12-31\")]\n", + "\n", + "# Compute range of record, average, and actual max and min temperatures.\n", + "record_range = agg[\"record_max_temp\"] - agg[\"record_min_temp\"]\n", + "average_range = agg[\"average_max_temp\"] - agg[\"average_min_temp\"]\n", + "actual_range = df[\"TMAX\"] - df[\"TMIN\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ba7e80c6", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot record, average, and actual max and min temperatures.\n", + "plt.figure(figsize=(12, 7))\n", + "\n", + "plt.bar(\n", + " df[\"DATE\"],\n", + " record_range,\n", + " bottom=agg[\"record_min_temp\"],\n", + " color=\"lightsteelblue\",\n", + " label=\"Record\",\n", + ")\n", + "\n", + "plt.bar(\n", + " df[\"DATE\"],\n", + " average_range,\n", + " bottom=agg[\"average_min_temp\"],\n", + " color=\"cornflowerblue\",\n", + " label=\"Average\",\n", + ")\n", + "\n", + "plt.bar(\n", + " df[\"DATE\"], actual_range, bottom=df[\"TMIN\"], color=\"navy\", label=\"Actual\", alpha=0.6\n", + ")\n", + "\n", + "plt.ylabel(\"Temperature (°C)\")\n", + "plt.xlabel(\"Day\")\n", + "plt.title(\n", + " \"Daily Record, Average, and Actual High and Low Temperatures for Asheville, NC in 2016\"\n", + ")\n", + "plt.legend()\n", + "plt.tight_layout()\n", + "\n", + "plt.show()" + ] } ], "metadata": { From a0dcf974b024ae2a9930deb7d18dacd8a9fbbf4d Mon Sep 17 00:00:00 2001 From: Josh Okrend Date: Sun, 5 Oct 2025 17:01:21 -0400 Subject: [PATCH 3/4] updated readme --- Okrend_ATMS523_Module3_Project.ipynb | 4 ++-- README.md | 10 ++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Okrend_ATMS523_Module3_Project.ipynb b/Okrend_ATMS523_Module3_Project.ipynb index 67c0e1f..bc60dbd 100644 --- a/Okrend_ATMS523_Module3_Project.ipynb +++ b/Okrend_ATMS523_Module3_Project.ipynb @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "017fbb12", "metadata": {}, "outputs": [], @@ -22,7 +22,7 @@ "def fetchGHCNStationData(stationId):\n", " \"\"\"Returns a DataFrame with record max temp, record min temp,\n", " average max temp, and average min temp\n", - " from the 1991-2020 period from the provided GHCNd Station Id\n", + " from the 1991-2020 period from the provided GHCN-D Station Id\n", " \"\"\"\n", "\n", " # Reads a GHCNd csv file at selected Station Id from AWS S3 Bucket.\n", diff --git a/README.md b/README.md index 3dbf7ef..aa456b5 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ -# ATMS 523 Module-3 +# ATMS-523-Module-2 Project 2 -Code, notebooks, and homework for ATMS 523 Module 3. +This project finds the record, average, and actual max and min temperatures +for a given year using Global Historical Climatology Network Daily (GHCN-D) data +from a given Station Id and creates a plot. + +## Dataset +- NOAA Global Historical Climatology Network Daily (GHCN-D) on AWS +- Store: `https://noaa-ghcn-pds.s3.amazonaws.com/index.html` \ No newline at end of file From 4f3c6c9ed50d92b866fee094b7374a72dda6044a Mon Sep 17 00:00:00 2001 From: Josh Okrend Date: Mon, 6 Oct 2025 16:02:28 -0400 Subject: [PATCH 4/4] updated readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index aa456b5..e6521c7 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# ATMS-523-Module-2 Project 2 +# ATMS-523-Module-3 Project This project finds the record, average, and actual max and min temperatures for a given year using Global Historical Climatology Network Daily (GHCN-D) data