-
Notifications
You must be signed in to change notification settings - Fork 41
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Create R environment using 'renv' package. Created a basic snapshot using dplyr, magrittr and janitor Started the process of replication the Basel weather python notebook. * Updated the basel_weather.csv. Added the following analyses: Clean columns Create various date variables Plot the hourly and daily weather Added spider plot for cyclical seasonality Create moving averages 7, 14 and 365 days Added kernel smoother with bandwidth of 30, 90 * Bandwidth = 2*sd for the ksmooth function. Adjusted the R script accordingly. * Updated the working directory path. Update the rlock file to inclue the "here" package. * Changed renv path to the root directory. * Finalised folder structure. Co-authored-by: Aaron Pickering <[email protected]>
- Loading branch information
Showing
8 changed files
with
835 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
source("renv/activate.R") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -133,4 +133,5 @@ dmypy.json | |
*.DS_Store | ||
|
||
#vscode | ||
.vscode | ||
.vscode | ||
.Rproj.user |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
source("renv/activate.R") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
library("dplyr") | ||
library("lubridate") | ||
library("magrittr") | ||
library("janitor") | ||
library("ggplot2") | ||
library("here") | ||
|
||
#Set the working directory | ||
setwd(paste0(here(),"/R")) | ||
|
||
# Read the weather data | ||
weather = read.csv("../data/basel_weather.csv") | ||
|
||
# Clean the column names | ||
# This function removes special characters, spaces etc | ||
weather %<>% | ||
janitor::clean_names() | ||
|
||
colnames(weather)[2] = "temperature" | ||
|
||
# Create various date variables and add them to the dataframe | ||
weather %<>% | ||
mutate_at(vars(timestamp), as.character) %>% | ||
mutate(date = as.Date(substr(timestamp, 1 ,8 ), format="%Y%m%d")) %>% | ||
mutate(year = lubridate::year(date)) %>% | ||
mutate(month = lubridate::month(date)) %>% | ||
mutate(day = lubridate::day(date)) %>% | ||
mutate(day_of_year = lubridate::yday(date)) | ||
|
||
extract_hour = function(timestamp){ | ||
t = substr(timestamp, 10, 11) | ||
as.numeric(t) | ||
} | ||
|
||
weather %<>% | ||
mutate(hour = extract_hour(timestamp)) | ||
|
||
# Plot the hourly temperature over time | ||
ggplot(weather) + | ||
geom_line(mapping=aes(x=date, y=temperature), col="blue") + | ||
ggtitle("Basel Temperature (Hourly)") + | ||
xlab("Date") + | ||
ylab("Temperature (deg C)") | ||
|
||
# Plot the daily temperature over time | ||
daily_df = weather %>% | ||
group_by(date, day) %>% | ||
summarise(temperature = mean(temperature), | ||
day_of_year = unique(day_of_year)) | ||
|
||
ggplot(daily_df) + | ||
geom_line(mapping=aes(x=date, y=temperature), col="darkblue") + | ||
ggtitle("Basel Temperature (Daily)") + | ||
xlab("Date") + | ||
ylab("Temperature (deg C)") | ||
|
||
|
||
# Visualise the seasonality using a cyclical transformation and a spider plot. | ||
ggplot(daily_df) + | ||
geom_line(mapping=aes(x=day_of_year, y=temperature), col="blue") + | ||
coord_polar() + | ||
ggtitle("Yearly Seasonality") | ||
|
||
# Create a function for calculating the moving average | ||
moving_average = function(x, k = 14){ | ||
x_smooth = rep(NA, length(x)) | ||
for (i in (k+1):length(x)){ | ||
x_smooth[i] = mean(x[(i - k):i]) | ||
} | ||
return(x_smooth) | ||
} | ||
|
||
daily_df$temp_smooth_ma_7 = moving_average(daily_df$temperature, k=7) | ||
daily_df$temp_smooth_ma_14 = moving_average(daily_df$temperature, k=14) | ||
daily_df$temp_smooth_ma_365 = moving_average(daily_df$temperature, k=365) | ||
|
||
# Plot the Smoothed Moving Average variables | ||
ggplot(daily_df) + | ||
geom_line(mapping=aes(x=date, y=temperature), col="blue") + | ||
geom_line(mapping=aes(x=date, y=temp_smooth_ma_7), col="darkred") | ||
|
||
ggplot(daily_df) + | ||
geom_line(mapping=aes(x=date, y=temperature), col="blue") + | ||
geom_line(mapping=aes(x=date, y=temp_smooth_ma_14), col="darkred") | ||
|
||
ggplot(daily_df) + | ||
geom_line(mapping=aes(x=date, y=temperature), col="blue") + | ||
geom_line(mapping=aes(x=date, y=temp_smooth_ma_365), col="darkred") | ||
|
||
|
||
|
||
# Apply a gaussian filters as smoothers | ||
daily_df$temp_gf_30 = ksmooth(x=daily_df$date, y=daily_df$temperature, bandwidth = 60, kernel="normal")$y | ||
daily_df$temp_gf_90 = ksmooth(x=daily_df$date, y=daily_df$temperature, bandwidth = 180, kernel="normal")$y | ||
|
||
|
||
# Plot the smoothed functions | ||
ggplot(daily_df) + | ||
geom_line(mapping=aes(x=date, y=temperature), col="blue") + | ||
geom_line(mapping=aes(x=date, y=temp_gf_30), col="darkred") | ||
|
||
ggplot(daily_df) + | ||
geom_line(mapping=aes(x=date, y=temperature), col="blue") + | ||
geom_line(mapping=aes(x=date, y=temp_gf_90), col="darkred") |
Oops, something went wrong.