Skip to content

Commit

Permalink
update vignettes
Browse files Browse the repository at this point in the history
  • Loading branch information
Nicholas Clark committed Jun 30, 2024
1 parent 2cd896c commit 253aa4b
Show file tree
Hide file tree
Showing 33 changed files with 3,995 additions and 4,195 deletions.
100 changes: 55 additions & 45 deletions doc/forecast_evaluation.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ knitr::opts_chunk$set(
eval = NOT_CRAN
)


## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
echo = TRUE,
dpi = 150,
dpi = 100,
fig.asp = 0.8,
fig.width = 6,
out.width = "60%",
Expand All @@ -19,39 +20,31 @@ library(mvgam)
library(ggplot2)
theme_set(theme_bw(base_size = 12, base_family = 'serif'))


## -----------------------------------------------------------------------------
set.seed(2345)
simdat <- sim_mvgam(T = 100,
n_series = 3,
trend_model = 'GP',
trend_model = GP(),
prop_trend = 0.75,
family = poisson(),
prop_missing = 0.10)


## -----------------------------------------------------------------------------
str(simdat)

## ----fig.alt = "Simulating data for dynamic GAM models in mvgam"--------------
plot(simdat$global_seasonality[1:12],
type = 'l', lwd = 2,
ylab = 'Relative effect',
xlab = 'Season',
bty = 'l')

## ----fig.alt = "Plotting time series features for GAM models in mvgam"--------
plot_mvgam_series(data = simdat$data_train,
series = 'all')


## ----fig.alt = "Plotting time series features for GAM models in mvgam"--------
plot_mvgam_series(data = simdat$data_train,
newdata = simdat$data_test,
series = 1)
plot_mvgam_series(data = simdat$data_train,
newdata = simdat$data_test,
series = 2)
plot_mvgam_series(data = simdat$data_train,
newdata = simdat$data_test,
series = 3)


## ----include=FALSE------------------------------------------------------------
mod1 <- mvgam(y ~ s(season, bs = 'cc', k = 8) +
Expand All @@ -60,18 +53,23 @@ mod1 <- mvgam(y ~ s(season, bs = 'cc', k = 8) +
trend_model = 'None',
data = simdat$data_train)


## ----eval=FALSE---------------------------------------------------------------
# mod1 <- mvgam(y ~ s(season, bs = 'cc', k = 8) +
# s(time, by = series, bs = 'cr', k = 20),
# knots = list(season = c(0.5, 12.5)),
# trend_model = 'None',
# data = simdat$data_train)
## mod1 <- mvgam(y ~ s(season, bs = 'cc', k = 8) +
## s(time, by = series, bs = 'cr', k = 20),
## knots = list(season = c(0.5, 12.5)),
## trend_model = 'None',
## data = simdat$data_train,
## silent = 2)


## -----------------------------------------------------------------------------
summary(mod1, include_betas = FALSE)


## ----fig.alt = "Plotting GAM smooth functions using mvgam"--------------------
plot(mod1, type = 'smooths')
conditional_effects(mod1, type = 'link')


## ----include=FALSE------------------------------------------------------------
mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) +
Expand All @@ -80,51 +78,52 @@ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) +
knots = list(season = c(0.5, 12.5)),
trend_model = 'None',
data = simdat$data_train,
adapt_delta = 0.98)
adapt_delta = 0.98,
silent = 2)


## ----eval=FALSE---------------------------------------------------------------
# mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) +
# gp(time, by = series, c = 5/4, k = 20,
# scale = FALSE),
# knots = list(season = c(0.5, 12.5)),
# trend_model = 'None',
# data = simdat$data_train)
## mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) +
## gp(time, by = series, c = 5/4, k = 20,
## scale = FALSE),
## knots = list(season = c(0.5, 12.5)),
## trend_model = 'None',
## data = simdat$data_train,
## silent = 2)


## -----------------------------------------------------------------------------
summary(mod2, include_betas = FALSE)


## ----fig.alt = "Summarising latent Gaussian Process parameters in mvgam"------
mcmc_plot(mod2, variable = c('alpha_gp'), regex = TRUE, type = 'areas')


## ----fig.alt = "Summarising latent Gaussian Process parameters in mvgam"------
mcmc_plot(mod2, variable = c('rho_gp'), regex = TRUE, type = 'areas')

## ----fig.alt = "Plotting Gaussian Process effects in mvgam"-------------------
plot(mod2, type = 'smooths')

## ----fig.alt = "Summarising latent Gaussian Process parameters in mvgam and marginaleffects"----
require('ggplot2')
plot_predictions(mod2,
condition = c('time', 'series', 'series'),
type = 'link') +
theme(legend.position = 'none')
## ----fig.alt = "Plotting latent Gaussian Process effects in mvgam and marginaleffects"----
conditional_effects(mod2, type = 'link')


## -----------------------------------------------------------------------------
fc_mod1 <- forecast(mod1, newdata = simdat$data_test)
fc_mod2 <- forecast(mod2, newdata = simdat$data_test)


## -----------------------------------------------------------------------------
str(fc_mod1)


## -----------------------------------------------------------------------------
plot(fc_mod1, series = 1)
plot(fc_mod2, series = 1)

plot(fc_mod1, series = 2)
plot(fc_mod2, series = 2)

plot(fc_mod1, series = 3)
plot(fc_mod2, series = 3)

## ----include=FALSE------------------------------------------------------------
mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) +
Expand All @@ -134,43 +133,54 @@ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) +
trend_model = 'None',
data = simdat$data_train,
newdata = simdat$data_test,
adapt_delta = 0.98)
adapt_delta = 0.98,
silent = 2)


## ----eval=FALSE---------------------------------------------------------------
# mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) +
# gp(time, by = series, c = 5/4, k = 20,
# scale = FALSE),
# knots = list(season = c(0.5, 12.5)),
# trend_model = 'None',
# data = simdat$data_train,
# newdata = simdat$data_test)
## mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) +
## gp(time, by = series, c = 5/4, k = 20,
## scale = FALSE),
## knots = list(season = c(0.5, 12.5)),
## trend_model = 'None',
## data = simdat$data_train,
## newdata = simdat$data_test,
## silent = 2)


## -----------------------------------------------------------------------------
fc_mod2 <- forecast(mod2)


## ----warning=FALSE, fig.alt = "Plotting posterior forecast distributions using mvgam and R"----
plot(fc_mod2, series = 1)


## ----warning=FALSE------------------------------------------------------------
crps_mod1 <- score(fc_mod1, score = 'crps')
str(crps_mod1)
crps_mod1$series_1


## ----warning=FALSE------------------------------------------------------------
crps_mod1 <- score(fc_mod1, score = 'crps', interval_width = 0.6)
crps_mod1$series_1


## -----------------------------------------------------------------------------
link_mod1 <- forecast(mod1, newdata = simdat$data_test, type = 'link')
score(link_mod1, score = 'elpd')$series_1


## -----------------------------------------------------------------------------
energy_mod2 <- score(fc_mod2, score = 'energy')
str(energy_mod2)


## -----------------------------------------------------------------------------
energy_mod2$all_series


## -----------------------------------------------------------------------------
crps_mod1 <- score(fc_mod1, score = 'crps')
crps_mod2 <- score(fc_mod2, score = 'crps')
Expand Down
63 changes: 21 additions & 42 deletions doc/forecast_evaluation.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ knitr::opts_chunk$set(
```{r setup, include=FALSE}
knitr::opts_chunk$set(
echo = TRUE,
dpi = 150,
dpi = 100,
fig.asp = 0.8,
fig.width = 6,
out.width = "60%",
Expand All @@ -36,12 +36,12 @@ theme_set(theme_bw(base_size = 12, base_family = 'serif'))
The purpose of this vignette is to show how the `mvgam` package can be used to produce probabilistic forecasts and to evaluate those forecasts using a variety of proper scoring rules.

## Simulating discrete time series
We begin by simulating some data to show how forecasts are computed and evaluated in `mvgam`. The `sim_mvgam()` function can be used to simulate series that come from a variety of response distributions as well as seasonal patterns and/or dynamic temporal patterns. Here we simulate a collection of three time count-valued series. These series all share the same seasonal pattern but have different temporal dynamics. By setting `trend_model = 'GP'` and `prop_trend = 0.75`, we are generating time series that have smooth underlying temporal trends (evolving as Gaussian Processes with squared exponential kernel) and moderate seasonal patterns. The observations are Poisson-distributed and we allow 10% of observations to be missing.
We begin by simulating some data to show how forecasts are computed and evaluated in `mvgam`. The `sim_mvgam()` function can be used to simulate series that come from a variety of response distributions as well as seasonal patterns and/or dynamic temporal patterns. Here we simulate a collection of three time count-valued series. These series all share the same seasonal pattern but have different temporal dynamics. By setting `trend_model = GP()` and `prop_trend = 0.75`, we are generating time series that have smooth underlying temporal trends (evolving as Gaussian Processes with squared exponential kernel) and moderate seasonal patterns. The observations are Poisson-distributed and we allow 10% of observations to be missing.
```{r}
set.seed(2345)
simdat <- sim_mvgam(T = 100,
n_series = 3,
trend_model = 'GP',
trend_model = GP(),
prop_trend = 0.75,
family = poisson(),
prop_missing = 0.10)
Expand All @@ -52,32 +52,17 @@ The returned object is a `list` containing training and testing data (`sim_mvgam
str(simdat)
```

Each series in this case has a shared seasonal pattern, which we can visualise:
```{r, fig.alt = "Simulating data for dynamic GAM models in mvgam"}
plot(simdat$global_seasonality[1:12],
type = 'l', lwd = 2,
ylab = 'Relative effect',
xlab = 'Season',
bty = 'l')
```

The resulting time series are similar to what we might encounter when dealing with count-valued data that can take small counts:
Each series in this case has a shared seasonal pattern. The resulting time series are similar to what we might encounter when dealing with count-valued data that can take small counts:
```{r, fig.alt = "Plotting time series features for GAM models in mvgam"}
plot_mvgam_series(data = simdat$data_train,
series = 'all')
```

For each individual series, we can plot the training and testing data, as well as some more specific features of the observed data:
For individual series, we can plot the training and testing data, as well as some more specific features of the observed data:
```{r, fig.alt = "Plotting time series features for GAM models in mvgam"}
plot_mvgam_series(data = simdat$data_train,
newdata = simdat$data_test,
series = 1)
plot_mvgam_series(data = simdat$data_train,
newdata = simdat$data_test,
series = 2)
plot_mvgam_series(data = simdat$data_train,
newdata = simdat$data_test,
series = 3)
```

### Modelling dynamics with splines
Expand All @@ -95,17 +80,18 @@ mod1 <- mvgam(y ~ s(season, bs = 'cc', k = 8) +
s(time, by = series, bs = 'cr', k = 20),
knots = list(season = c(0.5, 12.5)),
trend_model = 'None',
data = simdat$data_train)
data = simdat$data_train,
silent = 2)
```

The model fits without issue:
```{r}
summary(mod1, include_betas = FALSE)
```

And we can plot the partial effects of the splines to see that they are estimated to be highly nonlinear
And we can plot the conditional effects of the splines (on the link scale) to see that they are estimated to be highly nonlinear
```{r, fig.alt = "Plotting GAM smooth functions using mvgam"}
plot(mod1, type = 'smooths')
conditional_effects(mod1, type = 'link')
```

### Modelling dynamics with GPs
Expand All @@ -117,7 +103,8 @@ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) +
knots = list(season = c(0.5, 12.5)),
trend_model = 'None',
data = simdat$data_train,
adapt_delta = 0.98)
adapt_delta = 0.98,
silent = 2)
```

```{r eval=FALSE}
Expand All @@ -126,7 +113,8 @@ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) +
scale = FALSE),
knots = list(season = c(0.5, 12.5)),
trend_model = 'None',
data = simdat$data_train)
data = simdat$data_train,
silent = 2)
```

The summary for this model now contains information on the GP parameters for each time series:
Expand All @@ -144,17 +132,9 @@ And now the length scale ($\rho$) parameters:
mcmc_plot(mod2, variable = c('rho_gp'), regex = TRUE, type = 'areas')
```

We can also plot the nonlinear effects as before:
```{r, fig.alt = "Plotting Gaussian Process effects in mvgam"}
plot(mod2, type = 'smooths')
```
These can also be plotted using `marginaleffects` utilities:
```{r, fig.alt = "Summarising latent Gaussian Process parameters in mvgam and marginaleffects"}
require('ggplot2')
plot_predictions(mod2,
condition = c('time', 'series', 'series'),
type = 'link') +
theme(legend.position = 'none')
We can again plot the nonlinear effects:
```{r, fig.alt = "Plotting latent Gaussian Process effects in mvgam and marginaleffects"}
conditional_effects(mod2, type = 'link')
```

The estimates for the temporal trends are fairly similar for the two models, but below we will see if they produce similar forecasts
Expand All @@ -171,16 +151,13 @@ The objects we have created are of class `mvgam_forecast`, which contain informa
str(fc_mod1)
```

We can plot the forecasts for each series from each model using the `S3 plot` method for objects of this class:
We can plot the forecasts for some series from each model using the `S3 plot` method for objects of this class:
```{r}
plot(fc_mod1, series = 1)
plot(fc_mod2, series = 1)
plot(fc_mod1, series = 2)
plot(fc_mod2, series = 2)
plot(fc_mod1, series = 3)
plot(fc_mod2, series = 3)
```

Clearly the two models do not produce equivalent forecasts. We will come back to scoring these forecasts in a moment.
Expand All @@ -195,7 +172,8 @@ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) +
trend_model = 'None',
data = simdat$data_train,
newdata = simdat$data_test,
adapt_delta = 0.98)
adapt_delta = 0.98,
silent = 2)
```

```{r eval=FALSE}
Expand All @@ -205,7 +183,8 @@ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) +
knots = list(season = c(0.5, 12.5)),
trend_model = 'None',
data = simdat$data_train,
newdata = simdat$data_test)
newdata = simdat$data_test,
silent = 2)
```

Because the model already contains a forecast distribution, we do not need to feed `newdata` to the `forecast()` function:
Expand Down
Loading

0 comments on commit 253aa4b

Please sign in to comment.