From 253aa4bbc7c2cb7766d3df0763b7a195785bbb41 Mon Sep 17 00:00:00 2001 From: Nicholas Clark Date: Mon, 1 Jul 2024 08:19:07 +1000 Subject: [PATCH] update vignettes --- doc/forecast_evaluation.R | 100 +- doc/forecast_evaluation.Rmd | 63 +- doc/forecast_evaluation.html | 847 +++++++++-------- doc/shared_states.R | 142 +-- doc/shared_states.Rmd | 87 +- doc/shared_states.html | 748 ++++++++------- doc/time_varying_effects.R | 116 ++- doc/time_varying_effects.Rmd | 91 +- doc/time_varying_effects.html | 728 +++++++-------- doc/trend_formulas.R | 205 ++--- doc/trend_formulas.Rmd | 143 +-- doc/trend_formulas.html | 794 +++++++--------- inst/doc/data_in_mvgam.R | 115 +-- inst/doc/data_in_mvgam.Rmd | 2 +- inst/doc/data_in_mvgam.html | 194 ++-- inst/doc/forecast_evaluation.R | 81 +- inst/doc/forecast_evaluation.Rmd | 33 +- inst/doc/forecast_evaluation.html | 308 +++---- inst/doc/mvgam_overview.R | 228 +++-- inst/doc/mvgam_overview.Rmd | 96 +- inst/doc/mvgam_overview.html | 1378 +++++++++++++++------------- inst/doc/nmixtures.R | 192 ++-- inst/doc/nmixtures.Rmd | 2 +- inst/doc/nmixtures.html | 286 +++--- inst/doc/shared_states.R | 84 +- inst/doc/shared_states.Rmd | 40 +- inst/doc/shared_states.html | 401 ++++---- inst/doc/time_varying_effects.R | 115 ++- inst/doc/time_varying_effects.Rmd | 47 +- inst/doc/time_varying_effects.html | 219 ++--- inst/doc/trend_formulas.R | 98 +- inst/doc/trend_formulas.Rmd | 22 +- inst/doc/trend_formulas.html | 185 ++-- 33 files changed, 3995 insertions(+), 4195 deletions(-) diff --git a/doc/forecast_evaluation.R b/doc/forecast_evaluation.R index 28942b03..8412f550 100644 --- a/doc/forecast_evaluation.R +++ b/doc/forecast_evaluation.R @@ -7,10 +7,11 @@ knitr::opts_chunk$set( eval = NOT_CRAN ) + ## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( echo = TRUE, - dpi = 150, + dpi = 100, fig.asp = 0.8, fig.width = 6, out.width = "60%", @@ -19,39 +20,31 @@ library(mvgam) library(ggplot2) theme_set(theme_bw(base_size = 12, base_family = 'serif')) + ## ----------------------------------------------------------------------------- set.seed(2345) simdat <- sim_mvgam(T = 100, n_series = 3, - trend_model = 'GP', + trend_model = GP(), prop_trend = 0.75, family = poisson(), prop_missing = 0.10) + ## ----------------------------------------------------------------------------- str(simdat) -## ----fig.alt = "Simulating data for dynamic GAM models in mvgam"-------------- -plot(simdat$global_seasonality[1:12], - type = 'l', lwd = 2, - ylab = 'Relative effect', - xlab = 'Season', - bty = 'l') ## ----fig.alt = "Plotting time series features for GAM models in mvgam"-------- plot_mvgam_series(data = simdat$data_train, series = 'all') + ## ----fig.alt = "Plotting time series features for GAM models in mvgam"-------- plot_mvgam_series(data = simdat$data_train, newdata = simdat$data_test, series = 1) -plot_mvgam_series(data = simdat$data_train, - newdata = simdat$data_test, - series = 2) -plot_mvgam_series(data = simdat$data_train, - newdata = simdat$data_test, - series = 3) + ## ----include=FALSE------------------------------------------------------------ mod1 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + @@ -60,18 +53,23 @@ mod1 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + trend_model = 'None', data = simdat$data_train) + ## ----eval=FALSE--------------------------------------------------------------- -# mod1 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + -# s(time, by = series, bs = 'cr', k = 20), -# knots = list(season = c(0.5, 12.5)), -# trend_model = 'None', -# data = simdat$data_train) +## mod1 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + +## s(time, by = series, bs = 'cr', k = 20), +## knots = list(season = c(0.5, 12.5)), +## trend_model = 'None', +## data = simdat$data_train, +## silent = 2) + ## ----------------------------------------------------------------------------- summary(mod1, include_betas = FALSE) + ## ----fig.alt = "Plotting GAM smooth functions using mvgam"-------------------- -plot(mod1, type = 'smooths') +conditional_effects(mod1, type = 'link') + ## ----include=FALSE------------------------------------------------------------ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + @@ -80,42 +78,45 @@ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + knots = list(season = c(0.5, 12.5)), trend_model = 'None', data = simdat$data_train, - adapt_delta = 0.98) + adapt_delta = 0.98, + silent = 2) + ## ----eval=FALSE--------------------------------------------------------------- -# mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + -# gp(time, by = series, c = 5/4, k = 20, -# scale = FALSE), -# knots = list(season = c(0.5, 12.5)), -# trend_model = 'None', -# data = simdat$data_train) +## mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + +## gp(time, by = series, c = 5/4, k = 20, +## scale = FALSE), +## knots = list(season = c(0.5, 12.5)), +## trend_model = 'None', +## data = simdat$data_train, +## silent = 2) + ## ----------------------------------------------------------------------------- summary(mod2, include_betas = FALSE) + ## ----fig.alt = "Summarising latent Gaussian Process parameters in mvgam"------ mcmc_plot(mod2, variable = c('alpha_gp'), regex = TRUE, type = 'areas') + ## ----fig.alt = "Summarising latent Gaussian Process parameters in mvgam"------ mcmc_plot(mod2, variable = c('rho_gp'), regex = TRUE, type = 'areas') -## ----fig.alt = "Plotting Gaussian Process effects in mvgam"------------------- -plot(mod2, type = 'smooths') -## ----fig.alt = "Summarising latent Gaussian Process parameters in mvgam and marginaleffects"---- -require('ggplot2') -plot_predictions(mod2, - condition = c('time', 'series', 'series'), - type = 'link') + - theme(legend.position = 'none') +## ----fig.alt = "Plotting latent Gaussian Process effects in mvgam and marginaleffects"---- +conditional_effects(mod2, type = 'link') + ## ----------------------------------------------------------------------------- fc_mod1 <- forecast(mod1, newdata = simdat$data_test) fc_mod2 <- forecast(mod2, newdata = simdat$data_test) + ## ----------------------------------------------------------------------------- str(fc_mod1) + ## ----------------------------------------------------------------------------- plot(fc_mod1, series = 1) plot(fc_mod2, series = 1) @@ -123,8 +124,6 @@ plot(fc_mod2, series = 1) plot(fc_mod1, series = 2) plot(fc_mod2, series = 2) -plot(fc_mod1, series = 3) -plot(fc_mod2, series = 3) ## ----include=FALSE------------------------------------------------------------ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + @@ -134,43 +133,54 @@ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + trend_model = 'None', data = simdat$data_train, newdata = simdat$data_test, - adapt_delta = 0.98) + adapt_delta = 0.98, + silent = 2) + ## ----eval=FALSE--------------------------------------------------------------- -# mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + -# gp(time, by = series, c = 5/4, k = 20, -# scale = FALSE), -# knots = list(season = c(0.5, 12.5)), -# trend_model = 'None', -# data = simdat$data_train, -# newdata = simdat$data_test) +## mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + +## gp(time, by = series, c = 5/4, k = 20, +## scale = FALSE), +## knots = list(season = c(0.5, 12.5)), +## trend_model = 'None', +## data = simdat$data_train, +## newdata = simdat$data_test, +## silent = 2) + ## ----------------------------------------------------------------------------- fc_mod2 <- forecast(mod2) + ## ----warning=FALSE, fig.alt = "Plotting posterior forecast distributions using mvgam and R"---- plot(fc_mod2, series = 1) + ## ----warning=FALSE------------------------------------------------------------ crps_mod1 <- score(fc_mod1, score = 'crps') str(crps_mod1) crps_mod1$series_1 + ## ----warning=FALSE------------------------------------------------------------ crps_mod1 <- score(fc_mod1, score = 'crps', interval_width = 0.6) crps_mod1$series_1 + ## ----------------------------------------------------------------------------- link_mod1 <- forecast(mod1, newdata = simdat$data_test, type = 'link') score(link_mod1, score = 'elpd')$series_1 + ## ----------------------------------------------------------------------------- energy_mod2 <- score(fc_mod2, score = 'energy') str(energy_mod2) + ## ----------------------------------------------------------------------------- energy_mod2$all_series + ## ----------------------------------------------------------------------------- crps_mod1 <- score(fc_mod1, score = 'crps') crps_mod2 <- score(fc_mod2, score = 'crps') diff --git a/doc/forecast_evaluation.Rmd b/doc/forecast_evaluation.Rmd index 36ea6227..8979593d 100644 --- a/doc/forecast_evaluation.Rmd +++ b/doc/forecast_evaluation.Rmd @@ -23,7 +23,7 @@ knitr::opts_chunk$set( ```{r setup, include=FALSE} knitr::opts_chunk$set( echo = TRUE, - dpi = 150, + dpi = 100, fig.asp = 0.8, fig.width = 6, out.width = "60%", @@ -36,12 +36,12 @@ theme_set(theme_bw(base_size = 12, base_family = 'serif')) The purpose of this vignette is to show how the `mvgam` package can be used to produce probabilistic forecasts and to evaluate those forecasts using a variety of proper scoring rules. ## Simulating discrete time series -We begin by simulating some data to show how forecasts are computed and evaluated in `mvgam`. The `sim_mvgam()` function can be used to simulate series that come from a variety of response distributions as well as seasonal patterns and/or dynamic temporal patterns. Here we simulate a collection of three time count-valued series. These series all share the same seasonal pattern but have different temporal dynamics. By setting `trend_model = 'GP'` and `prop_trend = 0.75`, we are generating time series that have smooth underlying temporal trends (evolving as Gaussian Processes with squared exponential kernel) and moderate seasonal patterns. The observations are Poisson-distributed and we allow 10% of observations to be missing. +We begin by simulating some data to show how forecasts are computed and evaluated in `mvgam`. The `sim_mvgam()` function can be used to simulate series that come from a variety of response distributions as well as seasonal patterns and/or dynamic temporal patterns. Here we simulate a collection of three time count-valued series. These series all share the same seasonal pattern but have different temporal dynamics. By setting `trend_model = GP()` and `prop_trend = 0.75`, we are generating time series that have smooth underlying temporal trends (evolving as Gaussian Processes with squared exponential kernel) and moderate seasonal patterns. The observations are Poisson-distributed and we allow 10% of observations to be missing. ```{r} set.seed(2345) simdat <- sim_mvgam(T = 100, n_series = 3, - trend_model = 'GP', + trend_model = GP(), prop_trend = 0.75, family = poisson(), prop_missing = 0.10) @@ -52,32 +52,17 @@ The returned object is a `list` containing training and testing data (`sim_mvgam str(simdat) ``` -Each series in this case has a shared seasonal pattern, which we can visualise: -```{r, fig.alt = "Simulating data for dynamic GAM models in mvgam"} -plot(simdat$global_seasonality[1:12], - type = 'l', lwd = 2, - ylab = 'Relative effect', - xlab = 'Season', - bty = 'l') -``` - -The resulting time series are similar to what we might encounter when dealing with count-valued data that can take small counts: +Each series in this case has a shared seasonal pattern. The resulting time series are similar to what we might encounter when dealing with count-valued data that can take small counts: ```{r, fig.alt = "Plotting time series features for GAM models in mvgam"} plot_mvgam_series(data = simdat$data_train, series = 'all') ``` -For each individual series, we can plot the training and testing data, as well as some more specific features of the observed data: +For individual series, we can plot the training and testing data, as well as some more specific features of the observed data: ```{r, fig.alt = "Plotting time series features for GAM models in mvgam"} plot_mvgam_series(data = simdat$data_train, newdata = simdat$data_test, series = 1) -plot_mvgam_series(data = simdat$data_train, - newdata = simdat$data_test, - series = 2) -plot_mvgam_series(data = simdat$data_train, - newdata = simdat$data_test, - series = 3) ``` ### Modelling dynamics with splines @@ -95,7 +80,8 @@ mod1 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + s(time, by = series, bs = 'cr', k = 20), knots = list(season = c(0.5, 12.5)), trend_model = 'None', - data = simdat$data_train) + data = simdat$data_train, + silent = 2) ``` The model fits without issue: @@ -103,9 +89,9 @@ The model fits without issue: summary(mod1, include_betas = FALSE) ``` -And we can plot the partial effects of the splines to see that they are estimated to be highly nonlinear +And we can plot the conditional effects of the splines (on the link scale) to see that they are estimated to be highly nonlinear ```{r, fig.alt = "Plotting GAM smooth functions using mvgam"} -plot(mod1, type = 'smooths') +conditional_effects(mod1, type = 'link') ``` ### Modelling dynamics with GPs @@ -117,7 +103,8 @@ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + knots = list(season = c(0.5, 12.5)), trend_model = 'None', data = simdat$data_train, - adapt_delta = 0.98) + adapt_delta = 0.98, + silent = 2) ``` ```{r eval=FALSE} @@ -126,7 +113,8 @@ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + scale = FALSE), knots = list(season = c(0.5, 12.5)), trend_model = 'None', - data = simdat$data_train) + data = simdat$data_train, + silent = 2) ``` The summary for this model now contains information on the GP parameters for each time series: @@ -144,17 +132,9 @@ And now the length scale ($\rho$) parameters: mcmc_plot(mod2, variable = c('rho_gp'), regex = TRUE, type = 'areas') ``` -We can also plot the nonlinear effects as before: -```{r, fig.alt = "Plotting Gaussian Process effects in mvgam"} -plot(mod2, type = 'smooths') -``` -These can also be plotted using `marginaleffects` utilities: -```{r, fig.alt = "Summarising latent Gaussian Process parameters in mvgam and marginaleffects"} -require('ggplot2') -plot_predictions(mod2, - condition = c('time', 'series', 'series'), - type = 'link') + - theme(legend.position = 'none') +We can again plot the nonlinear effects: +```{r, fig.alt = "Plotting latent Gaussian Process effects in mvgam and marginaleffects"} +conditional_effects(mod2, type = 'link') ``` The estimates for the temporal trends are fairly similar for the two models, but below we will see if they produce similar forecasts @@ -171,16 +151,13 @@ The objects we have created are of class `mvgam_forecast`, which contain informa str(fc_mod1) ``` -We can plot the forecasts for each series from each model using the `S3 plot` method for objects of this class: +We can plot the forecasts for some series from each model using the `S3 plot` method for objects of this class: ```{r} plot(fc_mod1, series = 1) plot(fc_mod2, series = 1) plot(fc_mod1, series = 2) plot(fc_mod2, series = 2) - -plot(fc_mod1, series = 3) -plot(fc_mod2, series = 3) ``` Clearly the two models do not produce equivalent forecasts. We will come back to scoring these forecasts in a moment. @@ -195,7 +172,8 @@ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + trend_model = 'None', data = simdat$data_train, newdata = simdat$data_test, - adapt_delta = 0.98) + adapt_delta = 0.98, + silent = 2) ``` ```{r eval=FALSE} @@ -205,7 +183,8 @@ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + knots = list(season = c(0.5, 12.5)), trend_model = 'None', data = simdat$data_train, - newdata = simdat$data_test) + newdata = simdat$data_test, + silent = 2) ``` Because the model already contains a forecast distribution, we do not need to feed `newdata` to the `forecast()` function: diff --git a/doc/forecast_evaluation.html b/doc/forecast_evaluation.html index 570bab86..ae761ae5 100644 --- a/doc/forecast_evaluation.html +++ b/doc/forecast_evaluation.html @@ -12,7 +12,7 @@ - + Forecasting and forecast evaluation in mvgam @@ -341,7 +341,7 @@

Forecasting and forecast evaluation in mvgam

Nicholas J Clark

-

2024-04-18

+

2024-07-01

@@ -378,7 +378,7 @@

Simulating discrete time series

temporal patterns. Here we simulate a collection of three time count-valued series. These series all share the same seasonal pattern but have different temporal dynamics. By setting -trend_model = 'GP' and prop_trend = 0.75, we +trend_model = GP() and prop_trend = 0.75, we are generating time series that have smooth underlying temporal trends (evolving as Gaussian Processes with squared exponential kernel) and moderate seasonal patterns. The observations are Poisson-distributed and @@ -386,7 +386,7 @@

Simulating discrete time series

set.seed(2345)
 simdat <- sim_mvgam(T = 100, 
                     n_series = 3, 
-                    trend_model = 'GP',
+                    trend_model = GP(),
                     prop_trend = 0.75,
                     family = poisson(),
                     prop_missing = 0.10)
@@ -414,33 +414,18 @@

Simulating discrete time series

#> $ trend_params :List of 2 #> ..$ alpha: num [1:3] 0.767 0.988 0.897 #> ..$ rho : num [1:3] 6.02 6.94 5.04
-

Each series in this case has a shared seasonal pattern, which we can -visualise:

-
plot(simdat$global_seasonality[1:12], 
-     type = 'l', lwd = 2,
-     ylab = 'Relative effect',
-     xlab = 'Season',
-     bty = 'l')
-

Simulating data for dynamic GAM models in mvgam

-

The resulting time series are similar to what we might encounter when -dealing with count-valued data that can take small counts:

+

Each series in this case has a shared seasonal pattern. The resulting +time series are similar to what we might encounter when dealing with +count-valued data that can take small counts:

+
plot_mvgam_series(data = simdat$data_train, 
+                  series = 'all')
+

Plotting time series features for GAM models in mvgam

+

For individual series, we can plot the training and testing data, as +well as some more specific features of the observed data:

plot_mvgam_series(data = simdat$data_train, 
-                  series = 'all')
-

Plotting time series features for GAM models in mvgam

-

For each individual series, we can plot the training and testing -data, as well as some more specific features of the observed data:

-
plot_mvgam_series(data = simdat$data_train, 
-                  newdata = simdat$data_test,
-                  series = 1)
-

Plotting time series features for GAM models in mvgam

-
plot_mvgam_series(data = simdat$data_train, 
-                  newdata = simdat$data_test,
-                  series = 2)
-

Plotting time series features for GAM models in mvgam

-
plot_mvgam_series(data = simdat$data_train, 
-                  newdata = simdat$data_test,
-                  series = 3)
-

Plotting time series features for GAM models in mvgam

+ newdata = simdat$data_test, + series = 1) +

Plotting time series features for GAM models in mvgam

Modelling dynamics with splines

The first model we will fit uses a shared cyclic spline to capture @@ -448,17 +433,91 @@

Modelling dynamics with splines

capture the long-term dynamics. We allow the temporal splines to be fairly complex so they can capture as much of the temporal variation as possible:

-
mod1 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + 
-                s(time, by = series, bs = 'cr', k = 20),
-              knots = list(season = c(0.5, 12.5)),
-              trend_model = 'None',
-              data = simdat$data_train)
+
mod1 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + 
+                s(time, by = series, bs = 'cr', k = 20),
+              knots = list(season = c(0.5, 12.5)),
+              trend_model = 'None',
+              data = simdat$data_train,
+              silent = 2)

The model fits without issue:

-
summary(mod1, include_betas = FALSE)
+
summary(mod1, include_betas = FALSE)
+#> GAM formula:
+#> y ~ s(season, bs = "cc", k = 8) + s(time, by = series, bs = "cr", 
+#>     k = 20)
+#> <environment: 0x000001b67206d110>
+#> 
+#> Family:
+#> poisson
+#> 
+#> Link function:
+#> log
+#> 
+#> Trend model:
+#> None
+#> 
+#> N series:
+#> 3 
+#> 
+#> N timepoints:
+#> 75 
+#> 
+#> Status:
+#> Fitted using Stan 
+#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 
+#> Total post-warmup draws = 2000
+#> 
+#> 
+#> GAM coefficient (beta) estimates:
+#>              2.5%   50%  97.5% Rhat n_eff
+#> (Intercept) -0.41 -0.21 -0.052    1   855
+#> 
+#> Approximate significance of GAM smooths:
+#>                         edf Ref.df Chi.sq p-value   
+#> s(season)              3.82      6   19.6  0.0037 **
+#> s(time):seriesseries_1 7.25     19   13.2  0.7969   
+#> s(time):seriesseries_2 9.81     19  173.3  0.0019 **
+#> s(time):seriesseries_3 6.05     19   19.4  0.7931   
+#> ---
+#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+#> 
+#> Stan MCMC diagnostics:
+#> n_eff / iter looks reasonable for all parameters
+#> Rhat looks reasonable for all parameters
+#> 0 of 2000 iterations ended with a divergence (0%)
+#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
+#> E-FMI indicated no pathological behavior
+#> 
+#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:26:51 AM 2024.
+#> For each parameter, n_eff is a crude measure of effective sample size,
+#> and Rhat is the potential scale reduction factor on split MCMC chains
+#> (at convergence, Rhat = 1)
+

And we can plot the conditional effects of the splines (on the link +scale) to see that they are estimated to be highly nonlinear

+
conditional_effects(mod1, type = 'link')
+

Plotting GAM smooth functions using mvgamPlotting GAM smooth functions using mvgam

+
+
+

Modelling dynamics with GPs

+

Before showing how to produce and evaluate forecasts, we will fit a +second model to these data so the two models can be compared. This model +is equivalent to the above, except we now use Gaussian Processes to +model series-specific dynamics. This makes use of the gp() +function from brms, which can fit Hilbert space approximate +GPs. See ?brms::gp for more details.

+
mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + 
+                gp(time, by = series, c = 5/4, k = 20,
+                   scale = FALSE),
+              knots = list(season = c(0.5, 12.5)),
+              trend_model = 'None',
+              data = simdat$data_train,
+              silent = 2)
+

The summary for this model now contains information on the GP +parameters for each time series:

+
summary(mod2, include_betas = FALSE)
 #> GAM formula:
-#> y ~ s(season, bs = "cc", k = 8) + s(time, by = series, bs = "cr", 
-#>     k = 20)
-#> <environment: 0x0000029cbf2b3570>
+#> y ~ s(season, bs = "cc", k = 8) + gp(time, by = series, c = 5/4, 
+#>     k = 20, scale = FALSE)
+#> <environment: 0x000001b67206d110>
 #> 
 #> Family:
 #> poisson
@@ -482,129 +541,49 @@ 

Modelling dynamics with splines

#> #> #> GAM coefficient (beta) estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> (Intercept) -0.41 -0.21 -0.039 1 813 +#> 2.5% 50% 97.5% Rhat n_eff +#> (Intercept) -1.1 -0.51 0.34 1 694 #> -#> Approximate significance of GAM smooths: -#> edf Ref.df Chi.sq p-value -#> s(season) 3.77 6 21.8 0.004 ** -#> s(time):seriesseries_1 6.50 19 15.3 0.848 -#> s(time):seriesseries_2 9.49 19 226.0 <2e-16 *** -#> s(time):seriesseries_3 5.93 19 18.3 0.867 -#> --- -#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 +#> GAM gp term marginal deviation (alpha) and length scale (rho) estimates: +#> 2.5% 50% 97.5% Rhat n_eff +#> alpha_gp(time):seriesseries_1 0.21 0.78 2.2 1 819 +#> alpha_gp(time):seriesseries_2 0.73 1.30 2.9 1 761 +#> alpha_gp(time):seriesseries_3 0.46 1.10 2.8 1 1262 +#> rho_gp(time):seriesseries_1 1.30 5.40 22.0 1 682 +#> rho_gp(time):seriesseries_2 2.10 10.00 17.0 1 450 +#> rho_gp(time):seriesseries_3 1.50 8.80 24.0 1 769 #> -#> Stan MCMC diagnostics: -#> n_eff / iter looks reasonable for all parameters -#> Rhat looks reasonable for all parameters -#> 0 of 2000 iterations ended with a divergence (0%) -#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%) -#> E-FMI indicated no pathological behavior -#> -#> Samples were drawn using NUTS(diag_e) at Thu Apr 18 8:31:33 PM 2024. -#> For each parameter, n_eff is a crude measure of effective sample size, -#> and Rhat is the potential scale reduction factor on split MCMC chains -#> (at convergence, Rhat = 1)
-

And we can plot the partial effects of the splines to see that they -are estimated to be highly nonlinear

-
plot(mod1, type = 'smooths')
-

Plotting GAM smooth functions using mvgam

-
-
-

Modelling dynamics with GPs

-

Before showing how to produce and evaluate forecasts, we will fit a -second model to these data so the two models can be compared. This model -is equivalent to the above, except we now use Gaussian Processes to -model series-specific dynamics. This makes use of the gp() -function from brms, which can fit Hilbert space approximate -GPs. See ?brms::gp for more details.

-
mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + 
-                gp(time, by = series, c = 5/4, k = 20,
-                   scale = FALSE),
-              knots = list(season = c(0.5, 12.5)),
-              trend_model = 'None',
-              data = simdat$data_train)
-

The summary for this model now contains information on the GP -parameters for each time series:

-
summary(mod2, include_betas = FALSE)
-#> GAM formula:
-#> y ~ s(season, bs = "cc", k = 8) + gp(time, by = series, c = 5/4, 
-#>     k = 20, scale = FALSE)
-#> <environment: 0x0000029cbf2b3570>
-#> 
-#> Family:
-#> poisson
-#> 
-#> Link function:
-#> log
-#> 
-#> Trend model:
-#> None
-#> 
-#> N series:
-#> 3 
-#> 
-#> N timepoints:
-#> 75 
-#> 
-#> Status:
-#> Fitted using Stan 
-#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 
-#> Total post-warmup draws = 2000
-#> 
-#> 
-#> GAM coefficient (beta) estimates:
-#>             2.5%   50% 97.5% Rhat n_eff
-#> (Intercept) -1.1 -0.52  0.31    1   768
-#> 
-#> GAM gp term marginal deviation (alpha) and length scale (rho) estimates:
-#>                               2.5%  50% 97.5% Rhat n_eff
-#> alpha_gp(time):seriesseries_1 0.21  0.8   2.1 1.01   763
-#> alpha_gp(time):seriesseries_2 0.74  1.4   2.9 1.00  1028
-#> alpha_gp(time):seriesseries_3 0.50  1.1   2.8 1.00  1026
-#> rho_gp(time):seriesseries_1   1.20  5.1  23.0 1.00   681
-#> rho_gp(time):seriesseries_2   2.20 10.0  17.0 1.00   644
-#> rho_gp(time):seriesseries_3   1.50  8.8  23.0 1.00   819
-#> 
-#> Approximate significance of GAM smooths:
-#>            edf Ref.df Chi.sq p-value    
-#> s(season) 4.12      6   25.9 0.00053 ***
-#> ---
-#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-#> 
-#> Stan MCMC diagnostics:
-#> n_eff / iter looks reasonable for all parameters
-#> Rhat looks reasonable for all parameters
-#> 4 of 2000 iterations ended with a divergence (0.2%)
-#>  *Try running with larger adapt_delta to remove the divergences
-#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
-#> E-FMI indicated no pathological behavior
-#> 
-#> Samples were drawn using NUTS(diag_e) at Thu Apr 18 8:33:03 PM 2024.
-#> For each parameter, n_eff is a crude measure of effective sample size,
-#> and Rhat is the potential scale reduction factor on split MCMC chains
-#> (at convergence, Rhat = 1)
+#> Approximate significance of GAM smooths: +#> edf Ref.df Chi.sq p-value +#> s(season) 3.36 6 21.1 0.0093 ** +#> --- +#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 +#> +#> Stan MCMC diagnostics: +#> n_eff / iter looks reasonable for all parameters +#> Rhat looks reasonable for all parameters +#> 1 of 2000 iterations ended with a divergence (0.05%) +#> *Try running with larger adapt_delta to remove the divergences +#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%) +#> E-FMI indicated no pathological behavior +#> +#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:27:28 AM 2024. +#> For each parameter, n_eff is a crude measure of effective sample size, +#> and Rhat is the potential scale reduction factor on split MCMC chains +#> (at convergence, Rhat = 1)

We can plot the posteriors for these parameters, and for any other parameter for that matter, using bayesplot routines. First the marginal deviation (\(\alpha\)) parameters:

-
mcmc_plot(mod2, variable = c('alpha_gp'), regex = TRUE, type = 'areas')
-

Summarising latent Gaussian Process parameters in mvgam

+
mcmc_plot(mod2, variable = c('alpha_gp'), regex = TRUE, type = 'areas')
+

Summarising latent Gaussian Process parameters in mvgam

And now the length scale (\(\rho\)) parameters:

-
mcmc_plot(mod2, variable = c('rho_gp'), regex = TRUE, type = 'areas')
-

Summarising latent Gaussian Process parameters in mvgam

-

We can also plot the nonlinear effects as before:

-
plot(mod2, type = 'smooths')
-

Plotting Gaussian Process effects in mvgam -These can also be plotted using marginaleffects -utilities:

-
require('ggplot2')
-plot_predictions(mod2, 
-                 condition = c('time', 'series', 'series'),
-                 type = 'link') +
-  theme(legend.position = 'none')
-

Summarising latent Gaussian Process parameters in mvgam and marginaleffects

+
mcmc_plot(mod2, variable = c('rho_gp'), regex = TRUE, type = 'areas')
+

Summarising latent Gaussian Process parameters in mvgam

+

We can again plot the nonlinear effects:

+
conditional_effects(mod2, type = 'link')
+

Plotting latent Gaussian Process effects in mvgam and marginaleffectsPlotting latent Gaussian Process effects in mvgam and marginaleffects

The estimates for the temporal trends are fairly similar for the two models, but below we will see if they produce similar forecasts

@@ -627,80 +606,71 @@

Forecasting with the forecast() function

?forecast.mvgam for details). We will use the default and produce forecasts on the response scale, which is the most common way to evaluate forecast distributions

-
fc_mod1 <- forecast(mod1, newdata = simdat$data_test)
-fc_mod2 <- forecast(mod2, newdata = simdat$data_test)
+
fc_mod1 <- forecast(mod1, newdata = simdat$data_test)
+fc_mod2 <- forecast(mod2, newdata = simdat$data_test)

The objects we have created are of class mvgam_forecast, which contain information on hindcast distributions, forecast distributions and true observations for each series in the data:

-
str(fc_mod1)
-#> List of 16
-#>  $ call              :Class 'formula'  language y ~ s(season, bs = "cc", k = 8) + s(time, by = series, bs = "cr", k = 20)
-#>   .. ..- attr(*, ".Environment")=<environment: 0x0000029cbf2b3570> 
-#>  $ trend_call        : NULL
-#>  $ family            : chr "poisson"
-#>  $ family_pars       : NULL
-#>  $ trend_model       : chr "None"
-#>  $ drift             : logi FALSE
-#>  $ use_lv            : logi FALSE
-#>  $ fit_engine        : chr "stan"
-#>  $ type              : chr "response"
-#>  $ series_names      : Factor w/ 3 levels "series_1","series_2",..: 1 2 3
-#>  $ train_observations:List of 3
-#>   ..$ series_1: int [1:75] 0 0 1 1 0 0 0 0 0 0 ...
-#>   ..$ series_2: int [1:75] 1 0 0 1 1 0 1 0 1 2 ...
-#>   ..$ series_3: int [1:75] 3 0 3 NA 2 1 1 1 1 3 ...
-#>  $ train_times       : int [1:75] 1 2 3 4 5 6 7 8 9 10 ...
-#>  $ test_observations :List of 3
-#>   ..$ series_1: int [1:25] 0 0 2 NA 0 2 2 1 1 1 ...
-#>   ..$ series_2: int [1:25] 1 0 2 1 1 3 0 1 0 NA ...
-#>   ..$ series_3: int [1:25] 1 0 0 1 0 0 1 0 1 0 ...
-#>  $ test_times        : int [1:25] 76 77 78 79 80 81 82 83 84 85 ...
-#>  $ hindcasts         :List of 3
-#>   ..$ series_1: num [1:2000, 1:75] 1 1 0 0 0 1 1 1 0 0 ...
-#>   .. ..- attr(*, "dimnames")=List of 2
-#>   .. .. ..$ : NULL
-#>   .. .. ..$ : chr [1:75] "ypred[1,1]" "ypred[2,1]" "ypred[3,1]" "ypred[4,1]" ...
-#>   ..$ series_2: num [1:2000, 1:75] 0 0 0 0 0 0 0 1 0 0 ...
-#>   .. ..- attr(*, "dimnames")=List of 2
-#>   .. .. ..$ : NULL
-#>   .. .. ..$ : chr [1:75] "ypred[1,2]" "ypred[2,2]" "ypred[3,2]" "ypred[4,2]" ...
-#>   ..$ series_3: num [1:2000, 1:75] 3 0 2 1 0 1 2 1 5 1 ...
-#>   .. ..- attr(*, "dimnames")=List of 2
-#>   .. .. ..$ : NULL
-#>   .. .. ..$ : chr [1:75] "ypred[1,3]" "ypred[2,3]" "ypred[3,3]" "ypred[4,3]" ...
-#>  $ forecasts         :List of 3
-#>   ..$ series_1: num [1:2000, 1:25] 1 3 2 1 0 0 1 1 0 0 ...
-#>   ..$ series_2: num [1:2000, 1:25] 6 0 0 0 0 2 0 0 0 0 ...
-#>   ..$ series_3: num [1:2000, 1:25] 0 1 1 3 3 1 3 2 4 2 ...
-#>  - attr(*, "class")= chr "mvgam_forecast"
-

We can plot the forecasts for each series from each model using the +

str(fc_mod1)
+#> List of 16
+#>  $ call              :Class 'formula'  language y ~ s(season, bs = "cc", k = 8) + s(time, by = series, bs = "cr", k = 20)
+#>   .. ..- attr(*, ".Environment")=<environment: 0x000001b67206d110> 
+#>  $ trend_call        : NULL
+#>  $ family            : chr "poisson"
+#>  $ family_pars       : NULL
+#>  $ trend_model       : chr "None"
+#>  $ drift             : logi FALSE
+#>  $ use_lv            : logi FALSE
+#>  $ fit_engine        : chr "stan"
+#>  $ type              : chr "response"
+#>  $ series_names      : Factor w/ 3 levels "series_1","series_2",..: 1 2 3
+#>  $ train_observations:List of 3
+#>   ..$ series_1: int [1:75] 0 0 1 1 0 0 0 0 0 0 ...
+#>   ..$ series_2: int [1:75] 1 0 0 1 1 0 1 0 1 2 ...
+#>   ..$ series_3: int [1:75] 3 0 3 NA 2 1 1 1 1 3 ...
+#>  $ train_times       : int [1:75] 1 2 3 4 5 6 7 8 9 10 ...
+#>  $ test_observations :List of 3
+#>   ..$ series_1: int [1:25] 0 0 2 NA 0 2 2 1 1 1 ...
+#>   ..$ series_2: int [1:25] 1 0 2 1 1 3 0 1 0 NA ...
+#>   ..$ series_3: int [1:25] 1 0 0 1 0 0 1 0 1 0 ...
+#>  $ test_times        : int [1:25] 76 77 78 79 80 81 82 83 84 85 ...
+#>  $ hindcasts         :List of 3
+#>   ..$ series_1: num [1:2000, 1:75] 1 1 0 0 0 0 0 0 0 0 ...
+#>   .. ..- attr(*, "dimnames")=List of 2
+#>   .. .. ..$ : NULL
+#>   .. .. ..$ : chr [1:75] "ypred[1,1]" "ypred[2,1]" "ypred[3,1]" "ypred[4,1]" ...
+#>   ..$ series_2: num [1:2000, 1:75] 0 0 0 0 0 0 0 0 0 0 ...
+#>   .. ..- attr(*, "dimnames")=List of 2
+#>   .. .. ..$ : NULL
+#>   .. .. ..$ : chr [1:75] "ypred[1,2]" "ypred[2,2]" "ypred[3,2]" "ypred[4,2]" ...
+#>   ..$ series_3: num [1:2000, 1:75] 1 4 0 4 4 1 1 6 3 1 ...
+#>   .. ..- attr(*, "dimnames")=List of 2
+#>   .. .. ..$ : NULL
+#>   .. .. ..$ : chr [1:75] "ypred[1,3]" "ypred[2,3]" "ypred[3,3]" "ypred[4,3]" ...
+#>  $ forecasts         :List of 3
+#>   ..$ series_1: num [1:2000, 1:25] 0 1 0 0 0 1 1 0 0 3 ...
+#>   ..$ series_2: num [1:2000, 1:25] 0 2 0 1 0 2 1 0 1 0 ...
+#>   ..$ series_3: num [1:2000, 1:25] 1 8 4 2 2 1 2 0 2 0 ...
+#>  - attr(*, "class")= chr "mvgam_forecast"
+

We can plot the forecasts for some series from each model using the S3 plot method for objects of this class:

-
plot(fc_mod1, series = 1)
-

-
#> Out of sample CRPS:
-#> [1] 14.62964
-plot(fc_mod2, series = 1)
-

-
#> Out of sample DRPS:
-#> [1] 10.92516
-
-plot(fc_mod1, series = 2)
-

-
#> Out of sample CRPS:
-#> [1] 84201962708
-plot(fc_mod2, series = 2)
-

-
#> Out of sample DRPS:
-#> [1] 14.31152
-
-plot(fc_mod1, series = 3)
-

-
#> Out of sample CRPS:
-#> [1] 32.44136
-plot(fc_mod2, series = 3)
-

-
#> Out of sample DRPS:
-#> [1] 15.44332
+
plot(fc_mod1, series = 1)
+#> Out of sample CRPS:
+#> 14.89051875
+

+
plot(fc_mod2, series = 1)
+#> Out of sample DRPS:
+#> 10.84228725
+

+

+plot(fc_mod1, series = 2)
+#> Out of sample CRPS:
+#> 495050222726067
+

+
plot(fc_mod2, series = 2)
+#> Out of sample CRPS:
+#> 14.7121945
+

Clearly the two models do not produce equivalent forecasts. We will come back to scoring these forecasts in a moment.

@@ -714,23 +684,24 @@

Forecasting with newdata in mvgam()

block in Stan. As an example, we can refit mod2 but include the testing data for automatic forecasts:

-
mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + 
-                gp(time, by = series, c = 5/4, k = 20,
-                   scale = FALSE),
-              knots = list(season = c(0.5, 12.5)),
-              trend_model = 'None',
-              data = simdat$data_train,
-              newdata = simdat$data_test)
+
mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + 
+                gp(time, by = series, c = 5/4, k = 20,
+                   scale = FALSE),
+              knots = list(season = c(0.5, 12.5)),
+              trend_model = 'None',
+              data = simdat$data_train,
+              newdata = simdat$data_test,
+              silent = 2)

Because the model already contains a forecast distribution, we do not need to feed newdata to the forecast() function:

-
fc_mod2 <- forecast(mod2)
+
fc_mod2 <- forecast(mod2)

The forecasts will be nearly identical to those calculated previously:

-
plot(fc_mod2, series = 1)
-

Plotting posterior forecast distributions using mvgam and R

-
#> Out of sample DRPS:
-#> [1] 10.85762
+
plot(fc_mod2, series = 1)
+#> Out of sample DRPS:
+#> 10.78167525
+

Plotting posterior forecast distributions using mvgam and R

Scoring forecast distributions

@@ -741,58 +712,58 @@

Scoring forecast distributions

that are available. A useful scoring metric is the Continuous Rank Probability Score (CRPS). A CRPS value is similar to what we might get if we calculated a weighted absolute error using the full forecast distribution.

-
crps_mod1 <- score(fc_mod1, score = 'crps')
-str(crps_mod1)
-#> List of 4
-#>  $ series_1  :'data.frame':  25 obs. of  5 variables:
-#>   ..$ score         : num [1:25] 0.1938 0.1366 1.355 NA 0.0348 ...
-#>   ..$ in_interval   : num [1:25] 1 1 1 NA 1 1 1 1 1 1 ...
-#>   ..$ interval_width: num [1:25] 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 ...
-#>   ..$ eval_horizon  : int [1:25] 1 2 3 4 5 6 7 8 9 10 ...
-#>   ..$ score_type    : chr [1:25] "crps" "crps" "crps" "crps" ...
-#>  $ series_2  :'data.frame':  25 obs. of  5 variables:
-#>   ..$ score         : num [1:25] 0.379 0.306 0.941 0.5 0.573 ...
-#>   ..$ in_interval   : num [1:25] 1 1 1 1 1 1 1 1 1 NA ...
-#>   ..$ interval_width: num [1:25] 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 ...
-#>   ..$ eval_horizon  : int [1:25] 1 2 3 4 5 6 7 8 9 10 ...
-#>   ..$ score_type    : chr [1:25] "crps" "crps" "crps" "crps" ...
-#>  $ series_3  :'data.frame':  25 obs. of  5 variables:
-#>   ..$ score         : num [1:25] 0.32 0.556 0.379 0.362 0.219 ...
-#>   ..$ in_interval   : num [1:25] 1 1 1 1 1 1 1 1 1 1 ...
-#>   ..$ interval_width: num [1:25] 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 ...
-#>   ..$ eval_horizon  : int [1:25] 1 2 3 4 5 6 7 8 9 10 ...
-#>   ..$ score_type    : chr [1:25] "crps" "crps" "crps" "crps" ...
-#>  $ all_series:'data.frame':  25 obs. of  3 variables:
-#>   ..$ score       : num [1:25] 0.892 0.999 2.675 NA 0.827 ...
-#>   ..$ eval_horizon: int [1:25] 1 2 3 4 5 6 7 8 9 10 ...
-#>   ..$ score_type  : chr [1:25] "sum_crps" "sum_crps" "sum_crps" "sum_crps" ...
-crps_mod1$series_1
-#>         score in_interval interval_width eval_horizon score_type
-#> 1  0.19375525           1            0.9            1       crps
-#> 2  0.13663925           1            0.9            2       crps
-#> 3  1.35502175           1            0.9            3       crps
-#> 4          NA          NA            0.9            4       crps
-#> 5  0.03482775           1            0.9            5       crps
-#> 6  1.55416700           1            0.9            6       crps
-#> 7  1.51028900           1            0.9            7       crps
-#> 8  0.62121225           1            0.9            8       crps
-#> 9  0.62630125           1            0.9            9       crps
-#> 10 0.59853100           1            0.9           10       crps
-#> 11 1.30998625           1            0.9           11       crps
-#> 12 2.04829775           1            0.9           12       crps
-#> 13 0.61251800           1            0.9           13       crps
-#> 14 0.14052300           1            0.9           14       crps
-#> 15 0.65110800           1            0.9           15       crps
-#> 16 0.07973125           1            0.9           16       crps
-#> 17 0.07675600           1            0.9           17       crps
-#> 18 0.09382375           1            0.9           18       crps
-#> 19 0.12356725           1            0.9           19       crps
-#> 20         NA          NA            0.9           20       crps
-#> 21 0.20173600           1            0.9           21       crps
-#> 22 0.84066825           1            0.9           22       crps
-#> 23         NA          NA            0.9           23       crps
-#> 24 1.06489225           1            0.9           24       crps
-#> 25 0.75528825           1            0.9           25       crps
+
crps_mod1 <- score(fc_mod1, score = 'crps')
+str(crps_mod1)
+#> List of 4
+#>  $ series_1  :'data.frame':  25 obs. of  5 variables:
+#>   ..$ score         : num [1:25] 0.186 0.129 1.372 NA 0.037 ...
+#>   ..$ in_interval   : num [1:25] 1 1 1 NA 1 1 1 1 1 1 ...
+#>   ..$ interval_width: num [1:25] 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 ...
+#>   ..$ eval_horizon  : int [1:25] 1 2 3 4 5 6 7 8 9 10 ...
+#>   ..$ score_type    : chr [1:25] "crps" "crps" "crps" "crps" ...
+#>  $ series_2  :'data.frame':  25 obs. of  5 variables:
+#>   ..$ score         : num [1:25] 0.354 0.334 0.947 0.492 0.542 ...
+#>   ..$ in_interval   : num [1:25] 1 1 1 1 1 1 1 1 1 NA ...
+#>   ..$ interval_width: num [1:25] 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 ...
+#>   ..$ eval_horizon  : int [1:25] 1 2 3 4 5 6 7 8 9 10 ...
+#>   ..$ score_type    : chr [1:25] "crps" "crps" "crps" "crps" ...
+#>  $ series_3  :'data.frame':  25 obs. of  5 variables:
+#>   ..$ score         : num [1:25] 0.31 0.616 0.4 0.349 0.215 ...
+#>   ..$ in_interval   : num [1:25] 1 1 1 1 1 1 1 1 1 1 ...
+#>   ..$ interval_width: num [1:25] 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 ...
+#>   ..$ eval_horizon  : int [1:25] 1 2 3 4 5 6 7 8 9 10 ...
+#>   ..$ score_type    : chr [1:25] "crps" "crps" "crps" "crps" ...
+#>  $ all_series:'data.frame':  25 obs. of  3 variables:
+#>   ..$ score       : num [1:25] 0.85 1.079 2.719 NA 0.794 ...
+#>   ..$ eval_horizon: int [1:25] 1 2 3 4 5 6 7 8 9 10 ...
+#>   ..$ score_type  : chr [1:25] "sum_crps" "sum_crps" "sum_crps" "sum_crps" ...
+crps_mod1$series_1
+#>         score in_interval interval_width eval_horizon score_type
+#> 1  0.18582425           1            0.9            1       crps
+#> 2  0.12933350           1            0.9            2       crps
+#> 3  1.37181050           1            0.9            3       crps
+#> 4          NA          NA            0.9            4       crps
+#> 5  0.03698600           1            0.9            5       crps
+#> 6  1.53997900           1            0.9            6       crps
+#> 7  1.50467675           1            0.9            7       crps
+#> 8  0.63460725           1            0.9            8       crps
+#> 9  0.61682725           1            0.9            9       crps
+#> 10 0.62428875           1            0.9           10       crps
+#> 11 1.33824700           1            0.9           11       crps
+#> 12 2.06378300           1            0.9           12       crps
+#> 13 0.59247200           1            0.9           13       crps
+#> 14 0.13560025           1            0.9           14       crps
+#> 15 0.66512975           1            0.9           15       crps
+#> 16 0.08238525           1            0.9           16       crps
+#> 17 0.08152900           1            0.9           17       crps
+#> 18 0.09446425           1            0.9           18       crps
+#> 19 0.12084700           1            0.9           19       crps
+#> 20         NA          NA            0.9           20       crps
+#> 21 0.21286925           1            0.9           21       crps
+#> 22 0.85799700           1            0.9           22       crps
+#> 23         NA          NA            0.9           23       crps
+#> 24 1.14954750           1            0.9           24       crps
+#> 25 0.85131425           1            0.9           25       crps

The returned list contains a data.frame for each series in the data that shows the CRPS score for each evaluation in the testing data, along with some other useful information about the fit of the @@ -803,34 +774,34 @@

Scoring forecast distributions

in_interval column take a 1 approximately 90% of the time. This value can be changed if you wish to compute different coverages, say using a 60% interval:

-
crps_mod1 <- score(fc_mod1, score = 'crps', interval_width = 0.6)
-crps_mod1$series_1
-#>         score in_interval interval_width eval_horizon score_type
-#> 1  0.19375525           1            0.6            1       crps
-#> 2  0.13663925           1            0.6            2       crps
-#> 3  1.35502175           0            0.6            3       crps
-#> 4          NA          NA            0.6            4       crps
-#> 5  0.03482775           1            0.6            5       crps
-#> 6  1.55416700           0            0.6            6       crps
-#> 7  1.51028900           0            0.6            7       crps
-#> 8  0.62121225           1            0.6            8       crps
-#> 9  0.62630125           1            0.6            9       crps
-#> 10 0.59853100           1            0.6           10       crps
-#> 11 1.30998625           0            0.6           11       crps
-#> 12 2.04829775           0            0.6           12       crps
-#> 13 0.61251800           1            0.6           13       crps
-#> 14 0.14052300           1            0.6           14       crps
-#> 15 0.65110800           1            0.6           15       crps
-#> 16 0.07973125           1            0.6           16       crps
-#> 17 0.07675600           1            0.6           17       crps
-#> 18 0.09382375           1            0.6           18       crps
-#> 19 0.12356725           1            0.6           19       crps
-#> 20         NA          NA            0.6           20       crps
-#> 21 0.20173600           1            0.6           21       crps
-#> 22 0.84066825           1            0.6           22       crps
-#> 23         NA          NA            0.6           23       crps
-#> 24 1.06489225           1            0.6           24       crps
-#> 25 0.75528825           1            0.6           25       crps
+
crps_mod1 <- score(fc_mod1, score = 'crps', interval_width = 0.6)
+crps_mod1$series_1
+#>         score in_interval interval_width eval_horizon score_type
+#> 1  0.18582425           1            0.6            1       crps
+#> 2  0.12933350           1            0.6            2       crps
+#> 3  1.37181050           0            0.6            3       crps
+#> 4          NA          NA            0.6            4       crps
+#> 5  0.03698600           1            0.6            5       crps
+#> 6  1.53997900           0            0.6            6       crps
+#> 7  1.50467675           0            0.6            7       crps
+#> 8  0.63460725           1            0.6            8       crps
+#> 9  0.61682725           1            0.6            9       crps
+#> 10 0.62428875           1            0.6           10       crps
+#> 11 1.33824700           0            0.6           11       crps
+#> 12 2.06378300           0            0.6           12       crps
+#> 13 0.59247200           1            0.6           13       crps
+#> 14 0.13560025           1            0.6           14       crps
+#> 15 0.66512975           1            0.6           15       crps
+#> 16 0.08238525           1            0.6           16       crps
+#> 17 0.08152900           1            0.6           17       crps
+#> 18 0.09446425           1            0.6           18       crps
+#> 19 0.12084700           1            0.6           19       crps
+#> 20         NA          NA            0.6           20       crps
+#> 21 0.21286925           1            0.6           21       crps
+#> 22 0.85799700           1            0.6           22       crps
+#> 23         NA          NA            0.6           23       crps
+#> 24 1.14954750           1            0.6           24       crps
+#> 25 0.85131425           1            0.6           25       crps

We can also compare forecasts against out of sample observations using the Expected Log Predictive Density (ELPD; also known as the log score). The ELPD is a strictly proper scoring rule that can be @@ -838,34 +809,34 @@

Scoring forecast distributions

predictions on the link scale rather than on the outcome scale. This is where it is advantageous to change the type of prediction we can get using the forecast() function:

-
link_mod1 <- forecast(mod1, newdata = simdat$data_test, type = 'link')
-score(link_mod1, score = 'elpd')$series_1
-#>         score eval_horizon score_type
-#> 1  -0.5304414            1       elpd
-#> 2  -0.4298955            2       elpd
-#> 3  -2.9617583            3       elpd
-#> 4          NA            4       elpd
-#> 5  -0.2007644            5       elpd
-#> 6  -3.3781408            6       elpd
-#> 7  -3.2729088            7       elpd
-#> 8  -2.0363750            8       elpd
-#> 9  -2.0670612            9       elpd
-#> 10 -2.0844818           10       elpd
-#> 11 -3.0576463           11       elpd
-#> 12 -3.6291058           12       elpd
-#> 13 -2.1692669           13       elpd
-#> 14 -0.2960899           14       elpd
-#> 15 -2.3738851           15       elpd
-#> 16 -0.2160804           16       elpd
-#> 17 -0.2036782           17       elpd
-#> 18 -0.2115539           18       elpd
-#> 19 -0.2235072           19       elpd
-#> 20         NA           20       elpd
-#> 21 -0.2413680           21       elpd
-#> 22 -2.6791984           22       elpd
-#> 23         NA           23       elpd
-#> 24 -2.6851981           24       elpd
-#> 25 -0.2836901           25       elpd
+
link_mod1 <- forecast(mod1, newdata = simdat$data_test, type = 'link')
+score(link_mod1, score = 'elpd')$series_1
+#>         score eval_horizon score_type
+#> 1  -0.5343784            1       elpd
+#> 2  -0.4326190            2       elpd
+#> 3  -2.9699450            3       elpd
+#> 4          NA            4       elpd
+#> 5  -0.1998425            5       elpd
+#> 6  -3.3976729            6       elpd
+#> 7  -3.2989297            7       elpd
+#> 8  -2.0490633            8       elpd
+#> 9  -2.0690163            9       elpd
+#> 10 -2.0822051           10       elpd
+#> 11 -3.1101639           11       elpd
+#> 12 -3.7240924           12       elpd
+#> 13 -2.1578701           13       elpd
+#> 14 -0.2899481           14       elpd
+#> 15 -2.3811862           15       elpd
+#> 16 -0.2085375           16       elpd
+#> 17 -0.1960501           17       elpd
+#> 18 -0.2036978           18       elpd
+#> 19 -0.2154374           19       elpd
+#> 20         NA           20       elpd
+#> 21 -0.2341597           21       elpd
+#> 22 -2.6552948           22       elpd
+#> 23         NA           23       elpd
+#> 24 -2.6652717           24       elpd
+#> 25 -0.2759126           25       elpd

Finally, when we have multiple time series it may also make sense to use a multivariate proper scoring rule. mvgam offers two such options: the Energy score and the Variogram score. The first @@ -873,108 +844,108 @@

Scoring forecast distributions

the truth, while the second penalizes forecasts that do not capture the observed true correlation structure. Which score to use depends on your goals, but both are very easy to compute:

-
energy_mod2 <- score(fc_mod2, score = 'energy')
-str(energy_mod2)
-#> List of 4
-#>  $ series_1  :'data.frame':  25 obs. of  3 variables:
-#>   ..$ in_interval   : num [1:25] 1 1 1 NA 1 1 1 1 1 1 ...
-#>   ..$ interval_width: num [1:25] 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 ...
-#>   ..$ eval_horizon  : int [1:25] 1 2 3 4 5 6 7 8 9 10 ...
-#>  $ series_2  :'data.frame':  25 obs. of  3 variables:
-#>   ..$ in_interval   : num [1:25] 1 1 1 1 1 1 1 1 1 NA ...
-#>   ..$ interval_width: num [1:25] 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 ...
-#>   ..$ eval_horizon  : int [1:25] 1 2 3 4 5 6 7 8 9 10 ...
-#>  $ series_3  :'data.frame':  25 obs. of  3 variables:
-#>   ..$ in_interval   : num [1:25] 1 1 1 1 1 1 1 1 1 1 ...
-#>   ..$ interval_width: num [1:25] 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 ...
-#>   ..$ eval_horizon  : int [1:25] 1 2 3 4 5 6 7 8 9 10 ...
-#>  $ all_series:'data.frame':  25 obs. of  3 variables:
-#>   ..$ score       : num [1:25] 0.773 1.147 1.226 NA 0.458 ...
-#>   ..$ eval_horizon: int [1:25] 1 2 3 4 5 6 7 8 9 10 ...
-#>   ..$ score_type  : chr [1:25] "energy" "energy" "energy" "energy" ...
+
energy_mod2 <- score(fc_mod2, score = 'energy')
+str(energy_mod2)
+#> List of 4
+#>  $ series_1  :'data.frame':  25 obs. of  3 variables:
+#>   ..$ in_interval   : num [1:25] 1 1 1 NA 1 1 1 1 1 1 ...
+#>   ..$ interval_width: num [1:25] 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 ...
+#>   ..$ eval_horizon  : int [1:25] 1 2 3 4 5 6 7 8 9 10 ...
+#>  $ series_2  :'data.frame':  25 obs. of  3 variables:
+#>   ..$ in_interval   : num [1:25] 1 1 1 1 1 1 1 1 1 NA ...
+#>   ..$ interval_width: num [1:25] 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 ...
+#>   ..$ eval_horizon  : int [1:25] 1 2 3 4 5 6 7 8 9 10 ...
+#>  $ series_3  :'data.frame':  25 obs. of  3 variables:
+#>   ..$ in_interval   : num [1:25] 1 1 1 1 1 1 1 1 1 1 ...
+#>   ..$ interval_width: num [1:25] 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 ...
+#>   ..$ eval_horizon  : int [1:25] 1 2 3 4 5 6 7 8 9 10 ...
+#>  $ all_series:'data.frame':  25 obs. of  3 variables:
+#>   ..$ score       : num [1:25] 0.771 1.133 1.26 NA 0.443 ...
+#>   ..$ eval_horizon: int [1:25] 1 2 3 4 5 6 7 8 9 10 ...
+#>   ..$ score_type  : chr [1:25] "energy" "energy" "energy" "energy" ...

The returned object still provides information on interval coverage for each individual series, but there is only a single score per horizon now (which is provided in the all_series slot):

-
energy_mod2$all_series
-#>        score eval_horizon score_type
-#> 1  0.7728517            1     energy
-#> 2  1.1469836            2     energy
-#> 3  1.2258781            3     energy
-#> 4         NA            4     energy
-#> 5  0.4577536            5     energy
-#> 6  1.8094487            6     energy
-#> 7  1.4887317            7     energy
-#> 8  0.7651593            8     energy
-#> 9  1.1180634            9     energy
-#> 10        NA           10     energy
-#> 11 1.5008324           11     energy
-#> 12 3.2142460           12     energy
-#> 13 1.6129732           13     energy
-#> 14 1.2704438           14     energy
-#> 15 1.1335958           15     energy
-#> 16 1.8717420           16     energy
-#> 17        NA           17     energy
-#> 18 0.7953392           18     energy
-#> 19 0.9919119           19     energy
-#> 20        NA           20     energy
-#> 21 1.2461964           21     energy
-#> 22 1.5170615           22     energy
-#> 23        NA           23     energy
-#> 24 2.3824552           24     energy
-#> 25 1.5314557           25     energy
+
energy_mod2$all_series
+#>        score eval_horizon score_type
+#> 1  0.7705198            1     energy
+#> 2  1.1330328            2     energy
+#> 3  1.2600785            3     energy
+#> 4         NA            4     energy
+#> 5  0.4427578            5     energy
+#> 6  1.8848308            6     energy
+#> 7  1.4186997            7     energy
+#> 8  0.7280518            8     energy
+#> 9  1.0467755            9     energy
+#> 10        NA           10     energy
+#> 11 1.4172423           11     energy
+#> 12 3.2326925           12     energy
+#> 13 1.5987732           13     energy
+#> 14 1.1798872           14     energy
+#> 15 1.0311968           15     energy
+#> 16 1.8261356           16     energy
+#> 17        NA           17     energy
+#> 18 0.7170961           18     energy
+#> 19 0.8927311           19     energy
+#> 20        NA           20     energy
+#> 21 1.0544501           21     energy
+#> 22 1.3280321           22     energy
+#> 23        NA           23     energy
+#> 24 2.1843621           24     energy
+#> 25 1.2352041           25     energy

You can use your score(s) of choice to compare different models. For example, we can compute and plot the difference in CRPS scores for each series in data. Here, a negative value means the Gaussian Process model (mod2) is better, while a positive value means the spline model (mod1) is better.

-
crps_mod1 <- score(fc_mod1, score = 'crps')
-crps_mod2 <- score(fc_mod2, score = 'crps')
-
-diff_scores <- crps_mod2$series_1$score -
-  crps_mod1$series_1$score
-plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', 
-     ylim = c(-1*max(abs(diff_scores), na.rm = TRUE),
-              max(abs(diff_scores), na.rm = TRUE)),
-     bty = 'l',
-     xlab = 'Forecast horizon',
-     ylab = expression(CRPS[GP]~-~CRPS[spline]))
-abline(h = 0, lty = 'dashed', lwd = 2)
-gp_better <- length(which(diff_scores < 0))
-title(main = paste0('GP better in ', gp_better, ' of 25 evaluations',
-                    '\nMean difference = ', 
-                    round(mean(diff_scores, na.rm = TRUE), 2)))
-

-

-
-diff_scores <- crps_mod2$series_2$score -
-  crps_mod1$series_2$score
-plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', 
-     ylim = c(-1*max(abs(diff_scores), na.rm = TRUE),
-              max(abs(diff_scores), na.rm = TRUE)),
-     bty = 'l',
-     xlab = 'Forecast horizon',
-     ylab = expression(CRPS[GP]~-~CRPS[spline]))
-abline(h = 0, lty = 'dashed', lwd = 2)
-gp_better <- length(which(diff_scores < 0))
-title(main = paste0('GP better in ', gp_better, ' of 25 evaluations',
-                    '\nMean difference = ', 
-                    round(mean(diff_scores, na.rm = TRUE), 2)))
-

-

-diff_scores <- crps_mod2$series_3$score -
-  crps_mod1$series_3$score
-plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', 
-     ylim = c(-1*max(abs(diff_scores), na.rm = TRUE),
-              max(abs(diff_scores), na.rm = TRUE)),
-     bty = 'l',
-     xlab = 'Forecast horizon',
-     ylab = expression(CRPS[GP]~-~CRPS[spline]))
-abline(h = 0, lty = 'dashed', lwd = 2)
-gp_better <- length(which(diff_scores < 0))
-title(main = paste0('GP better in ', gp_better, ' of 25 evaluations',
-                    '\nMean difference = ', 
-                    round(mean(diff_scores, na.rm = TRUE), 2)))
-

+
crps_mod1 <- score(fc_mod1, score = 'crps')
+crps_mod2 <- score(fc_mod2, score = 'crps')
+
+diff_scores <- crps_mod2$series_1$score -
+  crps_mod1$series_1$score
+plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', 
+     ylim = c(-1*max(abs(diff_scores), na.rm = TRUE),
+              max(abs(diff_scores), na.rm = TRUE)),
+     bty = 'l',
+     xlab = 'Forecast horizon',
+     ylab = expression(CRPS[GP]~-~CRPS[spline]))
+abline(h = 0, lty = 'dashed', lwd = 2)
+gp_better <- length(which(diff_scores < 0))
+title(main = paste0('GP better in ', gp_better, ' of 25 evaluations',
+                    '\nMean difference = ', 
+                    round(mean(diff_scores, na.rm = TRUE), 2)))
+

+

+
+diff_scores <- crps_mod2$series_2$score -
+  crps_mod1$series_2$score
+plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', 
+     ylim = c(-1*max(abs(diff_scores), na.rm = TRUE),
+              max(abs(diff_scores), na.rm = TRUE)),
+     bty = 'l',
+     xlab = 'Forecast horizon',
+     ylab = expression(CRPS[GP]~-~CRPS[spline]))
+abline(h = 0, lty = 'dashed', lwd = 2)
+gp_better <- length(which(diff_scores < 0))
+title(main = paste0('GP better in ', gp_better, ' of 25 evaluations',
+                    '\nMean difference = ', 
+                    round(mean(diff_scores, na.rm = TRUE), 2)))
+

+

+diff_scores <- crps_mod2$series_3$score -
+  crps_mod1$series_3$score
+plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', 
+     ylim = c(-1*max(abs(diff_scores), na.rm = TRUE),
+              max(abs(diff_scores), na.rm = TRUE)),
+     bty = 'l',
+     xlab = 'Forecast horizon',
+     ylab = expression(CRPS[GP]~-~CRPS[spline]))
+abline(h = 0, lty = 'dashed', lwd = 2)
+gp_better <- length(which(diff_scores < 0))
+title(main = paste0('GP better in ', gp_better, ' of 25 evaluations',
+                    '\nMean difference = ', 
+                    round(mean(diff_scores, na.rm = TRUE), 2)))
+

The GP model consistently gives better forecasts, and the difference between scores grows quickly as the forecast horizon increases. This is not unexpected given the way that splines linearly extrapolate outside diff --git a/doc/shared_states.R b/doc/shared_states.R index 4eabc053..3619bf06 100644 --- a/doc/shared_states.R +++ b/doc/shared_states.R @@ -7,10 +7,11 @@ knitr::opts_chunk$set( eval = NOT_CRAN ) + ## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( echo = TRUE, - dpi = 150, + dpi = 100, fig.asp = 0.8, fig.width = 6, out.width = "60%", @@ -19,9 +20,10 @@ library(mvgam) library(ggplot2) theme_set(theme_bw(base_size = 12, base_family = 'serif')) + ## ----------------------------------------------------------------------------- set.seed(122) -simdat <- sim_mvgam(trend_model = 'AR1', +simdat <- sim_mvgam(trend_model = AR(), prop_trend = 0.6, mu = c(0, 1, 2), family = poisson()) @@ -29,9 +31,11 @@ trend_map <- data.frame(series = unique(simdat$data_train$series), trend = c(1, 1, 2)) trend_map + ## ----------------------------------------------------------------------------- all.equal(levels(trend_map$series), levels(simdat$data_train$series)) + ## ----------------------------------------------------------------------------- fake_mod <- mvgam(y ~ # observation model formula, which has a @@ -43,8 +47,10 @@ fake_mod <- mvgam(y ~ trend_formula = ~ s(season, bs = 'cc', k = 6), # AR1 dynamics (each latent process model has DIFFERENT) - # dynamics - trend_model = 'AR1', + # dynamics; processes are estimated using the noncentred + # parameterisation for improved efficiency + trend_model = AR(), + noncentred = TRUE, # supplied trend_map trend_map = trend_map, @@ -54,48 +60,51 @@ fake_mod <- mvgam(y ~ data = simdat$data_train, run_model = FALSE) + ## ----------------------------------------------------------------------------- code(fake_mod) + ## ----------------------------------------------------------------------------- fake_mod$model_data$Z + ## ----full_mod, include = FALSE, results='hide'-------------------------------- full_mod <- mvgam(y ~ series - 1, trend_formula = ~ s(season, bs = 'cc', k = 6), - trend_model = 'AR1', + trend_model = AR(), + noncentred = TRUE, trend_map = trend_map, family = poisson(), - data = simdat$data_train) + data = simdat$data_train, + silent = 2) + ## ----eval=FALSE--------------------------------------------------------------- -# full_mod <- mvgam(y ~ series - 1, -# trend_formula = ~ s(season, bs = 'cc', k = 6), -# trend_model = 'AR1', -# trend_map = trend_map, -# family = poisson(), -# data = simdat$data_train) +## full_mod <- mvgam(y ~ series - 1, +## trend_formula = ~ s(season, bs = 'cc', k = 6), +## trend_model = AR(), +## noncentred = TRUE, +## trend_map = trend_map, +## family = poisson(), +## data = simdat$data_train, +## silent = 2) + ## ----------------------------------------------------------------------------- summary(full_mod) -## ----------------------------------------------------------------------------- -plot(conditional_effects(full_mod, type = 'link'), ask = FALSE) ## ----------------------------------------------------------------------------- plot(full_mod, type = 'trend', series = 1) plot(full_mod, type = 'trend', series = 2) plot(full_mod, type = 'trend', series = 3) -## ----------------------------------------------------------------------------- -plot(full_mod, type = 'forecast', series = 1) -plot(full_mod, type = 'forecast', series = 2) -plot(full_mod, type = 'forecast', series = 3) ## ----------------------------------------------------------------------------- -set.seed(543210) +set.seed(0) # simulate a nonlinear relationship using the mgcv function gamSim -signal_dat <- gamSim(n = 100, eg = 1, scale = 1) +signal_dat <- mgcv::gamSim(n = 100, eg = 1, scale = 1) # productivity is one of the variables in the simulated data productivity <- signal_dat$x2 @@ -106,22 +115,17 @@ productivity <- signal_dat$x2 true_signal <- as.vector(scale(signal_dat$y) + arima.sim(100, model = list(ar = 0.8, sd = 0.1))) + ## ----------------------------------------------------------------------------- plot(true_signal, type = 'l', bty = 'l', lwd = 2, ylab = 'True signal', xlab = 'Time') -## ----------------------------------------------------------------------------- -plot(true_signal ~ productivity, - pch = 16, bty = 'l', - ylab = 'True signal', - xlab = 'Productivity') ## ----------------------------------------------------------------------------- -set.seed(543210) sim_series = function(n_series = 3, true_signal){ - temp_effects <- gamSim(n = 100, eg = 7, scale = 0.1) + temp_effects <- mgcv::gamSim(n = 100, eg = 7, scale = 0.1) temperature <- temp_effects$y alphas <- rnorm(n_series, sd = 2) @@ -141,10 +145,12 @@ sim_series = function(n_series = 3, true_signal){ model_dat <- sim_series(true_signal = true_signal) %>% dplyr::mutate(series = factor(series)) + ## ----------------------------------------------------------------------------- plot_mvgam_series(data = model_dat, y = 'observed', series = 'all') + ## ----------------------------------------------------------------------------- plot(observed ~ temperature, data = model_dat %>% dplyr::filter(series == 'sensor_1'), @@ -162,6 +168,7 @@ plot_mvgam_series(data = model_dat, y = 'observed', ylab = 'Sensor 3', xlab = 'Temperature') + ## ----sensor_mod, include = FALSE, results='hide'------------------------------ mod <- mvgam(formula = # formula for observations, allowing for different @@ -178,7 +185,8 @@ mod <- mvgam(formula = trend_model = # in addition to productivity effects, the signal is # assumed to exhibit temporal autocorrelation - 'AR1', + AR(), + noncentred = TRUE, trend_map = # trend_map forces all sensors to track the same @@ -195,61 +203,61 @@ mod <- mvgam(formula = family = gaussian(), burnin = 600, adapt_delta = 0.95, - data = model_dat) + data = model_dat, + silent = 2) + ## ----eval=FALSE--------------------------------------------------------------- -# mod <- mvgam(formula = -# # formula for observations, allowing for different -# # intercepts and hierarchical smooth effects of temperature -# observed ~ series + -# s(temperature, k = 10) + -# s(series, temperature, bs = 'sz', k = 8), -# -# trend_formula = -# # formula for the latent signal, which can depend -# # nonlinearly on productivity -# ~ s(productivity, k = 8), -# -# trend_model = -# # in addition to productivity effects, the signal is -# # assumed to exhibit temporal autocorrelation -# 'AR1', -# -# trend_map = -# # trend_map forces all sensors to track the same -# # latent signal -# data.frame(series = unique(model_dat$series), -# trend = c(1, 1, 1)), -# -# # informative priors on process error -# # and observation error will help with convergence -# priors = c(prior(normal(2, 0.5), class = sigma), -# prior(normal(1, 0.5), class = sigma_obs)), -# -# # Gaussian observations -# family = gaussian(), -# data = model_dat) +## mod <- mvgam(formula = +## # formula for observations, allowing for different +## # intercepts and hierarchical smooth effects of temperature +## observed ~ series + +## s(temperature, k = 10) + +## s(series, temperature, bs = 'sz', k = 8), +## +## trend_formula = +## # formula for the latent signal, which can depend +## # nonlinearly on productivity +## ~ s(productivity, k = 8), +## +## trend_model = +## # in addition to productivity effects, the signal is +## # assumed to exhibit temporal autocorrelation +## AR(), +## noncentred = TRUE, +## +## trend_map = +## # trend_map forces all sensors to track the same +## # latent signal +## data.frame(series = unique(model_dat$series), +## trend = c(1, 1, 1)), +## +## # informative priors on process error +## # and observation error will help with convergence +## priors = c(prior(normal(2, 0.5), class = sigma), +## prior(normal(1, 0.5), class = sigma_obs)), +## +## # Gaussian observations +## family = gaussian(), +## data = model_dat, +## silent = 2) + ## ----------------------------------------------------------------------------- summary(mod, include_betas = FALSE) -## ----------------------------------------------------------------------------- -plot(mod, type = 'smooths', trend_effects = TRUE) ## ----------------------------------------------------------------------------- -plot(mod, type = 'smooths') +conditional_effects(mod, type = 'link') -## ----------------------------------------------------------------------------- -plot(conditional_effects(mod, type = 'link'), ask = FALSE) ## ----------------------------------------------------------------------------- +require(marginaleffects) plot_predictions(mod, condition = c('temperature', 'series', 'series'), points = 0.5) + theme(legend.position = 'none') -## ----------------------------------------------------------------------------- -pairs(mod, variable = c('sigma[1]', 'sigma_obs[1]')) ## ----------------------------------------------------------------------------- plot(mod, type = 'trend') diff --git a/doc/shared_states.Rmd b/doc/shared_states.Rmd index e0f36689..bc79aa71 100644 --- a/doc/shared_states.Rmd +++ b/doc/shared_states.Rmd @@ -23,7 +23,7 @@ knitr::opts_chunk$set( ```{r setup, include=FALSE} knitr::opts_chunk$set( echo = TRUE, - dpi = 150, + dpi = 100, fig.asp = 0.8, fig.width = 6, out.width = "60%", @@ -39,7 +39,7 @@ This vignette gives an example of how `mvgam` can be used to estimate models whe The `trend_map` argument in the `mvgam()` function is an optional `data.frame` that can be used to specify which series should depend on which latent process models (called "trends" in `mvgam`). This can be particularly useful if we wish to force multiple observed time series to depend on the same latent trend process, but with different observation processes. If this argument is supplied, a latent factor model is set up by setting `use_lv = TRUE` and using the supplied `trend_map` to set up the shared trends. Users familiar with the `MARSS` family of packages will recognize this as a way of specifying the $Z$ matrix. This `data.frame` needs to have column names `series` and `trend`, with integer values in the `trend` column to state which trend each series should depend on. The `series` column should have a single unique entry for each time series in the data, with names that perfectly match the factor levels of the `series` variable in `data`). For example, if we were to simulate a collection of three integer-valued time series (using `sim_mvgam`), the following `trend_map` would force the first two series to share the same latent trend process: ```{r} set.seed(122) -simdat <- sim_mvgam(trend_model = 'AR1', +simdat <- sim_mvgam(trend_model = AR(), prop_trend = 0.6, mu = c(0, 1, 2), family = poisson()) @@ -66,8 +66,10 @@ fake_mod <- mvgam(y ~ trend_formula = ~ s(season, bs = 'cc', k = 6), # AR1 dynamics (each latent process model has DIFFERENT) - # dynamics - trend_model = 'AR1', + # dynamics; processes are estimated using the noncentred + # parameterisation for improved efficiency + trend_model = AR(), + noncentred = TRUE, # supplied trend_map trend_map = trend_map, @@ -93,19 +95,23 @@ Though this model doesn't perfectly match the data-generating process (which all ```{r full_mod, include = FALSE, results='hide'} full_mod <- mvgam(y ~ series - 1, trend_formula = ~ s(season, bs = 'cc', k = 6), - trend_model = 'AR1', + trend_model = AR(), + noncentred = TRUE, trend_map = trend_map, family = poisson(), - data = simdat$data_train) + data = simdat$data_train, + silent = 2) ``` ```{r eval=FALSE} full_mod <- mvgam(y ~ series - 1, trend_formula = ~ s(season, bs = 'cc', k = 6), - trend_model = 'AR1', + trend_model = AR(), + noncentred = TRUE, trend_map = trend_map, family = poisson(), - data = simdat$data_train) + data = simdat$data_train, + silent = 2) ``` The summary of this model is informative as it shows that only two latent process models have been estimated, even though we have three observed time series. The model converges well @@ -113,31 +119,21 @@ The summary of this model is informative as it shows that only two latent proces summary(full_mod) ``` -Quick plots of all main effects can be made using `conditional_effects()`: -```{r} -plot(conditional_effects(full_mod, type = 'link'), ask = FALSE) -``` - -Even more informative are the plots of the latent processes. Both series 1 and 2 share the exact same estimates, while the estimates for series 3 are different: +Both series 1 and 2 share the exact same latent process estimates, while the estimates for series 3 are different: ```{r} plot(full_mod, type = 'trend', series = 1) plot(full_mod, type = 'trend', series = 2) plot(full_mod, type = 'trend', series = 3) ``` -However, the forecasts for series' 1 and 2 differ because they have different intercepts in the observation model -```{r} -plot(full_mod, type = 'forecast', series = 1) -plot(full_mod, type = 'forecast', series = 2) -plot(full_mod, type = 'forecast', series = 3) -``` +However, forecasts for series' 1 and 2 will differ because they have different intercepts in the observation model ## Example: signal detection Now we will explore a more complicated example. Here we simulate a true hidden signal that we are trying to track. This signal depends nonlinearly on some covariate (called `productivity`, which represents a measure of how productive the landscape is). The signal also demonstrates a fairly large amount of temporal autocorrelation: ```{r} -set.seed(543210) +set.seed(0) # simulate a nonlinear relationship using the mgcv function gamSim -signal_dat <- gamSim(n = 100, eg = 1, scale = 1) +signal_dat <- mgcv::gamSim(n = 100, eg = 1, scale = 1) # productivity is one of the variables in the simulated data productivity <- signal_dat$x2 @@ -157,19 +153,10 @@ plot(true_signal, type = 'l', xlab = 'Time') ``` -And plot the relationship between the signal and the `productivity` covariate: -```{r} -plot(true_signal ~ productivity, - pch = 16, bty = 'l', - ylab = 'True signal', - xlab = 'Productivity') -``` - Next we simulate three sensors that are trying to track the same hidden signal. All of these sensors have different observation errors that can depend nonlinearly on a second external covariate, called `temperature` in this example. Again this makes use of `gamSim` ```{r} -set.seed(543210) sim_series = function(n_series = 3, true_signal){ - temp_effects <- gamSim(n = 100, eg = 7, scale = 0.1) + temp_effects <- mgcv::gamSim(n = 100, eg = 7, scale = 0.1) temperature <- temp_effects$y alphas <- rnorm(n_series, sd = 2) @@ -233,7 +220,8 @@ mod <- mvgam(formula = trend_model = # in addition to productivity effects, the signal is # assumed to exhibit temporal autocorrelation - 'AR1', + AR(), + noncentred = TRUE, trend_map = # trend_map forces all sensors to track the same @@ -250,7 +238,8 @@ mod <- mvgam(formula = family = gaussian(), burnin = 600, adapt_delta = 0.95, - data = model_dat) + data = model_dat, + silent = 2) ``` ```{r eval=FALSE} @@ -269,7 +258,8 @@ mod <- mvgam(formula = trend_model = # in addition to productivity effects, the signal is # assumed to exhibit temporal autocorrelation - 'AR1', + AR(), + noncentred = TRUE, trend_map = # trend_map forces all sensors to track the same @@ -284,7 +274,8 @@ mod <- mvgam(formula = # Gaussian observations family = gaussian(), - data = model_dat) + data = model_dat, + silent = 2) ``` View a reduced version of the model summary because there will be many spline coefficients in this model @@ -293,35 +284,21 @@ summary(mod, include_betas = FALSE) ``` ### Inspecting effects on both process and observation models -Don't pay much attention to the approximate *p*-values of the smooth terms. The calculation for these values is incredibly sensitive to the estimates for the smoothing parameters so I don't tend to find them to be very meaningful. What are meaningful, however, are prediction-based plots of the smooth functions. For example, here is the estimated response of the underlying signal to `productivity`: -```{r} -plot(mod, type = 'smooths', trend_effects = TRUE) -``` - -And here are the estimated relationships between the sensor observations and the `temperature` covariate: -```{r} -plot(mod, type = 'smooths') -``` - -All main effects can be quickly plotted with `conditional_effects`: +Don't pay much attention to the approximate *p*-values of the smooth terms. The calculation for these values is incredibly sensitive to the estimates for the smoothing parameters so I don't tend to find them to be very meaningful. What are meaningful, however, are prediction-based plots of the smooth functions. All main effects can be quickly plotted with `conditional_effects`: ```{r} -plot(conditional_effects(mod, type = 'link'), ask = FALSE) +conditional_effects(mod, type = 'link') ``` `conditional_effects` is simply a wrapper to the more flexible `plot_predictions` function from the `marginaleffects` package. We can get more useful plots of these effects using this function for further customisation: ```{r} +require(marginaleffects) plot_predictions(mod, condition = c('temperature', 'series', 'series'), points = 0.5) + theme(legend.position = 'none') ``` -We have successfully estimated effects, some of them nonlinear, that impact the hidden process AND the observations. All in a single joint model. But there can always be challenges with these models, particularly when estimating both process and observation error at the same time. For example, a `pairs` plot for the observation error for sensor 1 and the hidden process error shows some strong correlations that we might want to deal with by using a more structured prior: -```{r} -pairs(mod, variable = c('sigma[1]', 'sigma_obs[1]')) -``` - -But we will leave the model as-is for this example +We have successfully estimated effects, some of them nonlinear, that impact the hidden process AND the observations. All in a single joint model. But there can always be challenges with these models, particularly when estimating both process and observation error at the same time. ### Recovering the hidden signal A final but very key question is whether we can successfully recover the true hidden signal. The `trend` slot in the returned model parameters has the estimates for this signal, which we can easily plot using the `mvgam` S3 method for `plot`. We can also overlay the true values for the hidden signal, which shows that our model has done a good job of recovering it: @@ -336,7 +313,7 @@ points(true_signal, pch = 16, cex = 0.8) ## Further reading The following papers and resources offer a lot of useful material about other types of State-Space models and how they can be applied in practice: -Holmes, Elizabeth E., Eric J. Ward, and Wills Kellie. "[MARSS: multivariate autoregressive state-space models for analyzing time-series data.](https://d1wqtxts1xzle7.cloudfront.net/30588864/rjournal_2012-1_holmes_et_al-libre.pdf?1391843792=&response-content-disposition=inline%3B+filename%3DMARSS_Multivariate_Autoregressive_State.pdf&Expires=1695861526&Signature=TCRXULs0mUKRM4m1pmvZxwE10bUqS6vzLcuKeUBCj57YIjx23iTxS1fEgBpV0fs2wb5XAw7ZkG84XyMaoS~vjiqZ-DpheQDHwAHpIWG-TcckHQjEjPWTNajFvAemToUdCiHnDa~yrhW9HRgXjgncdalkjzjvjT3HLSW8mcjBDhQN-WJ3MKQFSXtxoBpWfcuPYbf-HC1E1oSl7957y~w0I1gcIVdu6LHjP~CEKXa0BQzS4xuarL2nz~tHD2MverbNJYMrDGrAxIi-MX6i~lfHWuwV6UKRdoOZ0pXIcMYWBTv9V5xYey76aMKTICiJ~0NqXLZdXO5qlS4~~2nFEO7b7w__&Key-Pair-Id=APKAJLOHF5GGSLRBV4ZA)" *R Journal*. 4.1 (2012): 11. +Holmes, Elizabeth E., Eric J. Ward, and Wills Kellie. "[MARSS: multivariate autoregressive state-space models for analyzing time-series data.](https://journal.r-project.org/archive/2012/RJ-2012-002/index.html)" *R Journal*. 4.1 (2012): 11. Ward, Eric J., et al. "[Inferring spatial structure from time‐series data: using multivariate state‐space models to detect metapopulation structure of California sea lions in the Gulf of California, Mexico.](https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/j.1365-2664.2009.01745.x)" *Journal of Applied Ecology* 47.1 (2010): 47-56. diff --git a/doc/shared_states.html b/doc/shared_states.html index 77372995..db2dac3b 100644 --- a/doc/shared_states.html +++ b/doc/shared_states.html @@ -12,7 +12,7 @@ - + Shared latent states in mvgam @@ -340,7 +340,7 @@

Shared latent states in mvgam

Nicholas J Clark

-

2024-04-16

+

2024-07-01

@@ -393,7 +393,7 @@

The trend_map argument

sim_mvgam), the following trend_map would force the first two series to share the same latent trend process:

set.seed(122)
-simdat <- sim_mvgam(trend_model = 'AR1',
+simdat <- sim_mvgam(trend_model = AR(),
                     prop_trend = 0.6,
                     mu = c(0, 1, 2),
                     family = poisson())
@@ -432,16 +432,18 @@ 

Checking trend_map with trend_formula = ~ s(season, bs = 'cc', k = 6), # AR1 dynamics (each latent process model has DIFFERENT) - # dynamics - trend_model = 'AR1', - - # supplied trend_map - trend_map = trend_map, - - # data and observation family - family = poisson(), - data = simdat$data_train, - run_model = FALSE)

+ # dynamics; processes are estimated using the noncentred + # parameterisation for improved efficiency + trend_model = AR(), + noncentred = TRUE, + + # supplied trend_map + trend_map = trend_map, + + # data and observation family + family = poisson(), + data = simdat$data_train, + run_model = FALSE)

Inspecting the Stan code shows how this model is a dynamic factor model in which the loadings are constructed to reflect the supplied trend_map:

@@ -479,96 +481,99 @@

Checking trend_map with #> vector<lower=0>[n_lv] sigma; #> #> // latent state AR1 terms -#> vector<lower=-1.5, upper=1.5>[n_lv] ar1; +#> vector<lower=-1, upper=1>[n_lv] ar1; #> -#> // latent states -#> matrix[n, n_lv] LV; +#> // raw latent states +#> matrix[n, n_lv] LV_raw; #> #> // smoothing parameters #> vector<lower=0>[n_sp_trend] lambda_trend; #> } #> transformed parameters { -#> // latent states and loading matrix +#> // raw latent states #> vector[n * n_lv] trend_mus; #> matrix[n, n_series] trend; #> #> // basis coefficients #> vector[num_basis] b; -#> vector[num_basis_trend] b_trend; -#> -#> // observation model basis coefficients -#> b[1 : num_basis] = b_raw[1 : num_basis]; +#> +#> // latent states +#> matrix[n, n_lv] LV; +#> vector[num_basis_trend] b_trend; #> -#> // process model basis coefficients -#> b_trend[1 : num_basis_trend] = b_raw_trend[1 : num_basis_trend]; +#> // observation model basis coefficients +#> b[1 : num_basis] = b_raw[1 : num_basis]; #> -#> // latent process linear predictors -#> trend_mus = X_trend * b_trend; +#> // process model basis coefficients +#> b_trend[1 : num_basis_trend] = b_raw_trend[1 : num_basis_trend]; #> -#> // derived latent states -#> for (i in 1 : n) { -#> for (s in 1 : n_series) { -#> trend[i, s] = dot_product(Z[s, : ], LV[i, : ]); -#> } -#> } -#> } -#> model { -#> // prior for seriesseries_1... -#> b_raw[1] ~ student_t(3, 0, 2); +#> // latent process linear predictors +#> trend_mus = X_trend * b_trend; +#> LV = LV_raw .* rep_matrix(sigma', rows(LV_raw)); +#> for (j in 1 : n_lv) { +#> LV[1, j] += trend_mus[ytimes_trend[1, j]]; +#> for (i in 2 : n) { +#> LV[i, j] += trend_mus[ytimes_trend[i, j]] +#> + ar1[j] * (LV[i - 1, j] - trend_mus[ytimes_trend[i - 1, j]]); +#> } +#> } #> -#> // prior for seriesseries_2... -#> b_raw[2] ~ student_t(3, 0, 2); -#> -#> // prior for seriesseries_3... -#> b_raw[3] ~ student_t(3, 0, 2); -#> -#> // priors for AR parameters -#> ar1 ~ std_normal(); -#> -#> // priors for latent state SD parameters -#> sigma ~ student_t(3, 0, 2.5); -#> -#> // dynamic process models +#> // derived latent states +#> for (i in 1 : n) { +#> for (s in 1 : n_series) { +#> trend[i, s] = dot_product(Z[s, : ], LV[i, : ]); +#> } +#> } +#> } +#> model { +#> // prior for seriesseries_1... +#> b_raw[1] ~ student_t(3, 0, 2); +#> +#> // prior for seriesseries_2... +#> b_raw[2] ~ student_t(3, 0, 2); #> -#> // prior for s(season)_trend... -#> b_raw_trend[1 : 4] ~ multi_normal_prec(zero_trend[1 : 4], -#> S_trend1[1 : 4, 1 : 4] -#> * lambda_trend[1]); -#> lambda_trend ~ normal(5, 30); -#> for (j in 1 : n_lv) { -#> LV[1, j] ~ normal(trend_mus[ytimes_trend[1, j]], sigma[j]); -#> for (i in 2 : n) { -#> LV[i, j] ~ normal(trend_mus[ytimes_trend[i, j]] -#> + ar1[j] -#> * (LV[i - 1, j] - trend_mus[ytimes_trend[i - 1, j]]), -#> sigma[j]); -#> } -#> } -#> { -#> // likelihood functions -#> vector[n_nonmissing] flat_trends; -#> flat_trends = to_vector(trend)[obs_ind]; -#> flat_ys ~ poisson_log_glm(append_col(flat_xs, flat_trends), 0.0, -#> append_row(b, 1.0)); -#> } -#> } -#> generated quantities { -#> vector[total_obs] eta; -#> matrix[n, n_series] mus; -#> vector[n_sp_trend] rho_trend; -#> vector[n_lv] penalty; -#> array[n, n_series] int ypred; -#> penalty = 1.0 / (sigma .* sigma); -#> rho_trend = log(lambda_trend); -#> -#> matrix[n_series, n_lv] lv_coefs = Z; -#> // posterior predictions -#> eta = X * b; -#> for (s in 1 : n_series) { -#> mus[1 : n, s] = eta[ytimes[1 : n, s]] + trend[1 : n, s]; -#> ypred[1 : n, s] = poisson_log_rng(mus[1 : n, s]); -#> } -#> }

+#> // prior for seriesseries_3... +#> b_raw[3] ~ student_t(3, 0, 2); +#> +#> // priors for AR parameters +#> ar1 ~ std_normal(); +#> +#> // priors for latent state SD parameters +#> sigma ~ student_t(3, 0, 2.5); +#> to_vector(LV_raw) ~ std_normal(); +#> +#> // dynamic process models +#> +#> // prior for s(season)_trend... +#> b_raw_trend[1 : 4] ~ multi_normal_prec(zero_trend[1 : 4], +#> S_trend1[1 : 4, 1 : 4] +#> * lambda_trend[1]); +#> lambda_trend ~ normal(5, 30); +#> { +#> // likelihood functions +#> vector[n_nonmissing] flat_trends; +#> flat_trends = to_vector(trend)[obs_ind]; +#> flat_ys ~ poisson_log_glm(append_col(flat_xs, flat_trends), 0.0, +#> append_row(b, 1.0)); +#> } +#> } +#> generated quantities { +#> vector[total_obs] eta; +#> matrix[n, n_series] mus; +#> vector[n_sp_trend] rho_trend; +#> vector[n_lv] penalty; +#> array[n, n_series] int ypred; +#> penalty = 1.0 / (sigma .* sigma); +#> rho_trend = log(lambda_trend); +#> +#> matrix[n_series, n_lv] lv_coefs = Z; +#> // posterior predictions +#> eta = X * b; +#> for (s in 1 : n_series) { +#> mus[1 : n, s] = eta[ytimes[1 : n, s]] + trend[1 : n, s]; +#> ypred[1 : n, s] = poisson_log_rng(mus[1 : n, s]); +#> } +#> }

Notice the line that states “lv_coefs = Z;”. This uses the supplied \(Z\) matrix to construct the loading coefficients. The supplied matrix now looks exactly like what you’d use @@ -587,103 +592,96 @@

Fitting and inspecting the model

can still fit it to show what the resulting inferences look like:

full_mod <- mvgam(y ~ series - 1,
                   trend_formula = ~ s(season, bs = 'cc', k = 6),
-                  trend_model = 'AR1',
-                  trend_map = trend_map,
-                  family = poisson(),
-                  data = simdat$data_train)
+ trend_model = AR(), + noncentred = TRUE, + trend_map = trend_map, + family = poisson(), + data = simdat$data_train, + silent = 2)

The summary of this model is informative as it shows that only two latent process models have been estimated, even though we have three observed time series. The model converges well

summary(full_mod)
 #> GAM observation formula:
 #> y ~ series - 1
-#> 
-#> GAM process formula:
-#> ~s(season, bs = "cc", k = 6)
-#> 
-#> Family:
-#> poisson
-#> 
-#> Link function:
-#> log
-#> 
-#> Trend model:
-#> AR1
-#> 
-#> N process models:
-#> 2 
-#> 
-#> N series:
-#> 3 
-#> 
-#> N timepoints:
-#> 75 
-#> 
-#> Status:
-#> Fitted using Stan 
-#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 
-#> Total post-warmup draws = 2000
-#> 
-#> 
-#> GAM observation model coefficient (beta) estimates:
-#>                 2.5%   50% 97.5% Rhat n_eff
-#> seriesseries_1 -0.14 0.087  0.31    1  1303
-#> seriesseries_2  0.91 1.100  1.20    1  1076
-#> seriesseries_3  1.90 2.100  2.30    1   456
-#> 
-#> Process model AR parameter estimates:
-#>         2.5%     50% 97.5% Rhat n_eff
-#> ar1[1] -0.72 -0.4300 -0.04 1.00   572
-#> ar1[2] -0.28 -0.0074  0.26 1.01  1838
-#> 
-#> Process error parameter estimates:
-#>          2.5%  50% 97.5% Rhat n_eff
-#> sigma[1] 0.33 0.48  0.68    1   343
-#> sigma[2] 0.59 0.73  0.90    1  1452
-#> 
-#> GAM process model coefficient (beta) estimates:
-#>                    2.5%     50% 97.5% Rhat n_eff
-#> s(season).1_trend -0.21 -0.0076  0.20    1  1917
-#> s(season).2_trend -0.27 -0.0470  0.18    1  1682
-#> s(season).3_trend -0.15  0.0670  0.29    1  1462
-#> s(season).4_trend -0.15  0.0630  0.27    1  1574
-#> 
-#> Approximate significance of GAM process smooths:
-#>            edf Ref.df    F p-value
-#> s(season) 2.49      4 0.09    0.93
-#> 
-#> Stan MCMC diagnostics:
-#> n_eff / iter looks reasonable for all parameters
-#> Rhat looks reasonable for all parameters
-#> 0 of 2000 iterations ended with a divergence (0%)
-#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
-#> E-FMI indicated no pathological behavior
-#> 
-#> Samples were drawn using NUTS(diag_e) at Tue Apr 16 1:09:57 PM 2024.
-#> For each parameter, n_eff is a crude measure of effective sample size,
-#> and Rhat is the potential scale reduction factor on split MCMC chains
-#> (at convergence, Rhat = 1)
-

Quick plots of all main effects can be made using -conditional_effects():

-
plot(conditional_effects(full_mod, type = 'link'), ask = FALSE)
-

-

Even more informative are the plots of the latent processes. Both -series 1 and 2 share the exact same estimates, while the estimates for -series 3 are different:

-
plot(full_mod, type = 'trend', series = 1)
-

-
plot(full_mod, type = 'trend', series = 2)
-

-
plot(full_mod, type = 'trend', series = 3)
-

-

However, the forecasts for series’ 1 and 2 differ because they have +#> <environment: 0x000001f52b9e3130> +#> +#> GAM process formula: +#> ~s(season, bs = "cc", k = 6) +#> <environment: 0x000001f52b9e3130> +#> +#> Family: +#> poisson +#> +#> Link function: +#> log +#> +#> Trend model: +#> AR() +#> +#> N process models: +#> 2 +#> +#> N series: +#> 3 +#> +#> N timepoints: +#> 75 +#> +#> Status: +#> Fitted using Stan +#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 +#> Total post-warmup draws = 2000 +#> +#> +#> GAM observation model coefficient (beta) estimates: +#> 2.5% 50% 97.5% Rhat n_eff +#> seriesseries_1 -0.14 0.084 0.29 1.00 1720 +#> seriesseries_2 0.91 1.100 1.20 1.00 1374 +#> seriesseries_3 1.90 2.100 2.30 1.01 447 +#> +#> Process model AR parameter estimates: +#> 2.5% 50% 97.5% Rhat n_eff +#> ar1[1] -0.72 -0.430 -0.037 1.01 560 +#> ar1[2] -0.30 -0.017 0.270 1.01 286 +#> +#> Process error parameter estimates: +#> 2.5% 50% 97.5% Rhat n_eff +#> sigma[1] 0.34 0.49 0.65 1 819 +#> sigma[2] 0.59 0.73 0.90 1 573 +#> +#> GAM process model coefficient (beta) estimates: +#> 2.5% 50% 97.5% Rhat n_eff +#> s(season).1_trend -0.20 -0.003 0.21 1.01 857 +#> s(season).2_trend -0.27 -0.046 0.17 1.00 918 +#> s(season).3_trend -0.15 0.068 0.28 1.00 850 +#> s(season).4_trend -0.14 0.064 0.27 1.00 972 +#> +#> Approximate significance of GAM process smooths: +#> edf Ref.df Chi.sq p-value +#> s(season) 2.33 4 0.38 0.93 +#> +#> Stan MCMC diagnostics: +#> n_eff / iter looks reasonable for all parameters +#> Rhat looks reasonable for all parameters +#> 0 of 2000 iterations ended with a divergence (0%) +#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%) +#> E-FMI indicated no pathological behavior +#> +#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:31:17 AM 2024. +#> For each parameter, n_eff is a crude measure of effective sample size, +#> and Rhat is the potential scale reduction factor on split MCMC chains +#> (at convergence, Rhat = 1) +

Both series 1 and 2 share the exact same latent process estimates, +while the estimates for series 3 are different:

+
plot(full_mod, type = 'trend', series = 1)
+

+
plot(full_mod, type = 'trend', series = 2)
+

+
plot(full_mod, type = 'trend', series = 3)
+

+

However, forecasts for series’ 1 and 2 will differ because they have different intercepts in the observation model

-
plot(full_mod, type = 'forecast', series = 1)
-

-
plot(full_mod, type = 'forecast', series = 2)
-

-
plot(full_mod, type = 'forecast', series = 3)
-

@@ -693,83 +691,75 @@

Example: signal detection

nonlinearly on some covariate (called productivity, which represents a measure of how productive the landscape is). The signal also demonstrates a fairly large amount of temporal autocorrelation:

-
set.seed(543210)
-# simulate a nonlinear relationship using the mgcv function gamSim
-signal_dat <- gamSim(n = 100, eg = 1, scale = 1)
-#> Gu & Wahba 4 term additive model
-
-# productivity is one of the variables in the simulated data
-productivity <- signal_dat$x2
-
-# simulate the true signal, which already has a nonlinear relationship
-# with productivity; we will add in a fairly strong AR1 process to 
-# contribute to the signal
-true_signal <- as.vector(scale(signal_dat$y) +
-                         arima.sim(100, model = list(ar = 0.8, sd = 0.1)))
+
set.seed(0)
+# simulate a nonlinear relationship using the mgcv function gamSim
+signal_dat <- mgcv::gamSim(n = 100, eg = 1, scale = 1)
+#> Gu & Wahba 4 term additive model
+
+# productivity is one of the variables in the simulated data
+productivity <- signal_dat$x2
+
+# simulate the true signal, which already has a nonlinear relationship
+# with productivity; we will add in a fairly strong AR1 process to 
+# contribute to the signal
+true_signal <- as.vector(scale(signal_dat$y) +
+                         arima.sim(100, model = list(ar = 0.8, sd = 0.1)))

Plot the signal to inspect it’s evolution over time

-
plot(true_signal, type = 'l',
-     bty = 'l', lwd = 2,
-     ylab = 'True signal',
-     xlab = 'Time')
-

-

And plot the relationship between the signal and the -productivity covariate:

-
plot(true_signal ~ productivity,
-     pch = 16, bty = 'l',
-     ylab = 'True signal',
-     xlab = 'Productivity')
-

+
plot(true_signal, type = 'l',
+     bty = 'l', lwd = 2,
+     ylab = 'True signal',
+     xlab = 'Time')
+

Next we simulate three sensors that are trying to track the same hidden signal. All of these sensors have different observation errors that can depend nonlinearly on a second external covariate, called temperature in this example. Again this makes use of gamSim

-
set.seed(543210)
-sim_series = function(n_series = 3, true_signal){
-  temp_effects <- gamSim(n = 100, eg = 7, scale = 0.1)
-  temperature <- temp_effects$y
-  alphas <- rnorm(n_series, sd = 2)
-
-  do.call(rbind, lapply(seq_len(n_series), function(series){
-    data.frame(observed = rnorm(length(true_signal),
-                                mean = alphas[series] +
-                                       1.5*as.vector(scale(temp_effects[, series + 1])) +
-                                       true_signal,
-                                sd = runif(1, 1, 2)),
-               series = paste0('sensor_', series),
-               time = 1:length(true_signal),
-               temperature = temperature,
-               productivity = productivity,
-               true_signal = true_signal)
-   }))
-  }
-model_dat <- sim_series(true_signal = true_signal) %>%
-  dplyr::mutate(series = factor(series))
-#> Gu & Wahba 4 term additive model, correlated predictors
+
sim_series = function(n_series = 3, true_signal){
+  temp_effects <- mgcv::gamSim(n = 100, eg = 7, scale = 0.1)
+  temperature <- temp_effects$y
+  alphas <- rnorm(n_series, sd = 2)
+
+  do.call(rbind, lapply(seq_len(n_series), function(series){
+    data.frame(observed = rnorm(length(true_signal),
+                                mean = alphas[series] +
+                                       1.5*as.vector(scale(temp_effects[, series + 1])) +
+                                       true_signal,
+                                sd = runif(1, 1, 2)),
+               series = paste0('sensor_', series),
+               time = 1:length(true_signal),
+               temperature = temperature,
+               productivity = productivity,
+               true_signal = true_signal)
+   }))
+  }
+model_dat <- sim_series(true_signal = true_signal) %>%
+  dplyr::mutate(series = factor(series))
+#> Gu & Wahba 4 term additive model, correlated predictors

Plot the sensor observations

-
plot_mvgam_series(data = model_dat, y = 'observed',
-                  series = 'all')
-

+
plot_mvgam_series(data = model_dat, y = 'observed',
+                  series = 'all')
+

And now plot the observed relationships between the three sensors and the temperature covariate

-
 plot(observed ~ temperature, data = model_dat %>%
-   dplyr::filter(series == 'sensor_1'),
-   pch = 16, bty = 'l',
-   ylab = 'Sensor 1',
-   xlab = 'Temperature')
-

-
 plot(observed ~ temperature, data = model_dat %>%
-   dplyr::filter(series == 'sensor_2'),
-   pch = 16, bty = 'l',
-   ylab = 'Sensor 2',
-   xlab = 'Temperature')
-

-
 plot(observed ~ temperature, data = model_dat %>%
-   dplyr::filter(series == 'sensor_3'),
-   pch = 16, bty = 'l',
-   ylab = 'Sensor 3',
-   xlab = 'Temperature')
-

+
 plot(observed ~ temperature, data = model_dat %>%
+   dplyr::filter(series == 'sensor_1'),
+   pch = 16, bty = 'l',
+   ylab = 'Sensor 1',
+   xlab = 'Temperature')
+

+
 plot(observed ~ temperature, data = model_dat %>%
+   dplyr::filter(series == 'sensor_2'),
+   pch = 16, bty = 'l',
+   ylab = 'Sensor 2',
+   xlab = 'Temperature')
+

+
 plot(observed ~ temperature, data = model_dat %>%
+   dplyr::filter(series == 'sensor_3'),
+   pch = 16, bty = 'l',
+   ylab = 'Sensor 3',
+   xlab = 'Temperature')
+

The shared signal model

Now we can formulate and fit a model that allows each sensor’s @@ -782,116 +772,117 @@

The shared signal model

variance components (process error and observation error), which reflect our prior belief that the observation error is smaller overall than the true process error

-
mod <- mvgam(formula =
-               # formula for observations, allowing for different
-               # intercepts and hierarchical smooth effects of temperature
-               observed ~ series + 
-               s(temperature, k = 10) +
-               s(series, temperature, bs = 'sz', k = 8),
-             
-             trend_formula =
-               # formula for the latent signal, which can depend
-               # nonlinearly on productivity
-               ~ s(productivity, k = 8),
-             
-             trend_model =
-               # in addition to productivity effects, the signal is
-               # assumed to exhibit temporal autocorrelation
-               'AR1',
-             
-             trend_map =
-               # trend_map forces all sensors to track the same
-               # latent signal
-               data.frame(series = unique(model_dat$series),
-                          trend = c(1, 1, 1)),
-             
-             # informative priors on process error
-             # and observation error will help with convergence
-             priors = c(prior(normal(2, 0.5), class = sigma),
-                        prior(normal(1, 0.5), class = sigma_obs)),
-             
-             # Gaussian observations
-             family = gaussian(),
-             data = model_dat)
+
mod <- mvgam(formula =
+               # formula for observations, allowing for different
+               # intercepts and hierarchical smooth effects of temperature
+               observed ~ series + 
+               s(temperature, k = 10) +
+               s(series, temperature, bs = 'sz', k = 8),
+             
+             trend_formula =
+               # formula for the latent signal, which can depend
+               # nonlinearly on productivity
+               ~ s(productivity, k = 8),
+             
+             trend_model =
+               # in addition to productivity effects, the signal is
+               # assumed to exhibit temporal autocorrelation
+               AR(),
+             noncentred = TRUE,
+             
+             trend_map =
+               # trend_map forces all sensors to track the same
+               # latent signal
+               data.frame(series = unique(model_dat$series),
+                          trend = c(1, 1, 1)),
+             
+             # informative priors on process error
+             # and observation error will help with convergence
+             priors = c(prior(normal(2, 0.5), class = sigma),
+                        prior(normal(1, 0.5), class = sigma_obs)),
+             
+             # Gaussian observations
+             family = gaussian(),
+             data = model_dat,
+             silent = 2)

View a reduced version of the model summary because there will be many spline coefficients in this model

-
summary(mod, include_betas = FALSE)
-#> GAM observation formula:
-#> observed ~ series + s(temperature, k = 10) + s(series, temperature, 
-#>     bs = "sz", k = 8)
-#> 
-#> GAM process formula:
-#> ~s(productivity, k = 8)
-#> 
-#> Family:
-#> gaussian
-#> 
-#> Link function:
-#> identity
-#> 
-#> Trend model:
-#> AR1
-#> 
-#> N process models:
-#> 1 
-#> 
-#> N series:
-#> 3 
-#> 
-#> N timepoints:
-#> 100 
-#> 
-#> Status:
-#> Fitted using Stan 
-#> 4 chains, each with iter = 1100; warmup = 600; thin = 1 
-#> Total post-warmup draws = 2000
-#> 
-#> 
-#> Observation error parameter estimates:
-#>              2.5% 50% 97.5% Rhat n_eff
-#> sigma_obs[1]  1.6 1.9   2.2    1  1757
-#> sigma_obs[2]  1.4 1.7   2.0    1  1090
-#> sigma_obs[3]  1.3 1.5   1.8    1  1339
-#> 
-#> GAM observation model coefficient (beta) estimates:
-#>                 2.5%   50% 97.5% Rhat n_eff
-#> (Intercept)     0.72  1.70  2.50 1.01   360
-#> seriessensor_2 -2.10 -0.96  0.32 1.00  1068
-#> seriessensor_3 -3.40 -2.00 -0.39 1.00  1154
-#> 
-#> Approximate significance of GAM observation smooths:
-#>                        edf Ref.df    F p-value    
-#> s(temperature)        1.22      9 12.7  <2e-16 ***
-#> s(series,temperature) 1.92     16  1.0   0.011 *  
-#> ---
-#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-#> 
-#> Process model AR parameter estimates:
-#>        2.5%  50% 97.5% Rhat n_eff
-#> ar1[1] 0.33 0.59  0.83    1   492
-#> 
-#> Process error parameter estimates:
-#>          2.5% 50% 97.5% Rhat n_eff
-#> sigma[1] 0.72   1   1.3 1.01   392
-#> 
-#> Approximate significance of GAM process smooths:
-#>                 edf Ref.df    F p-value    
-#> s(productivity) 3.6      7 9.34 0.00036 ***
-#> ---
-#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-#> 
-#> Stan MCMC diagnostics:
-#> n_eff / iter looks reasonable for all parameters
-#> Rhats above 1.05 found for 28 parameters
-#>  *Diagnose further to investigate why the chains have not mixed
-#> 0 of 2000 iterations ended with a divergence (0%)
-#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
-#> E-FMI indicated no pathological behavior
-#> 
-#> Samples were drawn using NUTS(diag_e) at Tue Apr 16 1:11:39 PM 2024.
-#> For each parameter, n_eff is a crude measure of effective sample size,
-#> and Rhat is the potential scale reduction factor on split MCMC chains
-#> (at convergence, Rhat = 1)
+
summary(mod, include_betas = FALSE)
+#> GAM observation formula:
+#> observed ~ series + s(temperature, k = 10) + s(series, temperature, 
+#>     bs = "sz", k = 8)
+#> <environment: 0x000001f52b9e3130>
+#> 
+#> GAM process formula:
+#> ~s(productivity, k = 8)
+#> <environment: 0x000001f52b9e3130>
+#> 
+#> Family:
+#> gaussian
+#> 
+#> Link function:
+#> identity
+#> 
+#> Trend model:
+#> AR()
+#> 
+#> N process models:
+#> 1 
+#> 
+#> N series:
+#> 3 
+#> 
+#> N timepoints:
+#> 100 
+#> 
+#> Status:
+#> Fitted using Stan 
+#> 4 chains, each with iter = 1100; warmup = 600; thin = 1 
+#> Total post-warmup draws = 2000
+#> 
+#> 
+#> Observation error parameter estimates:
+#>              2.5% 50% 97.5% Rhat n_eff
+#> sigma_obs[1]  1.4 1.7   2.1    1  1298
+#> sigma_obs[2]  1.7 2.0   2.3    1  1946
+#> sigma_obs[3]  2.0 2.3   2.7    1  2569
+#> 
+#> GAM observation model coefficient (beta) estimates:
+#>                 2.5%  50% 97.5% Rhat n_eff
+#> (Intercept)    -3.40 -2.1 -0.69    1  1067
+#> seriessensor_2 -2.80 -1.4 -0.14    1  1169
+#> seriessensor_3  0.63  3.1  4.80    1  1055
+#> 
+#> Approximate significance of GAM observation smooths:
+#>                        edf Ref.df Chi.sq p-value    
+#> s(temperature)        1.39      9   0.11       1    
+#> s(series,temperature) 2.78     16 107.40 5.4e-05 ***
+#> ---
+#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+#> 
+#> Process model AR parameter estimates:
+#>        2.5% 50% 97.5% Rhat n_eff
+#> ar1[1] 0.37 0.6   0.8 1.01   616
+#> 
+#> Process error parameter estimates:
+#>          2.5% 50% 97.5% Rhat n_eff
+#> sigma[1]  1.5 1.8   2.2 1.01   649
+#> 
+#> Approximate significance of GAM process smooths:
+#>                   edf Ref.df Chi.sq p-value
+#> s(productivity) 0.926      7   5.45       1
+#> 
+#> Stan MCMC diagnostics:
+#> n_eff / iter looks reasonable for all parameters
+#> Rhat looks reasonable for all parameters
+#> 0 of 2000 iterations ended with a divergence (0%)
+#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
+#> E-FMI indicated no pathological behavior
+#> 
+#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:32:12 AM 2024.
+#> For each parameter, n_eff is a crude measure of effective sample size,
+#> and Rhat is the potential scale reduction factor on split MCMC chains
+#> (at convergence, Rhat = 1)

Inspecting effects on both process and observation models

@@ -899,39 +890,26 @@

Inspecting effects on both process and observation models

smooth terms. The calculation for these values is incredibly sensitive to the estimates for the smoothing parameters so I don’t tend to find them to be very meaningful. What are meaningful, however, are -prediction-based plots of the smooth functions. For example, here is the -estimated response of the underlying signal to -productivity:

-
plot(mod, type = 'smooths', trend_effects = TRUE)
-

-

And here are the estimated relationships between the sensor -observations and the temperature covariate:

-
plot(mod, type = 'smooths')
-

-

All main effects can be quickly plotted with -conditional_effects:

-
plot(conditional_effects(mod, type = 'link'), ask = FALSE)
-

+prediction-based plots of the smooth functions. All main effects can be +quickly plotted with conditional_effects:

+
conditional_effects(mod, type = 'link')
+

conditional_effects is simply a wrapper to the more flexible plot_predictions function from the marginaleffects package. We can get more useful plots of these effects using this function for further customisation:

-
plot_predictions(mod, 
-                 condition = c('temperature', 'series', 'series'),
-                 points = 0.5) +
-  theme(legend.position = 'none')
-

+
require(marginaleffects)
+#> Loading required package: marginaleffects
+plot_predictions(mod, 
+                 condition = c('temperature', 'series', 'series'),
+                 points = 0.5) +
+  theme(legend.position = 'none')
+

We have successfully estimated effects, some of them nonlinear, that impact the hidden process AND the observations. All in a single joint model. But there can always be challenges with these models, particularly when estimating both process and observation error at the -same time. For example, a pairs plot for the observation -error for sensor 1 and the hidden process error shows some strong -correlations that we might want to deal with by using a more structured -prior:

-
pairs(mod, variable = c('sigma[1]', 'sigma_obs[1]'))
-

-

But we will leave the model as-is for this example

+same time.

Recovering the hidden signal

@@ -941,12 +919,12 @@

Recovering the hidden signal

plot using the mvgam S3 method for plot. We can also overlay the true values for the hidden signal, which shows that our model has done a good job of recovering it:

-
plot(mod, type = 'trend')
-
-# Overlay the true simulated signal
-points(true_signal, pch = 16, cex = 1, col = 'white')
-points(true_signal, pch = 16, cex = 0.8)
-

+
plot(mod, type = 'trend')
+
+# Overlay the true simulated signal
+points(true_signal, pch = 16, cex = 1, col = 'white')
+points(true_signal, pch = 16, cex = 0.8)
+

@@ -954,7 +932,7 @@

Further reading

The following papers and resources offer a lot of useful material about other types of State-Space models and how they can be applied in practice:

-

Holmes, Elizabeth E., Eric J. Ward, and Wills Kellie. “MARSS: +

Holmes, Elizabeth E., Eric J. Ward, and Wills Kellie. “MARSS: multivariate autoregressive state-space models for analyzing time-series data.R Journal. 4.1 (2012): 11.

Ward, Eric J., et al. “Inferring diff --git a/doc/time_varying_effects.R b/doc/time_varying_effects.R index f5415f2b..18caf805 100644 --- a/doc/time_varying_effects.R +++ b/doc/time_varying_effects.R @@ -7,10 +7,11 @@ knitr::opts_chunk$set( eval = NOT_CRAN ) + ## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( echo = TRUE, - dpi = 150, + dpi = 100, fig.asp = 0.8, fig.width = 6, out.width = "60%", @@ -19,6 +20,7 @@ library(mvgam) library(ggplot2) theme_set(theme_bw(base_size = 12, base_family = 'serif')) + ## ----------------------------------------------------------------------------- set.seed(1111) N <- 200 @@ -27,15 +29,18 @@ beta_temp <- mvgam:::sim_gp(rnorm(1), rho_gp = 10, h = N) + 0.5 + ## ----fig.alt = "Simulating time-varying effects in mvgam and R"--------------- plot(beta_temp, type = 'l', lwd = 3, bty = 'l', xlab = 'Time', ylab = 'Coefficient', col = 'darkred') box(bty = 'l', lwd = 2) + ## ----------------------------------------------------------------------------- temp <- rnorm(N, sd = 1) + ## ----fig.alt = "Simulating time-varying effects in mvgam and R"--------------- out <- rnorm(N, mean = 4 + beta_temp * temp, sd = 0.25) @@ -45,29 +50,30 @@ plot(out, type = 'l', lwd = 3, col = 'darkred') box(bty = 'l', lwd = 2) + ## ----------------------------------------------------------------------------- data <- data.frame(out, temp, time) data_train <- data[1:190,] data_test <- data[191:200,] -## ----------------------------------------------------------------------------- -plot_mvgam_series(data = data_train, newdata = data_test, y = 'out') ## ----include=FALSE------------------------------------------------------------ mod <- mvgam(out ~ dynamic(temp, rho = 8, stationary = TRUE, k = 40), family = gaussian(), - data = data_train) + data = data_train, + silent = 2) + ## ----eval=FALSE--------------------------------------------------------------- -# mod <- mvgam(out ~ dynamic(temp, rho = 8, stationary = TRUE, k = 40), -# family = gaussian(), -# data = data_train) +## mod <- mvgam(out ~ dynamic(temp, rho = 8, stationary = TRUE, k = 40), +## family = gaussian(), +## data = data_train, +## silent = 2) + ## ----------------------------------------------------------------------------- summary(mod, include_betas = FALSE) -## ----------------------------------------------------------------------------- -plot(mod, type = 'smooths') ## ----------------------------------------------------------------------------- plot_mvgam_smooth(mod, smooth = 1, newdata = data) @@ -75,7 +81,9 @@ abline(v = 190, lty = 'dashed', lwd = 2) lines(beta_temp, lwd = 2.5, col = 'white') lines(beta_temp, lwd = 2) + ## ----------------------------------------------------------------------------- +require(marginaleffects) range_round = function(x){ round(range(x, na.rm = TRUE), 2) } @@ -85,44 +93,42 @@ plot_predictions(mod, by = c('time', 'temp', 'temp'), type = 'link') + ## ----------------------------------------------------------------------------- fc <- forecast(mod, newdata = data_test) plot(fc) + ## ----include=FALSE------------------------------------------------------------ mod <- mvgam(out ~ dynamic(temp, k = 40), family = gaussian(), - data = data_train) + data = data_train, + silent = 2) + ## ----eval=FALSE--------------------------------------------------------------- -# mod <- mvgam(out ~ dynamic(temp, k = 40), -# family = gaussian(), -# data = data_train) +## mod <- mvgam(out ~ dynamic(temp, k = 40), +## family = gaussian(), +## data = data_train, +## silent = 2) + ## ----------------------------------------------------------------------------- summary(mod, include_betas = FALSE) + ## ----------------------------------------------------------------------------- plot_mvgam_smooth(mod, smooth = 1, newdata = data) abline(v = 190, lty = 'dashed', lwd = 2) lines(beta_temp, lwd = 2.5, col = 'white') lines(beta_temp, lwd = 2) -## ----------------------------------------------------------------------------- -plot_predictions(mod, - newdata = datagrid(time = unique, - temp = range_round), - by = c('time', 'temp', 'temp'), - type = 'link') - -## ----------------------------------------------------------------------------- -fc <- forecast(mod, newdata = data_test) -plot(fc) ## ----------------------------------------------------------------------------- load(url('https://github.com/atsa-es/MARSS/raw/master/data/SalmonSurvCUI.rda')) dplyr::glimpse(SalmonSurvCUI) + ## ----------------------------------------------------------------------------- SalmonSurvCUI %>% # create a time variable @@ -137,57 +143,74 @@ SalmonSurvCUI %>% # convert logit-transformed survival back to proportional dplyr::mutate(survival = plogis(logit.s)) -> model_data + ## ----------------------------------------------------------------------------- dplyr::glimpse(model_data) + ## ----------------------------------------------------------------------------- plot_mvgam_series(data = model_data, y = 'survival') + ## ----include = FALSE---------------------------------------------------------- mod0 <- mvgam(formula = survival ~ 1, - trend_model = 'RW', + trend_model = AR(), + noncentred = TRUE, family = betar(), - data = model_data) + data = model_data, + silent = 2) + ## ----eval = FALSE------------------------------------------------------------- -# mod0 <- mvgam(formula = survival ~ 1, -# trend_model = 'RW', -# family = betar(), -# data = model_data) +## mod0 <- mvgam(formula = survival ~ 1, +## trend_model = AR(), +## noncentred = TRUE, +## family = betar(), +## data = model_data, +## silent = 2) + ## ----------------------------------------------------------------------------- summary(mod0) + ## ----------------------------------------------------------------------------- plot(mod0, type = 'trend') -## ----------------------------------------------------------------------------- -plot(mod0, type = 'forecast') ## ----include=FALSE------------------------------------------------------------ mod1 <- mvgam(formula = survival ~ 1, trend_formula = ~ dynamic(CUI.apr, k = 25, scale = FALSE), - trend_model = 'RW', + trend_model = AR(), + noncentred = TRUE, family = betar(), data = model_data, - adapt_delta = 0.99) + adapt_delta = 0.99, + silent = 2) + ## ----eval=FALSE--------------------------------------------------------------- -# mod1 <- mvgam(formula = survival ~ 1, -# trend_formula = ~ dynamic(CUI.apr, k = 25, scale = FALSE), -# trend_model = 'RW', -# family = betar(), -# data = model_data) +## mod1 <- mvgam(formula = survival ~ 1, +## trend_formula = ~ dynamic(CUI.apr, k = 25, scale = FALSE), +## trend_model = AR(), +## noncentred = TRUE, +## family = betar(), +## data = model_data, +## silent = 2) + ## ----------------------------------------------------------------------------- summary(mod1, include_betas = FALSE) + ## ----------------------------------------------------------------------------- plot(mod1, type = 'trend') + ## ----------------------------------------------------------------------------- plot(mod1, type = 'forecast') + ## ----------------------------------------------------------------------------- # Extract estimates of the process error 'sigma' for each model mod0_sigma <- as.data.frame(mod0, variable = 'sigma', regex = TRUE) %>% @@ -197,34 +220,35 @@ mod1_sigma <- as.data.frame(mod1, variable = 'sigma', regex = TRUE) %>% sigmas <- rbind(mod0_sigma, mod1_sigma) # Plot using ggplot2 -library(ggplot2) +require(ggplot2) ggplot(sigmas, aes(y = `sigma[1]`, fill = model)) + geom_density(alpha = 0.3, colour = NA) + coord_flip() -## ----------------------------------------------------------------------------- -plot(mod1, type = 'smooth', trend_effects = TRUE) ## ----------------------------------------------------------------------------- -plot_predictions(mod1, newdata = datagrid(CUI.apr = range_round, - time = unique), - by = c('time', 'CUI.apr', 'CUI.apr')) +plot(mod1, type = 'smooths', trend_effects = TRUE) + ## ----------------------------------------------------------------------------- loo_compare(mod0, mod1) + ## ----include=FALSE------------------------------------------------------------ lfo_mod0 <- lfo_cv(mod0, min_t = 30) lfo_mod1 <- lfo_cv(mod1, min_t = 30) + ## ----eval=FALSE--------------------------------------------------------------- -# lfo_mod0 <- lfo_cv(mod0, min_t = 30) -# lfo_mod1 <- lfo_cv(mod1, min_t = 30) +## lfo_mod0 <- lfo_cv(mod0, min_t = 30) +## lfo_mod1 <- lfo_cv(mod1, min_t = 30) + ## ----------------------------------------------------------------------------- sum(lfo_mod0$elpds) sum(lfo_mod1$elpds) + ## ----fig.alt = "Comparing forecast skill for dynamic beta regression models in mvgam and R"---- plot(x = 1:length(lfo_mod0$elpds) + 30, y = lfo_mod0$elpds - lfo_mod1$elpds, diff --git a/doc/time_varying_effects.Rmd b/doc/time_varying_effects.Rmd index 271a907d..ec19a933 100644 --- a/doc/time_varying_effects.Rmd +++ b/doc/time_varying_effects.Rmd @@ -23,7 +23,7 @@ knitr::opts_chunk$set( ```{r setup, include=FALSE} knitr::opts_chunk$set( echo = TRUE, - dpi = 150, + dpi = 100, fig.asp = 0.8, fig.width = 6, out.width = "60%", @@ -80,24 +80,20 @@ data_train <- data[1:190,] data_test <- data[191:200,] ``` -Plot the series -```{r} -plot_mvgam_series(data = data_train, newdata = data_test, y = 'out') -``` - - ### The `dynamic()` function Time-varying coefficients can be fairly easily set up using the `s()` or `gp()` wrapper functions in `mvgam` formulae by fitting a nonlinear effect of `time` and using the covariate of interest as the numeric `by` variable (see `?mgcv::s` or `?brms::gp` for more details). The `dynamic()` formula wrapper offers a way to automate this process, and will eventually allow for a broader variety of time-varying effects (such as random walk or AR processes). Depending on the arguments that are specified to `dynamic`, it will either set up a low-rank GP smooth function using `s()` with `bs = 'gp'` and a fixed value of the length scale parameter $\rho$, or it will set up a Hilbert space approximate GP using the `gp()` function with `c=5/4` so that $\rho$ is estimated (see `?dynamic` for more details). In this first example we will use the `s()` option, and will mis-specify the $\rho$ parameter here as, in practice, it is never known. This call to `dynamic()` will set up the following smooth: `s(time, by = temp, bs = "gp", m = c(-2, 8, 2), k = 40)` ```{r, include=FALSE} mod <- mvgam(out ~ dynamic(temp, rho = 8, stationary = TRUE, k = 40), family = gaussian(), - data = data_train) + data = data_train, + silent = 2) ``` ```{r, eval=FALSE} mod <- mvgam(out ~ dynamic(temp, rho = 8, stationary = TRUE, k = 40), family = gaussian(), - data = data_train) + data = data_train, + silent = 2) ``` Inspect the model summary, which shows how the `dynamic()` wrapper was used to construct a low-rank Gaussian Process smooth function: @@ -105,12 +101,7 @@ Inspect the model summary, which shows how the `dynamic()` wrapper was used to c summary(mod, include_betas = FALSE) ``` -Because this model used a spline with a `gp` basis, it's smooths can be visualised just like any other `gam`. Plot the estimated time-varying coefficient for the in-sample training period -```{r} -plot(mod, type = 'smooths') -``` - -We can also plot the estimates for the in-sample and out-of-sample periods to see how the Gaussian Process function produces sensible smooth forecasts. Here we supply the full dataset to the `newdata` argument in `plot_mvgam_smooth` to inspect posterior forecasts of the time-varying smooth function. Overlay the true simulated function to see that the model has adequately estimated it's dynamics in both the training and testing data partitions +Because this model used a spline with a `gp` basis, it's smooths can be visualised just like any other `gam`. We can plot the estimates for the in-sample and out-of-sample periods to see how the Gaussian Process function produces sensible smooth forecasts. Here we supply the full dataset to the `newdata` argument in `plot_mvgam_smooth` to inspect posterior forecasts of the time-varying smooth function. Overlay the true simulated function to see that the model has adequately estimated it's dynamics in both the training and testing data partitions ```{r} plot_mvgam_smooth(mod, smooth = 1, newdata = data) abline(v = 190, lty = 'dashed', lwd = 2) @@ -120,6 +111,7 @@ lines(beta_temp, lwd = 2) We can also use `plot_predictions` from the `marginaleffects` package to visualise the time-varying coefficient for what the effect would be estimated to be at different values of $temperature$: ```{r} +require(marginaleffects) range_round = function(x){ round(range(x, na.rm = TRUE), 2) } @@ -140,13 +132,15 @@ The syntax is very similar if we wish to estimate the parameters of the underlyi ```{r include=FALSE} mod <- mvgam(out ~ dynamic(temp, k = 40), family = gaussian(), - data = data_train) + data = data_train, + silent = 2) ``` ```{r eval=FALSE} mod <- mvgam(out ~ dynamic(temp, k = 40), family = gaussian(), - data = data_train) + data = data_train, + silent = 2) ``` This model summary now contains estimates for the marginal deviation and length scale parameters of the underlying Gaussian Process function: @@ -162,21 +156,6 @@ lines(beta_temp, lwd = 2.5, col = 'white') lines(beta_temp, lwd = 2) ``` -Both the above plot and the below `plot_predictions()` call show that the effect in this case is similar to what we estimated in the approximate GP smooth model above: -```{r} -plot_predictions(mod, - newdata = datagrid(time = unique, - temp = range_round), - by = c('time', 'temp', 'temp'), - type = 'link') -``` - -Forecasts are also similar: -```{r} -fc <- forecast(mod, newdata = data_test) -plot(fc) -``` - ## Salmon survival example Here we will use openly available data on marine survival of Chinook salmon to illustrate how time-varying effects can be used to improve ecological time series models. [Scheuerell and Williams (2005)](https://onlinelibrary.wiley.com/doi/abs/10.1111/j.1365-2419.2005.00346.x) used a dynamic linear model to examine the relationship between marine survival of Chinook salmon and an index of ocean upwelling strength along the west coast of the USA. The authors hypothesized that stronger upwelling in April should create better growing conditions for phytoplankton, which would then translate into more zooplankton and provide better foraging opportunities for juvenile salmon entering the ocean. The data on survival is measured as a proportional variable over 42 years (1964–2005) and is available in the `MARSS` package: ```{r} @@ -212,19 +191,23 @@ plot_mvgam_series(data = model_data, y = 'survival') ### A State-Space Beta regression -`mvgam` can easily handle data that are bounded at 0 and 1 with a Beta observation model (using the `mgcv` function `betar()`, see `?mgcv::betar` for details). First we will fit a simple State-Space model that uses a Random Walk dynamic process model with no predictors and a Beta observation model: +`mvgam` can easily handle data that are bounded at 0 and 1 with a Beta observation model (using the `mgcv` function `betar()`, see `?mgcv::betar` for details). First we will fit a simple State-Space model that uses an AR1 dynamic process model with no predictors and a Beta observation model: ```{r include = FALSE} mod0 <- mvgam(formula = survival ~ 1, - trend_model = 'RW', + trend_model = AR(), + noncentred = TRUE, family = betar(), - data = model_data) + data = model_data, + silent = 2) ``` ```{r eval = FALSE} mod0 <- mvgam(formula = survival ~ 1, - trend_model = 'RW', + trend_model = AR(), + noncentred = TRUE, family = betar(), - data = model_data) + data = model_data, + silent = 2) ``` The summary of this model shows good behaviour of the Hamiltonian Monte Carlo sampler and provides useful summaries on the Beta observation model parameters: @@ -237,29 +220,27 @@ A plot of the underlying dynamic component shows how it has easily handled the t plot(mod0, type = 'trend') ``` -Posterior hindcasts are also good and will automatically respect the observational data bounding at 0 and 1: -```{r} -plot(mod0, type = 'forecast') -``` - - ### Including time-varying upwelling effects Now we can increase the complexity of our model by constructing and fitting a State-Space model with a time-varying effect of the coastal upwelling index in addition to the autoregressive dynamics. We again use a Beta observation model to capture the restrictions of our proportional observations, but this time will include a `dynamic()` effect of `CUI.apr` in the latent process model. We do not specify the $\rho$ parameter, instead opting to estimate it using a Hilbert space approximate GP: ```{r include=FALSE} mod1 <- mvgam(formula = survival ~ 1, trend_formula = ~ dynamic(CUI.apr, k = 25, scale = FALSE), - trend_model = 'RW', + trend_model = AR(), + noncentred = TRUE, family = betar(), data = model_data, - adapt_delta = 0.99) + adapt_delta = 0.99, + silent = 2) ``` ```{r eval=FALSE} mod1 <- mvgam(formula = survival ~ 1, trend_formula = ~ dynamic(CUI.apr, k = 25, scale = FALSE), - trend_model = 'RW', + trend_model = AR(), + noncentred = TRUE, family = betar(), - data = model_data) + data = model_data, + silent = 2) ``` The summary for this model now includes estimates for the time-varying GP parameters: @@ -286,25 +267,17 @@ mod1_sigma <- as.data.frame(mod1, variable = 'sigma', regex = TRUE) %>% sigmas <- rbind(mod0_sigma, mod1_sigma) # Plot using ggplot2 -library(ggplot2) +require(ggplot2) ggplot(sigmas, aes(y = `sigma[1]`, fill = model)) + geom_density(alpha = 0.3, colour = NA) + coord_flip() ``` -Why does the process error not need to be as flexible in the second model? Because the estimates of this dynamic process are now informed partly by the time-varying effect of upwelling, which we can visualise on the link scale using `plot()` with `trend_effects = TRUE`: +Why does the process error not need to be as flexible in the second model? Because the estimates of this dynamic process are now informed partly by the time-varying effect of upwelling, which we can visualise on the link scale using `plot()`: ```{r} -plot(mod1, type = 'smooth', trend_effects = TRUE) +plot(mod1, type = 'smooths', trend_effects = TRUE) ``` -Or on the outcome scale, at a range of possible `CUI.apr` values, using `plot_predictions()`: -```{r} -plot_predictions(mod1, newdata = datagrid(CUI.apr = range_round, - time = unique), - by = c('time', 'CUI.apr', 'CUI.apr')) -``` - - ### Comparing model predictive performances A key question when fitting multiple time series models is whether one of them provides better predictions than the other. There are several options in `mvgam` for exploring this quantitatively. First, we can compare models based on in-sample approximate leave-one-out cross-validation as implemented in the popular `loo` package: ```{r} @@ -340,7 +313,7 @@ plot(x = 1:length(lfo_mod0$elpds) + 30, abline(h = 0, lty = 'dashed') ``` -A useful exercise to further expand this model would be to think about what kinds of predictors might impact measurement error, which could easily be implemented into the observation formula in `mvgam`. But for now, we will leave the model as-is. +A useful exercise to further expand this model would be to think about what kinds of predictors might impact measurement error, which could easily be implemented into the observation formula in `mvgam()`. But for now, we will leave the model as-is. ## Further reading The following papers and resources offer a lot of useful material about dynamic linear models and how they can be applied / evaluated in practice: diff --git a/doc/time_varying_effects.html b/doc/time_varying_effects.html index 8d85f76b..341e817b 100644 --- a/doc/time_varying_effects.html +++ b/doc/time_varying_effects.html @@ -12,7 +12,7 @@ - + Time-varying effects in mvgam @@ -340,7 +340,7 @@

Time-varying effects in mvgam

Nicholas J Clark

-

2024-04-18

+

2024-07-01

@@ -407,7 +407,7 @@

Simulating time-varying effects

bty = 'l', xlab = 'Time', ylab = 'Coefficient', col = 'darkred') box(bty = 'l', lwd = 2)
-

Simulating time-varying effects in mvgam and R

+

Simulating time-varying effects in mvgam and R

Next we need to simulate the values of the covariate, which we will call temp (to represent \(temperature\)). In this case we just use a standard normal distribution to simulate this covariate:

@@ -422,15 +422,12 @@

Simulating time-varying effects

bty = 'l', xlab = 'Time', ylab = 'Outcome', col = 'darkred') box(bty = 'l', lwd = 2)
-

Simulating time-varying effects in mvgam and R

+

Simulating time-varying effects in mvgam and R

Gather the data into a data.frame for fitting models, and split the data into training and testing folds.

data <- data.frame(out, temp, time)
 data_train <- data[1:190,]
 data_test <- data[191:200,]
-

Plot the series

-
plot_mvgam_series(data = data_train, newdata = data_test, y = 'out')
-

The dynamic() function

@@ -454,184 +451,168 @@

The dynamic() function

practice, it is never known. This call to dynamic() will set up the following smooth: s(time, by = temp, bs = "gp", m = c(-2, 8, 2), k = 40)

-
mod <- mvgam(out ~ dynamic(temp, rho = 8, stationary = TRUE, k = 40),
-             family = gaussian(),
-             data = data_train)
+
mod <- mvgam(out ~ dynamic(temp, rho = 8, stationary = TRUE, k = 40),
+             family = gaussian(),
+             data = data_train,
+             silent = 2)

Inspect the model summary, which shows how the dynamic() wrapper was used to construct a low-rank Gaussian Process smooth function:

-
summary(mod, include_betas = FALSE)
-#> GAM formula:
-#> out ~ s(time, by = temp, bs = "gp", m = c(-2, 8, 2), k = 40)
-#> <environment: 0x000001d9c44e71e0>
-#> 
-#> Family:
-#> gaussian
-#> 
-#> Link function:
-#> identity
-#> 
-#> Trend model:
-#> None
-#> 
-#> N series:
-#> 1 
-#> 
-#> N timepoints:
-#> 190 
-#> 
-#> Status:
-#> Fitted using Stan 
-#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 
-#> Total post-warmup draws = 2000
-#> 
-#> 
-#> Observation error parameter estimates:
-#>              2.5%  50% 97.5% Rhat n_eff
-#> sigma_obs[1] 0.23 0.25  0.28    1  2222
-#> 
-#> GAM coefficient (beta) estimates:
-#>             2.5% 50% 97.5% Rhat n_eff
-#> (Intercept)    4   4   4.1    1  2893
-#> 
-#> Approximate significance of GAM smooths:
-#>              edf Ref.df    F p-value    
-#> s(time):temp  14     40 72.4  <2e-16 ***
-#> ---
-#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-#> 
-#> Stan MCMC diagnostics:
-#> n_eff / iter looks reasonable for all parameters
-#> Rhat looks reasonable for all parameters
-#> 0 of 2000 iterations ended with a divergence (0%)
-#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
-#> E-FMI indicated no pathological behavior
-#> 
-#> Samples were drawn using NUTS(diag_e) at Thu Apr 18 8:39:49 PM 2024.
-#> For each parameter, n_eff is a crude measure of effective sample size,
-#> and Rhat is the potential scale reduction factor on split MCMC chains
-#> (at convergence, Rhat = 1)
+
summary(mod, include_betas = FALSE)
+#> GAM formula:
+#> out ~ s(time, by = temp, bs = "gp", m = c(-2, 8, 2), k = 40)
+#> <environment: 0x0000014d37f0f110>
+#> 
+#> Family:
+#> gaussian
+#> 
+#> Link function:
+#> identity
+#> 
+#> Trend model:
+#> None
+#> 
+#> N series:
+#> 1 
+#> 
+#> N timepoints:
+#> 190 
+#> 
+#> Status:
+#> Fitted using Stan 
+#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 
+#> Total post-warmup draws = 2000
+#> 
+#> 
+#> Observation error parameter estimates:
+#>              2.5%  50% 97.5% Rhat n_eff
+#> sigma_obs[1] 0.23 0.25  0.28    1  2026
+#> 
+#> GAM coefficient (beta) estimates:
+#>             2.5% 50% 97.5% Rhat n_eff
+#> (Intercept)    4   4   4.1    1  2640
+#> 
+#> Approximate significance of GAM smooths:
+#>               edf Ref.df Chi.sq p-value    
+#> s(time):temp 15.4     40    173  <2e-16 ***
+#> ---
+#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+#> 
+#> Stan MCMC diagnostics:
+#> n_eff / iter looks reasonable for all parameters
+#> Rhat looks reasonable for all parameters
+#> 0 of 2000 iterations ended with a divergence (0%)
+#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
+#> E-FMI indicated no pathological behavior
+#> 
+#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:35:21 AM 2024.
+#> For each parameter, n_eff is a crude measure of effective sample size,
+#> and Rhat is the potential scale reduction factor on split MCMC chains
+#> (at convergence, Rhat = 1)

Because this model used a spline with a gp basis, it’s -smooths can be visualised just like any other gam. Plot the -estimated time-varying coefficient for the in-sample training period

-
plot(mod, type = 'smooths')
-

-

We can also plot the estimates for the in-sample and out-of-sample -periods to see how the Gaussian Process function produces sensible -smooth forecasts. Here we supply the full dataset to the -newdata argument in plot_mvgam_smooth to -inspect posterior forecasts of the time-varying smooth function. Overlay -the true simulated function to see that the model has adequately -estimated it’s dynamics in both the training and testing data -partitions

-
plot_mvgam_smooth(mod, smooth = 1, newdata = data)
-abline(v = 190, lty = 'dashed', lwd = 2)
-lines(beta_temp, lwd = 2.5, col = 'white')
-lines(beta_temp, lwd = 2)
-

+smooths can be visualised just like any other gam. We can +plot the estimates for the in-sample and out-of-sample periods to see +how the Gaussian Process function produces sensible smooth forecasts. +Here we supply the full dataset to the newdata argument in +plot_mvgam_smooth to inspect posterior forecasts of the +time-varying smooth function. Overlay the true simulated function to see +that the model has adequately estimated it’s dynamics in both the +training and testing data partitions

+
plot_mvgam_smooth(mod, smooth = 1, newdata = data)
+abline(v = 190, lty = 'dashed', lwd = 2)
+lines(beta_temp, lwd = 2.5, col = 'white')
+lines(beta_temp, lwd = 2)
+

We can also use plot_predictions from the marginaleffects package to visualise the time-varying coefficient for what the effect would be estimated to be at different values of \(temperature\):

-
range_round = function(x){
-  round(range(x, na.rm = TRUE), 2)
-}
-plot_predictions(mod, 
-                 newdata = datagrid(time = unique,
-                                    temp = range_round),
-                 by = c('time', 'temp', 'temp'),
-                 type = 'link')
-

+
require(marginaleffects)
+#> Loading required package: marginaleffects
+range_round = function(x){
+  round(range(x, na.rm = TRUE), 2)
+}
+plot_predictions(mod, 
+                 newdata = datagrid(time = unique,
+                                    temp = range_round),
+                 by = c('time', 'temp', 'temp'),
+                 type = 'link')
+

This results in sensible forecasts of the observations as well

-
fc <- forecast(mod, newdata = data_test)
-plot(fc)
-

-
#> Out of sample CRPS:
-#> [1] 1.280347
+
fc <- forecast(mod, newdata = data_test)
+plot(fc)
+#> Out of sample CRPS:
+#> 1.30674285292277
+

The syntax is very similar if we wish to estimate the parameters of the underlying Gaussian Process, this time using a Hilbert space approximation. We simply omit the rho argument in dynamic to make this happen. This will set up a call similar to gp(time, by = 'temp', c = 5/4, k = 40).

-
mod <- mvgam(out ~ dynamic(temp, k = 40),
-             family = gaussian(),
-             data = data_train)
+
mod <- mvgam(out ~ dynamic(temp, k = 40),
+             family = gaussian(),
+             data = data_train,
+             silent = 2)

This model summary now contains estimates for the marginal deviation and length scale parameters of the underlying Gaussian Process function:

-
summary(mod, include_betas = FALSE)
-#> GAM formula:
-#> out ~ gp(time, by = temp, c = 5/4, k = 40, scale = TRUE)
-#> <environment: 0x000001d9c44e71e0>
-#> 
-#> Family:
-#> gaussian
-#> 
-#> Link function:
-#> identity
-#> 
-#> Trend model:
-#> None
-#> 
-#> N series:
-#> 1 
-#> 
-#> N timepoints:
-#> 190 
-#> 
-#> Status:
-#> Fitted using Stan 
-#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 
-#> Total post-warmup draws = 2000
-#> 
-#> 
-#> Observation error parameter estimates:
-#>              2.5%  50% 97.5% Rhat n_eff
-#> sigma_obs[1] 0.24 0.26  0.29    1  2151
-#> 
-#> GAM coefficient (beta) estimates:
-#>             2.5% 50% 97.5% Rhat n_eff
-#> (Intercept)    4   4   4.1    1  2989
-#> 
-#> GAM gp term marginal deviation (alpha) and length scale (rho) estimates:
-#>                      2.5%   50% 97.5% Rhat n_eff
-#> alpha_gp(time):temp 0.640 0.890 1.400 1.01   745
-#> rho_gp(time):temp   0.028 0.053 0.069 1.00   888
-#> 
-#> Stan MCMC diagnostics:
-#> n_eff / iter looks reasonable for all parameters
-#> Rhat looks reasonable for all parameters
-#> 1 of 2000 iterations ended with a divergence (0.05%)
-#>  *Try running with larger adapt_delta to remove the divergences
-#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
-#> E-FMI indicated no pathological behavior
-#> 
-#> Samples were drawn using NUTS(diag_e) at Thu Apr 18 8:41:07 PM 2024.
-#> For each parameter, n_eff is a crude measure of effective sample size,
-#> and Rhat is the potential scale reduction factor on split MCMC chains
-#> (at convergence, Rhat = 1)
+
summary(mod, include_betas = FALSE)
+#> GAM formula:
+#> out ~ gp(time, by = temp, c = 5/4, k = 40, scale = TRUE)
+#> <environment: 0x0000014d37f0f110>
+#> 
+#> Family:
+#> gaussian
+#> 
+#> Link function:
+#> identity
+#> 
+#> Trend model:
+#> None
+#> 
+#> N series:
+#> 1 
+#> 
+#> N timepoints:
+#> 190 
+#> 
+#> Status:
+#> Fitted using Stan 
+#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 
+#> Total post-warmup draws = 2000
+#> 
+#> 
+#> Observation error parameter estimates:
+#>              2.5%  50% 97.5% Rhat n_eff
+#> sigma_obs[1] 0.24 0.26   0.3    1  2183
+#> 
+#> GAM coefficient (beta) estimates:
+#>             2.5% 50% 97.5% Rhat n_eff
+#> (Intercept)    4   4   4.1    1  2733
+#> 
+#> GAM gp term marginal deviation (alpha) and length scale (rho) estimates:
+#>                      2.5%   50% 97.5% Rhat n_eff
+#> alpha_gp(time):temp 0.620 0.890 1.400 1.01   539
+#> rho_gp(time):temp   0.026 0.053 0.069 1.00   628
+#> 
+#> Stan MCMC diagnostics:
+#> n_eff / iter looks reasonable for all parameters
+#> Rhat looks reasonable for all parameters
+#> 0 of 2000 iterations ended with a divergence (0%)
+#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
+#> E-FMI indicated no pathological behavior
+#> 
+#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:36:09 AM 2024.
+#> For each parameter, n_eff is a crude measure of effective sample size,
+#> and Rhat is the potential scale reduction factor on split MCMC chains
+#> (at convergence, Rhat = 1)

Effects for gp() terms can also be plotted as smooths:

-
plot_mvgam_smooth(mod, smooth = 1, newdata = data)
-abline(v = 190, lty = 'dashed', lwd = 2)
-lines(beta_temp, lwd = 2.5, col = 'white')
-lines(beta_temp, lwd = 2)
-

-

Both the above plot and the below plot_predictions() -call show that the effect in this case is similar to what we estimated -in the approximate GP smooth model above:

-
plot_predictions(mod, 
-                 newdata = datagrid(time = unique,
-                                    temp = range_round),
-                 by = c('time', 'temp', 'temp'),
-                 type = 'link')
-

-

Forecasts are also similar:

-
fc <- forecast(mod, newdata = data_test)
-plot(fc)
-

-
#> Out of sample CRPS:
-#> [1] 1.667521
+
plot_mvgam_smooth(mod, smooth = 1, newdata = data)
+abline(v = 190, lty = 'dashed', lwd = 2)
+lines(beta_temp, lwd = 2.5, col = 'white')
+lines(beta_temp, lwd = 2)
+

@@ -648,13 +629,13 @@

Salmon survival example

salmon entering the ocean. The data on survival is measured as a proportional variable over 42 years (1964–2005) and is available in the MARSS package:

-
load(url('https://github.com/atsa-es/MARSS/raw/master/data/SalmonSurvCUI.rda'))
-dplyr::glimpse(SalmonSurvCUI)
-#> Rows: 42
-#> Columns: 3
-#> $ year    <int> 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 19…
-#> $ logit.s <dbl> -3.46, -3.32, -3.58, -3.03, -3.61, -3.35, -3.93, -4.19, -4.82,…
-#> $ CUI.apr <int> 57, 5, 43, 11, 47, -21, 25, -2, -1, 43, 2, 35, 0, 1, -1, 6, -7…
+
load(url('https://github.com/atsa-es/MARSS/raw/master/data/SalmonSurvCUI.rda'))
+dplyr::glimpse(SalmonSurvCUI)
+#> Rows: 42
+#> Columns: 3
+#> $ year    <int> 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 19…
+#> $ logit.s <dbl> -3.46, -3.32, -3.58, -3.03, -3.61, -3.35, -3.93, -4.19, -4.82,…
+#> $ CUI.apr <int> 57, 5, 43, 11, 47, -21, 25, -2, -1, 43, 2, 35, 0, 1, -1, 6, -7…

First we need to prepare the data for modelling. The variable CUI.apr will be standardized to make it easier for the sampler to estimate underlying GP parameters for the time-varying @@ -663,104 +644,103 @@

Salmon survival example

time series packages cannot handle proportional data). As usual, we also need to create a time indicator and a series indicator for working in mvgam:

-
SalmonSurvCUI %>%
-  # create a time variable
-  dplyr::mutate(time = dplyr::row_number()) %>%
-
-  # create a series variable
-  dplyr::mutate(series = as.factor('salmon')) %>%
-
-  # z-score the covariate CUI.apr
-  dplyr::mutate(CUI.apr = as.vector(scale(CUI.apr))) %>%
-
-  # convert logit-transformed survival back to proportional
-  dplyr::mutate(survival = plogis(logit.s)) -> model_data
+
SalmonSurvCUI %>%
+  # create a time variable
+  dplyr::mutate(time = dplyr::row_number()) %>%
+
+  # create a series variable
+  dplyr::mutate(series = as.factor('salmon')) %>%
+
+  # z-score the covariate CUI.apr
+  dplyr::mutate(CUI.apr = as.vector(scale(CUI.apr))) %>%
+
+  # convert logit-transformed survival back to proportional
+  dplyr::mutate(survival = plogis(logit.s)) -> model_data

Inspect the data

-
dplyr::glimpse(model_data)
-#> Rows: 42
-#> Columns: 6
-#> $ year     <int> 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1…
-#> $ logit.s  <dbl> -3.46, -3.32, -3.58, -3.03, -3.61, -3.35, -3.93, -4.19, -4.82…
-#> $ CUI.apr  <dbl> 2.37949804, 0.03330223, 1.74782994, 0.30401713, 1.92830654, -…
-#> $ time     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18…
-#> $ series   <fct> salmon, salmon, salmon, salmon, salmon, salmon, salmon, salmo…
-#> $ survival <dbl> 0.030472033, 0.034891409, 0.027119717, 0.046088827, 0.0263393…
+
dplyr::glimpse(model_data)
+#> Rows: 42
+#> Columns: 6
+#> $ year     <int> 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1…
+#> $ logit.s  <dbl> -3.46, -3.32, -3.58, -3.03, -3.61, -3.35, -3.93, -4.19, -4.82…
+#> $ CUI.apr  <dbl> 2.37949804, 0.03330223, 1.74782994, 0.30401713, 1.92830654, -…
+#> $ time     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18…
+#> $ series   <fct> salmon, salmon, salmon, salmon, salmon, salmon, salmon, salmo…
+#> $ survival <dbl> 0.030472033, 0.034891409, 0.027119717, 0.046088827, 0.0263393…

Plot features of the outcome variable, which shows that it is a proportional variable with particular restrictions that we want to model:

-
plot_mvgam_series(data = model_data, y = 'survival')
-

+
plot_mvgam_series(data = model_data, y = 'survival')
+

A State-Space Beta regression

mvgam can easily handle data that are bounded at 0 and 1 with a Beta observation model (using the mgcv function betar(), see ?mgcv::betar for details). First -we will fit a simple State-Space model that uses a Random Walk dynamic -process model with no predictors and a Beta observation model:

-
mod0 <- mvgam(formula = survival ~ 1,
-             trend_model = 'RW',
-             family = betar(),
-             data = model_data)
+we will fit a simple State-Space model that uses an AR1 dynamic process +model with no predictors and a Beta observation model:

+
mod0 <- mvgam(formula = survival ~ 1,
+             trend_model = AR(),
+             noncentred = TRUE,
+             family = betar(),
+             data = model_data,
+             silent = 2)

The summary of this model shows good behaviour of the Hamiltonian Monte Carlo sampler and provides useful summaries on the Beta observation model parameters:

-
summary(mod0)
-#> GAM formula:
-#> survival ~ 1
-#> <environment: 0x000001d9c44e71e0>
-#> 
-#> Family:
-#> beta
-#> 
-#> Link function:
-#> logit
-#> 
-#> Trend model:
-#> RW
-#> 
-#> N series:
-#> 1 
-#> 
-#> N timepoints:
-#> 42 
-#> 
-#> Status:
-#> Fitted using Stan 
-#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 
-#> Total post-warmup draws = 2000
-#> 
-#> 
-#> Observation precision parameter estimates:
-#>        2.5% 50% 97.5% Rhat n_eff
-#> phi[1]  160 310   580 1.01   612
-#> 
-#> GAM coefficient (beta) estimates:
-#>             2.5%  50% 97.5% Rhat n_eff
-#> (Intercept) -4.2 -3.4  -2.4 1.02   125
-#> 
-#> Latent trend variance estimates:
-#>          2.5%  50% 97.5% Rhat n_eff
-#> sigma[1] 0.18 0.33  0.55 1.02   276
-#> 
-#> Stan MCMC diagnostics:
-#> n_eff / iter looks reasonable for all parameters
-#> Rhat looks reasonable for all parameters
-#> 0 of 2000 iterations ended with a divergence (0%)
-#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
-#> E-FMI indicated no pathological behavior
-#> 
-#> Samples were drawn using NUTS(diag_e) at Thu Apr 18 8:42:35 PM 2024.
-#> For each parameter, n_eff is a crude measure of effective sample size,
-#> and Rhat is the potential scale reduction factor on split MCMC chains
-#> (at convergence, Rhat = 1)
+
summary(mod0)
+#> GAM formula:
+#> survival ~ 1
+#> <environment: 0x0000014d37f0f110>
+#> 
+#> Family:
+#> beta
+#> 
+#> Link function:
+#> logit
+#> 
+#> Trend model:
+#> AR()
+#> 
+#> N series:
+#> 1 
+#> 
+#> N timepoints:
+#> 42 
+#> 
+#> Status:
+#> Fitted using Stan 
+#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 
+#> Total post-warmup draws = 2000
+#> 
+#> 
+#> Observation precision parameter estimates:
+#>        2.5% 50% 97.5% Rhat n_eff
+#> phi[1]   95 280   630 1.02   271
+#> 
+#> GAM coefficient (beta) estimates:
+#>             2.5%  50% 97.5% Rhat n_eff
+#> (Intercept) -4.7 -4.4    -4    1   625
+#> 
+#> Latent trend parameter AR estimates:
+#>            2.5%  50% 97.5% Rhat n_eff
+#> ar1[1]   -0.230 0.67  0.98 1.01   415
+#> sigma[1]  0.073 0.47  0.72 1.02   213
+#> 
+#> Stan MCMC diagnostics:
+#> n_eff / iter looks reasonable for all parameters
+#> Rhat looks reasonable for all parameters
+#> 0 of 2000 iterations ended with a divergence (0%)
+#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
+#> E-FMI indicated no pathological behavior
+#> 
+#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:36:57 AM 2024.
+#> For each parameter, n_eff is a crude measure of effective sample size,
+#> and Rhat is the potential scale reduction factor on split MCMC chains
+#> (at convergence, Rhat = 1)

A plot of the underlying dynamic component shows how it has easily handled the temporal evolution of the time series:

-
plot(mod0, type = 'trend')
-

-

Posterior hindcasts are also good and will automatically respect the -observational data bounding at 0 and 1:

-
plot(mod0, type = 'forecast')
-

+
plot(mod0, type = 'trend')
+

Including time-varying upwelling effects

@@ -772,110 +752,108 @@

Including time-varying upwelling effects

of CUI.apr in the latent process model. We do not specify the \(\rho\) parameter, instead opting to estimate it using a Hilbert space approximate GP:

-
mod1 <- mvgam(formula = survival ~ 1,
-              trend_formula = ~ dynamic(CUI.apr, k = 25, scale = FALSE),
-              trend_model = 'RW',
-              family = betar(),
-              data = model_data)
+
mod1 <- mvgam(formula = survival ~ 1,
+              trend_formula = ~ dynamic(CUI.apr, k = 25, scale = FALSE),
+              trend_model = AR(),
+              noncentred = TRUE,
+              family = betar(),
+              data = model_data,
+              silent = 2)

The summary for this model now includes estimates for the time-varying GP parameters:

-
summary(mod1, include_betas = FALSE)
-#> GAM observation formula:
-#> survival ~ 1
-#> <environment: 0x000001d9c44e71e0>
-#> 
-#> GAM process formula:
-#> ~dynamic(CUI.apr, k = 25, scale = FALSE)
-#> <environment: 0x000001d9c44e71e0>
-#> 
-#> Family:
-#> beta
-#> 
-#> Link function:
-#> logit
-#> 
-#> Trend model:
-#> RW
-#> 
-#> N process models:
-#> 1 
-#> 
-#> N series:
-#> 1 
-#> 
-#> N timepoints:
-#> 42 
-#> 
-#> Status:
-#> Fitted using Stan 
-#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 
-#> Total post-warmup draws = 2000
-#> 
-#> 
-#> Observation precision parameter estimates:
-#>        2.5% 50% 97.5% Rhat n_eff
-#> phi[1]  190 360   670    1   858
-#> 
-#> GAM observation model coefficient (beta) estimates:
-#>             2.5%  50% 97.5% Rhat n_eff
-#> (Intercept) -4.1 -3.2  -2.2 1.07    64
-#> 
-#> Process error parameter estimates:
-#>          2.5%  50% 97.5% Rhat n_eff
-#> sigma[1] 0.18 0.31  0.51 1.02   274
-#> 
-#> GAM process model gp term marginal deviation (alpha) and length scale (rho) estimates:
-#>                                2.5%  50% 97.5% Rhat n_eff
-#> alpha_gp_time_byCUI_apr_trend 0.028 0.32   1.5 1.02   205
-#> rho_gp_time_byCUI_apr_trend   1.400 6.50  40.0 1.02   236
-#> 
-#> Stan MCMC diagnostics:
-#> n_eff / iter looks reasonable for all parameters
-#> Rhats above 1.05 found for 30 parameters
-#>  *Diagnose further to investigate why the chains have not mixed
-#> 89 of 2000 iterations ended with a divergence (4.45%)
-#>  *Try running with larger adapt_delta to remove the divergences
-#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
-#> E-FMI indicated no pathological behavior
-#> 
-#> Samples were drawn using NUTS(diag_e) at Thu Apr 18 8:44:05 PM 2024.
-#> For each parameter, n_eff is a crude measure of effective sample size,
-#> and Rhat is the potential scale reduction factor on split MCMC chains
-#> (at convergence, Rhat = 1)
+
summary(mod1, include_betas = FALSE)
+#> GAM observation formula:
+#> survival ~ 1
+#> <environment: 0x0000014d37f0f110>
+#> 
+#> GAM process formula:
+#> ~dynamic(CUI.apr, k = 25, scale = FALSE)
+#> <environment: 0x0000014d37f0f110>
+#> 
+#> Family:
+#> beta
+#> 
+#> Link function:
+#> logit
+#> 
+#> Trend model:
+#> AR()
+#> 
+#> N process models:
+#> 1 
+#> 
+#> N series:
+#> 1 
+#> 
+#> N timepoints:
+#> 42 
+#> 
+#> Status:
+#> Fitted using Stan 
+#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 
+#> Total post-warmup draws = 2000
+#> 
+#> 
+#> Observation precision parameter estimates:
+#>        2.5% 50% 97.5% Rhat n_eff
+#> phi[1]  160 350   690    1   557
+#> 
+#> GAM observation model coefficient (beta) estimates:
+#>             2.5% 50% 97.5% Rhat n_eff
+#> (Intercept) -4.7  -4  -2.6    1   331
+#> 
+#> Process model AR parameter estimates:
+#>        2.5%  50% 97.5% Rhat n_eff
+#> ar1[1] 0.46 0.89  0.99 1.01   364
+#> 
+#> Process error parameter estimates:
+#>          2.5%  50% 97.5% Rhat n_eff
+#> sigma[1] 0.18 0.35  0.58    1   596
+#> 
+#> GAM process model gp term marginal deviation (alpha) and length scale (rho) estimates:
+#>                               2.5% 50% 97.5% Rhat n_eff
+#> alpha_gp_time_byCUI_apr_trend 0.02 0.3   1.2    1   760
+#> rho_gp_time_byCUI_apr_trend   1.30 5.5  28.0    1   674
+#> 
+#> Stan MCMC diagnostics:
+#> n_eff / iter looks reasonable for all parameters
+#> Rhat looks reasonable for all parameters
+#> 79 of 2000 iterations ended with a divergence (3.95%)
+#>  *Try running with larger adapt_delta to remove the divergences
+#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
+#> E-FMI indicated no pathological behavior
+#> 
+#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:37:32 AM 2024.
+#> For each parameter, n_eff is a crude measure of effective sample size,
+#> and Rhat is the potential scale reduction factor on split MCMC chains
+#> (at convergence, Rhat = 1)

The estimates for the underlying dynamic process, and for the hindcasts, haven’t changed much:

-
plot(mod1, type = 'trend')
-

-
plot(mod1, type = 'forecast')
-

+
plot(mod1, type = 'trend')
+

+
plot(mod1, type = 'forecast')
+

But the process error parameter \(\sigma\) is slightly smaller for this model than for the first model:

-
# Extract estimates of the process error 'sigma' for each model
-mod0_sigma <- as.data.frame(mod0, variable = 'sigma', regex = TRUE) %>%
-  dplyr::mutate(model = 'Mod0')
-mod1_sigma <- as.data.frame(mod1, variable = 'sigma', regex = TRUE) %>%
-  dplyr::mutate(model = 'Mod1')
-sigmas <- rbind(mod0_sigma, mod1_sigma)
-
-# Plot using ggplot2
-library(ggplot2)
-ggplot(sigmas, aes(y = `sigma[1]`, fill = model)) +
-  geom_density(alpha = 0.3, colour = NA) +
-  coord_flip()
-

+
# Extract estimates of the process error 'sigma' for each model
+mod0_sigma <- as.data.frame(mod0, variable = 'sigma', regex = TRUE) %>%
+  dplyr::mutate(model = 'Mod0')
+mod1_sigma <- as.data.frame(mod1, variable = 'sigma', regex = TRUE) %>%
+  dplyr::mutate(model = 'Mod1')
+sigmas <- rbind(mod0_sigma, mod1_sigma)
+
+# Plot using ggplot2
+require(ggplot2)
+ggplot(sigmas, aes(y = `sigma[1]`, fill = model)) +
+  geom_density(alpha = 0.3, colour = NA) +
+  coord_flip()
+

Why does the process error not need to be as flexible in the second model? Because the estimates of this dynamic process are now informed partly by the time-varying effect of upwelling, which we can visualise -on the link scale using plot() with -trend_effects = TRUE:

-
plot(mod1, type = 'smooth', trend_effects = TRUE)
-

-

Or on the outcome scale, at a range of possible CUI.apr -values, using plot_predictions():

-
plot_predictions(mod1, newdata = datagrid(CUI.apr = range_round,
-                                          time = unique),
-                 by = c('time', 'CUI.apr', 'CUI.apr'))
-

+on the link scale using plot():

+
plot(mod1, type = 'smooths', trend_effects = TRUE)
+

Comparing model predictive performances

@@ -885,13 +863,13 @@

Comparing model predictive performances

First, we can compare models based on in-sample approximate leave-one-out cross-validation as implemented in the popular loo package:

-
loo_compare(mod0, mod1)
-#> Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
-
-#> Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
-#>      elpd_diff se_diff
-#> mod1  0.0       0.0   
-#> mod0 -2.3       1.6
+
loo_compare(mod0, mod1)
+#> Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
+
+#> Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
+#>      elpd_diff se_diff
+#> mod1  0.0       0.0   
+#> mod0 -6.5       2.7

The second model has the larger Expected Log Predictive Density (ELPD), meaning that it is slightly favoured over the simpler model that did not include the time-varying upwelling effect. However, the two @@ -906,30 +884,30 @@

Comparing model predictive performances

sampling to reweight posterior predictions, acting as a kind of particle filter so that we don’t need to refit the model too often (you can read more about how this process works in Bürkner et al. 2020).

-
lfo_mod0 <- lfo_cv(mod0, min_t = 30)
-lfo_mod1 <- lfo_cv(mod1, min_t = 30)
+
lfo_mod0 <- lfo_cv(mod0, min_t = 30)
+lfo_mod1 <- lfo_cv(mod1, min_t = 30)

The model with the time-varying upwelling effect tends to provides better 1-step ahead forecasts, with a higher total forecast ELPD

-
sum(lfo_mod0$elpds)
-#> [1] 35.11835
-sum(lfo_mod1$elpds)
-#> [1] 36.77461
+
sum(lfo_mod0$elpds)
+#> [1] 39.52656
+sum(lfo_mod1$elpds)
+#> [1] 40.81327

We can also plot the ELPDs for each model as a contrast. Here, values less than zero suggest the time-varying predictor model (Mod1) gives better 1-step ahead forecasts:

-
plot(x = 1:length(lfo_mod0$elpds) + 30,
-     y = lfo_mod0$elpds - lfo_mod1$elpds,
-     ylab = 'ELPDmod0 - ELPDmod1',
-     xlab = 'Evaluation time point',
-     pch = 16,
-     col = 'darkred',
-     bty = 'l')
-abline(h = 0, lty = 'dashed')
-

Comparing forecast skill for dynamic beta regression models in mvgam and R

+
plot(x = 1:length(lfo_mod0$elpds) + 30,
+     y = lfo_mod0$elpds - lfo_mod1$elpds,
+     ylab = 'ELPDmod0 - ELPDmod1',
+     xlab = 'Evaluation time point',
+     pch = 16,
+     col = 'darkred',
+     bty = 'l')
+abline(h = 0, lty = 'dashed')
+

Comparing forecast skill for dynamic beta regression models in mvgam and R

A useful exercise to further expand this model would be to think about what kinds of predictors might impact measurement error, which could easily be implemented into the observation formula in -mvgam. But for now, we will leave the model as-is.

+mvgam(). But for now, we will leave the model as-is.

diff --git a/doc/trend_formulas.R b/doc/trend_formulas.R index 4739e2e9..21ec225e 100644 --- a/doc/trend_formulas.R +++ b/doc/trend_formulas.R @@ -7,10 +7,11 @@ knitr::opts_chunk$set( eval = NOT_CRAN ) + ## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( echo = TRUE, - dpi = 150, + dpi = 100, fig.asp = 0.8, fig.width = 6, out.width = "60%", @@ -19,12 +20,15 @@ library(mvgam) library(ggplot2) theme_set(theme_bw(base_size = 12, base_family = 'serif')) + ## ----------------------------------------------------------------------------- load(url('https://github.com/atsa-es/MARSS/raw/master/data/lakeWAplankton.rda')) + ## ----------------------------------------------------------------------------- outcomes <- c('Greens', 'Bluegreens', 'Diatoms', 'Unicells', 'Other.algae') + ## ----------------------------------------------------------------------------- # loop across each plankton group to create the long datframe plankton_data <- do.call(rbind, lapply(outcomes, function(x){ @@ -52,20 +56,18 @@ plankton_data <- do.call(rbind, lapply(outcomes, function(x){ dplyr::mutate(time = dplyr::row_number()) %>% dplyr::ungroup() + ## ----------------------------------------------------------------------------- head(plankton_data) + ## ----------------------------------------------------------------------------- dplyr::glimpse(plankton_data) + ## ----------------------------------------------------------------------------- plot_mvgam_series(data = plankton_data, series = 'all') -## ----------------------------------------------------------------------------- -image(is.na(t(plankton_data)), axes = F, - col = c('grey80', 'darkred')) -axis(3, at = seq(0,1, len = NCOL(plankton_data)), - labels = colnames(plankton_data)) ## ----------------------------------------------------------------------------- plankton_data %>% @@ -80,6 +82,7 @@ plankton_data %>% xlab('Time') + ggtitle('Temperature (black) vs Other algae (red)') + ## ----------------------------------------------------------------------------- plankton_data %>% dplyr::filter(series == 'Diatoms') %>% @@ -93,18 +96,6 @@ plankton_data %>% xlab('Time') + ggtitle('Temperature (black) vs Diatoms (red)') -## ----------------------------------------------------------------------------- -plankton_data %>% - dplyr::filter(series == 'Greens') %>% - ggplot(aes(x = time, y = temp)) + - geom_line(size = 1.1) + - geom_line(aes(y = y), col = 'white', - size = 1.3) + - geom_line(aes(y = y), col = 'darkred', - size = 1.1) + - ylab('z-score') + - xlab('Time') + - ggtitle('Temperature (black) vs Greens (red)') ## ----------------------------------------------------------------------------- plankton_train <- plankton_data %>% @@ -112,6 +103,7 @@ plankton_train <- plankton_data %>% plankton_test <- plankton_data %>% dplyr::filter(time > 112) + ## ----notrend_mod, include = FALSE, results='hide'----------------------------- notrend_mod <- mvgam(y ~ te(temp, month, k = c(4, 4)) + @@ -121,67 +113,57 @@ notrend_mod <- mvgam(y ~ newdata = plankton_test, trend_model = 'None') + ## ----eval=FALSE--------------------------------------------------------------- -# notrend_mod <- mvgam(y ~ -# # tensor of temp and month to capture -# # "global" seasonality -# te(temp, month, k = c(4, 4)) + -# -# # series-specific deviation tensor products -# te(temp, month, k = c(4, 4), by = series), -# family = gaussian(), -# data = plankton_train, -# newdata = plankton_test, -# trend_model = 'None') -# +## notrend_mod <- mvgam(y ~ +## # tensor of temp and month to capture +## # "global" seasonality +## te(temp, month, k = c(4, 4)) + +## +## # series-specific deviation tensor products +## te(temp, month, k = c(4, 4), by = series), +## family = gaussian(), +## data = plankton_train, +## newdata = plankton_test, +## trend_model = 'None') +## + ## ----------------------------------------------------------------------------- plot_mvgam_smooth(notrend_mod, smooth = 1) -## ----------------------------------------------------------------------------- -plot_mvgam_smooth(notrend_mod, smooth = 2) ## ----------------------------------------------------------------------------- -plot_mvgam_smooth(notrend_mod, smooth = 3) +plot_mvgam_smooth(notrend_mod, smooth = 2) -## ----------------------------------------------------------------------------- -plot_mvgam_smooth(notrend_mod, smooth = 4) ## ----------------------------------------------------------------------------- -plot_mvgam_smooth(notrend_mod, smooth = 5) +plot_mvgam_smooth(notrend_mod, smooth = 3) -## ----------------------------------------------------------------------------- -plot_mvgam_smooth(notrend_mod, smooth = 6) ## ----------------------------------------------------------------------------- plot(notrend_mod, type = 'forecast', series = 1) + ## ----------------------------------------------------------------------------- plot(notrend_mod, type = 'forecast', series = 2) + ## ----------------------------------------------------------------------------- plot(notrend_mod, type = 'forecast', series = 3) -## ----------------------------------------------------------------------------- -plot(notrend_mod, type = 'forecast', series = 4) - -## ----------------------------------------------------------------------------- -plot(notrend_mod, type = 'forecast', series = 5) ## ----------------------------------------------------------------------------- plot(notrend_mod, type = 'residuals', series = 1) + ## ----------------------------------------------------------------------------- plot(notrend_mod, type = 'residuals', series = 2) + ## ----------------------------------------------------------------------------- plot(notrend_mod, type = 'residuals', series = 3) -## ----------------------------------------------------------------------------- -plot(notrend_mod, type = 'residuals', series = 4) - -## ----------------------------------------------------------------------------- -plot(notrend_mod, type = 'residuals', series = 5) ## ----------------------------------------------------------------------------- priors <- get_mvgam_priors( @@ -193,20 +175,24 @@ priors <- get_mvgam_priors( te(temp, month, k = c(4, 4), by = trend), # VAR1 model with uncorrelated process errors - trend_model = 'VAR1', + trend_model = VAR(), family = gaussian(), data = plankton_train) + ## ----------------------------------------------------------------------------- priors[, 3] + ## ----------------------------------------------------------------------------- priors[, 4] + ## ----------------------------------------------------------------------------- priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2), prior(normal(0.5, 0.25), class = sigma)) + ## ----var_mod, include = FALSE, results='hide'--------------------------------- var_mod <- mvgam(y ~ -1, trend_formula = ~ @@ -219,36 +205,39 @@ var_mod <- mvgam(y ~ -1, family = gaussian(), data = plankton_train, newdata = plankton_test, - trend_model = 'VAR1', + trend_model = VAR(), priors = priors, + adapt_delta = 0.99, burnin = 1000) + ## ----eval=FALSE--------------------------------------------------------------- -# var_mod <- mvgam( -# # observation formula, which is empty -# y ~ -1, -# -# # process model formula, which includes the smooth functions -# trend_formula = ~ te(temp, month, k = c(4, 4)) + -# te(temp, month, k = c(4, 4), by = trend), -# -# # VAR1 model with uncorrelated process errors -# trend_model = 'VAR1', -# family = gaussian(), -# data = plankton_train, -# newdata = plankton_test, -# -# # include the updated priors -# priors = priors) +## var_mod <- mvgam( +## # observation formula, which is empty +## y ~ -1, +## +## # process model formula, which includes the smooth functions +## trend_formula = ~ te(temp, month, k = c(4, 4)) + +## te(temp, month, k = c(4, 4), by = trend), +## +## # VAR1 model with uncorrelated process errors +## trend_model = VAR(), +## family = gaussian(), +## data = plankton_train, +## newdata = plankton_test, +## +## # include the updated priors +## priors = priors, +## silent = 2) + ## ----------------------------------------------------------------------------- summary(var_mod, include_betas = FALSE) + ## ----------------------------------------------------------------------------- plot(var_mod, 'smooths', trend_effects = TRUE) -## ----warning=FALSE, message=FALSE--------------------------------------------- -mcmc_plot(var_mod, variable = 'A', regex = TRUE, type = 'hist') ## ----warning=FALSE, message=FALSE--------------------------------------------- A_pars <- matrix(NA, nrow = 5, ncol = 5) @@ -261,11 +250,6 @@ mcmc_plot(var_mod, variable = as.vector(t(A_pars)), type = 'hist') -## ----------------------------------------------------------------------------- -plot(var_mod, type = 'trend', series = 1) - -## ----------------------------------------------------------------------------- -plot(var_mod, type = 'trend', series = 3) ## ----warning=FALSE, message=FALSE--------------------------------------------- Sigma_pars <- matrix(NA, nrow = 5, ncol = 5) @@ -278,13 +262,16 @@ mcmc_plot(var_mod, variable = as.vector(t(Sigma_pars)), type = 'hist') + ## ----warning=FALSE, message=FALSE--------------------------------------------- mcmc_plot(var_mod, variable = 'sigma_obs', regex = TRUE, type = 'hist') + ## ----------------------------------------------------------------------------- priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2), prior(normal(0.5, 0.25), class = sigma)) + ## ----varcor_mod, include = FALSE, results='hide'------------------------------ varcor_mod <- mvgam(y ~ -1, trend_formula = ~ @@ -297,33 +284,31 @@ varcor_mod <- mvgam(y ~ -1, family = gaussian(), data = plankton_train, newdata = plankton_test, - trend_model = 'VAR1cor', + trend_model = VAR(cor = TRUE), burnin = 1000, + adapt_delta = 0.99, priors = priors) + ## ----eval=FALSE--------------------------------------------------------------- -# varcor_mod <- mvgam( -# # observation formula, which remains empty -# y ~ -1, -# -# # process model formula, which includes the smooth functions -# trend_formula = ~ te(temp, month, k = c(4, 4)) + -# te(temp, month, k = c(4, 4), by = trend), -# -# # VAR1 model with correlated process errors -# trend_model = 'VAR1cor', -# family = gaussian(), -# data = plankton_train, -# newdata = plankton_test, -# -# # include the updated priors -# priors = priors) +## varcor_mod <- mvgam( +## # observation formula, which remains empty +## y ~ -1, +## +## # process model formula, which includes the smooth functions +## trend_formula = ~ te(temp, month, k = c(4, 4)) + +## te(temp, month, k = c(4, 4), by = trend), +## +## # VAR1 model with correlated process errors +## trend_model = VAR(cor = TRUE), +## family = gaussian(), +## data = plankton_train, +## newdata = plankton_test, +## +## # include the updated priors +## priors = priors, +## silent = 2) -## ----warning=FALSE, message=FALSE--------------------------------------------- -mcmc_plot(varcor_mod, type = 'rhat') + - labs(title = 'VAR1cor') -mcmc_plot(var_mod, type = 'rhat') + - labs(title = 'VAR1') ## ----warning=FALSE, message=FALSE--------------------------------------------- Sigma_pars <- matrix(NA, nrow = 5, ncol = 5) @@ -336,6 +321,7 @@ mcmc_plot(varcor_mod, variable = as.vector(t(Sigma_pars)), type = 'hist') + ## ----------------------------------------------------------------------------- Sigma_post <- as.matrix(varcor_mod, variable = 'Sigma', regex = TRUE) median_correlations <- cov2cor(matrix(apply(Sigma_post, 2, median), @@ -344,34 +330,6 @@ rownames(median_correlations) <- colnames(median_correlations) <- levels(plankto round(median_correlations, 2) -## ----warning=FALSE, message=FALSE--------------------------------------------- -A_pars <- matrix(NA, nrow = 5, ncol = 5) -for(i in 1:5){ - for(j in 1:5){ - A_pars[i, j] <- paste0('A[', i, ',', j, ']') - } -} -mcmc_plot(varcor_mod, - variable = as.vector(t(A_pars)), - type = 'hist') - -## ----------------------------------------------------------------------------- -plot(var_mod, type = 'forecast', series = 1, newdata = plankton_test) - -## ----------------------------------------------------------------------------- -plot(varcor_mod, type = 'forecast', series = 1, newdata = plankton_test) - -## ----------------------------------------------------------------------------- -plot(var_mod, type = 'forecast', series = 2, newdata = plankton_test) - -## ----------------------------------------------------------------------------- -plot(varcor_mod, type = 'forecast', series = 2, newdata = plankton_test) - -## ----------------------------------------------------------------------------- -plot(var_mod, type = 'forecast', series = 3, newdata = plankton_test) - -## ----------------------------------------------------------------------------- -plot(varcor_mod, type = 'forecast', series = 3, newdata = plankton_test) ## ----------------------------------------------------------------------------- # create forecast objects for each model @@ -389,6 +347,7 @@ plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', ylab = expression(variogram[VAR1cor]~-~variogram[VAR1])) abline(h = 0, lty = 'dashed') + ## ----------------------------------------------------------------------------- # plot the difference in energy scores; a negative value means the VAR1cor model is better, while a positive value means the VAR1 model is better diff_scores <- score(fcvarcor, score = 'energy')$all_series$score - diff --git a/doc/trend_formulas.Rmd b/doc/trend_formulas.Rmd index e65a4ce7..387a8708 100644 --- a/doc/trend_formulas.Rmd +++ b/doc/trend_formulas.Rmd @@ -23,7 +23,7 @@ knitr::opts_chunk$set( ```{r setup, include=FALSE} knitr::opts_chunk$set( echo = TRUE, - dpi = 150, + dpi = 100, fig.asp = 0.8, fig.width = 6, out.width = "60%", @@ -97,14 +97,6 @@ Note that we have z-scored the counts in this example as that will make it easie plot_mvgam_series(data = plankton_data, series = 'all') ``` -It is always helpful to check the data for `NA`s before attempting any models: -```{r} -image(is.na(t(plankton_data)), axes = F, - col = c('grey80', 'darkred')) -axis(3, at = seq(0,1, len = NCOL(plankton_data)), - labels = colnames(plankton_data)) -``` - We have some missing observations, but this isn't an issue for modelling in `mvgam`. A useful property to understand about these counts is that they tend to be highly seasonal. Below are some plots of z-scored counts against the z-scored temperature measurements in the lake for each month: ```{r} plankton_data %>% @@ -135,20 +127,6 @@ plankton_data %>% ggtitle('Temperature (black) vs Diatoms (red)') ``` -```{r} -plankton_data %>% - dplyr::filter(series == 'Greens') %>% - ggplot(aes(x = time, y = temp)) + - geom_line(size = 1.1) + - geom_line(aes(y = y), col = 'white', - size = 1.3) + - geom_line(aes(y = y), col = 'darkred', - size = 1.1) + - ylab('z-score') + - xlab('Time') + - ggtitle('Temperature (black) vs Greens (red)') -``` - We will have to try and capture this seasonality in our process model, which should be easy to do given the flexibility of GAMs. Next we will split the data into training and testing splits: ```{r} plankton_train <- plankton_data %>% @@ -192,7 +170,7 @@ The "global" tensor product smooth function can be quickly visualized: plot_mvgam_smooth(notrend_mod, smooth = 1) ``` -On this plot, red indicates below-average linear predictors and white indicates above-average. We can then plot the deviation smooths for each algal group to see how they vary from the "global" pattern: +On this plot, red indicates below-average linear predictors and white indicates above-average. We can then plot the deviation smooths for a few algal groups to see how they vary from the "global" pattern: ```{r} plot_mvgam_smooth(notrend_mod, smooth = 2) ``` @@ -201,18 +179,6 @@ plot_mvgam_smooth(notrend_mod, smooth = 2) plot_mvgam_smooth(notrend_mod, smooth = 3) ``` -```{r} -plot_mvgam_smooth(notrend_mod, smooth = 4) -``` - -```{r} -plot_mvgam_smooth(notrend_mod, smooth = 5) -``` - -```{r} -plot_mvgam_smooth(notrend_mod, smooth = 6) -``` - These multidimensional smooths have done a good job of capturing the seasonal variation in our observations: ```{r} plot(notrend_mod, type = 'forecast', series = 1) @@ -226,15 +192,7 @@ plot(notrend_mod, type = 'forecast', series = 2) plot(notrend_mod, type = 'forecast', series = 3) ``` -```{r} -plot(notrend_mod, type = 'forecast', series = 4) -``` - -```{r} -plot(notrend_mod, type = 'forecast', series = 5) -``` - -This basic model gives us confidence that we can capture the seasonal variation in the observations. But the model has not captured the remaining temporal dynamics, which is obvious when we inspect Dunn-Smyth residuals for each series: +This basic model gives us confidence that we can capture the seasonal variation in the observations. But the model has not captured the remaining temporal dynamics, which is obvious when we inspect Dunn-Smyth residuals for a few series: ```{r} plot(notrend_mod, type = 'residuals', series = 1) ``` @@ -247,13 +205,6 @@ plot(notrend_mod, type = 'residuals', series = 2) plot(notrend_mod, type = 'residuals', series = 3) ``` -```{r} -plot(notrend_mod, type = 'residuals', series = 4) -``` - -```{r} -plot(notrend_mod, type = 'residuals', series = 5) -``` ### Multiseries dynamics Now it is time to get into multivariate State-Space models. We will fit two models that can both incorporate lagged cross-dependencies in the latent process models. The first model assumes that the process errors operate independently from one another, while the second assumes that there may be contemporaneous correlations in the process errors. Both models include a Vector Autoregressive component for the process means, and so both can model complex community dynamics. The models can be described mathematically as follows: @@ -280,7 +231,7 @@ priors <- get_mvgam_priors( te(temp, month, k = c(4, 4), by = trend), # VAR1 model with uncorrelated process errors - trend_model = 'VAR1', + trend_model = VAR(), family = gaussian(), data = plankton_train) ``` @@ -314,8 +265,9 @@ var_mod <- mvgam(y ~ -1, family = gaussian(), data = plankton_train, newdata = plankton_test, - trend_model = 'VAR1', + trend_model = VAR(), priors = priors, + adapt_delta = 0.99, burnin = 1000) ``` @@ -329,13 +281,14 @@ var_mod <- mvgam( te(temp, month, k = c(4, 4), by = trend), # VAR1 model with uncorrelated process errors - trend_model = 'VAR1', + trend_model = VAR(), family = gaussian(), data = plankton_train, newdata = plankton_test, # include the updated priors - priors = priors) + priors = priors, + silent = 2) ``` ### Inspecting SS models @@ -349,12 +302,7 @@ The convergence of this model isn't fabulous (more on this in a moment). But we plot(var_mod, 'smooths', trend_effects = TRUE) ``` -The VAR matrix is of particular interest here, as it captures lagged dependencies and cross-dependencies in the latent process model: -```{r warning=FALSE, message=FALSE} -mcmc_plot(var_mod, variable = 'A', regex = TRUE, type = 'hist') -``` - -Unfortunately `bayesplot` doesn't know this is a matrix of parameters so what we see is actually the transpose of the VAR matrix. A little bit of wrangling gives us these histograms in the correct order: +The VAR matrix is of particular interest here, as it captures lagged dependencies and cross-dependencies in the latent process model. Unfortunately `bayesplot` doesn't know this is a matrix of parameters so what we see is actually the transpose of the VAR matrix. A little bit of wrangling gives us these histograms in the correct order: ```{r warning=FALSE, message=FALSE} A_pars <- matrix(NA, nrow = 5, ncol = 5) for(i in 1:5){ @@ -367,15 +315,8 @@ mcmc_plot(var_mod, type = 'hist') ``` -There is a lot happening in this matrix. Each cell captures the lagged effect of the process in the column on the process in the row in the next timestep. So for example, the effect in cell [1,3], which is quite strongly negative, means that an *increase* in the process for series 3 (Greens) at time $t$ is expected to lead to a subsequent *decrease* in the process for series 1 (Bluegreens) at time $t+1$. The latent process model is now capturing these effects and the smooth seasonal effects, so the trend plot shows our best estimate of what the *true* count should have been at each time point: -```{r} -plot(var_mod, type = 'trend', series = 1) -``` - -```{r} -plot(var_mod, type = 'trend', series = 3) -``` - +There is a lot happening in this matrix. Each cell captures the lagged effect of the process in the column on the process in the row in the next timestep. So for example, the effect in cell [1,3] shows how an *increase* in the process for series 3 (Greens) at time $t$ is expected to impact the process for series 1 (Bluegreens) at time $t+1$. The latent process model is now capturing these effects and the smooth seasonal effects. + The process error $(\Sigma)$ captures unmodelled variation in the process models. Again, we fixed the off-diagonals to 0, so the histograms for these will look like flat boxes: ```{r warning=FALSE, message=FALSE} Sigma_pars <- matrix(NA, nrow = 5, ncol = 5) @@ -417,8 +358,9 @@ varcor_mod <- mvgam(y ~ -1, family = gaussian(), data = plankton_train, newdata = plankton_test, - trend_model = 'VAR1cor', + trend_model = VAR(cor = TRUE), burnin = 1000, + adapt_delta = 0.99, priors = priors) ``` @@ -432,21 +374,14 @@ varcor_mod <- mvgam( te(temp, month, k = c(4, 4), by = trend), # VAR1 model with correlated process errors - trend_model = 'VAR1cor', + trend_model = VAR(cor = TRUE), family = gaussian(), data = plankton_train, newdata = plankton_test, # include the updated priors - priors = priors) -``` - -Plot convergence diagnostics for the two models, which shows that both models display similar levels of convergence: -```{r warning=FALSE, message=FALSE} -mcmc_plot(varcor_mod, type = 'rhat') + - labs(title = 'VAR1cor') -mcmc_plot(var_mod, type = 'rhat') + - labs(title = 'VAR1') + priors = priors, + silent = 2) ``` The $(\Sigma)$ matrix now captures any evidence of contemporaneously correlated process error: @@ -462,7 +397,7 @@ mcmc_plot(varcor_mod, type = 'hist') ``` -This symmetric matrix tells us there is support for correlated process errors. For example, series 1 and 3 (Bluegreens and Greens) show negatively correlated process errors, while series 1 and 4 (Bluegreens and Other.algae) show positively correlated errors. But it is easier to interpret these estimates if we convert the covariance matrix to a correlation matrix. Here we compute the posterior median process error correlations: +This symmetric matrix tells us there is support for correlated process errors, as several of the off-diagonal entries are strongly non-zero. But it is easier to interpret these estimates if we convert the covariance matrix to a correlation matrix. Here we compute the posterior median process error correlations: ```{r} Sigma_post <- as.matrix(varcor_mod, variable = 'Sigma', regex = TRUE) median_correlations <- cov2cor(matrix(apply(Sigma_post, 2, median), @@ -472,45 +407,7 @@ rownames(median_correlations) <- colnames(median_correlations) <- levels(plankto round(median_correlations, 2) ``` -Because this model is able to capture correlated errors, the VAR matrix has changed slightly: -```{r warning=FALSE, message=FALSE} -A_pars <- matrix(NA, nrow = 5, ncol = 5) -for(i in 1:5){ - for(j in 1:5){ - A_pars[i, j] <- paste0('A[', i, ',', j, ']') - } -} -mcmc_plot(varcor_mod, - variable = as.vector(t(A_pars)), - type = 'hist') -``` - -We still have some evidence of lagged cross-dependence, but some of these interactions have now been pulled more toward zero. But which model is better? Forecasts don't appear to differ very much, at least qualitatively (here are forecasts for three of the series, for each model): -```{r} -plot(var_mod, type = 'forecast', series = 1, newdata = plankton_test) -``` - -```{r} -plot(varcor_mod, type = 'forecast', series = 1, newdata = plankton_test) -``` - -```{r} -plot(var_mod, type = 'forecast', series = 2, newdata = plankton_test) -``` - -```{r} -plot(varcor_mod, type = 'forecast', series = 2, newdata = plankton_test) -``` - -```{r} -plot(var_mod, type = 'forecast', series = 3, newdata = plankton_test) -``` - -```{r} -plot(varcor_mod, type = 'forecast', series = 3, newdata = plankton_test) -``` - -We can compute the variogram score for out of sample forecasts to get a sense of which model does a better job of capturing the dependence structure in the true evaluation set: +But which model is better? We can compute the variogram score for out of sample forecasts to get a sense of which model does a better job of capturing the dependence structure in the true evaluation set: ```{r} # create forecast objects for each model fcvar <- forecast(var_mod) @@ -551,7 +448,7 @@ Heaps, Sarah E. "[Enforcing stationarity through the prior in vector autoregress Hannaford, Naomi E., et al. "[A sparse Bayesian hierarchical vector autoregressive model for microbial dynamics in a wastewater treatment plant.](https://www.sciencedirect.com/science/article/pii/S0167947322002390)" *Computational Statistics & Data Analysis* 179 (2023): 107659. -Holmes, Elizabeth E., Eric J. Ward, and Wills Kellie. "[MARSS: multivariate autoregressive state-space models for analyzing time-series data.](https://d1wqtxts1xzle7.cloudfront.net/30588864/rjournal_2012-1_holmes_et_al-libre.pdf?1391843792=&response-content-disposition=inline%3B+filename%3DMARSS_Multivariate_Autoregressive_State.pdf&Expires=1695861526&Signature=TCRXULs0mUKRM4m1pmvZxwE10bUqS6vzLcuKeUBCj57YIjx23iTxS1fEgBpV0fs2wb5XAw7ZkG84XyMaoS~vjiqZ-DpheQDHwAHpIWG-TcckHQjEjPWTNajFvAemToUdCiHnDa~yrhW9HRgXjgncdalkjzjvjT3HLSW8mcjBDhQN-WJ3MKQFSXtxoBpWfcuPYbf-HC1E1oSl7957y~w0I1gcIVdu6LHjP~CEKXa0BQzS4xuarL2nz~tHD2MverbNJYMrDGrAxIi-MX6i~lfHWuwV6UKRdoOZ0pXIcMYWBTv9V5xYey76aMKTICiJ~0NqXLZdXO5qlS4~~2nFEO7b7w__&Key-Pair-Id=APKAJLOHF5GGSLRBV4ZA)" *R Journal*. 4.1 (2012): 11. +Holmes, Elizabeth E., Eric J. Ward, and Wills Kellie. "[MARSS: multivariate autoregressive state-space models for analyzing time-series data.](https://journal.r-project.org/archive/2012/RJ-2012-002/index.html)" *R Journal*. 4.1 (2012): 11. Ward, Eric J., et al. "[Inferring spatial structure from time‐series data: using multivariate state‐space models to detect metapopulation structure of California sea lions in the Gulf of California, Mexico.](https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/j.1365-2664.2009.01745.x)" *Journal of Applied Ecology* 47.1 (2010): 47-56. diff --git a/doc/trend_formulas.html b/doc/trend_formulas.html index e6e67bc0..5e2a8efc 100644 --- a/doc/trend_formulas.html +++ b/doc/trend_formulas.html @@ -12,7 +12,7 @@ - + State-Space models in mvgam @@ -340,7 +340,7 @@

State-Space models in mvgam

Nicholas J Clark

-

2024-04-16

+

2024-07-01

@@ -453,21 +453,31 @@

Lake Washington plankton data

necessary; it is often better to build a model that respects the properties of the actual outcome variables)

plot_mvgam_series(data = plankton_data, series = 'all')
-

-

It is always helpful to check the data for NAs before -attempting any models:

-
image(is.na(t(plankton_data)), axes = F,
-      col = c('grey80', 'darkred'))
-axis(3, at = seq(0,1, len = NCOL(plankton_data)), 
-     labels = colnames(plankton_data))
-

+

We have some missing observations, but this isn’t an issue for modelling in mvgam. A useful property to understand about these counts is that they tend to be highly seasonal. Below are some plots of z-scored counts against the z-scored temperature measurements in the lake for each month:

+
plankton_data %>%
+  dplyr::filter(series == 'Other.algae') %>%
+  ggplot(aes(x = time, y = temp)) +
+  geom_line(size = 1.1) +
+  geom_line(aes(y = y), col = 'white',
+            size = 1.3) +
+  geom_line(aes(y = y), col = 'darkred',
+            size = 1.1) +
+  ylab('z-score') +
+  xlab('Time') +
+  ggtitle('Temperature (black) vs Other algae (red)')
+#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
+#> ℹ Please use `linewidth` instead.
+#> This warning is displayed once every 8 hours.
+#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
+#> generated.
+

plankton_data %>%
-  dplyr::filter(series == 'Other.algae') %>%
+  dplyr::filter(series == 'Diatoms') %>%
   ggplot(aes(x = time, y = temp)) +
   geom_line(size = 1.1) +
   geom_line(aes(y = y), col = 'white',
@@ -476,44 +486,15 @@ 

Lake Washington plankton data

size = 1.1) + ylab('z-score') + xlab('Time') + - ggtitle('Temperature (black) vs Other algae (red)') -#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0. -#> ℹ Please use `linewidth` instead. -#> This warning is displayed once every 8 hours. -#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was -#> generated.
-

-
plankton_data %>%
-  dplyr::filter(series == 'Diatoms') %>%
-  ggplot(aes(x = time, y = temp)) +
-  geom_line(size = 1.1) +
-  geom_line(aes(y = y), col = 'white',
-            size = 1.3) +
-  geom_line(aes(y = y), col = 'darkred',
-            size = 1.1) +
-  ylab('z-score') +
-  xlab('Time') +
-  ggtitle('Temperature (black) vs Diatoms (red)')
-

-
plankton_data %>%
-  dplyr::filter(series == 'Greens') %>%
-  ggplot(aes(x = time, y = temp)) +
-  geom_line(size = 1.1) +
-  geom_line(aes(y = y), col = 'white',
-            size = 1.3) +
-  geom_line(aes(y = y), col = 'darkred',
-            size = 1.1) +
-  ylab('z-score') +
-  xlab('Time') +
-  ggtitle('Temperature (black) vs Greens (red)')
-

+ ggtitle('Temperature (black) vs Diatoms (red)')
+

We will have to try and capture this seasonality in our process model, which should be easy to do given the flexibility of GAMs. Next we will split the data into training and testing splits:

-
plankton_train <- plankton_data %>%
-  dplyr::filter(time <= 112)
-plankton_test <- plankton_data %>%
-  dplyr::filter(time > 112)
+
plankton_train <- plankton_data %>%
+  dplyr::filter(time <= 112)
+plankton_test <- plankton_data %>%
+  dplyr::filter(time > 112)

Now time to fit some models. This requires a bit of thinking about how we can best tackle the seasonal variation and the likely dependence structure in the data. These algae are interacting as part of a complex @@ -541,70 +522,52 @@

Capturing seasonality

overall “global” seasonality. Note that we do not include series-specific intercepts in this model because each series was z-scored to have a mean of 0.

-
notrend_mod <- mvgam(y ~ 
-                       # tensor of temp and month to capture
-                       # "global" seasonality
-                       te(temp, month, k = c(4, 4)) +
-                       
-                       # series-specific deviation tensor products
-                       te(temp, month, k = c(4, 4), by = series),
-                     family = gaussian(),
-                     data = plankton_train,
-                     newdata = plankton_test,
-                     trend_model = 'None')
+
notrend_mod <- mvgam(y ~ 
+                       # tensor of temp and month to capture
+                       # "global" seasonality
+                       te(temp, month, k = c(4, 4)) +
+                       
+                       # series-specific deviation tensor products
+                       te(temp, month, k = c(4, 4), by = series),
+                     family = gaussian(),
+                     data = plankton_train,
+                     newdata = plankton_test,
+                     trend_model = 'None')

The “global” tensor product smooth function can be quickly visualized:

-
plot_mvgam_smooth(notrend_mod, smooth = 1)
-

+
plot_mvgam_smooth(notrend_mod, smooth = 1)
+

On this plot, red indicates below-average linear predictors and white -indicates above-average. We can then plot the deviation smooths for each -algal group to see how they vary from the “global” pattern:

-
plot_mvgam_smooth(notrend_mod, smooth = 2)
-

-
plot_mvgam_smooth(notrend_mod, smooth = 3)
-

-
plot_mvgam_smooth(notrend_mod, smooth = 4)
-

-
plot_mvgam_smooth(notrend_mod, smooth = 5)
-

-
plot_mvgam_smooth(notrend_mod, smooth = 6)
-

+indicates above-average. We can then plot the deviation smooths for a +few algal groups to see how they vary from the “global” pattern:

+
plot_mvgam_smooth(notrend_mod, smooth = 2)
+

+
plot_mvgam_smooth(notrend_mod, smooth = 3)
+

These multidimensional smooths have done a good job of capturing the seasonal variation in our observations:

-
plot(notrend_mod, type = 'forecast', series = 1)
-

-
#> Out of sample CRPS:
-#> [1] 6.795543
-
plot(notrend_mod, type = 'forecast', series = 2)
-

-
#> Out of sample CRPS:
-#> [1] 6.841293
-
plot(notrend_mod, type = 'forecast', series = 3)
-

-
#> Out of sample CRPS:
-#> [1] 4.109977
-
plot(notrend_mod, type = 'forecast', series = 4)
-

-
#> Out of sample CRPS:
-#> [1] 3.533645
-
plot(notrend_mod, type = 'forecast', series = 5)
-

-
#> Out of sample CRPS:
-#> [1] 2.859834
+
plot(notrend_mod, type = 'forecast', series = 1)
+#> Out of sample CRPS:
+#> 6.77172874237756
+

+
plot(notrend_mod, type = 'forecast', series = 2)
+#> Out of sample CRPS:
+#> 6.75657325046048
+

+
plot(notrend_mod, type = 'forecast', series = 3)
+#> Out of sample CRPS:
+#> 4.09992574037549
+

This basic model gives us confidence that we can capture the seasonal variation in the observations. But the model has not captured the remaining temporal dynamics, which is obvious when we inspect Dunn-Smyth -residuals for each series:

-
plot(notrend_mod, type = 'residuals', series = 1)
-

-
plot(notrend_mod, type = 'residuals', series = 2)
-

-
plot(notrend_mod, type = 'residuals', series = 3)
-

-
plot(notrend_mod, type = 'residuals', series = 4)
-

-
plot(notrend_mod, type = 'residuals', series = 5)
-

+residuals for a few series:

+
plot(notrend_mod, type = 'residuals', series = 1)
+

+
plot(notrend_mod, type = 'residuals', series = 2)
+

+
plot(notrend_mod, type = 'residuals', series = 3)
+

Multiseries dynamics

@@ -667,52 +630,52 @@

Multiseries dynamics

So let’s update the priors for these parameters. In doing so, you will get to see how the formula for the latent process (i.e. trend) model is used in mvgam:

-
priors <- get_mvgam_priors(
-  # observation formula, which has no terms in it
-  y ~ -1,
-  
-  # process model formula, which includes the smooth functions
-  trend_formula = ~ te(temp, month, k = c(4, 4)) +
-    te(temp, month, k = c(4, 4), by = trend),
-  
-  # VAR1 model with uncorrelated process errors
-  trend_model = 'VAR1',
-  family = gaussian(),
-  data = plankton_train)
+
priors <- get_mvgam_priors(
+  # observation formula, which has no terms in it
+  y ~ -1,
+  
+  # process model formula, which includes the smooth functions
+  trend_formula = ~ te(temp, month, k = c(4, 4)) +
+    te(temp, month, k = c(4, 4), by = trend),
+  
+  # VAR1 model with uncorrelated process errors
+  trend_model = VAR(),
+  family = gaussian(),
+  data = plankton_train)

Get names of all parameters whose priors can be modified:

-
priors[, 3]
-#>  [1] "(Intercept)"                                                                                                                                                                                                                                                           
-#>  [2] "process error sd"                                                                                                                                                                                                                                                      
-#>  [3] "diagonal autocorrelation population mean"                                                                                                                                                                                                                              
-#>  [4] "off-diagonal autocorrelation population mean"                                                                                                                                                                                                                          
-#>  [5] "diagonal autocorrelation population variance"                                                                                                                                                                                                                          
-#>  [6] "off-diagonal autocorrelation population variance"                                                                                                                                                                                                                      
-#>  [7] "shape1 for diagonal autocorrelation precision"                                                                                                                                                                                                                         
-#>  [8] "shape1 for off-diagonal autocorrelation precision"                                                                                                                                                                                                                     
-#>  [9] "shape2 for diagonal autocorrelation precision"                                                                                                                                                                                                                         
-#> [10] "shape2 for off-diagonal autocorrelation precision"                                                                                                                                                                                                                     
-#> [11] "observation error sd"                                                                                                                                                                                                                                                  
-#> [12] "te(temp,month) smooth parameters, te(temp,month):trendtrend1 smooth parameters, te(temp,month):trendtrend2 smooth parameters, te(temp,month):trendtrend3 smooth parameters, te(temp,month):trendtrend4 smooth parameters, te(temp,month):trendtrend5 smooth parameters"
+
priors[, 3]
+#>  [1] "(Intercept)"                                                                                                                                                                                                                                                           
+#>  [2] "process error sd"                                                                                                                                                                                                                                                      
+#>  [3] "diagonal autocorrelation population mean"                                                                                                                                                                                                                              
+#>  [4] "off-diagonal autocorrelation population mean"                                                                                                                                                                                                                          
+#>  [5] "diagonal autocorrelation population variance"                                                                                                                                                                                                                          
+#>  [6] "off-diagonal autocorrelation population variance"                                                                                                                                                                                                                      
+#>  [7] "shape1 for diagonal autocorrelation precision"                                                                                                                                                                                                                         
+#>  [8] "shape1 for off-diagonal autocorrelation precision"                                                                                                                                                                                                                     
+#>  [9] "shape2 for diagonal autocorrelation precision"                                                                                                                                                                                                                         
+#> [10] "shape2 for off-diagonal autocorrelation precision"                                                                                                                                                                                                                     
+#> [11] "observation error sd"                                                                                                                                                                                                                                                  
+#> [12] "te(temp,month) smooth parameters, te(temp,month):trendtrend1 smooth parameters, te(temp,month):trendtrend2 smooth parameters, te(temp,month):trendtrend3 smooth parameters, te(temp,month):trendtrend4 smooth parameters, te(temp,month):trendtrend5 smooth parameters"

And their default prior distributions:

-
priors[, 4]
-#>  [1] "(Intercept) ~ student_t(3, -0.1, 2.5);"
-#>  [2] "sigma ~ student_t(3, 0, 2.5);"         
-#>  [3] "es[1] = 0;"                            
-#>  [4] "es[2] = 0;"                            
-#>  [5] "fs[1] = sqrt(0.455);"                  
-#>  [6] "fs[2] = sqrt(0.455);"                  
-#>  [7] "gs[1] = 1.365;"                        
-#>  [8] "gs[2] = 1.365;"                        
-#>  [9] "hs[1] = 0.071175;"                     
-#> [10] "hs[2] = 0.071175;"                     
-#> [11] "sigma_obs ~ student_t(3, 0, 2.5);"     
-#> [12] "lambda_trend ~ normal(5, 30);"
+
priors[, 4]
+#>  [1] "(Intercept) ~ student_t(3, -0.1, 2.5);"
+#>  [2] "sigma ~ student_t(3, 0, 2.5);"         
+#>  [3] "es[1] = 0;"                            
+#>  [4] "es[2] = 0;"                            
+#>  [5] "fs[1] = sqrt(0.455);"                  
+#>  [6] "fs[2] = sqrt(0.455);"                  
+#>  [7] "gs[1] = 1.365;"                        
+#>  [8] "gs[2] = 1.365;"                        
+#>  [9] "hs[1] = 0.071175;"                     
+#> [10] "hs[2] = 0.071175;"                     
+#> [11] "sigma_obs ~ student_t(3, 0, 2.5);"     
+#> [12] "lambda_trend ~ normal(5, 30);"

Setting priors is easy in mvgam as you can use brms routines. Here we use more informative Normal priors for both error components, but we impose a lower bound of 0.2 for the observation errors:

-
priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2),
-            prior(normal(0.5, 0.25), class = sigma))
+
priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2),
+            prior(normal(0.5, 0.25), class = sigma))

You may have noticed something else unique about this model: there is no intercept term in the observation formula. This is because a shared intercept parameter can sometimes be unidentifiable with respect to the @@ -722,22 +685,23 @@

Multiseries dynamics

this parameter. mvgam accomplishes this by fixing the coefficient for the intercept to zero. Now we can fit the first model, which assumes that process errors are contemporaneously uncorrelated

-
var_mod <- mvgam(  
-  # observation formula, which is empty
-  y ~ -1,
-  
-  # process model formula, which includes the smooth functions
-  trend_formula = ~ te(temp, month, k = c(4, 4)) +
-    te(temp, month, k = c(4, 4), by = trend),
-  
-  # VAR1 model with uncorrelated process errors
-  trend_model = 'VAR1',
-  family = gaussian(),
-  data = plankton_train,
-  newdata = plankton_test,
-  
-  # include the updated priors
-  priors = priors)
+
var_mod <- mvgam(  
+  # observation formula, which is empty
+  y ~ -1,
+  
+  # process model formula, which includes the smooth functions
+  trend_formula = ~ te(temp, month, k = c(4, 4)) +
+    te(temp, month, k = c(4, 4), by = trend),
+  
+  # VAR1 model with uncorrelated process errors
+  trend_model = VAR(),
+  family = gaussian(),
+  data = plankton_train,
+  newdata = plankton_test,
+  
+  # include the updated priors
+  priors = priors,
+  silent = 2)

Inspecting SS models

@@ -752,187 +716,177 @@

Inspecting SS models

include_betas = FALSE to stop the summary from printing output for all of the spline coefficients, which can be dense and hard to interpret:

-
summary(var_mod, include_betas = FALSE)
-#> GAM observation formula:
-#> y ~ 1
-#> 
-#> GAM process formula:
-#> ~te(temp, month, k = c(4, 4)) + te(temp, month, k = c(4, 4), 
-#>     by = trend)
-#> 
-#> Family:
-#> gaussian
-#> 
-#> Link function:
-#> identity
-#> 
-#> Trend model:
-#> VAR1
-#> 
-#> N process models:
-#> 5 
-#> 
-#> N series:
-#> 5 
-#> 
-#> N timepoints:
-#> 112 
-#> 
-#> Status:
-#> Fitted using Stan 
-#> 4 chains, each with iter = 1500; warmup = 1000; thin = 1 
-#> Total post-warmup draws = 2000
-#> 
-#> 
-#> Observation error parameter estimates:
-#>              2.5%  50% 97.5% Rhat n_eff
-#> sigma_obs[1] 0.21 0.26  0.35 1.01   376
-#> sigma_obs[2] 0.24 0.39  0.53 1.03   152
-#> sigma_obs[3] 0.43 0.64  0.83 1.10    43
-#> sigma_obs[4] 0.24 0.38  0.50 1.01   219
-#> sigma_obs[5] 0.29 0.42  0.54 1.03   173
-#> 
-#> GAM observation model coefficient (beta) estimates:
-#>             2.5% 50% 97.5% Rhat n_eff
-#> (Intercept)    0   0     0  NaN   NaN
-#> 
-#> Process model VAR parameter estimates:
-#>          2.5%    50% 97.5% Rhat n_eff
-#> A[1,1] -0.055  0.480 0.900 1.07    64
-#> A[1,2] -0.370 -0.039 0.210 1.01   295
-#> A[1,3] -0.500 -0.053 0.330 1.01   188
-#> A[1,4] -0.270  0.023 0.410 1.01   488
-#> A[1,5] -0.092  0.140 0.580 1.04   135
-#> A[2,1] -0.170  0.012 0.270 1.00   514
-#> A[2,2]  0.610  0.800 0.920 1.01   231
-#> A[2,3] -0.390 -0.120 0.048 1.01   223
-#> A[2,4] -0.044  0.100 0.340 1.01   241
-#> A[2,5] -0.063  0.061 0.210 1.01   371
-#> A[3,1] -0.300  0.014 0.820 1.06    54
-#> A[3,2] -0.540 -0.210 0.016 1.02   137
-#> A[3,3]  0.072  0.410 0.700 1.02   221
-#> A[3,4] -0.015  0.230 0.650 1.01   151
-#> A[3,5] -0.073  0.130 0.420 1.02   139
-#> A[4,1] -0.160  0.065 0.560 1.04    83
-#> A[4,2] -0.130  0.058 0.260 1.03   165
-#> A[4,3] -0.440 -0.120 0.120 1.03   143
-#> A[4,4]  0.480  0.730 0.960 1.03   144
-#> A[4,5] -0.230 -0.035 0.130 1.02   426
-#> A[5,1] -0.230  0.082 0.900 1.06    56
-#> A[5,2] -0.420 -0.120 0.079 1.02   128
-#> A[5,3] -0.650 -0.200 0.120 1.03    90
-#> A[5,4] -0.061  0.180 0.580 1.01   153
-#> A[5,5]  0.510  0.740 0.980 1.02   156
-#> 
-#> Process error parameter estimates:
-#>             2.5%  50% 97.5% Rhat n_eff
-#> Sigma[1,1] 0.019 0.28  0.65 1.11    32
-#> Sigma[1,2] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[1,3] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[1,4] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[1,5] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[2,1] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[2,2] 0.063 0.11  0.18 1.01   371
-#> Sigma[2,3] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[2,4] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[2,5] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[3,1] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[3,2] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[3,3] 0.056 0.16  0.31 1.03   106
-#> Sigma[3,4] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[3,5] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[4,1] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[4,2] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[4,3] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[4,4] 0.048 0.14  0.27 1.03   111
-#> Sigma[4,5] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[5,1] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[5,2] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[5,3] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[5,4] 0.000 0.00  0.00  NaN   NaN
-#> Sigma[5,5] 0.098 0.20  0.36 1.02   131
-#> 
-#> Approximate significance of GAM process smooths:
-#>                              edf Ref.df    F p-value
-#> te(temp,month)              3.93     15 2.73    0.25
-#> te(temp,month):seriestrend1 1.22     15 0.16    1.00
-#> te(temp,month):seriestrend2 1.25     15 0.48    1.00
-#> te(temp,month):seriestrend3 3.98     15 3.53    0.24
-#> te(temp,month):seriestrend4 1.60     15 1.02    0.95
-#> te(temp,month):seriestrend5 1.97     15 0.32    1.00
-#> 
-#> Stan MCMC diagnostics:
-#> n_eff / iter looks reasonable for all parameters
-#> Rhats above 1.05 found for 24 parameters
-#>  *Diagnose further to investigate why the chains have not mixed
-#> 0 of 2000 iterations ended with a divergence (0%)
-#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
-#> Chain 1: E-FMI = 0.179
-#> Chain 3: E-FMI = 0.1616
-#> Chain 4: E-FMI = 0.1994
-#>  *E-FMI below 0.2 indicates you may need to reparameterize your model
-#> 
-#> Samples were drawn using NUTS(diag_e) at Tue Apr 16 1:19:23 PM 2024.
-#> For each parameter, n_eff is a crude measure of effective sample size,
-#> and Rhat is the potential scale reduction factor on split MCMC chains
-#> (at convergence, Rhat = 1)
+
summary(var_mod, include_betas = FALSE)
+#> GAM observation formula:
+#> y ~ 1
+#> <environment: 0x00000241693f91f0>
+#> 
+#> GAM process formula:
+#> ~te(temp, month, k = c(4, 4)) + te(temp, month, k = c(4, 4), 
+#>     by = trend)
+#> <environment: 0x00000241693f91f0>
+#> 
+#> Family:
+#> gaussian
+#> 
+#> Link function:
+#> identity
+#> 
+#> Trend model:
+#> VAR()
+#> 
+#> N process models:
+#> 5 
+#> 
+#> N series:
+#> 5 
+#> 
+#> N timepoints:
+#> 120 
+#> 
+#> Status:
+#> Fitted using Stan 
+#> 4 chains, each with iter = 1500; warmup = 1000; thin = 1 
+#> Total post-warmup draws = 2000
+#> 
+#> 
+#> Observation error parameter estimates:
+#>              2.5%  50% 97.5% Rhat n_eff
+#> sigma_obs[1] 0.20 0.25  0.34 1.01   508
+#> sigma_obs[2] 0.27 0.40  0.54 1.03   179
+#> sigma_obs[3] 0.43 0.64  0.82 1.13    20
+#> sigma_obs[4] 0.25 0.37  0.50 1.00   378
+#> sigma_obs[5] 0.30 0.43  0.54 1.03   229
+#> 
+#> GAM observation model coefficient (beta) estimates:
+#>             2.5% 50% 97.5% Rhat n_eff
+#> (Intercept)    0   0     0  NaN   NaN
+#> 
+#> Process model VAR parameter estimates:
+#>          2.5%    50% 97.5% Rhat n_eff
+#> A[1,1]  0.038  0.520 0.870 1.08    32
+#> A[1,2] -0.350 -0.030 0.200 1.00   497
+#> A[1,3] -0.530 -0.044 0.330 1.02   261
+#> A[1,4] -0.280  0.038 0.420 1.00   392
+#> A[1,5] -0.100  0.120 0.510 1.04   141
+#> A[2,1] -0.160  0.011 0.200 1.00  1043
+#> A[2,2]  0.620  0.790 0.910 1.01   418
+#> A[2,3] -0.400 -0.130 0.045 1.03   291
+#> A[2,4] -0.034  0.110 0.360 1.02   274
+#> A[2,5] -0.048  0.061 0.200 1.01   585
+#> A[3,1] -0.260  0.025 0.560 1.10    28
+#> A[3,2] -0.530 -0.200 0.027 1.02   167
+#> A[3,3]  0.069  0.430 0.740 1.01   256
+#> A[3,4] -0.022  0.230 0.660 1.02   162
+#> A[3,5] -0.094  0.120 0.390 1.02   208
+#> A[4,1] -0.150  0.058 0.360 1.03   137
+#> A[4,2] -0.110  0.063 0.270 1.01   360
+#> A[4,3] -0.430 -0.110 0.140 1.01   312
+#> A[4,4]  0.470  0.730 0.950 1.02   278
+#> A[4,5] -0.200 -0.036 0.130 1.01   548
+#> A[5,1] -0.190  0.083 0.650 1.08    41
+#> A[5,2] -0.460 -0.120 0.076 1.04   135
+#> A[5,3] -0.620 -0.180 0.130 1.04   153
+#> A[5,4] -0.062  0.190 0.660 1.04   140
+#> A[5,5]  0.510  0.740 0.930 1.00   437
+#> 
+#> Process error parameter estimates:
+#>             2.5%  50% 97.5% Rhat n_eff
+#> Sigma[1,1] 0.033 0.27  0.64 1.20     9
+#> Sigma[1,2] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[1,3] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[1,4] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[1,5] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[2,1] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[2,2] 0.066 0.12  0.18 1.01   541
+#> Sigma[2,3] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[2,4] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[2,5] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[3,1] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[3,2] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[3,3] 0.051 0.16  0.29 1.04   163
+#> Sigma[3,4] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[3,5] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[4,1] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[4,2] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[4,3] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[4,4] 0.054 0.14  0.28 1.03   182
+#> Sigma[4,5] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[5,1] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[5,2] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[5,3] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[5,4] 0.000 0.00  0.00  NaN   NaN
+#> Sigma[5,5] 0.100 0.21  0.35 1.01   343
+#> 
+#> Approximate significance of GAM process smooths:
+#>                               edf Ref.df Chi.sq p-value
+#> te(temp,month)              2.902     15  43.54    0.44
+#> te(temp,month):seriestrend1 2.001     15   1.66    1.00
+#> te(temp,month):seriestrend2 0.943     15   7.03    1.00
+#> te(temp,month):seriestrend3 5.867     15  45.04    0.21
+#> te(temp,month):seriestrend4 2.984     15   9.12    0.98
+#> te(temp,month):seriestrend5 1.986     15   4.66    1.00
+#> 
+#> Stan MCMC diagnostics:
+#> n_eff / iter looks reasonable for all parameters
+#> Rhats above 1.05 found for 33 parameters
+#>  *Diagnose further to investigate why the chains have not mixed
+#> 0 of 2000 iterations ended with a divergence (0%)
+#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
+#> E-FMI indicated no pathological behavior
+#> 
+#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:43:45 AM 2024.
+#> For each parameter, n_eff is a crude measure of effective sample size,
+#> and Rhat is the potential scale reduction factor on split MCMC chains
+#> (at convergence, Rhat = 1)

The convergence of this model isn’t fabulous (more on this in a moment). But we can again plot the smooth functions, which this time operate on the process model. We can see the same plot using trend_effects = TRUE in the plotting functions:

-
plot(var_mod, 'smooths', trend_effects = TRUE)
-

+
plot(var_mod, 'smooths', trend_effects = TRUE)
+

The VAR matrix is of particular interest here, as it captures lagged -dependencies and cross-dependencies in the latent process model:

-
mcmc_plot(var_mod, variable = 'A', regex = TRUE, type = 'hist')
-

-

Unfortunately bayesplot doesn’t know this is a matrix of +dependencies and cross-dependencies in the latent process model. +Unfortunately bayesplot doesn’t know this is a matrix of parameters so what we see is actually the transpose of the VAR matrix. A little bit of wrangling gives us these histograms in the correct order:

-
A_pars <- matrix(NA, nrow = 5, ncol = 5)
-for(i in 1:5){
-  for(j in 1:5){
-    A_pars[i, j] <- paste0('A[', i, ',', j, ']')
-  }
-}
-mcmc_plot(var_mod, 
-          variable = as.vector(t(A_pars)), 
-          type = 'hist')
-

+
A_pars <- matrix(NA, nrow = 5, ncol = 5)
+for(i in 1:5){
+  for(j in 1:5){
+    A_pars[i, j] <- paste0('A[', i, ',', j, ']')
+  }
+}
+mcmc_plot(var_mod, 
+          variable = as.vector(t(A_pars)), 
+          type = 'hist')
+

There is a lot happening in this matrix. Each cell captures the lagged effect of the process in the column on the process in the row in -the next timestep. So for example, the effect in cell [1,3], which is -quite strongly negative, means that an increase in the process -for series 3 (Greens) at time \(t\) is -expected to lead to a subsequent decrease in the process for +the next timestep. So for example, the effect in cell [1,3] shows how an +increase in the process for series 3 (Greens) at time \(t\) is expected to impact the process for series 1 (Bluegreens) at time \(t+1\). The latent process model is now capturing these effects and the smooth -seasonal effects, so the trend plot shows our best estimate of what the -true count should have been at each time point:

-
plot(var_mod, type = 'trend', series = 1)
-

-
plot(var_mod, type = 'trend', series = 3)
-

+seasonal effects.

The process error \((\Sigma)\) captures unmodelled variation in the process models. Again, we fixed the off-diagonals to 0, so the histograms for these will look like flat boxes:

-
Sigma_pars <- matrix(NA, nrow = 5, ncol = 5)
-for(i in 1:5){
-  for(j in 1:5){
-    Sigma_pars[i, j] <- paste0('Sigma[', i, ',', j, ']')
-  }
-}
-mcmc_plot(var_mod, 
-          variable = as.vector(t(Sigma_pars)), 
-          type = 'hist')
-

+
Sigma_pars <- matrix(NA, nrow = 5, ncol = 5)
+for(i in 1:5){
+  for(j in 1:5){
+    Sigma_pars[i, j] <- paste0('Sigma[', i, ',', j, ']')
+  }
+}
+mcmc_plot(var_mod, 
+          variable = as.vector(t(Sigma_pars)), 
+          type = 'hist')
+

The observation error estimates \((\sigma_{obs})\) represent how much the model thinks we might miss the true count when we take our imperfect measurements:

-
mcmc_plot(var_mod, variable = 'sigma_obs', regex = TRUE, type = 'hist')
-

+
mcmc_plot(var_mod, variable = 'sigma_obs', regex = TRUE, type = 'hist')
+

These are still a bit hard to identify overall, especially when trying to estimate both process and observation error. Often we need to make some strong assumptions about which of these is more important for @@ -943,137 +897,87 @@

Correlated process errors

Let’s see if these estimates improve when we allow the process errors to be correlated. Once again, we need to first update the priors for the observation errors:

-
priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2),
-            prior(normal(0.5, 0.25), class = sigma))
+
priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2),
+            prior(normal(0.5, 0.25), class = sigma))

And now we can fit the correlated process error model

-
varcor_mod <- mvgam(  
-  # observation formula, which remains empty
-  y ~ -1,
-  
-  # process model formula, which includes the smooth functions
-  trend_formula = ~ te(temp, month, k = c(4, 4)) +
-    te(temp, month, k = c(4, 4), by = trend),
-  
-  # VAR1 model with correlated process errors
-  trend_model = 'VAR1cor',
-  family = gaussian(),
-  data = plankton_train,
-  newdata = plankton_test,
-  
-  # include the updated priors
-  priors = priors)
-

Plot convergence diagnostics for the two models, which shows that -both models display similar levels of convergence:

-
mcmc_plot(varcor_mod, type = 'rhat') +
-  labs(title = 'VAR1cor')
-

-
mcmc_plot(var_mod, type = 'rhat') +
-  labs(title = 'VAR1')
-

+
varcor_mod <- mvgam(  
+  # observation formula, which remains empty
+  y ~ -1,
+  
+  # process model formula, which includes the smooth functions
+  trend_formula = ~ te(temp, month, k = c(4, 4)) +
+    te(temp, month, k = c(4, 4), by = trend),
+  
+  # VAR1 model with correlated process errors
+  trend_model = VAR(cor = TRUE),
+  family = gaussian(),
+  data = plankton_train,
+  newdata = plankton_test,
+  
+  # include the updated priors
+  priors = priors,
+  silent = 2)

The \((\Sigma)\) matrix now captures any evidence of contemporaneously correlated process error:

-
Sigma_pars <- matrix(NA, nrow = 5, ncol = 5)
-for(i in 1:5){
-  for(j in 1:5){
-    Sigma_pars[i, j] <- paste0('Sigma[', i, ',', j, ']')
-  }
-}
-mcmc_plot(varcor_mod, 
-          variable = as.vector(t(Sigma_pars)), 
-          type = 'hist')
-

+
Sigma_pars <- matrix(NA, nrow = 5, ncol = 5)
+for(i in 1:5){
+  for(j in 1:5){
+    Sigma_pars[i, j] <- paste0('Sigma[', i, ',', j, ']')
+  }
+}
+mcmc_plot(varcor_mod, 
+          variable = as.vector(t(Sigma_pars)), 
+          type = 'hist')
+

This symmetric matrix tells us there is support for correlated -process errors. For example, series 1 and 3 (Bluegreens and Greens) show -negatively correlated process errors, while series 1 and 4 (Bluegreens -and Other.algae) show positively correlated errors. But it is easier to -interpret these estimates if we convert the covariance matrix to a -correlation matrix. Here we compute the posterior median process error -correlations:

-
Sigma_post <- as.matrix(varcor_mod, variable = 'Sigma', regex = TRUE)
-median_correlations <- cov2cor(matrix(apply(Sigma_post, 2, median),
-                                      nrow = 5, ncol = 5))
-rownames(median_correlations) <- colnames(median_correlations) <- levels(plankton_train$series)
-
-round(median_correlations, 2)
-#>             Bluegreens Diatoms Greens Other.algae Unicells
-#> Bluegreens        1.00   -0.03   0.15       -0.05     0.32
-#> Diatoms          -0.03    1.00  -0.20        0.48     0.17
-#> Greens            0.15   -0.20   1.00        0.18     0.46
-#> Other.algae      -0.05    0.48   0.18        1.00     0.28
-#> Unicells          0.32    0.17   0.46        0.28     1.00
-

Because this model is able to capture correlated errors, the VAR -matrix has changed slightly:

-
A_pars <- matrix(NA, nrow = 5, ncol = 5)
-for(i in 1:5){
-  for(j in 1:5){
-    A_pars[i, j] <- paste0('A[', i, ',', j, ']')
-  }
-}
-mcmc_plot(varcor_mod, 
-          variable = as.vector(t(A_pars)), 
-          type = 'hist')
-

-

We still have some evidence of lagged cross-dependence, but some of -these interactions have now been pulled more toward zero. But which -model is better? Forecasts don’t appear to differ very much, at least -qualitatively (here are forecasts for three of the series, for each -model):

-
plot(var_mod, type = 'forecast', series = 1, newdata = plankton_test)
-

-
#> Out of sample CRPS:
-#> [1] 2.954187
-
plot(varcor_mod, type = 'forecast', series = 1, newdata = plankton_test)
-

-
#> Out of sample CRPS:
-#> [1] 3.114382
-
plot(var_mod, type = 'forecast', series = 2, newdata = plankton_test)
-

-
#> Out of sample CRPS:
-#> [1] 5.827133
-
plot(varcor_mod, type = 'forecast', series = 2, newdata = plankton_test)
-

-
#> Out of sample CRPS:
-#> [1] 5.503073
-
plot(var_mod, type = 'forecast', series = 3, newdata = plankton_test)
-

-
#> Out of sample CRPS:
-#> [1] 4.04051
-
plot(varcor_mod, type = 'forecast', series = 3, newdata = plankton_test)
-

-
#> Out of sample CRPS:
-#> [1] 4.007712
-

We can compute the variogram score for out of sample forecasts to get -a sense of which model does a better job of capturing the dependence -structure in the true evaluation set:

-
# create forecast objects for each model
-fcvar <- forecast(var_mod)
-fcvarcor <- forecast(varcor_mod)
-
-# plot the difference in variogram scores; a negative value means the VAR1cor model is better, while a positive value means the VAR1 model is better
-diff_scores <- score(fcvarcor, score = 'variogram')$all_series$score -
-  score(fcvar, score = 'variogram')$all_series$score
-plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', 
-     ylim = c(-1*max(abs(diff_scores), na.rm = TRUE),
-              max(abs(diff_scores), na.rm = TRUE)),
-     bty = 'l',
-     xlab = 'Forecast horizon',
-     ylab = expression(variogram[VAR1cor]~-~variogram[VAR1]))
-abline(h = 0, lty = 'dashed')
-

+process errors, as several of the off-diagonal entries are strongly +non-zero. But it is easier to interpret these estimates if we convert +the covariance matrix to a correlation matrix. Here we compute the +posterior median process error correlations:

+
Sigma_post <- as.matrix(varcor_mod, variable = 'Sigma', regex = TRUE)
+median_correlations <- cov2cor(matrix(apply(Sigma_post, 2, median),
+                                      nrow = 5, ncol = 5))
+rownames(median_correlations) <- colnames(median_correlations) <- levels(plankton_train$series)
+
+round(median_correlations, 2)
+#>             Bluegreens Diatoms Greens Other.algae Unicells
+#> Bluegreens        1.00   -0.04   0.16       -0.05     0.29
+#> Diatoms          -0.04    1.00  -0.21        0.48     0.17
+#> Greens            0.16   -0.21   1.00        0.17     0.46
+#> Other.algae      -0.05    0.48   0.17        1.00     0.28
+#> Unicells          0.29    0.17   0.46        0.28     1.00
+

But which model is better? We can compute the variogram score for out +of sample forecasts to get a sense of which model does a better job of +capturing the dependence structure in the true evaluation set:

+
# create forecast objects for each model
+fcvar <- forecast(var_mod)
+fcvarcor <- forecast(varcor_mod)
+
+# plot the difference in variogram scores; a negative value means the VAR1cor model is better, while a positive value means the VAR1 model is better
+diff_scores <- score(fcvarcor, score = 'variogram')$all_series$score -
+  score(fcvar, score = 'variogram')$all_series$score
+plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', 
+     ylim = c(-1*max(abs(diff_scores), na.rm = TRUE),
+              max(abs(diff_scores), na.rm = TRUE)),
+     bty = 'l',
+     xlab = 'Forecast horizon',
+     ylab = expression(variogram[VAR1cor]~-~variogram[VAR1]))
+abline(h = 0, lty = 'dashed')
+

And we can also compute the energy score for out of sample forecasts to get a sense of which model provides forecasts that are better calibrated:

-
# plot the difference in energy scores; a negative value means the VAR1cor model is better, while a positive value means the VAR1 model is better
-diff_scores <- score(fcvarcor, score = 'energy')$all_series$score -
-  score(fcvar, score = 'energy')$all_series$score
-plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', 
-     ylim = c(-1*max(abs(diff_scores), na.rm = TRUE),
-              max(abs(diff_scores), na.rm = TRUE)),
-     bty = 'l',
-     xlab = 'Forecast horizon',
-     ylab = expression(energy[VAR1cor]~-~energy[VAR1]))
-abline(h = 0, lty = 'dashed')
-

+
# plot the difference in energy scores; a negative value means the VAR1cor model is better, while a positive value means the VAR1 model is better
+diff_scores <- score(fcvarcor, score = 'energy')$all_series$score -
+  score(fcvar, score = 'energy')$all_series$score
+plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', 
+     ylim = c(-1*max(abs(diff_scores), na.rm = TRUE),
+              max(abs(diff_scores), na.rm = TRUE)),
+     bty = 'l',
+     xlab = 'Forecast horizon',
+     ylab = expression(energy[VAR1cor]~-~energy[VAR1]))
+abline(h = 0, lty = 'dashed')
+

The models tend to provide similar forecasts, though the correlated error model does slightly better overall. We would probably need to use a more extensive rolling forecast evaluation exercise if we felt like we @@ -1094,7 +998,7 @@

Further reading

sparse Bayesian hierarchical vector autoregressive model for microbial dynamics in a wastewater treatment plant.” Computational Statistics & Data Analysis 179 (2023): 107659.

-

Holmes, Elizabeth E., Eric J. Ward, and Wills Kellie. “MARSS: +

Holmes, Elizabeth E., Eric J. Ward, and Wills Kellie. “MARSS: multivariate autoregressive state-space models for analyzing time-series data.R Journal. 4.1 (2012): 11.

Ward, Eric J., et al. “Inferring diff --git a/inst/doc/data_in_mvgam.R b/inst/doc/data_in_mvgam.R index bd016f61..29efbeba 100644 --- a/inst/doc/data_in_mvgam.R +++ b/inst/doc/data_in_mvgam.R @@ -1,4 +1,4 @@ -## ----echo = FALSE------------------------------------------------------ +## ----echo = FALSE------------------------------------------------------------- NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true") knitr::opts_chunk$set( collapse = TRUE, @@ -7,11 +7,10 @@ knitr::opts_chunk$set( eval = NOT_CRAN ) - -## ----setup, include=FALSE---------------------------------------------- +## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( echo = TRUE, - dpi = 100, + dpi = 150, fig.asp = 0.8, fig.width = 6, out.width = "60%", @@ -20,54 +19,45 @@ library(mvgam) library(ggplot2) theme_set(theme_bw(base_size = 12, base_family = 'serif')) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- simdat <- sim_mvgam(n_series = 4, T = 24, prop_missing = 0.2) head(simdat$data_train, 16) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- class(simdat$data_train$series) levels(simdat$data_train$series) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- all(levels(simdat$data_train$series) %in% unique(simdat$data_train$series)) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- summary(glm(y ~ series + time, data = simdat$data_train, family = poisson())) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- summary(gam(y ~ series + s(time, by = series), data = simdat$data_train, family = poisson())) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- gauss_dat <- data.frame(outcome = rnorm(10), series = factor('series1', levels = 'series1'), time = 1:10) gauss_dat - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- gam(outcome ~ time, family = betar(), data = gauss_dat) - -## ----error=TRUE-------------------------------------------------------- +## ----error=TRUE--------------------------------------------------------------- mvgam(outcome ~ time, family = betar(), data = gauss_dat) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- # A function to ensure all timepoints within a sequence are identical all_times_avail = function(time, min_time, max_time){ identical(as.numeric(sort(time)), @@ -91,21 +81,18 @@ if(any(checked_times$all_there == FALSE)){ cat('All series have observations at all timepoints :)') } - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- bad_times <- data.frame(time = seq(1, 16, by = 2), series = factor('series_1'), outcome = rnorm(8)) bad_times - -## ----error = TRUE------------------------------------------------------ +## ----error = TRUE------------------------------------------------------------- get_mvgam_priors(outcome ~ 1, data = bad_times, family = gaussian()) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- bad_times %>% dplyr::right_join(expand.grid(time = seq(min(bad_times$time), max(bad_times$time)), @@ -114,14 +101,12 @@ bad_times %>% dplyr::arrange(time) -> good_times good_times - -## ----error = TRUE------------------------------------------------------ +## ----error = TRUE------------------------------------------------------------- get_mvgam_priors(outcome ~ 1, data = good_times, family = gaussian()) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- bad_levels <- data.frame(time = 1:8, series = factor('series_1', levels = c('series_1', @@ -130,30 +115,25 @@ bad_levels <- data.frame(time = 1:8, levels(bad_levels$series) - -## ----error = TRUE------------------------------------------------------ +## ----error = TRUE------------------------------------------------------------- get_mvgam_priors(outcome ~ 1, data = bad_levels, family = gaussian()) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- setdiff(levels(bad_levels$series), unique(bad_levels$series)) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- bad_levels %>% dplyr::mutate(series = droplevels(series)) -> good_levels levels(good_levels$series) - -## ----error = TRUE------------------------------------------------------ +## ----error = TRUE------------------------------------------------------------- get_mvgam_priors(outcome ~ 1, data = good_levels, family = gaussian()) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- miss_dat <- data.frame(outcome = rnorm(10), cov = c(NA, rnorm(9)), series = factor('series1', @@ -161,14 +141,12 @@ miss_dat <- data.frame(outcome = rnorm(10), time = 1:10) miss_dat - -## ----error = TRUE------------------------------------------------------ +## ----error = TRUE------------------------------------------------------------- get_mvgam_priors(outcome ~ cov, data = miss_dat, family = gaussian()) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- miss_dat <- list(outcome = rnorm(10), series = factor('series1', levels = 'series1'), @@ -176,45 +154,38 @@ miss_dat <- list(outcome = rnorm(10), miss_dat$cov <- matrix(rnorm(50), ncol = 5, nrow = 10) miss_dat$cov[2,3] <- NA - -## ----error=TRUE-------------------------------------------------------- +## ----error=TRUE--------------------------------------------------------------- get_mvgam_priors(outcome ~ cov, data = miss_dat, family = gaussian()) - -## ----fig.alt = "Plotting time series features for GAM models in mvgam"---- +## ----fig.alt = "Plotting time series features for GAM models in mvgam"-------- plot_mvgam_series(data = simdat$data_train, y = 'y', series = 'all') - -## ----fig.alt = "Plotting time series features for GAM models in mvgam"---- +## ----fig.alt = "Plotting time series features for GAM models in mvgam"-------- plot_mvgam_series(data = simdat$data_train, y = 'y', series = 1) - -## ----fig.alt = "Plotting time series features for GAM models in mvgam"---- +## ----fig.alt = "Plotting time series features for GAM models in mvgam"-------- plot_mvgam_series(data = simdat$data_train, newdata = simdat$data_test, y = 'y', series = 1) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- data("all_neon_tick_data") str(dplyr::ungroup(all_neon_tick_data)) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plotIDs <- c('SCBI_013','SCBI_002', 'SERC_001','SERC_005', 'SERC_006','SERC_012', 'BLAN_012','BLAN_005') - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- model_dat <- all_neon_tick_data %>% dplyr::ungroup() %>% dplyr::mutate(target = ixodes_scapularis) %>% @@ -222,8 +193,7 @@ model_dat <- all_neon_tick_data %>% dplyr::select(Year, epiWeek, plotID, target) %>% dplyr::mutate(epiWeek = as.numeric(epiWeek)) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- model_dat %>% # Create all possible combos of plotID, Year and epiWeek; # missing outcomes will be filled in as NA @@ -237,8 +207,7 @@ model_dat %>% dplyr::select(siteID, plotID) %>% dplyr::distinct()) -> model_dat - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- model_dat %>% dplyr::mutate(series = plotID, y = target) %>% @@ -247,8 +216,7 @@ model_dat %>% dplyr::select(-target, -plotID) %>% dplyr::arrange(Year, epiWeek, series) -> model_dat - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- model_dat %>% dplyr::ungroup() %>% dplyr::group_by(series) %>% @@ -256,18 +224,15 @@ model_dat %>% dplyr::mutate(time = seq(1, dplyr::n())) %>% dplyr::ungroup() -> model_dat - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- levels(model_dat$series) - -## ----error=TRUE-------------------------------------------------------- +## ----error=TRUE--------------------------------------------------------------- get_mvgam_priors(y ~ 1, data = model_dat, family = poisson()) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- testmod <- mvgam(y ~ s(epiWeek, by = series, bs = 'cc') + s(series, bs = 're'), trend_model = 'AR1', @@ -275,11 +240,9 @@ testmod <- mvgam(y ~ s(epiWeek, by = series, bs = 'cc') + backend = 'cmdstanr', run_model = FALSE) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- str(testmod$model_data) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- code(testmod) diff --git a/inst/doc/data_in_mvgam.Rmd b/inst/doc/data_in_mvgam.Rmd index a704cb38..b8240edd 100644 --- a/inst/doc/data_in_mvgam.Rmd +++ b/inst/doc/data_in_mvgam.Rmd @@ -24,7 +24,7 @@ knitr::opts_chunk$set( ```{r setup, include=FALSE} knitr::opts_chunk$set( echo = TRUE, - dpi = 100, + dpi = 150, fig.asp = 0.8, fig.width = 6, out.width = "60%", diff --git a/inst/doc/data_in_mvgam.html b/inst/doc/data_in_mvgam.html index 92dde520..369d4844 100644 --- a/inst/doc/data_in_mvgam.html +++ b/inst/doc/data_in_mvgam.html @@ -12,7 +12,7 @@ - + Formatting data for use in mvgam @@ -340,7 +340,7 @@

Formatting data for use in mvgam

Nicholas J Clark

-

2024-05-09

+

2024-04-16

@@ -389,21 +389,21 @@

Required long data format

simdat <- sim_mvgam(n_series = 4, T = 24, prop_missing = 0.2)
 head(simdat$data_train, 16)
 #>     y season year   series time
-#> 1   1      1    1 series_1    1
-#> 2   2      1    1 series_2    1
-#> 3   2      1    1 series_3    1
-#> 4   1      1    1 series_4    1
-#> 5  NA      2    1 series_1    2
-#> 6   0      2    1 series_2    2
-#> 7  NA      2    1 series_3    2
-#> 8   0      2    1 series_4    2
+#> 1  NA      1    1 series_1    1
+#> 2   0      1    1 series_2    1
+#> 3   0      1    1 series_3    1
+#> 4   0      1    1 series_4    1
+#> 5   0      2    1 series_1    2
+#> 6   1      2    1 series_2    2
+#> 7   1      2    1 series_3    2
+#> 8   1      2    1 series_4    2
 #> 9   0      3    1 series_1    3
-#> 10  2      3    1 series_2    3
+#> 10 NA      3    1 series_2    3
 #> 11  0      3    1 series_3    3
-#> 12  1      3    1 series_4    3
-#> 13 NA      4    1 series_1    4
-#> 14 NA      4    1 series_2    4
-#> 15  1      4    1 series_3    4
+#> 12 NA      3    1 series_4    3
+#> 13  1      4    1 series_1    4
+#> 14  0      4    1 series_2    4
+#> 15  0      4    1 series_3    4
 #> 16  2      4    1 series_4    4

series as a factor variable

@@ -448,23 +448,23 @@

A single outcome variable

#> glm(formula = y ~ series + time, family = poisson(), data = simdat$data_train) #> #> Coefficients: -#> Estimate Std. Error z value Pr(>|z|) -#> (Intercept) 0.66272 0.23921 2.770 0.0056 ** -#> seriesseries_2 0.22551 0.23720 0.951 0.3417 -#> seriesseries_3 0.03122 0.24810 0.126 0.8998 -#> seriesseries_4 0.23036 0.23745 0.970 0.3320 -#> time 0.00939 0.01576 0.596 0.5512 +#> Estimate Std. Error z value Pr(>|z|) +#> (Intercept) -0.05275 0.38870 -0.136 0.8920 +#> seriesseries_2 -0.80716 0.45417 -1.777 0.0755 . +#> seriesseries_3 -1.21614 0.51290 -2.371 0.0177 * +#> seriesseries_4 0.55084 0.31854 1.729 0.0838 . +#> time 0.01725 0.02701 0.639 0.5229 #> --- #> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #> #> (Dispersion parameter for poisson family taken to be 1) #> -#> Null deviance: 115.79 on 59 degrees of freedom -#> Residual deviance: 113.87 on 55 degrees of freedom -#> (12 observations deleted due to missingness) -#> AIC: 261.2 +#> Null deviance: 120.029 on 56 degrees of freedom +#> Residual deviance: 96.641 on 52 degrees of freedom +#> (15 observations deleted due to missingness) +#> AIC: 166.83 #> -#> Number of Fisher Scoring iterations: 5
+#> Number of Fisher Scoring iterations: 6
summary(gam(y ~ series + s(time, by = series),
             data = simdat$data_train,
             family = poisson()))
@@ -476,25 +476,23 @@ 

A single outcome variable

#> y ~ series + s(time, by = series) #> #> Parametric coefficients: -#> Estimate Std. Error z value Pr(>|z|) -#> (Intercept) 0.1701 0.3241 0.525 0.5997 -#> seriesseries_2 0.6108 0.3783 1.615 0.1063 -#> seriesseries_3 0.5974 0.3698 1.616 0.1062 -#> seriesseries_4 0.6933 0.3695 1.876 0.0606 . -#> --- -#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 -#> -#> Approximate significance of smooth terms: -#> edf Ref.df Chi.sq p-value -#> s(time):seriesseries_1 8.004 8.712 18.052 0.0311 * -#> s(time):seriesseries_2 6.822 7.906 13.501 0.0919 . -#> s(time):seriesseries_3 1.000 1.000 0.911 0.3400 -#> s(time):seriesseries_4 3.498 4.333 12.185 0.0212 * -#> --- -#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 -#> -#> R-sq.(adj) = 0.323 Deviance explained = 53.4% -#> UBRE = 0.67586 Scale est. = 1 n = 60
+#> Estimate Std. Error z value Pr(>|z|) +#> (Intercept) -4.293 5.500 -0.781 0.435 +#> seriesseries_2 3.001 5.533 0.542 0.588 +#> seriesseries_3 3.193 5.518 0.579 0.563 +#> seriesseries_4 4.795 5.505 0.871 0.384 +#> +#> Approximate significance of smooth terms: +#> edf Ref.df Chi.sq p-value +#> s(time):seriesseries_1 7.737 8.181 6.541 0.5585 +#> s(time):seriesseries_2 3.444 4.213 4.739 0.3415 +#> s(time):seriesseries_3 1.000 1.000 0.006 0.9365 +#> s(time):seriesseries_4 3.958 4.832 11.636 0.0363 * +#> --- +#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 +#> +#> R-sq.(adj) = 0.605 Deviance explained = 66.2% +#> UBRE = 0.4193 Scale est. = 1 n = 57

Depending on the observation families you plan to use when building models, there may be some restrictions that need to be satisfied within the outcome variable. For example, a Beta regression can only handle @@ -516,16 +514,16 @@

A single outcome variable

time = 1:10) gauss_dat #> outcome series time -#> 1 1.47861412 series1 1 -#> 2 0.66514221 series1 2 -#> 3 -1.28876390 series1 3 -#> 4 -0.66525713 series1 4 -#> 5 -0.33403782 series1 5 -#> 6 -0.53151573 series1 6 -#> 7 -0.02939463 series1 7 -#> 8 0.67704160 series1 8 -#> 9 0.53028655 series1 9 -#> 10 -1.52837062 series1 10 +#> 1 -1.51807964 series1 1 +#> 2 -0.12895041 series1 2 +#> 3 0.91902592 series1 3 +#> 4 -0.78329254 series1 4 +#> 5 0.28469724 series1 5 +#> 6 0.07481887 series1 6 +#> 7 0.03770728 series1 7 +#> 8 -0.37485636 series1 8 +#> 9 0.23694172 series1 9 +#> 10 -0.53988302 series1 10

A call to gam using the mgcv package leads to a model that actually fits (though it does give an unhelpful warning message):

@@ -535,14 +533,14 @@

A single outcome variable

#> Warning in family$saturated.ll(y, prior.weights, theta): saturated likelihood #> may be inaccurate #> -#> Family: Beta regression(0.135) +#> Family: Beta regression(0.44) #> Link function: logit #> #> Formula: #> outcome ~ time #> Total model degrees of freedom 2 #> -#> REML score: -177.4533 +#> REML score: -127.2706

But the same call to mvgam gives us something more useful:

mvgam(outcome ~ time,
@@ -630,14 +628,14 @@ 

Checking data with get_mvgam_priors

outcome = rnorm(8)) bad_times #> time series outcome -#> 1 1 series_1 -0.4313989 -#> 2 3 series_1 -1.5537678 -#> 3 5 series_1 1.0315560 -#> 4 7 series_1 0.8438992 -#> 5 9 series_1 -0.3758622 -#> 6 11 series_1 1.6854502 -#> 7 13 series_1 -1.0580686 -#> 8 15 series_1 -0.7285234
+#> 1 1 series_1 1.4681068 +#> 2 3 series_1 0.1796627 +#> 3 5 series_1 -0.4204020 +#> 4 7 series_1 -1.0729359 +#> 5 9 series_1 -0.1738239 +#> 6 11 series_1 -0.5463268 +#> 7 13 series_1 0.8275198 +#> 8 15 series_1 2.2085085

Next we call get_mvgam_priors by simply specifying an intercept-only model, which is enough to trigger all the checks:

get_mvgam_priors(outcome ~ 1,
@@ -657,21 +655,21 @@ 

Checking data with get_mvgam_priors

#> Joining with `by = join_by(time, series)` good_times #> time series outcome -#> 1 1 series_1 -0.4313989 +#> 1 1 series_1 1.4681068 #> 2 2 series_1 NA -#> 3 3 series_1 -1.5537678 +#> 3 3 series_1 0.1796627 #> 4 4 series_1 NA -#> 5 5 series_1 1.0315560 +#> 5 5 series_1 -0.4204020 #> 6 6 series_1 NA -#> 7 7 series_1 0.8438992 +#> 7 7 series_1 -1.0729359 #> 8 8 series_1 NA -#> 9 9 series_1 -0.3758622 +#> 9 9 series_1 -0.1738239 #> 10 10 series_1 NA -#> 11 11 series_1 1.6854502 +#> 11 11 series_1 -0.5463268 #> 12 12 series_1 NA -#> 13 13 series_1 -1.0580686 +#> 13 13 series_1 0.8275198 #> 14 14 series_1 NA -#> 15 15 series_1 -0.7285234
+#> 15 15 series_1 2.2085085

Now the call to get_mvgam_priors, using our filled in data, should work:

get_mvgam_priors(outcome ~ 1,
@@ -680,9 +678,9 @@ 

Checking data with get_mvgam_priors

#> param_name param_length param_info #> 1 (Intercept) 1 (Intercept) #> 2 vector<lower=0>[n_series] sigma_obs; 1 observation error sd -#> prior example_change -#> 1 (Intercept) ~ student_t(3, -0.4, 2.5); (Intercept) ~ normal(0, 1); -#> 2 sigma_obs ~ student_t(3, 0, 2.5); sigma_obs ~ normal(-0.25, 0.9); +#> prior example_change +#> 1 (Intercept) ~ student_t(3, 0, 2.5); (Intercept) ~ normal(0, 1); +#> 2 sigma_obs ~ student_t(3, 0, 2.5); sigma_obs ~ normal(-0.22, 0.33); #> new_lowerbound new_upperbound #> 1 NA NA #> 2 NA NA
@@ -725,9 +723,9 @@

Checking data with get_mvgam_priors

#> param_name param_length param_info #> 1 (Intercept) 1 (Intercept) #> 2 vector<lower=0>[n_series] sigma_obs; 1 observation error sd -#> prior example_change -#> 1 (Intercept) ~ student_t(3, 0.5, 2.5); (Intercept) ~ normal(0, 1); -#> 2 sigma_obs ~ student_t(3, 0, 2.5); sigma_obs ~ normal(-0.59, 0.61); +#> prior example_change +#> 1 (Intercept) ~ student_t(3, -1, 2.5); (Intercept) ~ normal(0, 1); +#> 2 sigma_obs ~ student_t(3, 0, 2.5); sigma_obs ~ normal(0.98, 0.91); #> new_lowerbound new_upperbound #> 1 NA NA #> 2 NA NA @@ -748,22 +746,22 @@

Covariates with no NAs

levels = 'series1'), time = 1:10) miss_dat -#> outcome cov series time -#> 1 -0.3926197 NA series1 1 -#> 2 -0.1127277 1.3171518 series1 2 -#> 3 -1.5022493 -0.5112159 series1 3 -#> 4 1.2718119 -1.5898549 series1 4 -#> 5 0.3792046 -1.5860549 series1 5 -#> 6 1.0253017 -1.4026850 series1 6 -#> 7 -1.1546050 -0.6485981 series1 7 -#> 8 -0.1498811 -0.7392203 series1 8 -#> 9 2.0924367 0.5155418 series1 9 -#> 10 -0.5269962 -1.2443386 series1 10 +#> outcome cov series time +#> 1 0.77436859 NA series1 1 +#> 2 0.33222199 -0.2653819 series1 2 +#> 3 0.50385503 0.6658354 series1 3 +#> 4 -0.99577591 0.3541730 series1 4 +#> 5 -1.09812817 -2.3125954 series1 5 +#> 6 -0.49687774 -1.0778578 series1 6 +#> 7 -1.26666072 -0.1973507 series1 7 +#> 8 -0.11638041 -3.0585179 series1 8 +#> 9 0.08890432 1.7964928 series1 9 +#> 10 -0.64375459 0.7894733 series1 10
get_mvgam_priors(outcome ~ cov,
                  data = miss_dat,
                  family = gaussian())
 #> Error: Missing values found in data predictors:
-#>  Error in na.fail.default(structure(list(outcome = c(-0.392619741341104, : missing values in object
+#> Error in na.fail.default(structure(list(outcome = c(0.774368589907313, : missing values in object

Just like with the mgcv package, mvgam can also accept data as a list object. This is useful if you want to set up linear @@ -781,7 +779,7 @@

Covariates with no NAs

data = miss_dat, family = gaussian()) #> Error: Missing values found in data predictors: -#> Error in na.fail.default(structure(list(outcome = c(-0.494366970739628, : missing values in object +#> Error in na.fail.default(structure(list(outcome = c(-0.708736388395862, : missing values in object
@@ -797,20 +795,20 @@

Plotting with plot_mvgam_series

plot_mvgam_series(data = simdat$data_train, 
                   y = 'y', 
                   series = 'all')
-

Plotting time series features for GAM models in mvgam

+

Plotting time series features for GAM models in mvgam

Or we can look more closely at the distribution for the first time series:

plot_mvgam_series(data = simdat$data_train, 
                   y = 'y', 
                   series = 1)
-

Plotting time series features for GAM models in mvgam

+

Plotting time series features for GAM models in mvgam

If you have split your data into training and testing folds (i.e. for forecast evaluation), you can include the test data in your plots:

plot_mvgam_series(data = simdat$data_train,
                   newdata = simdat$data_test,
                   y = 'y', 
                   series = 1)
-

Plotting time series features for GAM models in mvgam

+

Plotting time series features for GAM models in mvgam

Example with NEON tick data

@@ -967,12 +965,12 @@

Example with NEON tick data

#> $ S6 : num [1:8, 1:8] 1.037 -0.416 0.419 0.117 0.188 ... #> $ S7 : num [1:8, 1:8] 1.037 -0.416 0.419 0.117 0.188 ... #> $ S8 : num [1:8, 1:8] 1.037 -0.416 0.419 0.117 0.188 ... -#> $ p_coefs : Named num 0.775 +#> $ p_coefs : Named num 0.806 #> ..- attr(*, "names")= chr "(Intercept)" -#> $ p_taus : num 3285 +#> $ p_taus : num 301 #> $ ytimes : int [1:416, 1:8] 1 9 17 25 33 41 49 57 65 73 ... #> $ n_series : int 8 -#> $ sp : Named num [1:9] 5.46 8 28.79 5 2.65 ... +#> $ sp : Named num [1:9] 4.68 59.57 1.11 2.73 6.5 ... #> ..- attr(*, "names")= chr [1:9] "s(epiWeek):seriesBLAN_005" "s(epiWeek):seriesBLAN_012" "s(epiWeek):seriesSCBI_002" "s(epiWeek):seriesSCBI_013" ... #> $ y_observed : num [1:416, 1:8] 0 0 0 0 0 0 0 0 0 0 ... #> $ total_obs : int 3328 diff --git a/inst/doc/forecast_evaluation.R b/inst/doc/forecast_evaluation.R index 5515579a..8412f550 100644 --- a/inst/doc/forecast_evaluation.R +++ b/inst/doc/forecast_evaluation.R @@ -1,4 +1,4 @@ -## ----echo = FALSE------------------------------------------------------ +## ----echo = FALSE------------------------------------------------------------- NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true") knitr::opts_chunk$set( collapse = TRUE, @@ -8,7 +8,7 @@ knitr::opts_chunk$set( ) -## ----setup, include=FALSE---------------------------------------------- +## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( echo = TRUE, dpi = 100, @@ -21,32 +21,32 @@ library(ggplot2) theme_set(theme_bw(base_size = 12, base_family = 'serif')) -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- set.seed(2345) simdat <- sim_mvgam(T = 100, n_series = 3, - trend_model = 'GP', + trend_model = GP(), prop_trend = 0.75, family = poisson(), prop_missing = 0.10) -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- str(simdat) -## ----fig.alt = "Plotting time series features for GAM models in mvgam"---- +## ----fig.alt = "Plotting time series features for GAM models in mvgam"-------- plot_mvgam_series(data = simdat$data_train, series = 'all') -## ----fig.alt = "Plotting time series features for GAM models in mvgam"---- +## ----fig.alt = "Plotting time series features for GAM models in mvgam"-------- plot_mvgam_series(data = simdat$data_train, newdata = simdat$data_test, series = 1) -## ----include=FALSE----------------------------------------------------- +## ----include=FALSE------------------------------------------------------------ mod1 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + s(time, by = series, bs = 'cr', k = 20), knots = list(season = c(0.5, 12.5)), @@ -54,71 +54,70 @@ mod1 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + data = simdat$data_train) -## ----eval=FALSE-------------------------------------------------------- +## ----eval=FALSE--------------------------------------------------------------- ## mod1 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + ## s(time, by = series, bs = 'cr', k = 20), ## knots = list(season = c(0.5, 12.5)), ## trend_model = 'None', -## data = simdat$data_train) +## data = simdat$data_train, +## silent = 2) -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- summary(mod1, include_betas = FALSE) -## ----fig.alt = "Plotting GAM smooth functions using mvgam"------------- -plot(mod1, type = 'smooths') +## ----fig.alt = "Plotting GAM smooth functions using mvgam"-------------------- +conditional_effects(mod1, type = 'link') -## ----include=FALSE----------------------------------------------------- +## ----include=FALSE------------------------------------------------------------ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + gp(time, by = series, c = 5/4, k = 20, scale = FALSE), knots = list(season = c(0.5, 12.5)), trend_model = 'None', data = simdat$data_train, - adapt_delta = 0.98) + adapt_delta = 0.98, + silent = 2) -## ----eval=FALSE-------------------------------------------------------- +## ----eval=FALSE--------------------------------------------------------------- ## mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + ## gp(time, by = series, c = 5/4, k = 20, ## scale = FALSE), ## knots = list(season = c(0.5, 12.5)), ## trend_model = 'None', -## data = simdat$data_train) +## data = simdat$data_train, +## silent = 2) -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- summary(mod2, include_betas = FALSE) -## ----fig.alt = "Summarising latent Gaussian Process parameters in mvgam"---- +## ----fig.alt = "Summarising latent Gaussian Process parameters in mvgam"------ mcmc_plot(mod2, variable = c('alpha_gp'), regex = TRUE, type = 'areas') -## ----fig.alt = "Summarising latent Gaussian Process parameters in mvgam"---- +## ----fig.alt = "Summarising latent Gaussian Process parameters in mvgam"------ mcmc_plot(mod2, variable = c('rho_gp'), regex = TRUE, type = 'areas') -## ----fig.alt = "Summarising latent Gaussian Process parameters in mvgam and marginaleffects"---- -require('ggplot2') -plot_predictions(mod2, - condition = c('time', 'series', 'series'), - type = 'link') + - theme(legend.position = 'none') +## ----fig.alt = "Plotting latent Gaussian Process effects in mvgam and marginaleffects"---- +conditional_effects(mod2, type = 'link') -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- fc_mod1 <- forecast(mod1, newdata = simdat$data_test) fc_mod2 <- forecast(mod2, newdata = simdat$data_test) -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- str(fc_mod1) -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot(fc_mod1, series = 1) plot(fc_mod2, series = 1) @@ -126,7 +125,7 @@ plot(fc_mod1, series = 2) plot(fc_mod2, series = 2) -## ----include=FALSE----------------------------------------------------- +## ----include=FALSE------------------------------------------------------------ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + gp(time, by = series, c = 5/4, k = 20, scale = FALSE), @@ -134,20 +133,22 @@ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + trend_model = 'None', data = simdat$data_train, newdata = simdat$data_test, - adapt_delta = 0.98) + adapt_delta = 0.98, + silent = 2) -## ----eval=FALSE-------------------------------------------------------- +## ----eval=FALSE--------------------------------------------------------------- ## mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + ## gp(time, by = series, c = 5/4, k = 20, ## scale = FALSE), ## knots = list(season = c(0.5, 12.5)), ## trend_model = 'None', ## data = simdat$data_train, -## newdata = simdat$data_test) +## newdata = simdat$data_test, +## silent = 2) -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- fc_mod2 <- forecast(mod2) @@ -155,32 +156,32 @@ fc_mod2 <- forecast(mod2) plot(fc_mod2, series = 1) -## ----warning=FALSE----------------------------------------------------- +## ----warning=FALSE------------------------------------------------------------ crps_mod1 <- score(fc_mod1, score = 'crps') str(crps_mod1) crps_mod1$series_1 -## ----warning=FALSE----------------------------------------------------- +## ----warning=FALSE------------------------------------------------------------ crps_mod1 <- score(fc_mod1, score = 'crps', interval_width = 0.6) crps_mod1$series_1 -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- link_mod1 <- forecast(mod1, newdata = simdat$data_test, type = 'link') score(link_mod1, score = 'elpd')$series_1 -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- energy_mod2 <- score(fc_mod2, score = 'energy') str(energy_mod2) -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- energy_mod2$all_series -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- crps_mod1 <- score(fc_mod1, score = 'crps') crps_mod2 <- score(fc_mod2, score = 'crps') diff --git a/inst/doc/forecast_evaluation.Rmd b/inst/doc/forecast_evaluation.Rmd index 8b6a9ec5..8979593d 100644 --- a/inst/doc/forecast_evaluation.Rmd +++ b/inst/doc/forecast_evaluation.Rmd @@ -36,12 +36,12 @@ theme_set(theme_bw(base_size = 12, base_family = 'serif')) The purpose of this vignette is to show how the `mvgam` package can be used to produce probabilistic forecasts and to evaluate those forecasts using a variety of proper scoring rules. ## Simulating discrete time series -We begin by simulating some data to show how forecasts are computed and evaluated in `mvgam`. The `sim_mvgam()` function can be used to simulate series that come from a variety of response distributions as well as seasonal patterns and/or dynamic temporal patterns. Here we simulate a collection of three time count-valued series. These series all share the same seasonal pattern but have different temporal dynamics. By setting `trend_model = 'GP'` and `prop_trend = 0.75`, we are generating time series that have smooth underlying temporal trends (evolving as Gaussian Processes with squared exponential kernel) and moderate seasonal patterns. The observations are Poisson-distributed and we allow 10% of observations to be missing. +We begin by simulating some data to show how forecasts are computed and evaluated in `mvgam`. The `sim_mvgam()` function can be used to simulate series that come from a variety of response distributions as well as seasonal patterns and/or dynamic temporal patterns. Here we simulate a collection of three time count-valued series. These series all share the same seasonal pattern but have different temporal dynamics. By setting `trend_model = GP()` and `prop_trend = 0.75`, we are generating time series that have smooth underlying temporal trends (evolving as Gaussian Processes with squared exponential kernel) and moderate seasonal patterns. The observations are Poisson-distributed and we allow 10% of observations to be missing. ```{r} set.seed(2345) simdat <- sim_mvgam(T = 100, n_series = 3, - trend_model = 'GP', + trend_model = GP(), prop_trend = 0.75, family = poisson(), prop_missing = 0.10) @@ -80,7 +80,8 @@ mod1 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + s(time, by = series, bs = 'cr', k = 20), knots = list(season = c(0.5, 12.5)), trend_model = 'None', - data = simdat$data_train) + data = simdat$data_train, + silent = 2) ``` The model fits without issue: @@ -88,9 +89,9 @@ The model fits without issue: summary(mod1, include_betas = FALSE) ``` -And we can plot the partial effects of the splines to see that they are estimated to be highly nonlinear +And we can plot the conditional effects of the splines (on the link scale) to see that they are estimated to be highly nonlinear ```{r, fig.alt = "Plotting GAM smooth functions using mvgam"} -plot(mod1, type = 'smooths') +conditional_effects(mod1, type = 'link') ``` ### Modelling dynamics with GPs @@ -102,7 +103,8 @@ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + knots = list(season = c(0.5, 12.5)), trend_model = 'None', data = simdat$data_train, - adapt_delta = 0.98) + adapt_delta = 0.98, + silent = 2) ``` ```{r eval=FALSE} @@ -111,7 +113,8 @@ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + scale = FALSE), knots = list(season = c(0.5, 12.5)), trend_model = 'None', - data = simdat$data_train) + data = simdat$data_train, + silent = 2) ``` The summary for this model now contains information on the GP parameters for each time series: @@ -129,13 +132,9 @@ And now the length scale ($\rho$) parameters: mcmc_plot(mod2, variable = c('rho_gp'), regex = TRUE, type = 'areas') ``` -We can also plot the nonlinear effects using `marginaleffects` utilities: -```{r, fig.alt = "Summarising latent Gaussian Process parameters in mvgam and marginaleffects"} -require('ggplot2') -plot_predictions(mod2, - condition = c('time', 'series', 'series'), - type = 'link') + - theme(legend.position = 'none') +We can again plot the nonlinear effects: +```{r, fig.alt = "Plotting latent Gaussian Process effects in mvgam and marginaleffects"} +conditional_effects(mod2, type = 'link') ``` The estimates for the temporal trends are fairly similar for the two models, but below we will see if they produce similar forecasts @@ -173,7 +172,8 @@ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + trend_model = 'None', data = simdat$data_train, newdata = simdat$data_test, - adapt_delta = 0.98) + adapt_delta = 0.98, + silent = 2) ``` ```{r eval=FALSE} @@ -183,7 +183,8 @@ mod2 <- mvgam(y ~ s(season, bs = 'cc', k = 8) + knots = list(season = c(0.5, 12.5)), trend_model = 'None', data = simdat$data_train, - newdata = simdat$data_test) + newdata = simdat$data_test, + silent = 2) ``` Because the model already contains a forecast distribution, we do not need to feed `newdata` to the `forecast()` function: diff --git a/inst/doc/forecast_evaluation.html b/inst/doc/forecast_evaluation.html index 9058b21d..ae761ae5 100644 --- a/inst/doc/forecast_evaluation.html +++ b/inst/doc/forecast_evaluation.html @@ -12,7 +12,7 @@ - + Forecasting and forecast evaluation in mvgam @@ -341,7 +341,7 @@

Forecasting and forecast evaluation in mvgam

Nicholas J Clark

-

2024-05-09

+

2024-07-01

@@ -378,7 +378,7 @@

Simulating discrete time series

temporal patterns. Here we simulate a collection of three time count-valued series. These series all share the same seasonal pattern but have different temporal dynamics. By setting -trend_model = 'GP' and prop_trend = 0.75, we +trend_model = GP() and prop_trend = 0.75, we are generating time series that have smooth underlying temporal trends (evolving as Gaussian Processes with squared exponential kernel) and moderate seasonal patterns. The observations are Poisson-distributed and @@ -386,7 +386,7 @@

Simulating discrete time series

set.seed(2345)
 simdat <- sim_mvgam(T = 100, 
                     n_series = 3, 
-                    trend_model = 'GP',
+                    trend_model = GP(),
                     prop_trend = 0.75,
                     family = poisson(),
                     prop_missing = 0.10)
@@ -425,7 +425,7 @@

Simulating discrete time series

plot_mvgam_series(data = simdat$data_train, 
                   newdata = simdat$data_test,
                   series = 1)
-

Plotting time series features for GAM models in mvgam

+

Plotting time series features for GAM models in mvgam

Modelling dynamics with splines

The first model we will fit uses a shared cyclic spline to capture @@ -437,13 +437,14 @@

Modelling dynamics with splines

s(time, by = series, bs = 'cr', k = 20), knots = list(season = c(0.5, 12.5)), trend_model = 'None', - data = simdat$data_train)
+ data = simdat$data_train, + silent = 2)

The model fits without issue:

summary(mod1, include_betas = FALSE)
 #> GAM formula:
 #> y ~ s(season, bs = "cc", k = 8) + s(time, by = series, bs = "cr", 
 #>     k = 20)
-#> <environment: 0x000001ba309013f0>
+#> <environment: 0x000001b67206d110>
 #> 
 #> Family:
 #> poisson
@@ -468,14 +469,14 @@ 

Modelling dynamics with splines

#> #> GAM coefficient (beta) estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> (Intercept) -0.41 -0.21 -0.039 1 813 +#> (Intercept) -0.41 -0.21 -0.052 1 855 #> #> Approximate significance of GAM smooths: -#> edf Ref.df Chi.sq p-value -#> s(season) 3.77 6 21.9 0.0039 ** -#> s(time):seriesseries_1 6.50 19 14.6 0.8790 -#> s(time):seriesseries_2 9.49 19 228.9 <2e-16 *** -#> s(time):seriesseries_3 5.93 19 18.9 0.8515 +#> edf Ref.df Chi.sq p-value +#> s(season) 3.82 6 19.6 0.0037 ** +#> s(time):seriesseries_1 7.25 19 13.2 0.7969 +#> s(time):seriesseries_2 9.81 19 173.3 0.0019 ** +#> s(time):seriesseries_3 6.05 19 19.4 0.7931 #> --- #> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #> @@ -486,14 +487,14 @@

Modelling dynamics with splines

#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%) #> E-FMI indicated no pathological behavior #> -#> Samples were drawn using NUTS(diag_e) at Thu May 09 6:54:44 PM 2024. +#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:26:51 AM 2024. #> For each parameter, n_eff is a crude measure of effective sample size, #> and Rhat is the potential scale reduction factor on split MCMC chains #> (at convergence, Rhat = 1)
-

And we can plot the partial effects of the splines to see that they -are estimated to be highly nonlinear

-
plot(mod1, type = 'smooths')
-

Plotting GAM smooth functions using mvgam

+

And we can plot the conditional effects of the splines (on the link +scale) to see that they are estimated to be highly nonlinear

+
conditional_effects(mod1, type = 'link')
+

Plotting GAM smooth functions using mvgamPlotting GAM smooth functions using mvgam

Modelling dynamics with GPs

@@ -508,14 +509,15 @@

Modelling dynamics with GPs

scale = FALSE), knots = list(season = c(0.5, 12.5)), trend_model = 'None', - data = simdat$data_train)
+ data = simdat$data_train, + silent = 2)

The summary for this model now contains information on the GP parameters for each time series:

summary(mod2, include_betas = FALSE)
 #> GAM formula:
 #> y ~ s(season, bs = "cc", k = 8) + gp(time, by = series, c = 5/4, 
 #>     k = 20, scale = FALSE)
-#> <environment: 0x000001ba309013f0>
+#> <environment: 0x000001b67206d110>
 #> 
 #> Family:
 #> poisson
@@ -540,32 +542,32 @@ 

Modelling dynamics with GPs

#> #> GAM coefficient (beta) estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> (Intercept) -1.1 -0.52 0.31 1 768 +#> (Intercept) -1.1 -0.51 0.34 1 694 #> #> GAM gp term marginal deviation (alpha) and length scale (rho) estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> alpha_gp(time):seriesseries_1 0.21 0.8 2.1 1.01 763 -#> alpha_gp(time):seriesseries_2 0.74 1.4 2.9 1.00 1028 -#> alpha_gp(time):seriesseries_3 0.50 1.1 2.8 1.00 1026 -#> rho_gp(time):seriesseries_1 1.20 5.1 23.0 1.00 681 -#> rho_gp(time):seriesseries_2 2.20 10.0 17.0 1.00 644 -#> rho_gp(time):seriesseries_3 1.50 8.8 23.0 1.00 819 +#> 2.5% 50% 97.5% Rhat n_eff +#> alpha_gp(time):seriesseries_1 0.21 0.78 2.2 1 819 +#> alpha_gp(time):seriesseries_2 0.73 1.30 2.9 1 761 +#> alpha_gp(time):seriesseries_3 0.46 1.10 2.8 1 1262 +#> rho_gp(time):seriesseries_1 1.30 5.40 22.0 1 682 +#> rho_gp(time):seriesseries_2 2.10 10.00 17.0 1 450 +#> rho_gp(time):seriesseries_3 1.50 8.80 24.0 1 769 #> #> Approximate significance of GAM smooths: -#> edf Ref.df Chi.sq p-value -#> s(season) 4.12 6 25.9 0.00052 *** +#> edf Ref.df Chi.sq p-value +#> s(season) 3.36 6 21.1 0.0093 ** #> --- #> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #> #> Stan MCMC diagnostics: #> n_eff / iter looks reasonable for all parameters #> Rhat looks reasonable for all parameters -#> 4 of 2000 iterations ended with a divergence (0.2%) +#> 1 of 2000 iterations ended with a divergence (0.05%) #> *Try running with larger adapt_delta to remove the divergences #> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%) #> E-FMI indicated no pathological behavior #> -#> Samples were drawn using NUTS(diag_e) at Thu May 09 6:55:28 PM 2024. +#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:27:28 AM 2024. #> For each parameter, n_eff is a crude measure of effective sample size, #> and Rhat is the potential scale reduction factor on split MCMC chains #> (at convergence, Rhat = 1)
@@ -574,19 +576,14 @@

Modelling dynamics with GPs

the marginal deviation (\(\alpha\)) parameters:

mcmc_plot(mod2, variable = c('alpha_gp'), regex = TRUE, type = 'areas')
-

Summarising latent Gaussian Process parameters in mvgam

+

Summarising latent Gaussian Process parameters in mvgam

And now the length scale (\(\rho\)) parameters:

mcmc_plot(mod2, variable = c('rho_gp'), regex = TRUE, type = 'areas')
-

Summarising latent Gaussian Process parameters in mvgam

-

We can also plot the nonlinear effects using -marginaleffects utilities:

-
require('ggplot2')
-plot_predictions(mod2, 
-                 condition = c('time', 'series', 'series'),
-                 type = 'link') +
-  theme(legend.position = 'none')
-

Summarising latent Gaussian Process parameters in mvgam and marginaleffects

+

Summarising latent Gaussian Process parameters in mvgam

+

We can again plot the nonlinear effects:

+
conditional_effects(mod2, type = 'link')
+

Plotting latent Gaussian Process effects in mvgam and marginaleffectsPlotting latent Gaussian Process effects in mvgam and marginaleffects

The estimates for the temporal trends are fairly similar for the two models, but below we will see if they produce similar forecasts

@@ -617,7 +614,7 @@

Forecasting with the forecast() function

str(fc_mod1)
 #> List of 16
 #>  $ call              :Class 'formula'  language y ~ s(season, bs = "cc", k = 8) + s(time, by = series, bs = "cr", k = 20)
-#>   .. ..- attr(*, ".Environment")=<environment: 0x000001ba309013f0> 
+#>   .. ..- attr(*, ".Environment")=<environment: 0x000001b67206d110> 
 #>  $ trend_call        : NULL
 #>  $ family            : chr "poisson"
 #>  $ family_pars       : NULL
@@ -638,42 +635,42 @@ 

Forecasting with the forecast() function

#> ..$ series_3: int [1:25] 1 0 0 1 0 0 1 0 1 0 ... #> $ test_times : int [1:25] 76 77 78 79 80 81 82 83 84 85 ... #> $ hindcasts :List of 3 -#> ..$ series_1: num [1:2000, 1:75] 1 1 0 0 0 1 1 1 0 0 ... +#> ..$ series_1: num [1:2000, 1:75] 1 1 0 0 0 0 0 0 0 0 ... #> .. ..- attr(*, "dimnames")=List of 2 #> .. .. ..$ : NULL #> .. .. ..$ : chr [1:75] "ypred[1,1]" "ypred[2,1]" "ypred[3,1]" "ypred[4,1]" ... -#> ..$ series_2: num [1:2000, 1:75] 0 0 0 0 0 0 0 1 0 0 ... +#> ..$ series_2: num [1:2000, 1:75] 0 0 0 0 0 0 0 0 0 0 ... #> .. ..- attr(*, "dimnames")=List of 2 #> .. .. ..$ : NULL #> .. .. ..$ : chr [1:75] "ypred[1,2]" "ypred[2,2]" "ypred[3,2]" "ypred[4,2]" ... -#> ..$ series_3: num [1:2000, 1:75] 3 0 2 1 0 1 2 1 5 1 ... +#> ..$ series_3: num [1:2000, 1:75] 1 4 0 4 4 1 1 6 3 1 ... #> .. ..- attr(*, "dimnames")=List of 2 #> .. .. ..$ : NULL #> .. .. ..$ : chr [1:75] "ypred[1,3]" "ypred[2,3]" "ypred[3,3]" "ypred[4,3]" ... #> $ forecasts :List of 3 -#> ..$ series_1: num [1:2000, 1:25] 1 3 2 1 0 0 1 1 0 0 ... -#> ..$ series_2: num [1:2000, 1:25] 6 0 0 0 0 2 0 0 0 0 ... -#> ..$ series_3: num [1:2000, 1:25] 0 1 1 3 3 1 3 2 4 2 ... +#> ..$ series_1: num [1:2000, 1:25] 0 1 0 0 0 1 1 0 0 3 ... +#> ..$ series_2: num [1:2000, 1:25] 0 2 0 1 0 2 1 0 1 0 ... +#> ..$ series_3: num [1:2000, 1:25] 1 8 4 2 2 1 2 0 2 0 ... #> - attr(*, "class")= chr "mvgam_forecast"

We can plot the forecasts for some series from each model using the S3 plot method for objects of this class:

plot(fc_mod1, series = 1)
 #> Out of sample CRPS:
-#> 14.6296405
-

+#> 14.89051875 +

plot(fc_mod2, series = 1)
 #> Out of sample DRPS:
-#> 10.92516425
-

+#> 10.84228725 +


 plot(fc_mod1, series = 2)
 #> Out of sample CRPS:
-#> 84201962707.6125
-

+#> 495050222726067 +

plot(fc_mod2, series = 2)
-#> Out of sample DRPS:
-#> 14.311523
-

+#> Out of sample CRPS: +#> 14.7121945 +

Clearly the two models do not produce equivalent forecasts. We will come back to scoring these forecasts in a moment.

@@ -693,7 +690,8 @@

Forecasting with newdata in mvgam()

knots = list(season = c(0.5, 12.5)), trend_model = 'None', data = simdat$data_train, - newdata = simdat$data_test) + newdata = simdat$data_test, + silent = 2)

Because the model already contains a forecast distribution, we do not need to feed newdata to the forecast() function:

@@ -702,8 +700,8 @@

Forecasting with newdata in mvgam()

previously:

plot(fc_mod2, series = 1)
 #> Out of sample DRPS:
-#> 10.8576175
-

Plotting posterior forecast distributions using mvgam and R

+#> 10.78167525 +

Plotting posterior forecast distributions using mvgam and R

Scoring forecast distributions

@@ -718,54 +716,54 @@

Scoring forecast distributions

str(crps_mod1) #> List of 4 #> $ series_1 :'data.frame': 25 obs. of 5 variables: -#> ..$ score : num [1:25] 0.1938 0.1366 1.355 NA 0.0348 ... +#> ..$ score : num [1:25] 0.186 0.129 1.372 NA 0.037 ... #> ..$ in_interval : num [1:25] 1 1 1 NA 1 1 1 1 1 1 ... #> ..$ interval_width: num [1:25] 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 ... #> ..$ eval_horizon : int [1:25] 1 2 3 4 5 6 7 8 9 10 ... #> ..$ score_type : chr [1:25] "crps" "crps" "crps" "crps" ... #> $ series_2 :'data.frame': 25 obs. of 5 variables: -#> ..$ score : num [1:25] 0.379 0.306 0.941 0.5 0.573 ... +#> ..$ score : num [1:25] 0.354 0.334 0.947 0.492 0.542 ... #> ..$ in_interval : num [1:25] 1 1 1 1 1 1 1 1 1 NA ... #> ..$ interval_width: num [1:25] 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 ... #> ..$ eval_horizon : int [1:25] 1 2 3 4 5 6 7 8 9 10 ... #> ..$ score_type : chr [1:25] "crps" "crps" "crps" "crps" ... #> $ series_3 :'data.frame': 25 obs. of 5 variables: -#> ..$ score : num [1:25] 0.32 0.556 0.379 0.362 0.219 ... +#> ..$ score : num [1:25] 0.31 0.616 0.4 0.349 0.215 ... #> ..$ in_interval : num [1:25] 1 1 1 1 1 1 1 1 1 1 ... #> ..$ interval_width: num [1:25] 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 ... #> ..$ eval_horizon : int [1:25] 1 2 3 4 5 6 7 8 9 10 ... #> ..$ score_type : chr [1:25] "crps" "crps" "crps" "crps" ... #> $ all_series:'data.frame': 25 obs. of 3 variables: -#> ..$ score : num [1:25] 0.892 0.999 2.675 NA 0.827 ... +#> ..$ score : num [1:25] 0.85 1.079 2.719 NA 0.794 ... #> ..$ eval_horizon: int [1:25] 1 2 3 4 5 6 7 8 9 10 ... #> ..$ score_type : chr [1:25] "sum_crps" "sum_crps" "sum_crps" "sum_crps" ... crps_mod1$series_1 #> score in_interval interval_width eval_horizon score_type -#> 1 0.19375525 1 0.9 1 crps -#> 2 0.13663925 1 0.9 2 crps -#> 3 1.35502175 1 0.9 3 crps +#> 1 0.18582425 1 0.9 1 crps +#> 2 0.12933350 1 0.9 2 crps +#> 3 1.37181050 1 0.9 3 crps #> 4 NA NA 0.9 4 crps -#> 5 0.03482775 1 0.9 5 crps -#> 6 1.55416700 1 0.9 6 crps -#> 7 1.51028900 1 0.9 7 crps -#> 8 0.62121225 1 0.9 8 crps -#> 9 0.62630125 1 0.9 9 crps -#> 10 0.59853100 1 0.9 10 crps -#> 11 1.30998625 1 0.9 11 crps -#> 12 2.04829775 1 0.9 12 crps -#> 13 0.61251800 1 0.9 13 crps -#> 14 0.14052300 1 0.9 14 crps -#> 15 0.65110800 1 0.9 15 crps -#> 16 0.07973125 1 0.9 16 crps -#> 17 0.07675600 1 0.9 17 crps -#> 18 0.09382375 1 0.9 18 crps -#> 19 0.12356725 1 0.9 19 crps +#> 5 0.03698600 1 0.9 5 crps +#> 6 1.53997900 1 0.9 6 crps +#> 7 1.50467675 1 0.9 7 crps +#> 8 0.63460725 1 0.9 8 crps +#> 9 0.61682725 1 0.9 9 crps +#> 10 0.62428875 1 0.9 10 crps +#> 11 1.33824700 1 0.9 11 crps +#> 12 2.06378300 1 0.9 12 crps +#> 13 0.59247200 1 0.9 13 crps +#> 14 0.13560025 1 0.9 14 crps +#> 15 0.66512975 1 0.9 15 crps +#> 16 0.08238525 1 0.9 16 crps +#> 17 0.08152900 1 0.9 17 crps +#> 18 0.09446425 1 0.9 18 crps +#> 19 0.12084700 1 0.9 19 crps #> 20 NA NA 0.9 20 crps -#> 21 0.20173600 1 0.9 21 crps -#> 22 0.84066825 1 0.9 22 crps +#> 21 0.21286925 1 0.9 21 crps +#> 22 0.85799700 1 0.9 22 crps #> 23 NA NA 0.9 23 crps -#> 24 1.06489225 1 0.9 24 crps -#> 25 0.75528825 1 0.9 25 crps
+#> 24 1.14954750 1 0.9 24 crps +#> 25 0.85131425 1 0.9 25 crps

The returned list contains a data.frame for each series in the data that shows the CRPS score for each evaluation in the testing data, along with some other useful information about the fit of the @@ -779,31 +777,31 @@

Scoring forecast distributions

crps_mod1 <- score(fc_mod1, score = 'crps', interval_width = 0.6)
 crps_mod1$series_1
 #>         score in_interval interval_width eval_horizon score_type
-#> 1  0.19375525           1            0.6            1       crps
-#> 2  0.13663925           1            0.6            2       crps
-#> 3  1.35502175           0            0.6            3       crps
+#> 1  0.18582425           1            0.6            1       crps
+#> 2  0.12933350           1            0.6            2       crps
+#> 3  1.37181050           0            0.6            3       crps
 #> 4          NA          NA            0.6            4       crps
-#> 5  0.03482775           1            0.6            5       crps
-#> 6  1.55416700           0            0.6            6       crps
-#> 7  1.51028900           0            0.6            7       crps
-#> 8  0.62121225           1            0.6            8       crps
-#> 9  0.62630125           1            0.6            9       crps
-#> 10 0.59853100           1            0.6           10       crps
-#> 11 1.30998625           0            0.6           11       crps
-#> 12 2.04829775           0            0.6           12       crps
-#> 13 0.61251800           1            0.6           13       crps
-#> 14 0.14052300           1            0.6           14       crps
-#> 15 0.65110800           1            0.6           15       crps
-#> 16 0.07973125           1            0.6           16       crps
-#> 17 0.07675600           1            0.6           17       crps
-#> 18 0.09382375           1            0.6           18       crps
-#> 19 0.12356725           1            0.6           19       crps
+#> 5  0.03698600           1            0.6            5       crps
+#> 6  1.53997900           0            0.6            6       crps
+#> 7  1.50467675           0            0.6            7       crps
+#> 8  0.63460725           1            0.6            8       crps
+#> 9  0.61682725           1            0.6            9       crps
+#> 10 0.62428875           1            0.6           10       crps
+#> 11 1.33824700           0            0.6           11       crps
+#> 12 2.06378300           0            0.6           12       crps
+#> 13 0.59247200           1            0.6           13       crps
+#> 14 0.13560025           1            0.6           14       crps
+#> 15 0.66512975           1            0.6           15       crps
+#> 16 0.08238525           1            0.6           16       crps
+#> 17 0.08152900           1            0.6           17       crps
+#> 18 0.09446425           1            0.6           18       crps
+#> 19 0.12084700           1            0.6           19       crps
 #> 20         NA          NA            0.6           20       crps
-#> 21 0.20173600           1            0.6           21       crps
-#> 22 0.84066825           1            0.6           22       crps
+#> 21 0.21286925           1            0.6           21       crps
+#> 22 0.85799700           1            0.6           22       crps
 #> 23         NA          NA            0.6           23       crps
-#> 24 1.06489225           1            0.6           24       crps
-#> 25 0.75528825           1            0.6           25       crps
+#> 24 1.14954750 1 0.6 24 crps +#> 25 0.85131425 1 0.6 25 crps

We can also compare forecasts against out of sample observations using the Expected Log Predictive Density (ELPD; also known as the log score). The ELPD is a strictly proper scoring rule that can be @@ -814,31 +812,31 @@

Scoring forecast distributions

link_mod1 <- forecast(mod1, newdata = simdat$data_test, type = 'link')
 score(link_mod1, score = 'elpd')$series_1
 #>         score eval_horizon score_type
-#> 1  -0.5304414            1       elpd
-#> 2  -0.4298955            2       elpd
-#> 3  -2.9617583            3       elpd
+#> 1  -0.5343784            1       elpd
+#> 2  -0.4326190            2       elpd
+#> 3  -2.9699450            3       elpd
 #> 4          NA            4       elpd
-#> 5  -0.2007644            5       elpd
-#> 6  -3.3781408            6       elpd
-#> 7  -3.2729088            7       elpd
-#> 8  -2.0363750            8       elpd
-#> 9  -2.0670612            9       elpd
-#> 10 -2.0844818           10       elpd
-#> 11 -3.0576463           11       elpd
-#> 12 -3.6291058           12       elpd
-#> 13 -2.1692669           13       elpd
-#> 14 -0.2960899           14       elpd
-#> 15 -2.3738851           15       elpd
-#> 16 -0.2160804           16       elpd
-#> 17 -0.2036782           17       elpd
-#> 18 -0.2115539           18       elpd
-#> 19 -0.2235072           19       elpd
+#> 5  -0.1998425            5       elpd
+#> 6  -3.3976729            6       elpd
+#> 7  -3.2989297            7       elpd
+#> 8  -2.0490633            8       elpd
+#> 9  -2.0690163            9       elpd
+#> 10 -2.0822051           10       elpd
+#> 11 -3.1101639           11       elpd
+#> 12 -3.7240924           12       elpd
+#> 13 -2.1578701           13       elpd
+#> 14 -0.2899481           14       elpd
+#> 15 -2.3811862           15       elpd
+#> 16 -0.2085375           16       elpd
+#> 17 -0.1960501           17       elpd
+#> 18 -0.2036978           18       elpd
+#> 19 -0.2154374           19       elpd
 #> 20         NA           20       elpd
-#> 21 -0.2413680           21       elpd
-#> 22 -2.6791984           22       elpd
+#> 21 -0.2341597           21       elpd
+#> 22 -2.6552948           22       elpd
 #> 23         NA           23       elpd
-#> 24 -2.6851981           24       elpd
-#> 25 -0.2836901           25       elpd
+#> 24 -2.6652717 24 elpd +#> 25 -0.2759126 25 elpd

Finally, when we have multiple time series it may also make sense to use a multivariate proper scoring rule. mvgam offers two such options: the Energy score and the Variogram score. The first @@ -862,7 +860,7 @@

Scoring forecast distributions

#> ..$ interval_width: num [1:25] 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 ... #> ..$ eval_horizon : int [1:25] 1 2 3 4 5 6 7 8 9 10 ... #> $ all_series:'data.frame': 25 obs. of 3 variables: -#> ..$ score : num [1:25] 0.773 1.147 1.226 NA 0.458 ... +#> ..$ score : num [1:25] 0.771 1.133 1.26 NA 0.443 ... #> ..$ eval_horizon: int [1:25] 1 2 3 4 5 6 7 8 9 10 ... #> ..$ score_type : chr [1:25] "energy" "energy" "energy" "energy" ...

The returned object still provides information on interval coverage @@ -870,31 +868,31 @@

Scoring forecast distributions

now (which is provided in the all_series slot):

energy_mod2$all_series
 #>        score eval_horizon score_type
-#> 1  0.7728517            1     energy
-#> 2  1.1469836            2     energy
-#> 3  1.2258781            3     energy
+#> 1  0.7705198            1     energy
+#> 2  1.1330328            2     energy
+#> 3  1.2600785            3     energy
 #> 4         NA            4     energy
-#> 5  0.4577536            5     energy
-#> 6  1.8094487            6     energy
-#> 7  1.4887317            7     energy
-#> 8  0.7651593            8     energy
-#> 9  1.1180634            9     energy
+#> 5  0.4427578            5     energy
+#> 6  1.8848308            6     energy
+#> 7  1.4186997            7     energy
+#> 8  0.7280518            8     energy
+#> 9  1.0467755            9     energy
 #> 10        NA           10     energy
-#> 11 1.5008324           11     energy
-#> 12 3.2142460           12     energy
-#> 13 1.6129732           13     energy
-#> 14 1.2704438           14     energy
-#> 15 1.1335958           15     energy
-#> 16 1.8717420           16     energy
+#> 11 1.4172423           11     energy
+#> 12 3.2326925           12     energy
+#> 13 1.5987732           13     energy
+#> 14 1.1798872           14     energy
+#> 15 1.0311968           15     energy
+#> 16 1.8261356           16     energy
 #> 17        NA           17     energy
-#> 18 0.7953392           18     energy
-#> 19 0.9919119           19     energy
+#> 18 0.7170961           18     energy
+#> 19 0.8927311           19     energy
 #> 20        NA           20     energy
-#> 21 1.2461964           21     energy
-#> 22 1.5170615           22     energy
+#> 21 1.0544501           21     energy
+#> 22 1.3280321           22     energy
 #> 23        NA           23     energy
-#> 24 2.3824552           24     energy
-#> 25 1.5314557           25     energy
+#> 24 2.1843621 24 energy +#> 25 1.2352041 25 energy

You can use your score(s) of choice to compare different models. For example, we can compute and plot the difference in CRPS scores for each series in data. Here, a negative value means the Gaussian Process model @@ -916,7 +914,7 @@

Scoring forecast distributions

title(main = paste0('GP better in ', gp_better, ' of 25 evaluations', '\nMean difference = ', round(mean(diff_scores, na.rm = TRUE), 2))) -

+


 
 diff_scores <- crps_mod2$series_2$score -
@@ -932,7 +930,7 @@ 

Scoring forecast distributions

title(main = paste0('GP better in ', gp_better, ' of 25 evaluations', '\nMean difference = ', round(mean(diff_scores, na.rm = TRUE), 2)))
-

+


 diff_scores <- crps_mod2$series_3$score -
   crps_mod1$series_3$score
@@ -947,7 +945,7 @@ 

Scoring forecast distributions

title(main = paste0('GP better in ', gp_better, ' of 25 evaluations', '\nMean difference = ', round(mean(diff_scores, na.rm = TRUE), 2)))
-

+

The GP model consistently gives better forecasts, and the difference between scores grows quickly as the forecast horizon increases. This is not unexpected given the way that splines linearly extrapolate outside diff --git a/inst/doc/mvgam_overview.R b/inst/doc/mvgam_overview.R index b298f867..c8b40d2d 100644 --- a/inst/doc/mvgam_overview.R +++ b/inst/doc/mvgam_overview.R @@ -1,4 +1,4 @@ -## ----echo = FALSE-------------------------------------------------------------- +## ----echo = FALSE------------------------------------------------------------- NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true") knitr::opts_chunk$set( collapse = TRUE, @@ -7,11 +7,10 @@ knitr::opts_chunk$set( eval = NOT_CRAN ) - -## ----setup, include=FALSE------------------------------------------------------ +## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( echo = TRUE, - dpi = 100, + dpi = 150, fig.asp = 0.8, fig.width = 6, out.width = "60%", @@ -20,25 +19,20 @@ library(mvgam) library(ggplot2) theme_set(theme_bw(base_size = 12, base_family = 'serif')) - -## ----Access time series data--------------------------------------------------- +## ----Access time series data-------------------------------------------------- data("portal_data") - -## ----Inspect data format and structure----------------------------------------- +## ----Inspect data format and structure---------------------------------------- head(portal_data) - -## ------------------------------------------------------------------------------ +## ----------------------------------------------------------------------------- dplyr::glimpse(portal_data) - -## ------------------------------------------------------------------------------ +## ----------------------------------------------------------------------------- data <- sim_mvgam(n_series = 4, T = 24) head(data$data_train, 12) - -## ----Wrangle data for modelling------------------------------------------------ +## ----Wrangle data for modelling----------------------------------------------- portal_data %>% # mvgam requires a 'time' variable be present in the data to index @@ -60,131 +54,122 @@ portal_data %>% # Select the variables of interest to keep in the model_data dplyr::select(series, year, time, count, mintemp, ndvi) -> model_data - -## ------------------------------------------------------------------------------ +## ----------------------------------------------------------------------------- head(model_data) - -## ------------------------------------------------------------------------------ +## ----------------------------------------------------------------------------- dplyr::glimpse(model_data) - -## ----Summarise variables------------------------------------------------------- +## ----Summarise variables------------------------------------------------------ summary(model_data) +## ----------------------------------------------------------------------------- +image(is.na(t(model_data %>% + dplyr::arrange(dplyr::desc(time)))), axes = F, + col = c('grey80', 'darkred')) +axis(3, at = seq(0,1, len = NCOL(model_data)), labels = colnames(model_data)) -## ------------------------------------------------------------------------------ +## ----------------------------------------------------------------------------- plot_mvgam_series(data = model_data, series = 1, y = 'count') - -## ------------------------------------------------------------------------------ +## ----------------------------------------------------------------------------- model_data %>% # Create a 'year_fac' factor version of 'year' dplyr::mutate(year_fac = factor(year)) -> model_data - -## ------------------------------------------------------------------------------ +## ----------------------------------------------------------------------------- dplyr::glimpse(model_data) levels(model_data$year_fac) - -## ----model1, include=FALSE, results='hide'------------------------------------- +## ----model1, include=FALSE, results='hide'------------------------------------ model1 <- mvgam(count ~ s(year_fac, bs = 're') - 1, family = poisson(), data = model_data, parallel = FALSE) +## ----eval=FALSE--------------------------------------------------------------- +# model1 <- mvgam(count ~ s(year_fac, bs = 're') - 1, +# family = poisson(), +# data = model_data) -## ----eval=FALSE---------------------------------------------------------------- -## model1 <- mvgam(count ~ s(year_fac, bs = 're') - 1, -## family = poisson(), -## data = model_data) - - -## ------------------------------------------------------------------------------ +## ----------------------------------------------------------------------------- get_mvgam_priors(count ~ s(year_fac, bs = 're') - 1, family = poisson(), data = model_data) - -## ------------------------------------------------------------------------------ +## ----------------------------------------------------------------------------- summary(model1) - -## ----Extract coefficient posteriors-------------------------------------------- +## ----Extract coefficient posteriors------------------------------------------- beta_post <- as.data.frame(model1, variable = 'betas') dplyr::glimpse(beta_post) - -## ------------------------------------------------------------------------------ +## ----------------------------------------------------------------------------- code(model1) - -## ----Plot random effect estimates---------------------------------------------- +## ----Plot random effect estimates--------------------------------------------- plot(model1, type = 're') - -## ------------------------------------------------------------------------------ +## ----------------------------------------------------------------------------- mcmc_plot(object = model1, variable = 'betas', type = 'areas') - -## ------------------------------------------------------------------------------ +## ----------------------------------------------------------------------------- pp_check(object = model1) +pp_check(model1, type = "rootogram") - -## ----Plot posterior hindcasts-------------------------------------------------- +## ----Plot posterior hindcasts------------------------------------------------- plot(model1, type = 'forecast') - -## ----Extract posterior hindcast------------------------------------------------ +## ----Extract posterior hindcast----------------------------------------------- hc <- hindcast(model1) str(hc) - -## ----Extract hindcasts on the linear predictor scale--------------------------- +## ----Extract hindcasts on the linear predictor scale-------------------------- hc <- hindcast(model1, type = 'link') range(hc$hindcasts$PP) +## ----Plot hindcasts on the linear predictor scale----------------------------- +plot(hc) -## ----Plot posterior residuals-------------------------------------------------- +## ----Plot posterior residuals------------------------------------------------- plot(model1, type = 'residuals') - -## ------------------------------------------------------------------------------ +## ----------------------------------------------------------------------------- model_data %>% dplyr::filter(time <= 160) -> data_train model_data %>% dplyr::filter(time > 160) -> data_test - -## ----include=FALSE, message=FALSE, warning=FALSE------------------------------- +## ----include=FALSE, message=FALSE, warning=FALSE------------------------------ model1b <- mvgam(count ~ s(year_fac, bs = 're') - 1, family = poisson(), data = data_train, newdata = data_test, parallel = FALSE) +## ----eval=FALSE--------------------------------------------------------------- +# model1b <- mvgam(count ~ s(year_fac, bs = 're') - 1, +# family = poisson(), +# data = data_train, +# newdata = data_test) -## ----eval=FALSE---------------------------------------------------------------- -## model1b <- mvgam(count ~ s(year_fac, bs = 're') - 1, -## family = poisson(), -## data = data_train, -## newdata = data_test) +## ----------------------------------------------------------------------------- +plot(model1b, type = 're') +## ----------------------------------------------------------------------------- +plot(model1b, type = 'forecast') -## ----Plotting predictions against test data------------------------------------ +## ----Plotting predictions against test data----------------------------------- plot(model1b, type = 'forecast', newdata = data_test) - -## ----Extract posterior forecasts----------------------------------------------- +## ----Extract posterior forecasts---------------------------------------------- fc <- forecast(model1b) str(fc) - -## ----model2, include=FALSE, message=FALSE, warning=FALSE----------------------- +## ----model2, include=FALSE, message=FALSE, warning=FALSE---------------------- model2 <- mvgam(count ~ s(year_fac, bs = 're') + ndvi - 1, family = poisson(), @@ -192,29 +177,27 @@ model2 <- mvgam(count ~ s(year_fac, bs = 're') + newdata = data_test, parallel = FALSE) +## ----eval=FALSE--------------------------------------------------------------- +# model2 <- mvgam(count ~ s(year_fac, bs = 're') + +# ndvi - 1, +# family = poisson(), +# data = data_train, +# newdata = data_test) -## ----eval=FALSE---------------------------------------------------------------- -## model2 <- mvgam(count ~ s(year_fac, bs = 're') + -## ndvi - 1, -## family = poisson(), -## data = data_train, -## newdata = data_test) - - -## ----class.output="scroll-300"------------------------------------------------- +## ----class.output="scroll-300"------------------------------------------------ summary(model2) - -## ----Posterior quantiles of model coefficients--------------------------------- +## ----Posterior quantiles of model coefficients-------------------------------- coef(model2) +## ----Plot NDVI effect--------------------------------------------------------- +plot(model2, type = 'pterms') -## ------------------------------------------------------------------------------ +## ----------------------------------------------------------------------------- beta_post <- as.data.frame(model2, variable = 'betas') dplyr::glimpse(beta_post) - -## ----Histogram of NDVI effects------------------------------------------------- +## ----Histogram of NDVI effects------------------------------------------------ hist(beta_post$ndvi, xlim = c(-1 * max(abs(beta_post$ndvi)), max(abs(beta_post$ndvi))), @@ -227,12 +210,18 @@ hist(beta_post$ndvi, lwd = 2) abline(v = 0, lwd = 2.5) +## ----warning=FALSE------------------------------------------------------------ +plot_predictions(model2, + condition = "ndvi", + # include the observed count values + # as points, and show rugs for the observed + # ndvi and count values on the axes + points = 0.5, rug = TRUE) -## ----warning=FALSE------------------------------------------------------------- -conditional_effects(model2) - +## ----warning=FALSE------------------------------------------------------------ +plot(conditional_effects(model2), ask = FALSE) -## ----model3, include=FALSE, message=FALSE, warning=FALSE----------------------- +## ----model3, include=FALSE, message=FALSE, warning=FALSE---------------------- model3 <- mvgam(count ~ s(time, bs = 'bs', k = 15) + ndvi, family = poisson(), @@ -240,32 +229,39 @@ model3 <- mvgam(count ~ s(time, bs = 'bs', k = 15) + newdata = data_test, parallel = FALSE) +## ----eval=FALSE--------------------------------------------------------------- +# model3 <- mvgam(count ~ s(time, bs = 'bs', k = 15) + +# ndvi, +# family = poisson(), +# data = data_train, +# newdata = data_test) -## ----eval=FALSE---------------------------------------------------------------- -## model3 <- mvgam(count ~ s(time, bs = 'bs', k = 15) + -## ndvi, -## family = poisson(), -## data = data_train, -## newdata = data_test) +## ----------------------------------------------------------------------------- +summary(model3) +## ----------------------------------------------------------------------------- +plot(model3, type = 'smooths') -## ------------------------------------------------------------------------------ -summary(model3) +## ----------------------------------------------------------------------------- +plot(model3, type = 'smooths', realisations = TRUE, + n_realisations = 30) +## ----Plot smooth term derivatives, warning = FALSE, fig.asp = 1--------------- +plot(model3, type = 'smooths', derivatives = TRUE) -## ----warning=FALSE------------------------------------------------------------- -conditional_effects(model3, type = 'link') +## ----warning=FALSE------------------------------------------------------------ +plot(conditional_effects(model3), ask = FALSE) +## ----warning=FALSE------------------------------------------------------------ +plot(conditional_effects(model3, type = 'link'), ask = FALSE) -## ----class.output="scroll-300"------------------------------------------------- +## ----class.output="scroll-300"------------------------------------------------ code(model3) - -## ------------------------------------------------------------------------------ +## ----------------------------------------------------------------------------- plot(model3, type = 'forecast', newdata = data_test) - -## ----Plot extrapolated temporal functions using newdata------------------------ +## ----Plot extrapolated temporal functions using newdata----------------------- plot_mvgam_smooth(model3, smooth = 's(time)', # feed newdata to the plot function to generate # predictions of the temporal smooth to the end of the @@ -274,8 +270,7 @@ plot_mvgam_smooth(model3, smooth = 's(time)', ndvi = 0)) abline(v = max(data_train$time), lty = 'dashed', lwd = 2) - -## ----model4, include=FALSE----------------------------------------------------- +## ----model4, include=FALSE---------------------------------------------------- model4 <- mvgam(count ~ s(ndvi, k = 6), family = poisson(), data = data_train, @@ -283,32 +278,31 @@ model4 <- mvgam(count ~ s(ndvi, k = 6), trend_model = 'AR1', parallel = FALSE) - -## ----eval=FALSE---------------------------------------------------------------- -## model4 <- mvgam(count ~ s(ndvi, k = 6), -## family = poisson(), -## data = data_train, -## newdata = data_test, -## trend_model = 'AR1') - +## ----eval=FALSE--------------------------------------------------------------- +# model4 <- mvgam(count ~ s(ndvi, k = 6), +# family = poisson(), +# data = data_train, +# newdata = data_test, +# trend_model = 'AR1') ## ----Summarise the mvgam autocorrelated error model, class.output="scroll-300"---- summary(model4) +## ----warning=FALSE, message=FALSE--------------------------------------------- +plot_predictions(model4, + condition = "ndvi", + points = 0.5, rug = TRUE) -## ------------------------------------------------------------------------------ +## ----------------------------------------------------------------------------- plot(model4, type = 'forecast', newdata = data_test) - -## ------------------------------------------------------------------------------ +## ----------------------------------------------------------------------------- plot(model4, type = 'trend', newdata = data_test) - -## ------------------------------------------------------------------------------ +## ----------------------------------------------------------------------------- loo_compare(model3, model4) - -## ------------------------------------------------------------------------------ +## ----------------------------------------------------------------------------- fc_mod3 <- forecast(model3) fc_mod4 <- forecast(model4) score_mod3 <- score(fc_mod3, score = 'drps') diff --git a/inst/doc/mvgam_overview.Rmd b/inst/doc/mvgam_overview.Rmd index c3e2602f..508c63d3 100644 --- a/inst/doc/mvgam_overview.Rmd +++ b/inst/doc/mvgam_overview.Rmd @@ -23,7 +23,7 @@ knitr::opts_chunk$set( ```{r setup, include=FALSE} knitr::opts_chunk$set( echo = TRUE, - dpi = 100, + dpi = 150, fig.asp = 0.8, fig.width = 6, out.width = "60%", @@ -146,7 +146,7 @@ z_{i,t} & \sim \text{Normal}(ar1_i^{distance} * z_{i,t-1}, \sigma_i) \end{align* Where $distance$ is a vector of non-negative measurements of the time differences between successive observations. See the **Examples** section in `?CAR` for an illustration of how to set these models up. ## Regression formulae -`mvgam` supports an observation model regression formula, built off the `mgcv` package, as well as an optional process model regression formula. The formulae supplied to \code{\link{mvgam}} are exactly like those supplied to `glm()` except that smooth terms, `s()`, +`mvgam` supports an observation model regression formula, built off the `mvgcv` package, as well as an optional process model regression formula. The formulae supplied to \code{\link{mvgam}} are exactly like those supplied to `glm()` except that smooth terms, `s()`, `te()`, `ti()` and `t2()`, time-varying effects using `dynamic()`, monotonically increasing (using `s(x, bs = 'moi')`) or decreasing splines (using `s(x, bs = 'mod')`; see `?smooth.construct.moi.smooth.spec` for details), as well as Gaussian Process functions using `gp()`, can be added to the right hand side (and `.` is not supported in `mvgam` formulae). See `?mvgam_formulae` for more guidance. For setting up State-Space models, the optional process model formula can be used (see [the State-Space model vignette](https://nicholasjclark.github.io/mvgam/articles/trend_formulas.html) and [the shared latent states vignette](https://nicholasjclark.github.io/mvgam/articles/trend_formulas.html) for guidance on using trend formulae). @@ -223,7 +223,15 @@ You can also summarize multiple variables, which is helpful to search for data r summary(model_data) ``` -We have some `NA`s in our response variable `count`. These observations will generally be thrown out by most modelling packages in \R. But as you will see when we work through the tutorials, `mvgam` keeps these in the data so that predictions can be automatically returned for the full dataset. The time series and some of its descriptive features can be plotted using `plot_mvgam_series()`: +We have some `NA`s in our response variable `count`. Let's visualize the data as a heatmap to get a sense of where these are distributed (`NA`s are shown as red bars in the below plot) +```{r} +image(is.na(t(model_data %>% + dplyr::arrange(dplyr::desc(time)))), axes = F, + col = c('grey80', 'darkred')) +axis(3, at = seq(0,1, len = NCOL(model_data)), labels = colnames(model_data)) +``` + +These observations will generally be thrown out by most modelling packages in \R. But as you will see when we work through the tutorials, `mvgam` keeps these in the data so that predictions can be automatically returned for the full dataset. The time series and some of its descriptive features can be plotted using `plot_mvgam_series()`: ```{r} plot_mvgam_series(data = model_data, series = 1, y = 'count') ``` @@ -306,6 +314,7 @@ mcmc_plot(object = model1, We can also use the wide range of posterior checking functions available in `bayesplot` (see `?mvgam::ppc_check.mvgam` for details): ```{r} pp_check(object = model1) +pp_check(model1, type = "rootogram") ``` There is clearly some variation in these yearly intercept estimates. But how do these translate into time-varying predictions? To understand this, we can plot posterior hindcasts from this model for the training period using `plot.mvgam` with `type = 'forecast'` @@ -325,6 +334,13 @@ hc <- hindcast(model1, type = 'link') range(hc$hindcasts$PP) ``` +Objects of class `mvgam_forecast` have an associated plot function as well: +```{r Plot hindcasts on the linear predictor scale} +plot(hc) +``` + +This plot can look a bit confusing as it seems like there is linear interpolation from the end of one year to the start of the next. But this is just due to the way the lines are automatically connected in base \R plots + In any regression analysis, a key question is whether the residuals show any patterns that can be indicative of un-modelled sources of variation. For GLMs, we can use a modified residual called the [Dunn-Smyth, or randomized quantile, residual](https://www.jstor.org/stable/1390802){target="_blank"}. Inspect Dunn-Smyth residuals from the model using `plot.mvgam` with `type = 'residuals'` ```{r Plot posterior residuals} plot(model1, type = 'residuals') @@ -349,12 +365,22 @@ model1b <- mvgam(count ~ s(year_fac, bs = 're') - 1, ```{r eval=FALSE} model1b <- mvgam(count ~ s(year_fac, bs = 're') - 1, - family = poisson(), - data = data_train, - newdata = data_test) + family = poisson(), + data = data_train, + newdata = data_test) ``` -We can view the test data in the forecast plot to see that the predictions do not capture the temporal variation in the test set +Repeating the plots above gives insight into how the model's hierarchical prior formulation provides all the structure needed to sample values for un-modelled years +```{r} +plot(model1b, type = 're') +``` + + +```{r} +plot(model1b, type = 'forecast') +``` + +We can also view the test data in the forecast plot to see that the predictions do not capture the temporal variation in the test set ```{r Plotting predictions against test data} plot(model1b, type = 'forecast', newdata = data_test) ``` @@ -402,7 +428,12 @@ Rather than printing the summary each time, we can also quickly look at the post coef(model2) ``` -Look at the estimated effect of `ndvi` using using a histogram. This can be done by first extracting the posterior coefficients: +Look at the estimated effect of `ndvi` using `plot.mvgam` with `type = 'pterms'` +```{r Plot NDVI effect} +plot(model2, type = 'pterms') +``` + +This plot indicates a positive linear effect of `ndvi` on `log(counts)`. But it may be easier to visualise using a histogram, especially for parametric (linear) effects. This can be done by first extracting the posterior coefficients as we did in the first example: ```{r} beta_post <- as.data.frame(model2, variable = 'betas') dplyr::glimpse(beta_post) @@ -424,9 +455,19 @@ abline(v = 0, lwd = 2.5) ``` ### `marginaleffects` support -Given our model used a nonlinear link function (log link in this example), it can still be difficult to fully understand what relationship our model is estimating between a predictor and the response. Fortunately, the `marginaleffects` package makes this relatively straightforward. Objects of class `mvgam` can be used with `marginaleffects` to inspect contrasts, scenario-based predictions, conditional and marginal effects, all on the outcome scale. Like `brms`, `mvgam` has the simple `conditional_effects` function to make quick and informative plots for main effects, which rely on `marginaleffects` support. This will likely be your go-to function for quickly understanding patterns from fitted `mvgam` models +Given our model used a nonlinear link function (log link in this example), it can still be difficult to fully understand what relationship our model is estimating between a predictor and the response. Fortunately, the `marginaleffects` package makes this relatively straightforward. Objects of class `mvgam` can be used with `marginaleffects` to inspect contrasts, scenario-based predictions, conditional and marginal effects, all on the outcome scale. Here we will use the `plot_predictions` function from `marginaleffects` to inspect the conditional effect of `ndvi` (use `?plot_predictions` for guidance on how to modify these plots): ```{r warning=FALSE} -conditional_effects(model2) +plot_predictions(model2, + condition = "ndvi", + # include the observed count values + # as points, and show rugs for the observed + # ndvi and count values on the axes + points = 0.5, rug = TRUE) +``` + +Now it is easier to get a sense of the nonlinear but positive relationship estimated between `ndvi` and `count`. Like `brms`, `mvgam` has the simple `conditional_effects` function to make quick and informative plots for main effects. This will likely be your go-to function for quickly understanding patterns from fitted `mvgam` models +```{r warning=FALSE} +plot(conditional_effects(model2), ask = FALSE) ``` ## Adding predictors as smooths @@ -463,9 +504,33 @@ Where the smooth function $f_{time}$ is built by summing across a set of weighte summary(model3) ``` -The summary above now contains posterior estimates for the smoothing parameters as well as the basis coefficients for the nonlinear effect of `time`. We can visualize `conditional_effects` as before: +The summary above now contains posterior estimates for the smoothing parameters as well as the basis coefficients for the nonlinear effect of `time`. We can visualize the conditional `time` effect using the `plot` function with `type = 'smooths'`: +```{r} +plot(model3, type = 'smooths') +``` + +By default this plots shows posterior empirical quantiles, but it can also be helpful to view some realizations of the underlying function (here, each line is a different potential curve drawn from the posterior of all possible curves): +```{r} +plot(model3, type = 'smooths', realisations = TRUE, + n_realisations = 30) +``` + +### Derivatives of smooths +A useful question when modelling using GAMs is to identify where the function is changing most rapidly. To address this, we can plot estimated 1st derivatives of the spline: +```{r Plot smooth term derivatives, warning = FALSE, fig.asp = 1} +plot(model3, type = 'smooths', derivatives = TRUE) +``` + +Here, values above `>0` indicate the function was increasing at that time point, while values `<0` indicate the function was declining. The most rapid declines appear to have been happening around timepoints 50 and again toward the end of the training period, for example. + +Use `conditional_effects` again for useful plots on the outcome scale: ```{r warning=FALSE} -conditional_effects(model3, type = 'link') +plot(conditional_effects(model3), ask = FALSE) +``` + +Or on the link scale: +```{r warning=FALSE} +plot(conditional_effects(model3, type = 'link'), ask = FALSE) ``` Inspect the underlying `Stan` code to gain some idea of how the spline is being penalized: @@ -526,6 +591,13 @@ Here the term $z_t$ captures autocorrelated latent residuals, which are modelled summary(model4) ``` +View conditional smooths for the `ndvi` effect: +```{r warning=FALSE, message=FALSE} +plot_predictions(model4, + condition = "ndvi", + points = 0.5, rug = TRUE) +``` + View posterior hindcasts / forecasts and compare against the out of sample test data ```{r} plot(model4, type = 'forecast', newdata = data_test) diff --git a/inst/doc/mvgam_overview.html b/inst/doc/mvgam_overview.html index b390eccb..6ebc839b 100644 --- a/inst/doc/mvgam_overview.html +++ b/inst/doc/mvgam_overview.html @@ -12,7 +12,7 @@ - + Overview of the mvgam package @@ -340,7 +340,7 @@

Overview of the mvgam package

Nicholas J Clark

-

2024-05-08

+

2024-04-16

@@ -382,8 +382,10 @@

2024-05-08

  • marginaleffects support
  • -
  • Adding predictors as -smooths
  • +
  • Adding predictors as smooths +
  • Latent dynamics in mvgam
  • Interested in contributing?
  • @@ -692,7 +694,7 @@

    Continuous time AR(1) processes

    Regression formulae

    mvgam supports an observation model regression formula, -built off the mgcv package, as well as an optional process +built off the mvgcv package, as well as an optional process model regression formula. The formulae supplied to are exactly like those supplied to glm() except that smooth terms, s(), te(), ti() and @@ -765,19 +767,19 @@

    Manipulating data for modelling

    ?sim_mvgam for more details

    data <- sim_mvgam(n_series = 4, T = 24)
     head(data$data_train, 12)
    -#>     y season year   series time
    -#> 1   9      1    1 series_1    1
    -#> 2  11      1    1 series_2    1
    -#> 3   7      1    1 series_3    1
    -#> 4   3      1    1 series_4    1
    -#> 5   3      2    1 series_1    2
    -#> 6   2      2    1 series_2    2
    -#> 7   0      2    1 series_3    2
    -#> 8   1      2    1 series_4    2
    -#> 9   2      3    1 series_1    3
    -#> 10  0      3    1 series_2    3
    -#> 11  0      3    1 series_3    3
    -#> 12  1      3    1 series_4    3
    +#> y season year series time +#> 1 1 1 1 series_1 1 +#> 2 0 1 1 series_2 1 +#> 3 1 1 1 series_3 1 +#> 4 1 1 1 series_4 1 +#> 5 1 2 1 series_1 2 +#> 6 1 2 1 series_2 2 +#> 7 0 2 1 series_3 2 +#> 8 1 2 1 series_4 2 +#> 9 2 3 1 series_1 3 +#> 10 0 3 1 series_2 3 +#> 11 0 3 1 series_3 3 +#> 12 1 3 1 series_4 3

    Notice how we have four different time series in these simulated data, but we do not spread the outcome values into different columns. Rather, there is only a single column for the outcome variable, labelled @@ -861,14 +863,22 @@

    Manipulating data for modelling

    #> Max. :3.9126 #>

    We have some NAs in our response variable -count. These observations will generally be thrown out by -most modelling packages in . But as you will see when we work through -the tutorials, mvgam keeps these in the data so that -predictions can be automatically returned for the full dataset. The time -series and some of its descriptive features can be plotted using +count. Let’s visualize the data as a heatmap to get a sense +of where these are distributed (NAs are shown as red bars +in the below plot)

    +
    image(is.na(t(model_data %>%
    +                dplyr::arrange(dplyr::desc(time)))), axes = F,
    +      col = c('grey80', 'darkred'))
    +axis(3, at = seq(0,1, len = NCOL(model_data)), labels = colnames(model_data))
    +

    +

    These observations will generally be thrown out by most modelling +packages in . But as you will see when we work through the tutorials, +mvgam keeps these in the data so that predictions can be +automatically returned for the full dataset. The time series and some of +its descriptive features can be plotted using plot_mvgam_series():

    -
    plot_mvgam_series(data = model_data, series = 1, y = 'count')
    -

    +
    plot_mvgam_series(data = model_data, series = 1, y = 'count')
    +

    GLMs with temporal random effects

    @@ -893,25 +903,25 @@

    GLMs with temporal random effects

    ?smooth.construct.re.smooth.spec for details about the re basis construction that is used by both mvgam and mgcv

    -
    model_data %>%
    -  
    -  # Create a 'year_fac' factor version of 'year'
    -  dplyr::mutate(year_fac = factor(year)) -> model_data
    +
    model_data %>%
    +  
    +  # Create a 'year_fac' factor version of 'year'
    +  dplyr::mutate(year_fac = factor(year)) -> model_data

    Preview the dataset to ensure year is now a factor with a unique factor level for each year in the data

    -
    dplyr::glimpse(model_data)
    -#> Rows: 199
    -#> Columns: 7
    -#> $ series   <fct> PP, PP, PP, PP, PP, PP, PP, PP, PP, PP, PP, PP, PP, PP, PP, P…
    -#> $ year     <int> 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2…
    -#> $ time     <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18…
    -#> $ count    <int> 0, 1, 2, NA, 10, NA, NA, 16, 18, 12, NA, 3, 2, NA, NA, 13, NA…
    -#> $ mintemp  <dbl> -9.710, -5.924, -0.220, 1.931, 6.568, 11.590, 14.370, 16.520,…
    -#> $ ndvi     <dbl> 1.4658889, 1.5585069, 1.3378172, 1.6589129, 1.8536561, 1.7613…
    -#> $ year_fac <fct> 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2…
    -levels(model_data$year_fac)
    -#>  [1] "2004" "2005" "2006" "2007" "2008" "2009" "2010" "2011" "2012" "2013"
    -#> [11] "2014" "2015" "2016" "2017" "2018" "2019" "2020"
    +
    dplyr::glimpse(model_data)
    +#> Rows: 199
    +#> Columns: 7
    +#> $ series   <fct> PP, PP, PP, PP, PP, PP, PP, PP, PP, PP, PP, PP, PP, PP, PP, P…
    +#> $ year     <int> 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2…
    +#> $ time     <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18…
    +#> $ count    <int> 0, 1, 2, NA, 10, NA, NA, 16, 18, 12, NA, 3, 2, NA, NA, 13, NA…
    +#> $ mintemp  <dbl> -9.710, -5.924, -0.220, 1.931, 6.568, 11.590, 14.370, 16.520,…
    +#> $ ndvi     <dbl> 1.4658889, 1.5585069, 1.3378172, 1.6589129, 1.8536561, 1.7613…
    +#> $ year_fac <fct> 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2004, 2…
    +levels(model_data$year_fac)
    +#>  [1] "2004" "2005" "2006" "2007" "2008" "2009" "2010" "2011" "2012" "2013"
    +#> [11] "2014" "2015" "2016" "2017" "2018" "2019" "2020"

    We are now ready for our first mvgam model. The syntax will be familiar to users who have previously built models with mgcv. But for a refresher, see ?formula.gam @@ -933,9 +943,9 @@

    GLMs with temporal random effects

    consult the Stan user’s guide for more information about the software and the enormous variety of models that can be tackled with HMC.

    -
    model1 <- mvgam(count ~ s(year_fac, bs = 're') - 1,
    -                family = poisson(),
    -                data = model_data)
    +
    model1 <- mvgam(count ~ s(year_fac, bs = 're') - 1,
    +                family = poisson(),
    +                data = model_data)

    The model can be described mathematically for each timepoint \(t\) as follows: \[\begin{align*} \boldsymbol{count}_t & \sim \text{Poisson}(\lambda_t) \\ log(\lambda_t) & = \beta_{year[year_t]} \\ @@ -949,91 +959,90 @@

    GLMs with temporal random effects

    similar functionality to the options available in brms. For example, the default priors on \((\mu_{year})\) and \((\sigma_{year})\) can be viewed using the following code:

    -
    get_mvgam_priors(count ~ s(year_fac, bs = 're') - 1,
    -                 family = poisson(),
    -                 data = model_data)
    -#>                      param_name param_length           param_info
    -#> 1             vector[1] mu_raw;            1 s(year_fac) pop mean
    -#> 2 vector<lower=0>[1] sigma_raw;            1   s(year_fac) pop sd
    -#>                               prior                 example_change
    -#> 1            mu_raw ~ std_normal();   mu_raw ~ normal(0.13, 0.16);
    -#> 2 sigma_raw ~ student_t(3, 0, 2.5); sigma_raw ~ exponential(0.72);
    -#>   new_lowerbound new_upperbound
    -#> 1             NA             NA
    -#> 2             NA             NA
    +
    get_mvgam_priors(count ~ s(year_fac, bs = 're') - 1,
    +                 family = poisson(),
    +                 data = model_data)
    +#>                      param_name param_length           param_info
    +#> 1             vector[1] mu_raw;            1 s(year_fac) pop mean
    +#> 2 vector<lower=0>[1] sigma_raw;            1   s(year_fac) pop sd
    +#>                               prior                example_change
    +#> 1            mu_raw ~ std_normal();  mu_raw ~ normal(0.17, 0.76);
    +#> 2 sigma_raw ~ student_t(3, 0, 2.5); sigma_raw ~ exponential(0.7);
    +#>   new_lowerbound new_upperbound
    +#> 1             NA             NA
    +#> 2             NA             NA

    See examples in ?get_mvgam_priors to find out different ways that priors can be altered. Once the model has finished, the first step is to inspect the summary to ensure no major diagnostic warnings have been produced and to quickly summarise posterior distributions for key parameters

    -
    summary(model1)
    -#> GAM formula:
    -#> count ~ s(year_fac, bs = "re") - 1
    -#> <environment: 0x0000026502c9ba10>
    -#> 
    -#> Family:
    -#> poisson
    -#> 
    -#> Link function:
    -#> log
    -#> 
    -#> Trend model:
    -#> None
    -#> 
    -#> N series:
    -#> 1 
    -#> 
    -#> N timepoints:
    -#> 199 
    -#> 
    -#> Status:
    -#> Fitted using Stan 
    -#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 
    -#> Total post-warmup draws = 2000
    -#> 
    -#> 
    -#> GAM coefficient (beta) estimates:
    -#>                 2.5% 50% 97.5% Rhat n_eff
    -#> s(year_fac).1   1.80 2.1   2.3    1  2272
    -#> s(year_fac).2   2.50 2.7   2.9    1  3300
    -#> s(year_fac).3   3.00 3.1   3.2    1  3601
    -#> s(year_fac).4   3.10 3.3   3.4    1  2910
    -#> s(year_fac).5   1.90 2.1   2.3    1  2957
    -#> s(year_fac).6   1.50 1.8   2.0    1  2772
    -#> s(year_fac).7   1.80 2.0   2.3    1  2956
    -#> s(year_fac).8   2.80 3.0   3.1    1  2895
    -#> s(year_fac).9   3.10 3.2   3.4    1  3207
    -#> s(year_fac).10  2.60 2.8   2.9    1  2399
    -#> s(year_fac).11  3.00 3.1   3.2    1  3477
    -#> s(year_fac).12  3.10 3.2   3.3    1  3154
    -#> s(year_fac).13  2.00 2.2   2.4    1  1854
    -#> s(year_fac).14  2.50 2.6   2.8    1  2744
    -#> s(year_fac).15  1.90 2.2   2.4    1  2927
    -#> s(year_fac).16  1.90 2.1   2.3    1  3169
    -#> s(year_fac).17 -0.35 1.1   1.9    1   387
    -#> 
    -#> GAM group-level estimates:
    -#>                   2.5%  50% 97.5% Rhat n_eff
    -#> mean(s(year_fac)) 2.00 2.40   2.8 1.01   175
    -#> sd(s(year_fac))   0.45 0.67   1.1 1.01   171
    -#> 
    -#> Approximate significance of GAM smooths:
    -#>              edf Ref.df Chi.sq p-value    
    -#> s(year_fac) 12.6     17  24442  <2e-16 ***
    -#> ---
    -#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    -#> 
    -#> Stan MCMC diagnostics:
    -#> n_eff / iter looks reasonable for all parameters
    -#> Rhat looks reasonable for all parameters
    -#> 0 of 2000 iterations ended with a divergence (0%)
    -#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
    -#> E-FMI indicated no pathological behavior
    -#> 
    -#> Samples were drawn using NUTS(diag_e) at Wed May 08 8:46:12 PM 2024.
    -#> For each parameter, n_eff is a crude measure of effective sample size,
    -#> and Rhat is the potential scale reduction factor on split MCMC chains
    -#> (at convergence, Rhat = 1)
    +
    summary(model1)
    +#> GAM formula:
    +#> count ~ s(year_fac, bs = "re") - 1
    +#> 
    +#> Family:
    +#> poisson
    +#> 
    +#> Link function:
    +#> log
    +#> 
    +#> Trend model:
    +#> None
    +#> 
    +#> N series:
    +#> 1 
    +#> 
    +#> N timepoints:
    +#> 199 
    +#> 
    +#> Status:
    +#> Fitted using Stan 
    +#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 
    +#> Total post-warmup draws = 2000
    +#> 
    +#> 
    +#> GAM coefficient (beta) estimates:
    +#>                 2.5% 50% 97.5% Rhat n_eff
    +#> s(year_fac).1   1.80 2.1   2.3 1.00  2663
    +#> s(year_fac).2   2.50 2.7   2.8 1.00  2468
    +#> s(year_fac).3   3.00 3.1   3.2 1.00  3105
    +#> s(year_fac).4   3.10 3.3   3.4 1.00  2822
    +#> s(year_fac).5   1.90 2.1   2.3 1.00  3348
    +#> s(year_fac).6   1.50 1.8   2.0 1.00  2859
    +#> s(year_fac).7   1.80 2.0   2.3 1.00  2995
    +#> s(year_fac).8   2.80 3.0   3.1 1.00  3126
    +#> s(year_fac).9   3.10 3.3   3.4 1.00  2816
    +#> s(year_fac).10  2.60 2.8   2.9 1.00  2289
    +#> s(year_fac).11  3.00 3.1   3.2 1.00  2725
    +#> s(year_fac).12  3.10 3.2   3.3 1.00  2581
    +#> s(year_fac).13  2.00 2.2   2.5 1.00  2885
    +#> s(year_fac).14  2.50 2.6   2.8 1.00  2749
    +#> s(year_fac).15  1.90 2.2   2.4 1.00  2943
    +#> s(year_fac).16  1.90 2.1   2.3 1.00  2991
    +#> s(year_fac).17 -0.33 1.1   1.9 1.01   356
    +#> 
    +#> GAM group-level estimates:
    +#>                   2.5%  50% 97.5% Rhat n_eff
    +#> mean(s(year_fac)) 2.00 2.40   2.7 1.01   193
    +#> sd(s(year_fac))   0.44 0.67   1.1 1.02   172
    +#> 
    +#> Approximate significance of GAM smooths:
    +#>              edf Ref.df Chi.sq p-value    
    +#> s(year_fac) 13.8     17  23477  <2e-16 ***
    +#> ---
    +#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    +#> 
    +#> Stan MCMC diagnostics:
    +#> n_eff / iter looks reasonable for all parameters
    +#> Rhat looks reasonable for all parameters
    +#> 0 of 2000 iterations ended with a divergence (0%)
    +#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
    +#> E-FMI indicated no pathological behavior
    +#> 
    +#> Samples were drawn using NUTS(diag_e) at Tue Apr 16 12:59:57 PM 2024.
    +#> For each parameter, n_eff is a crude measure of effective sample size,
    +#> and Rhat is the potential scale reduction factor on split MCMC chains
    +#> (at convergence, Rhat = 1)

    The diagnostic messages at the bottom of the summary show that the HMC sampler did not encounter any problems or difficult posterior spaces. This is a good sign. Posterior distributions for model @@ -1042,85 +1051,85 @@

    GLMs with temporal random effects

    details). For example, we can extract the coefficients related to the GAM linear predictor (i.e. the \(\beta\)’s) into a data.frame using:

    -
    beta_post <- as.data.frame(model1, variable = 'betas')
    -dplyr::glimpse(beta_post)
    -#> Rows: 2,000
    -#> Columns: 17
    -#> $ `s(year_fac).1`  <dbl> 2.19572, 1.80459, 2.05882, 1.77283, 2.17899, 1.78306,…
    -#> $ `s(year_fac).2`  <dbl> 2.73268, 2.63982, 2.67192, 2.60732, 2.70498, 2.59426,…
    -#> $ `s(year_fac).3`  <dbl> 3.20954, 3.21016, 3.04541, 3.12612, 3.02844, 3.07486,…
    -#> $ `s(year_fac).4`  <dbl> 3.21654, 3.22332, 3.27734, 3.26635, 3.22450, 3.18903,…
    -#> $ `s(year_fac).5`  <dbl> 2.11947, 2.02341, 2.15199, 2.11768, 2.04859, 2.13145,…
    -#> $ `s(year_fac).6`  <dbl> 1.77935, 1.75774, 1.87608, 1.74988, 1.68181, 1.57843,…
    -#> $ `s(year_fac).7`  <dbl> 2.00416, 2.09224, 2.06421, 1.79570, 2.09012, 1.85510,…
    -#> $ `s(year_fac).8`  <dbl> 2.90598, 2.98992, 2.95453, 2.90335, 3.00163, 2.83986,…
    -#> $ `s(year_fac).9`  <dbl> 3.28482, 3.30058, 3.15660, 3.32875, 3.27280, 3.26740,…
    -#> $ `s(year_fac).10` <dbl> 2.94900, 2.58117, 2.62932, 2.87103, 2.61496, 2.84087,…
    -#> $ `s(year_fac).11` <dbl> 3.07873, 3.07060, 3.21571, 2.99998, 3.04865, 3.12531,…
    -#> $ `s(year_fac).12` <dbl> 3.18193, 3.15362, 3.19562, 3.23034, 3.18776, 3.10583,…
    -#> $ `s(year_fac).13` <dbl> 2.25802, 2.21252, 2.17579, 2.24304, 2.19309, 2.15013,…
    -#> $ `s(year_fac).14` <dbl> 2.48221, 2.50201, 2.64717, 2.72028, 2.53187, 2.68245,…
    -#> $ `s(year_fac).15` <dbl> 2.11611, 2.36704, 2.18749, 2.32255, 1.88132, 2.34579,…
    -#> $ `s(year_fac).16` <dbl> 2.10163, 2.10267, 2.12983, 1.97498, 2.01470, 1.93883,…
    -#> $ `s(year_fac).17` <dbl> 1.252770, 0.736639, 0.520716, -0.163687, 1.832560, 0.…
    +
    beta_post <- as.data.frame(model1, variable = 'betas')
    +dplyr::glimpse(beta_post)
    +#> Rows: 2,000
    +#> Columns: 17
    +#> $ `s(year_fac).1`  <dbl> 2.17023, 2.08413, 1.99815, 2.17572, 2.11308, 2.03050,…
    +#> $ `s(year_fac).2`  <dbl> 2.70488, 2.69887, 2.65551, 2.79651, 2.76044, 2.75108,…
    +#> $ `s(year_fac).3`  <dbl> 3.08617, 3.13429, 3.04575, 3.14824, 3.10917, 3.09809,…
    +#> $ `s(year_fac).4`  <dbl> 3.29529, 3.21044, 3.22018, 3.26644, 3.29880, 3.25638,…
    +#> $ `s(year_fac).5`  <dbl> 2.11053, 2.14516, 2.13959, 2.05244, 2.26847, 2.20820,…
    +#> $ `s(year_fac).6`  <dbl> 1.80418, 1.83343, 1.75987, 1.76972, 1.64782, 1.70765,…
    +#> $ `s(year_fac).7`  <dbl> 1.99033, 1.95772, 1.98093, 2.01777, 2.04849, 1.97815,…
    +#> $ `s(year_fac).8`  <dbl> 3.01204, 2.91291, 3.14762, 2.83082, 2.90250, 3.04050,…
    +#> $ `s(year_fac).9`  <dbl> 3.22248, 3.20205, 3.30373, 3.23181, 3.24927, 3.25232,…
    +#> $ `s(year_fac).10` <dbl> 2.71922, 2.62225, 2.82574, 2.65027, 2.69077, 2.75249,…
    +#> $ `s(year_fac).11` <dbl> 3.10525, 3.03951, 3.12914, 3.03849, 3.01198, 3.14391,…
    +#> $ `s(year_fac).12` <dbl> 3.20887, 3.23337, 3.24350, 3.16821, 3.23516, 3.18216,…
    +#> $ `s(year_fac).13` <dbl> 2.18530, 2.15358, 2.39908, 2.21862, 2.14648, 2.17067,…
    +#> $ `s(year_fac).14` <dbl> 2.66153, 2.67202, 2.64594, 2.57457, 2.38109, 2.44175,…
    +#> $ `s(year_fac).15` <dbl> 2.24898, 2.24912, 2.03587, 2.33842, 2.27868, 2.24643,…
    +#> $ `s(year_fac).16` <dbl> 2.20947, 2.21717, 2.03610, 2.17374, 2.16442, 2.14900,…
    +#> $ `s(year_fac).17` <dbl> 0.1428430, 0.8005170, -0.0136294, 0.6880930, 0.192034…

    With any model fitted in mvgam, the underlying Stan code can be viewed using the code function:

    -
    code(model1)
    -#> // Stan model code generated by package mvgam
    -#> data {
    -#>   int<lower=0> total_obs; // total number of observations
    -#>   int<lower=0> n; // number of timepoints per series
    -#>   int<lower=0> n_series; // number of series
    -#>   int<lower=0> num_basis; // total number of basis coefficients
    -#>   matrix[total_obs, num_basis] X; // mgcv GAM design matrix
    -#>   array[n, n_series] int<lower=0> ytimes; // time-ordered matrix (which col in X belongs to each [time, series] observation?)
    -#>   int<lower=0> n_nonmissing; // number of nonmissing observations
    -#>   array[n_nonmissing] int<lower=0> flat_ys; // flattened nonmissing observations
    -#>   matrix[n_nonmissing, num_basis] flat_xs; // X values for nonmissing observations
    -#>   array[n_nonmissing] int<lower=0> obs_ind; // indices of nonmissing observations
    -#> }
    -#> parameters {
    -#>   // raw basis coefficients
    -#>   vector[num_basis] b_raw;
    -#>   
    -#>   // random effect variances
    -#>   vector<lower=0>[1] sigma_raw;
    -#>   
    -#>   // random effect means
    -#>   vector[1] mu_raw;
    -#> }
    -#> transformed parameters {
    -#>   // basis coefficients
    -#>   vector[num_basis] b;
    -#>   b[1 : 17] = mu_raw[1] + b_raw[1 : 17] * sigma_raw[1];
    -#> }
    -#> model {
    -#>   // prior for random effect population variances
    -#>   sigma_raw ~ student_t(3, 0, 2.5);
    -#>   
    -#>   // prior for random effect population means
    -#>   mu_raw ~ std_normal();
    -#>   
    -#>   // prior (non-centred) for s(year_fac)...
    -#>   b_raw[1 : 17] ~ std_normal();
    -#>   {
    -#>     // likelihood functions
    -#>     flat_ys ~ poisson_log_glm(flat_xs, 0.0, b);
    -#>   }
    -#> }
    -#> generated quantities {
    -#>   vector[total_obs] eta;
    -#>   matrix[n, n_series] mus;
    -#>   array[n, n_series] int ypred;
    -#>   
    -#>   // posterior predictions
    -#>   eta = X * b;
    -#>   for (s in 1 : n_series) {
    -#>     mus[1 : n, s] = eta[ytimes[1 : n, s]];
    -#>     ypred[1 : n, s] = poisson_log_rng(mus[1 : n, s]);
    -#>   }
    -#> }
    +
    code(model1)
    +#> // Stan model code generated by package mvgam
    +#> data {
    +#>   int<lower=0> total_obs; // total number of observations
    +#>   int<lower=0> n; // number of timepoints per series
    +#>   int<lower=0> n_series; // number of series
    +#>   int<lower=0> num_basis; // total number of basis coefficients
    +#>   matrix[total_obs, num_basis] X; // mgcv GAM design matrix
    +#>   array[n, n_series] int<lower=0> ytimes; // time-ordered matrix (which col in X belongs to each [time, series] observation?)
    +#>   int<lower=0> n_nonmissing; // number of nonmissing observations
    +#>   array[n_nonmissing] int<lower=0> flat_ys; // flattened nonmissing observations
    +#>   matrix[n_nonmissing, num_basis] flat_xs; // X values for nonmissing observations
    +#>   array[n_nonmissing] int<lower=0> obs_ind; // indices of nonmissing observations
    +#> }
    +#> parameters {
    +#>   // raw basis coefficients
    +#>   vector[num_basis] b_raw;
    +#>   
    +#>   // random effect variances
    +#>   vector<lower=0>[1] sigma_raw;
    +#>   
    +#>   // random effect means
    +#>   vector[1] mu_raw;
    +#> }
    +#> transformed parameters {
    +#>   // basis coefficients
    +#>   vector[num_basis] b;
    +#>   b[1 : 17] = mu_raw[1] + b_raw[1 : 17] * sigma_raw[1];
    +#> }
    +#> model {
    +#>   // prior for random effect population variances
    +#>   sigma_raw ~ student_t(3, 0, 2.5);
    +#>   
    +#>   // prior for random effect population means
    +#>   mu_raw ~ std_normal();
    +#>   
    +#>   // prior (non-centred) for s(year_fac)...
    +#>   b_raw[1 : 17] ~ std_normal();
    +#>   {
    +#>     // likelihood functions
    +#>     flat_ys ~ poisson_log_glm(flat_xs, 0.0, b);
    +#>   }
    +#> }
    +#> generated quantities {
    +#>   vector[total_obs] eta;
    +#>   matrix[n, n_series] mus;
    +#>   array[n, n_series] int ypred;
    +#>   
    +#>   // posterior predictions
    +#>   eta = X * b;
    +#>   for (s in 1 : n_series) {
    +#>     mus[1 : n, s] = eta[ytimes[1 : n, s]];
    +#>     ypred[1 : n, s] = poisson_log_rng(mus[1 : n, s]);
    +#>   }
    +#> }

    Plotting effects and residuals

    Now for interrogating the model. We can get some sense of the @@ -1130,8 +1139,8 @@

    Plotting effects and residuals

    type = 're'. See ?plot.mvgam for more details about the types of plots that can be produced from fitted mvgam objects

    -
    plot(model1, type = 're')
    -

    +
    plot(model1, type = 're')
    +

    bayesplot support

    @@ -1139,71 +1148,84 @@

    bayesplot support

    from the bayesplot package to visualize posterior distributions and diagnostics (see ?mvgam::mcmc_plot.mvgam for details):

    -
    mcmc_plot(object = model1,
    -          variable = 'betas',
    -          type = 'areas')
    -

    +
    mcmc_plot(object = model1,
    +          variable = 'betas',
    +          type = 'areas')
    +

    We can also use the wide range of posterior checking functions available in bayesplot (see ?mvgam::ppc_check.mvgam for details):

    -
    pp_check(object = model1)
    -#> Using 10 posterior draws for ppc type 'dens_overlay' by default.
    -#> Warning in pp_check.mvgam(object = model1): NA responses are not shown in
    -#> 'pp_check'.
    -

    +
    pp_check(object = model1)
    +#> Using 10 posterior draws for ppc type 'dens_overlay' by default.
    +#> Warning in pp_check.mvgam(object = model1): NA responses are not shown in
    +#> 'pp_check'.
    +

    +
    pp_check(model1, type = "rootogram")
    +#> Using all posterior draws for ppc type 'rootogram' by default.
    +#> Warning in pp_check.mvgam(model1, type = "rootogram"): NA responses are not
    +#> shown in 'pp_check'.
    +

    There is clearly some variation in these yearly intercept estimates. But how do these translate into time-varying predictions? To understand this, we can plot posterior hindcasts from this model for the training period using plot.mvgam with type = 'forecast'

    -
    plot(model1, type = 'forecast')
    -

    +
    plot(model1, type = 'forecast')
    +

    If you wish to extract these hindcasts for other downstream analyses, the hindcast function can be used. This will return a list object of class mvgam_forecast. In the hindcasts slot, a matrix of posterior retrodictions will be returned for each series in the data (only one series in our example):

    -
    hc <- hindcast(model1)
    -str(hc)
    -#> List of 15
    -#>  $ call              :Class 'formula'  language count ~ s(year_fac, bs = "re") - 1
    -#>   .. ..- attr(*, ".Environment")=<environment: 0x0000026502c9ba10> 
    -#>  $ trend_call        : NULL
    -#>  $ family            : chr "poisson"
    -#>  $ trend_model       : chr "None"
    -#>  $ drift             : logi FALSE
    -#>  $ use_lv            : logi FALSE
    -#>  $ fit_engine        : chr "stan"
    -#>  $ type              : chr "response"
    -#>  $ series_names      : chr "PP"
    -#>  $ train_observations:List of 1
    -#>   ..$ PP: int [1:199] 0 1 2 NA 10 NA NA 16 18 12 ...
    -#>  $ train_times       : num [1:199] 1 2 3 4 5 6 7 8 9 10 ...
    -#>  $ test_observations : NULL
    -#>  $ test_times        : NULL
    -#>  $ hindcasts         :List of 1
    -#>   ..$ PP: num [1:2000, 1:199] 11 9 9 7 8 4 12 9 13 3 ...
    -#>   .. ..- attr(*, "dimnames")=List of 2
    -#>   .. .. ..$ : NULL
    -#>   .. .. ..$ : chr [1:199] "ypred[1,1]" "ypred[2,1]" "ypred[3,1]" "ypred[4,1]" ...
    -#>  $ forecasts         : NULL
    -#>  - attr(*, "class")= chr "mvgam_forecast"
    +
    hc <- hindcast(model1)
    +str(hc)
    +#> List of 15
    +#>  $ call              :Class 'formula'  language count ~ s(year_fac, bs = "re") - 1
    +#>   .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
    +#>  $ trend_call        : NULL
    +#>  $ family            : chr "poisson"
    +#>  $ trend_model       : chr "None"
    +#>  $ drift             : logi FALSE
    +#>  $ use_lv            : logi FALSE
    +#>  $ fit_engine        : chr "stan"
    +#>  $ type              : chr "response"
    +#>  $ series_names      : chr "PP"
    +#>  $ train_observations:List of 1
    +#>   ..$ PP: int [1:199] 0 1 2 NA 10 NA NA 16 18 12 ...
    +#>  $ train_times       : num [1:199] 1 2 3 4 5 6 7 8 9 10 ...
    +#>  $ test_observations : NULL
    +#>  $ test_times        : NULL
    +#>  $ hindcasts         :List of 1
    +#>   ..$ PP: num [1:2000, 1:199] 9 6 10 6 12 8 10 5 8 6 ...
    +#>   .. ..- attr(*, "dimnames")=List of 2
    +#>   .. .. ..$ : NULL
    +#>   .. .. ..$ : chr [1:199] "ypred[1,1]" "ypred[2,1]" "ypred[3,1]" "ypred[4,1]" ...
    +#>  $ forecasts         : NULL
    +#>  - attr(*, "class")= chr "mvgam_forecast"

    You can also extract these hindcasts on the linear predictor scale, which in this case is the log scale (our Poisson GLM used a log link function). Sometimes this can be useful for asking more targeted questions about drivers of variation:

    -
    hc <- hindcast(model1, type = 'link')
    -range(hc$hindcasts$PP)
    -#> [1] -1.62312  3.47602
    +
    hc <- hindcast(model1, type = 'link')
    +range(hc$hindcasts$PP)
    +#> [1] -1.51216  3.46162
    +

    Objects of class mvgam_forecast have an associated plot +function as well:

    +
    plot(hc)
    +

    +

    This plot can look a bit confusing as it seems like there is linear +interpolation from the end of one year to the start of the next. But +this is just due to the way the lines are automatically connected in +base plots

    In any regression analysis, a key question is whether the residuals show any patterns that can be indicative of un-modelled sources of variation. For GLMs, we can use a modified residual called the Dunn-Smyth, or randomized quantile, residual. Inspect Dunn-Smyth residuals from the model using plot.mvgam with type = 'residuals'

    -
    plot(model1, type = 'residuals')
    -

    +
    plot(model1, type = 'residuals')
    +

    @@ -1216,56 +1238,65 @@

    Automatic forecasting for new data

    testing sets before re-running the model. We can then supply the test set as newdata. For splitting, we will make use of the filter function from dplyr

    -
    model_data %>% 
    -  dplyr::filter(time <= 160) -> data_train 
    -model_data %>% 
    -  dplyr::filter(time > 160) -> data_test
    -
    model1b <- mvgam(count ~ s(year_fac, bs = 're') - 1,
    -                 family = poisson(),
    -                 data = data_train,
    -                 newdata = data_test)
    -

    We can view the test data in the forecast plot to see that the +

    model_data %>% 
    +  dplyr::filter(time <= 160) -> data_train 
    +model_data %>% 
    +  dplyr::filter(time > 160) -> data_test
    +
    model1b <- mvgam(count ~ s(year_fac, bs = 're') - 1,
    +                family = poisson(),
    +                data = data_train,
    +                newdata = data_test)
    +

    Repeating the plots above gives insight into how the model’s +hierarchical prior formulation provides all the structure needed to +sample values for un-modelled years

    +
    plot(model1b, type = 're')
    +

    +
    plot(model1b, type = 'forecast')
    +

    +
    #> Out of sample DRPS:
    +#> [1] 182.6177
    +

    We can also view the test data in the forecast plot to see that the predictions do not capture the temporal variation in the test set

    -
    plot(model1b, type = 'forecast', newdata = data_test)
    -#> Out of sample DRPS:
    -#> 179.97371575
    -

    +
    plot(model1b, type = 'forecast', newdata = data_test)
    +

    +
    #> Out of sample DRPS:
    +#> [1] 182.6177

    As with the hindcast function, we can use the forecast function to automatically extract the posterior distributions for these predictions. This also returns an object of class mvgam_forecast, but now it will contain both the hindcasts and forecasts for each series in the data:

    -
    fc <- forecast(model1b)
    -str(fc)
    -#> List of 16
    -#>  $ call              :Class 'formula'  language count ~ s(year_fac, bs = "re") - 1
    -#>   .. ..- attr(*, ".Environment")=<environment: 0x0000026502c9ba10> 
    -#>  $ trend_call        : NULL
    -#>  $ family            : chr "poisson"
    -#>  $ family_pars       : NULL
    -#>  $ trend_model       : chr "None"
    -#>  $ drift             : logi FALSE
    -#>  $ use_lv            : logi FALSE
    -#>  $ fit_engine        : chr "stan"
    -#>  $ type              : chr "response"
    -#>  $ series_names      : Factor w/ 1 level "PP": 1
    -#>  $ train_observations:List of 1
    -#>   ..$ PP: int [1:160] 0 1 2 NA 10 NA NA 16 18 12 ...
    -#>  $ train_times       : num [1:160] 1 2 3 4 5 6 7 8 9 10 ...
    -#>  $ test_observations :List of 1
    -#>   ..$ PP: int [1:39] NA 0 0 10 3 14 18 NA 28 46 ...
    -#>  $ test_times        : num [1:39] 161 162 163 164 165 166 167 168 169 170 ...
    -#>  $ hindcasts         :List of 1
    -#>   ..$ PP: num [1:2000, 1:160] 14 7 5 9 5 7 10 7 11 5 ...
    -#>   .. ..- attr(*, "dimnames")=List of 2
    -#>   .. .. ..$ : NULL
    -#>   .. .. ..$ : chr [1:160] "ypred[1,1]" "ypred[2,1]" "ypred[3,1]" "ypred[4,1]" ...
    -#>  $ forecasts         :List of 1
    -#>   ..$ PP: num [1:2000, 1:39] 8 13 16 15 15 11 10 7 7 4 ...
    -#>   .. ..- attr(*, "dimnames")=List of 2
    -#>   .. .. ..$ : NULL
    -#>   .. .. ..$ : chr [1:39] "ypred[161,1]" "ypred[162,1]" "ypred[163,1]" "ypred[164,1]" ...
    -#>  - attr(*, "class")= chr "mvgam_forecast"
    +
    fc <- forecast(model1b)
    +str(fc)
    +#> List of 16
    +#>  $ call              :Class 'formula'  language count ~ s(year_fac, bs = "re") - 1
    +#>   .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
    +#>  $ trend_call        : NULL
    +#>  $ family            : chr "poisson"
    +#>  $ family_pars       : NULL
    +#>  $ trend_model       : chr "None"
    +#>  $ drift             : logi FALSE
    +#>  $ use_lv            : logi FALSE
    +#>  $ fit_engine        : chr "stan"
    +#>  $ type              : chr "response"
    +#>  $ series_names      : Factor w/ 1 level "PP": 1
    +#>  $ train_observations:List of 1
    +#>   ..$ PP: int [1:160] 0 1 2 NA 10 NA NA 16 18 12 ...
    +#>  $ train_times       : num [1:160] 1 2 3 4 5 6 7 8 9 10 ...
    +#>  $ test_observations :List of 1
    +#>   ..$ PP: int [1:39] NA 0 0 10 3 14 18 NA 28 46 ...
    +#>  $ test_times        : num [1:39] 161 162 163 164 165 166 167 168 169 170 ...
    +#>  $ hindcasts         :List of 1
    +#>   ..$ PP: num [1:2000, 1:160] 10 7 8 11 6 11 9 11 5 2 ...
    +#>   .. ..- attr(*, "dimnames")=List of 2
    +#>   .. .. ..$ : NULL
    +#>   .. .. ..$ : chr [1:160] "ypred[1,1]" "ypred[2,1]" "ypred[3,1]" "ypred[4,1]" ...
    +#>  $ forecasts         :List of 1
    +#>   ..$ PP: num [1:2000, 1:39] 5 12 10 7 7 8 11 14 8 12 ...
    +#>   .. ..- attr(*, "dimnames")=List of 2
    +#>   .. .. ..$ : NULL
    +#>   .. .. ..$ : chr [1:39] "ypred[161,1]" "ypred[162,1]" "ypred[163,1]" "ypred[164,1]" ...
    +#>  - attr(*, "class")= chr "mvgam_forecast"

    Adding predictors as “fixed” effects

    @@ -1274,11 +1305,11 @@

    Adding predictors as “fixed” effects

    our observations. Predictors are easily incorporated into GLMs / GAMs. Here, we will update the model from above by including a parametric (fixed) effect of ndvi as a linear predictor:

    -
    model2 <- mvgam(count ~ s(year_fac, bs = 're') + 
    -                  ndvi - 1,
    -                family = poisson(),
    -                data = data_train,
    -                newdata = data_test)
    +
    model2 <- mvgam(count ~ s(year_fac, bs = 're') + 
    +                  ndvi - 1,
    +                family = poisson(),
    +                data = data_train,
    +                newdata = data_test)

    The model can be described mathematically as follows: \[\begin{align*} \boldsymbol{count}_t & \sim \text{Poisson}(\lambda_t) \\ log(\lambda_t) & = \beta_{year[year_t]} + \beta_{ndvi} * @@ -1288,140 +1319,145 @@

    Adding predictors as “fixed” effects

    Where the \(\beta_{year}\) effects are the same as before but we now have another predictor \((\beta_{ndvi})\) that applies to the ndvi value at each timepoint \(t\). Inspect the summary of this model

    -
    summary(model2)
    -#> GAM formula:
    -#> count ~ ndvi + s(year_fac, bs = "re") - 1
    -#> <environment: 0x0000026502c9ba10>
    -#> 
    -#> Family:
    -#> poisson
    -#> 
    -#> Link function:
    -#> log
    -#> 
    -#> Trend model:
    -#> None
    -#> 
    -#> N series:
    -#> 1 
    -#> 
    -#> N timepoints:
    -#> 199 
    -#> 
    -#> Status:
    -#> Fitted using Stan 
    -#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 
    -#> Total post-warmup draws = 2000
    -#> 
    -#> 
    -#> GAM coefficient (beta) estimates:
    -#>                2.5%  50% 97.5% Rhat n_eff
    -#> ndvi           0.32 0.39  0.46    1  1941
    -#> s(year_fac).1  1.10 1.40  1.70    1  2605
    -#> s(year_fac).2  1.80 2.00  2.20    1  2245
    -#> s(year_fac).3  2.20 2.40  2.60    1  2285
    -#> s(year_fac).4  2.30 2.50  2.70    1  2014
    -#> s(year_fac).5  1.20 1.40  1.60    1  2363
    -#> s(year_fac).6  1.00 1.30  1.50    1  2822
    -#> s(year_fac).7  1.10 1.40  1.70    1  2801
    -#> s(year_fac).8  2.10 2.30  2.50    1  1854
    -#> s(year_fac).9  2.70 2.90  3.00    1  1900
    -#> s(year_fac).10 2.00 2.20  2.40    1  2160
    -#> s(year_fac).11 2.30 2.40  2.60    1  2236
    -#> s(year_fac).12 2.50 2.70  2.80    1  2288
    -#> s(year_fac).13 1.40 1.60  1.90    1  2756
    -#> s(year_fac).14 0.67 2.00  3.30    1  1454
    -#> s(year_fac).15 0.68 2.00  3.30    1  1552
    -#> s(year_fac).16 0.61 2.00  3.20    1  1230
    -#> s(year_fac).17 0.60 2.00  3.20    1  1755
    -#> 
    -#> GAM group-level estimates:
    -#>                   2.5%  50% 97.5% Rhat n_eff
    -#> mean(s(year_fac)) 1.60 2.00  2.30    1   375
    -#> sd(s(year_fac))   0.42 0.59  0.99    1   419
    -#> 
    -#> Approximate significance of GAM smooths:
    -#>             edf Ref.df Chi.sq p-value    
    -#> s(year_fac)  10     17   2971  <2e-16 ***
    -#> ---
    -#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    -#> 
    -#> Stan MCMC diagnostics:
    -#> n_eff / iter looks reasonable for all parameters
    -#> Rhat looks reasonable for all parameters
    -#> 0 of 2000 iterations ended with a divergence (0%)
    -#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
    -#> E-FMI indicated no pathological behavior
    -#> 
    -#> Samples were drawn using NUTS(diag_e) at Wed May 08 8:47:27 PM 2024.
    -#> For each parameter, n_eff is a crude measure of effective sample size,
    -#> and Rhat is the potential scale reduction factor on split MCMC chains
    -#> (at convergence, Rhat = 1)
    +
    summary(model2)
    +#> GAM formula:
    +#> count ~ ndvi + s(year_fac, bs = "re") - 1
    +#> 
    +#> Family:
    +#> poisson
    +#> 
    +#> Link function:
    +#> log
    +#> 
    +#> Trend model:
    +#> None
    +#> 
    +#> N series:
    +#> 1 
    +#> 
    +#> N timepoints:
    +#> 160 
    +#> 
    +#> Status:
    +#> Fitted using Stan 
    +#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 
    +#> Total post-warmup draws = 2000
    +#> 
    +#> 
    +#> GAM coefficient (beta) estimates:
    +#>                2.5%  50% 97.5% Rhat n_eff
    +#> ndvi           0.32 0.39  0.46    1  1696
    +#> s(year_fac).1  1.10 1.40  1.70    1  2512
    +#> s(year_fac).2  1.80 2.00  2.20    1  2210
    +#> s(year_fac).3  2.20 2.40  2.60    1  2109
    +#> s(year_fac).4  2.30 2.50  2.70    1  1780
    +#> s(year_fac).5  1.20 1.40  1.60    1  2257
    +#> s(year_fac).6  1.00 1.30  1.50    1  2827
    +#> s(year_fac).7  1.10 1.40  1.70    1  2492
    +#> s(year_fac).8  2.10 2.30  2.50    1  2188
    +#> s(year_fac).9  2.70 2.90  3.00    1  2014
    +#> s(year_fac).10 2.00 2.20  2.40    1  2090
    +#> s(year_fac).11 2.30 2.40  2.60    1  1675
    +#> s(year_fac).12 2.50 2.70  2.80    1  2108
    +#> s(year_fac).13 1.40 1.60  1.80    1  2161
    +#> s(year_fac).14 0.46 2.00  3.20    1  1849
    +#> s(year_fac).15 0.53 2.00  3.30    1  1731
    +#> s(year_fac).16 0.53 2.00  3.30    1  1859
    +#> s(year_fac).17 0.59 1.90  3.20    1  1761
    +#> 
    +#> GAM group-level estimates:
    +#>                   2.5%  50% 97.5% Rhat n_eff
    +#> mean(s(year_fac))  1.6 2.00   2.3 1.01   397
    +#> sd(s(year_fac))    0.4 0.59   1.0 1.01   395
    +#> 
    +#> Approximate significance of GAM smooths:
    +#>              edf Ref.df Chi.sq p-value    
    +#> s(year_fac) 11.2     17   3096  <2e-16 ***
    +#> ---
    +#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    +#> 
    +#> Stan MCMC diagnostics:
    +#> n_eff / iter looks reasonable for all parameters
    +#> Rhat looks reasonable for all parameters
    +#> 0 of 2000 iterations ended with a divergence (0%)
    +#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
    +#> E-FMI indicated no pathological behavior
    +#> 
    +#> Samples were drawn using NUTS(diag_e) at Tue Apr 16 1:00:50 PM 2024.
    +#> For each parameter, n_eff is a crude measure of effective sample size,
    +#> and Rhat is the potential scale reduction factor on split MCMC chains
    +#> (at convergence, Rhat = 1)

    Rather than printing the summary each time, we can also quickly look at the posterior empirical quantiles for the fixed effect of ndvi (and other linear predictor coefficients) using coef:

    -
    coef(model2)
    -#>                     2.5%      50%     97.5% Rhat n_eff
    -#> ndvi           0.3181071 0.388789 0.4593694    1  1941
    -#> s(year_fac).1  1.1403257 1.401500 1.6865907    1  2605
    -#> s(year_fac).2  1.7973795 2.001035 2.2164728    1  2245
    -#> s(year_fac).3  2.1907445 2.380020 2.5654575    1  2285
    -#> s(year_fac).4  2.3191002 2.508015 2.6922312    1  2014
    -#> s(year_fac).5  1.1906135 1.421775 1.6474892    1  2363
    -#> s(year_fac).6  1.0297390 1.273005 1.5119025    1  2822
    -#> s(year_fac).7  1.1386545 1.416695 1.6901837    1  2801
    -#> s(year_fac).8  2.0805780 2.275875 2.4541855    1  1854
    -#> s(year_fac).9  2.7105088 2.854695 2.9904035    1  1900
    -#> s(year_fac).10 1.9784170 2.185665 2.3829547    1  2160
    -#> s(year_fac).11 2.2702520 2.439920 2.6018802    1  2236
    -#> s(year_fac).12 2.5377335 2.694410 2.8427060    1  2288
    -#> s(year_fac).13 1.3582277 1.614790 1.8549620    1  2756
    -#> s(year_fac).14 0.6747114 1.981380 3.3241242    1  1454
    -#> s(year_fac).15 0.6760184 1.965565 3.2735972    1  1552
    -#> s(year_fac).16 0.6100672 1.984980 3.2413580    1  1230
    -#> s(year_fac).17 0.5969866 1.971725 3.2093065    1  1755
    -

    Look at the estimated effect of ndvi using using a -histogram. This can be done by first extracting the posterior -coefficients:

    -
    beta_post <- as.data.frame(model2, variable = 'betas')
    -dplyr::glimpse(beta_post)
    -#> Rows: 2,000
    -#> Columns: 18
    -#> $ ndvi             <dbl> 0.439648, 0.384196, 0.413367, 0.366973, 0.404344, 0.4…
    -#> $ `s(year_fac).1`  <dbl> 1.42732, 1.53312, 1.22096, 1.23654, 1.26042, 1.28158,…
    -#> $ `s(year_fac).2`  <dbl> 1.96723, 2.09795, 1.93254, 2.02153, 2.08334, 1.88701,…
    -#> $ `s(year_fac).3`  <dbl> 2.27870, 2.37000, 2.29162, 2.36628, 2.29201, 2.38041,…
    -#> $ `s(year_fac).4`  <dbl> 2.37015, 2.60912, 2.43459, 2.50683, 2.52586, 2.54542,…
    -#> $ `s(year_fac).5`  <dbl> 1.28135, 1.38028, 1.39677, 1.43385, 1.36603, 1.40265,…
    -#> $ `s(year_fac).6`  <dbl> 1.08797, 1.08143, 1.41139, 1.32584, 1.22344, 1.39926,…
    -#> $ `s(year_fac).7`  <dbl> 1.44054, 1.56645, 1.31643, 1.48867, 1.37964, 1.47620,…
    -#> $ `s(year_fac).8`  <dbl> 2.15992, 2.33356, 2.20094, 2.28246, 2.21501, 2.25253,…
    -#> $ `s(year_fac).9`  <dbl> 2.75921, 2.88033, 2.78019, 2.93305, 2.78423, 2.83695,…
    -#> $ `s(year_fac).10` <dbl> 1.88597, 2.00190, 2.27773, 2.19950, 2.14239, 2.05914,…
    -#> $ `s(year_fac).11` <dbl> 2.30656, 2.53149, 2.47414, 2.61563, 2.23886, 2.28949,…
    -#> $ `s(year_fac).12` <dbl> 2.60053, 2.67232, 2.56650, 2.61523, 2.77591, 2.78755,…
    -#> $ `s(year_fac).13` <dbl> 1.49833, 1.65298, 1.59788, 1.49292, 1.91593, 1.46407,…
    -#> $ `s(year_fac).14` <dbl> 2.275730, 1.633320, 2.074560, 2.935380, 2.663920, 1.3…
    -#> $ `s(year_fac).15` <dbl> 1.405470, 2.233280, 2.326930, 1.424470, 1.962880, 2.6…
    -#> $ `s(year_fac).16` <dbl> 1.809570, 1.569380, 1.961950, 2.647790, 2.515950, 1.1…
    -#> $ `s(year_fac).17` <dbl> 2.672570, 1.177650, 3.133890, 1.256500, 1.641950, 1.9…
    +
    coef(model2)
    +#>                     2.5%       50%     97.5% Rhat n_eff
    +#> ndvi           0.3198694 0.3899835 0.4571083    1  1696
    +#> s(year_fac).1  1.1176373 1.4085900 1.6603838    1  2512
    +#> s(year_fac).2  1.8008470 2.0005000 2.2003670    1  2210
    +#> s(year_fac).3  2.1842727 2.3822950 2.5699363    1  2109
    +#> s(year_fac).4  2.3267037 2.5022700 2.6847912    1  1780
    +#> s(year_fac).5  1.1945853 1.4215950 1.6492038    1  2257
    +#> s(year_fac).6  1.0332160 1.2743050 1.5091052    1  2827
    +#> s(year_fac).7  1.1467567 1.4119100 1.6751850    1  2492
    +#> s(year_fac).8  2.0710285 2.2713050 2.4596285    1  2188
    +#> s(year_fac).9  2.7198967 2.8557300 2.9874662    1  2014
    +#> s(year_fac).10 1.9798730 2.1799600 2.3932595    1  2090
    +#> s(year_fac).11 2.2734940 2.4374700 2.6130482    1  1675
    +#> s(year_fac).12 2.5421157 2.6935350 2.8431822    1  2108
    +#> s(year_fac).13 1.3786087 1.6177850 1.8495872    1  2161
    +#> s(year_fac).14 0.4621041 1.9744700 3.2480377    1  1849
    +#> s(year_fac).15 0.5293684 2.0014200 3.2766722    1  1731
    +#> s(year_fac).16 0.5285142 1.9786450 3.2859085    1  1859
    +#> s(year_fac).17 0.5909969 1.9462850 3.2306940    1  1761
    +

    Look at the estimated effect of ndvi using +plot.mvgam with type = 'pterms'

    +
    plot(model2, type = 'pterms')
    +

    +

    This plot indicates a positive linear effect of ndvi on +log(counts). But it may be easier to visualise using a +histogram, especially for parametric (linear) effects. This can be done +by first extracting the posterior coefficients as we did in the first +example:

    +
    beta_post <- as.data.frame(model2, variable = 'betas')
    +dplyr::glimpse(beta_post)
    +#> Rows: 2,000
    +#> Columns: 18
    +#> $ ndvi             <dbl> 0.330568, 0.398734, 0.357498, 0.484288, 0.380087, 0.3…
    +#> $ `s(year_fac).1`  <dbl> 1.55868, 1.27949, 1.24414, 1.02997, 1.64712, 1.07519,…
    +#> $ `s(year_fac).2`  <dbl> 1.98967, 2.00846, 2.07493, 1.84431, 2.01590, 2.16466,…
    +#> $ `s(year_fac).3`  <dbl> 2.41434, 2.16020, 2.67324, 2.33332, 2.32415, 2.45516,…
    +#> $ `s(year_fac).4`  <dbl> 2.62215, 2.53992, 2.50659, 2.23671, 2.56663, 2.40054,…
    +#> $ `s(year_fac).5`  <dbl> 1.37221, 1.44795, 1.53019, 1.27623, 1.50771, 1.49515,…
    +#> $ `s(year_fac).6`  <dbl> 1.323980, 1.220200, 1.165610, 1.271620, 1.193820, 1.3…
    +#> $ `s(year_fac).7`  <dbl> 1.52005, 1.30735, 1.42566, 1.13335, 1.61581, 1.31853,…
    +#> $ `s(year_fac).8`  <dbl> 2.40223, 2.20021, 2.44366, 2.17192, 2.20837, 2.33066,…
    +#> $ `s(year_fac).9`  <dbl> 2.91580, 2.90942, 2.87679, 2.64941, 2.85401, 2.78744,…
    +#> $ `s(year_fac).10` <dbl> 2.46559, 2.01466, 2.08319, 2.01400, 2.22965, 2.26523,…
    +#> $ `s(year_fac).11` <dbl> 2.52118, 2.45406, 2.46667, 2.20664, 2.42495, 2.46256,…
    +#> $ `s(year_fac).12` <dbl> 2.72360, 2.63546, 2.86718, 2.59035, 2.76576, 2.56130,…
    +#> $ `s(year_fac).13` <dbl> 1.67388, 1.50790, 1.52463, 1.39004, 1.72927, 1.61023,…
    +#> $ `s(year_fac).14` <dbl> 2.583650, 2.034240, 1.819820, 1.579280, 2.426880, 1.8…
    +#> $ `s(year_fac).15` <dbl> 2.57365, 2.28723, 1.67404, 1.46796, 2.49512, 2.71230,…
    +#> $ `s(year_fac).16` <dbl> 1.801660, 2.185540, 1.756500, 2.098760, 2.270640, 1.8…
    +#> $ `s(year_fac).17` <dbl> 0.886081, 3.409300, -0.371795, 2.494990, 1.822150, 2.…

    The posterior distribution for the effect of ndvi is stored in the ndvi column. A quick histogram confirms our inference that log(counts) respond positively to increases in ndvi:

    -
    hist(beta_post$ndvi,
    -     xlim = c(-1 * max(abs(beta_post$ndvi)),
    -              max(abs(beta_post$ndvi))),
    -     col = 'darkred',
    -     border = 'white',
    -     xlab = expression(beta[NDVI]),
    -     ylab = '',
    -     yaxt = 'n',
    -     main = '',
    -     lwd = 2)
    -abline(v = 0, lwd = 2.5)
    -

    +
    hist(beta_post$ndvi,
    +     xlim = c(-1 * max(abs(beta_post$ndvi)),
    +              max(abs(beta_post$ndvi))),
    +     col = 'darkred',
    +     border = 'white',
    +     xlab = expression(beta[NDVI]),
    +     ylab = '',
    +     yaxt = 'n',
    +     main = '',
    +     lwd = 2)
    +abline(v = 0, lwd = 2.5)
    +

    marginaleffects support

    Given our model used a nonlinear link function (log link in this @@ -1431,13 +1467,25 @@

    marginaleffects support

    this relatively straightforward. Objects of class mvgam can be used with marginaleffects to inspect contrasts, scenario-based predictions, conditional and marginal effects, all on the -outcome scale. Like brms, mvgam has the simple +outcome scale. Here we will use the plot_predictions +function from marginaleffects to inspect the conditional +effect of ndvi (use ?plot_predictions for +guidance on how to modify these plots):

    +
    plot_predictions(model2, 
    +                 condition = "ndvi",
    +                 # include the observed count values
    +                 # as points, and show rugs for the observed
    +                 # ndvi and count values on the axes
    +                 points = 0.5, rug = TRUE)
    +

    +

    Now it is easier to get a sense of the nonlinear but positive +relationship estimated between ndvi and count. +Like brms, mvgam has the simple conditional_effects function to make quick and informative -plots for main effects, which rely on marginaleffects -support. This will likely be your go-to function for quickly -understanding patterns from fitted mvgam models

    -
    conditional_effects(model2)
    -

    +plots for main effects. This will likely be your go-to function for +quickly understanding patterns from fitted mvgam models

    +
    plot(conditional_effects(model2), ask = FALSE)
    +

    @@ -1472,11 +1520,11 @@

    Adding predictors as smooths

    b-spline basis for the temporal smooth. Because we no longer have intercepts for each year, we also retain the primary intercept term in this model (there is no -1 in the formula now):

    -
    model3 <- mvgam(count ~ s(time, bs = 'bs', k = 15) + 
    -                  ndvi,
    -                family = poisson(),
    -                data = data_train,
    -                newdata = data_test)
    +
    model3 <- mvgam(count ~ s(time, bs = 'bs', k = 15) + 
    +                  ndvi,
    +                family = poisson(),
    +                data = data_train,
    +                newdata = data_test)

    The model can be described mathematically as follows: \[\begin{align*} \boldsymbol{count}_t & \sim \text{Poisson}(\lambda_t) \\ log(\lambda_t) & = f(\boldsymbol{time})_t + \beta_{ndvi} * @@ -1495,151 +1543,178 @@

    Adding predictors as smooths

    wiggly spline. Note that sometimes there are multiple smoothing penalties that contribute to the covariance matrix, but I am only showing one here for simplicity. View the summary as before

    -
    summary(model3)
    -#> GAM formula:
    -#> count ~ s(time, bs = "bs", k = 15) + ndvi
    -#> <environment: 0x0000026502c9ba10>
    -#> 
    -#> Family:
    -#> poisson
    -#> 
    -#> Link function:
    -#> log
    -#> 
    -#> Trend model:
    -#> None
    -#> 
    -#> N series:
    -#> 1 
    -#> 
    -#> N timepoints:
    -#> 199 
    -#> 
    -#> Status:
    -#> Fitted using Stan 
    -#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 
    -#> Total post-warmup draws = 2000
    -#> 
    -#> 
    -#> GAM coefficient (beta) estimates:
    -#>              2.5%   50% 97.5% Rhat n_eff
    -#> (Intercept)  2.00  2.10  2.20    1   852
    -#> ndvi         0.26  0.33  0.40    1   860
    -#> s(time).1   -2.10 -1.10 -0.10    1   618
    -#> s(time).2    0.43  1.30  2.20    1   493
    -#> s(time).3   -0.50  0.42  1.40    1   445
    -#> s(time).4    1.60  2.40  3.40    1   445
    -#> s(time).5   -1.20 -0.23  0.74    1   467
    -#> s(time).6   -0.56  0.34  1.40    1   487
    -#> s(time).7   -1.50 -0.54  0.46    1   492
    -#> s(time).8    0.59  1.40  2.50    1   450
    -#> s(time).9    1.10  2.00  3.00    1   427
    -#> s(time).10  -0.35  0.51  1.50    1   459
    -#> s(time).11   0.81  1.70  2.70    1   429
    -#> s(time).12   0.68  1.50  2.30    1   450
    -#> s(time).13  -1.20 -0.35  0.61    1   614
    -#> s(time).14  -7.20 -4.10 -1.20    1   513
    -#> 
    -#> Approximate significance of GAM smooths:
    -#>          edf Ref.df Chi.sq p-value    
    -#> s(time) 8.72     14    771  <2e-16 ***
    -#> ---
    -#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    -#> 
    -#> Stan MCMC diagnostics:
    -#> n_eff / iter looks reasonable for all parameters
    -#> Rhat looks reasonable for all parameters
    -#> 0 of 2000 iterations ended with a divergence (0%)
    -#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
    -#> E-FMI indicated no pathological behavior
    -#> 
    -#> Samples were drawn using NUTS(diag_e) at Wed May 08 8:48:31 PM 2024.
    -#> For each parameter, n_eff is a crude measure of effective sample size,
    -#> and Rhat is the potential scale reduction factor on split MCMC chains
    -#> (at convergence, Rhat = 1)
    +
    summary(model3)
    +#> GAM formula:
    +#> count ~ s(time, bs = "bs", k = 15) + ndvi
    +#> 
    +#> Family:
    +#> poisson
    +#> 
    +#> Link function:
    +#> log
    +#> 
    +#> Trend model:
    +#> None
    +#> 
    +#> N series:
    +#> 1 
    +#> 
    +#> N timepoints:
    +#> 160 
    +#> 
    +#> Status:
    +#> Fitted using Stan 
    +#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 
    +#> Total post-warmup draws = 2000
    +#> 
    +#> 
    +#> GAM coefficient (beta) estimates:
    +#>              2.5%   50%  97.5% Rhat n_eff
    +#> (Intercept)  2.00  2.10  2.200 1.00   903
    +#> ndvi         0.26  0.33  0.390 1.00   942
    +#> s(time).1   -2.10 -1.10  0.029 1.01   484
    +#> s(time).2    0.45  1.30  2.400 1.01   411
    +#> s(time).3   -0.43  0.45  1.500 1.02   347
    +#> s(time).4    1.60  2.50  3.600 1.02   342
    +#> s(time).5   -1.10 -0.22  0.880 1.02   375
    +#> s(time).6   -0.53  0.36  1.600 1.01   352
    +#> s(time).7   -1.50 -0.51  0.560 1.01   406
    +#> s(time).8    0.63  1.50  2.600 1.02   340
    +#> s(time).9    1.20  2.10  3.200 1.02   346
    +#> s(time).10  -0.34  0.54  1.600 1.01   364
    +#> s(time).11   0.92  1.80  2.900 1.02   332
    +#> s(time).12   0.67  1.50  2.500 1.01   398
    +#> s(time).13  -1.20 -0.32  0.700 1.01   420
    +#> s(time).14  -7.90 -4.20 -1.200 1.01   414
    +#> 
    +#> Approximate significance of GAM smooths:
    +#>          edf Ref.df Chi.sq p-value    
    +#> s(time) 9.41     14    790  <2e-16 ***
    +#> ---
    +#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    +#> 
    +#> Stan MCMC diagnostics:
    +#> n_eff / iter looks reasonable for all parameters
    +#> Rhat looks reasonable for all parameters
    +#> 0 of 2000 iterations ended with a divergence (0%)
    +#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
    +#> E-FMI indicated no pathological behavior
    +#> 
    +#> Samples were drawn using NUTS(diag_e) at Tue Apr 16 1:01:29 PM 2024.
    +#> For each parameter, n_eff is a crude measure of effective sample size,
    +#> and Rhat is the potential scale reduction factor on split MCMC chains
    +#> (at convergence, Rhat = 1)

    The summary above now contains posterior estimates for the smoothing parameters as well as the basis coefficients for the nonlinear effect of -time. We can visualize conditional_effects as -before:

    -
    conditional_effects(model3, type = 'link')
    -

    +time. We can visualize the conditional time +effect using the plot function with +type = 'smooths':

    +
    plot(model3, type = 'smooths')
    +

    +

    By default this plots shows posterior empirical quantiles, but it can +also be helpful to view some realizations of the underlying function +(here, each line is a different potential curve drawn from the posterior +of all possible curves):

    +
    plot(model3, type = 'smooths', realisations = TRUE,
    +     n_realisations = 30)
    +

    +
    +

    Derivatives of smooths

    +

    A useful question when modelling using GAMs is to identify where the +function is changing most rapidly. To address this, we can plot +estimated 1st derivatives of the spline:

    +
    plot(model3, type = 'smooths', derivatives = TRUE)
    +

    +

    Here, values above >0 indicate the function was +increasing at that time point, while values <0 indicate +the function was declining. The most rapid declines appear to have been +happening around timepoints 50 and again toward the end of the training +period, for example.

    +

    Use conditional_effects again for useful plots on the +outcome scale:

    +
    plot(conditional_effects(model3), ask = FALSE)
    +

    +

    Or on the link scale:

    +
    plot(conditional_effects(model3, type = 'link'), ask = FALSE)
    +

    Inspect the underlying Stan code to gain some idea of how the spline is being penalized:

    -
    code(model3)
    -#> // Stan model code generated by package mvgam
    -#> data {
    -#>   int<lower=0> total_obs; // total number of observations
    -#>   int<lower=0> n; // number of timepoints per series
    -#>   int<lower=0> n_sp; // number of smoothing parameters
    -#>   int<lower=0> n_series; // number of series
    -#>   int<lower=0> num_basis; // total number of basis coefficients
    -#>   vector[num_basis] zero; // prior locations for basis coefficients
    -#>   matrix[total_obs, num_basis] X; // mgcv GAM design matrix
    -#>   array[n, n_series] int<lower=0> ytimes; // time-ordered matrix (which col in X belongs to each [time, series] observation?)
    -#>   matrix[14, 28] S1; // mgcv smooth penalty matrix S1
    -#>   int<lower=0> n_nonmissing; // number of nonmissing observations
    -#>   array[n_nonmissing] int<lower=0> flat_ys; // flattened nonmissing observations
    -#>   matrix[n_nonmissing, num_basis] flat_xs; // X values for nonmissing observations
    -#>   array[n_nonmissing] int<lower=0> obs_ind; // indices of nonmissing observations
    -#> }
    -#> parameters {
    -#>   // raw basis coefficients
    -#>   vector[num_basis] b_raw;
    -#>   
    -#>   // smoothing parameters
    -#>   vector<lower=0>[n_sp] lambda;
    -#> }
    -#> transformed parameters {
    -#>   // basis coefficients
    -#>   vector[num_basis] b;
    -#>   b[1 : num_basis] = b_raw[1 : num_basis];
    -#> }
    -#> model {
    -#>   // prior for (Intercept)...
    -#>   b_raw[1] ~ student_t(3, 2.6, 2.5);
    -#>   
    -#>   // prior for ndvi...
    -#>   b_raw[2] ~ student_t(3, 0, 2);
    -#>   
    -#>   // prior for s(time)...
    -#>   b_raw[3 : 16] ~ multi_normal_prec(zero[3 : 16],
    -#>                                     S1[1 : 14, 1 : 14] * lambda[1]
    -#>                                     + S1[1 : 14, 15 : 28] * lambda[2]);
    -#>   
    -#>   // priors for smoothing parameters
    -#>   lambda ~ normal(5, 30);
    -#>   {
    -#>     // likelihood functions
    -#>     flat_ys ~ poisson_log_glm(flat_xs, 0.0, b);
    -#>   }
    -#> }
    -#> generated quantities {
    -#>   vector[total_obs] eta;
    -#>   matrix[n, n_series] mus;
    -#>   vector[n_sp] rho;
    -#>   array[n, n_series] int ypred;
    -#>   rho = log(lambda);
    -#>   
    -#>   // posterior predictions
    -#>   eta = X * b;
    -#>   for (s in 1 : n_series) {
    -#>     mus[1 : n, s] = eta[ytimes[1 : n, s]];
    -#>     ypred[1 : n, s] = poisson_log_rng(mus[1 : n, s]);
    -#>   }
    -#> }
    +
    code(model3)
    +#> // Stan model code generated by package mvgam
    +#> data {
    +#>   int<lower=0> total_obs; // total number of observations
    +#>   int<lower=0> n; // number of timepoints per series
    +#>   int<lower=0> n_sp; // number of smoothing parameters
    +#>   int<lower=0> n_series; // number of series
    +#>   int<lower=0> num_basis; // total number of basis coefficients
    +#>   vector[num_basis] zero; // prior locations for basis coefficients
    +#>   matrix[total_obs, num_basis] X; // mgcv GAM design matrix
    +#>   array[n, n_series] int<lower=0> ytimes; // time-ordered matrix (which col in X belongs to each [time, series] observation?)
    +#>   matrix[14, 28] S1; // mgcv smooth penalty matrix S1
    +#>   int<lower=0> n_nonmissing; // number of nonmissing observations
    +#>   array[n_nonmissing] int<lower=0> flat_ys; // flattened nonmissing observations
    +#>   matrix[n_nonmissing, num_basis] flat_xs; // X values for nonmissing observations
    +#>   array[n_nonmissing] int<lower=0> obs_ind; // indices of nonmissing observations
    +#> }
    +#> parameters {
    +#>   // raw basis coefficients
    +#>   vector[num_basis] b_raw;
    +#>   
    +#>   // smoothing parameters
    +#>   vector<lower=0>[n_sp] lambda;
    +#> }
    +#> transformed parameters {
    +#>   // basis coefficients
    +#>   vector[num_basis] b;
    +#>   b[1 : num_basis] = b_raw[1 : num_basis];
    +#> }
    +#> model {
    +#>   // prior for (Intercept)...
    +#>   b_raw[1] ~ student_t(3, 2.6, 2.5);
    +#>   
    +#>   // prior for ndvi...
    +#>   b_raw[2] ~ student_t(3, 0, 2);
    +#>   
    +#>   // prior for s(time)...
    +#>   b_raw[3 : 16] ~ multi_normal_prec(zero[3 : 16],
    +#>                                     S1[1 : 14, 1 : 14] * lambda[1]
    +#>                                     + S1[1 : 14, 15 : 28] * lambda[2]);
    +#>   
    +#>   // priors for smoothing parameters
    +#>   lambda ~ normal(5, 30);
    +#>   {
    +#>     // likelihood functions
    +#>     flat_ys ~ poisson_log_glm(flat_xs, 0.0, b);
    +#>   }
    +#> }
    +#> generated quantities {
    +#>   vector[total_obs] eta;
    +#>   matrix[n, n_series] mus;
    +#>   vector[n_sp] rho;
    +#>   array[n, n_series] int ypred;
    +#>   rho = log(lambda);
    +#>   
    +#>   // posterior predictions
    +#>   eta = X * b;
    +#>   for (s in 1 : n_series) {
    +#>     mus[1 : n, s] = eta[ytimes[1 : n, s]];
    +#>     ypred[1 : n, s] = poisson_log_rng(mus[1 : n, s]);
    +#>   }
    +#> }

    The line below // prior for s(time)... shows how the spline basis coefficients are drawn from a zero-centred multivariate normal distribution. The precision matrix \(S\) is penalized by two different smoothing parameters (the \(\lambda\)’s) to enforce smoothness and reduce overfitting

    +

    Latent dynamics in mvgam

    Forecasts from the above model are not ideal:

    -
    plot(model3, type = 'forecast', newdata = data_test)
    -#> Out of sample DRPS:
    -#> 287.6505975
    -

    +
    plot(model3, type = 'forecast', newdata = data_test)
    +

    +
    #> Out of sample DRPS:
    +#> [1] 288.3844

    Why is this happening? The forecasts are driven almost entirely by variation in the temporal spline, which is extrapolating linearly forever beyond the edge of the training data. Any slight @@ -1648,14 +1723,14 @@

    Latent dynamics in mvgam

    functions into the out-of-sample test set for the two models. Here are the extrapolated functions for the first model, with 15 basis functions:

    -
    plot_mvgam_smooth(model3, smooth = 's(time)',
    -                  # feed newdata to the plot function to generate
    -                  # predictions of the temporal smooth to the end of the 
    -                  # testing period
    -                  newdata = data.frame(time = 1:max(data_test$time),
    -                                       ndvi = 0))
    -abline(v = max(data_train$time), lty = 'dashed', lwd = 2)
    -

    +
    plot_mvgam_smooth(model3, smooth = 's(time)',
    +                  # feed newdata to the plot function to generate
    +                  # predictions of the temporal smooth to the end of the 
    +                  # testing period
    +                  newdata = data.frame(time = 1:max(data_test$time),
    +                                       ndvi = 0))
    +abline(v = max(data_train$time), lty = 'dashed', lwd = 2)
    +

    This model is not doing well. Clearly we need to somehow account for the strong temporal autocorrelation when modelling these data without using a smooth function of time. Now onto another prominent @@ -1670,11 +1745,11 @@

    Latent dynamics in mvgam

    rather than the parametric term that was used above, to showcase that mvgam can include combinations of smooths and dynamic components:

    -
    model4 <- mvgam(count ~ s(ndvi, k = 6),
    -                family = poisson(),
    -                data = data_train,
    -                newdata = data_test,
    -                trend_model = 'AR1')
    +
    model4 <- mvgam(count ~ s(ndvi, k = 6),
    +                family = poisson(),
    +                data = data_train,
    +                newdata = data_test,
    +                trend_model = 'AR1')

    The model can be described mathematically as follows: \[\begin{align*} \boldsymbol{count}_t & \sim \text{Poisson}(\lambda_t) \\ log(\lambda_t) & = f(\boldsymbol{ndvi})_t + z_t \\ @@ -1692,81 +1767,84 @@

    Latent dynamics in mvgam

    more realistic estimates of the residual autocorrelation parameters. Summarise the model to see how it now returns posterior summaries for the latent AR1 process:

    -
    summary(model4)
    -#> GAM formula:
    -#> count ~ s(ndvi, k = 6)
    -#> <environment: 0x0000026502c9ba10>
    -#> 
    -#> Family:
    -#> poisson
    -#> 
    -#> Link function:
    -#> log
    -#> 
    -#> Trend model:
    -#> AR1
    -#> 
    -#> N series:
    -#> 1 
    -#> 
    -#> N timepoints:
    -#> 199 
    -#> 
    -#> Status:
    -#> Fitted using Stan 
    -#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 
    -#> Total post-warmup draws = 2000
    -#> 
    -#> 
    -#> GAM coefficient (beta) estimates:
    -#>               2.5%     50% 97.5% Rhat n_eff
    -#> (Intercept)  1.100  1.9000 2.500 1.08    73
    -#> s(ndvi).1   -0.078  0.0096 0.180 1.01   369
    -#> s(ndvi).2   -0.160  0.0150 0.270 1.03   200
    -#> s(ndvi).3   -0.051 -0.0015 0.046 1.02   292
    -#> s(ndvi).4   -0.270  0.1100 1.300 1.03   187
    -#> s(ndvi).5   -0.072  0.1400 0.340 1.01   661
    -#> 
    -#> Approximate significance of GAM smooths:
    -#>          edf Ref.df Chi.sq p-value
    -#> s(ndvi) 1.35      5   74.7    0.15
    -#> 
    -#> Latent trend parameter AR estimates:
    -#>          2.5%  50% 97.5% Rhat n_eff
    -#> ar1[1]   0.70 0.81  0.92 1.01   308
    -#> sigma[1] 0.67 0.80  0.96 1.01   501
    -#> 
    -#> Stan MCMC diagnostics:
    -#> n_eff / iter looks reasonable for all parameters
    -#> Rhats above 1.05 found for 49 parameters
    -#>  *Diagnose further to investigate why the chains have not mixed
    -#> 0 of 2000 iterations ended with a divergence (0%)
    -#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
    -#> E-FMI indicated no pathological behavior
    -#> 
    -#> Samples were drawn using NUTS(diag_e) at Wed May 08 8:49:55 PM 2024.
    -#> For each parameter, n_eff is a crude measure of effective sample size,
    -#> and Rhat is the potential scale reduction factor on split MCMC chains
    -#> (at convergence, Rhat = 1)
    +
    summary(model4)
    +#> GAM formula:
    +#> count ~ s(ndvi, k = 6)
    +#> 
    +#> Family:
    +#> poisson
    +#> 
    +#> Link function:
    +#> log
    +#> 
    +#> Trend model:
    +#> AR1
    +#> 
    +#> N series:
    +#> 1 
    +#> 
    +#> N timepoints:
    +#> 160 
    +#> 
    +#> Status:
    +#> Fitted using Stan 
    +#> 4 chains, each with iter = 1000; warmup = 500; thin = 1 
    +#> Total post-warmup draws = 2000
    +#> 
    +#> 
    +#> GAM coefficient (beta) estimates:
    +#>               2.5%     50% 97.5% Rhat n_eff
    +#> (Intercept)  1.200  1.9000 2.500 1.03    63
    +#> s(ndvi).1   -0.066  0.0100 0.160 1.01   318
    +#> s(ndvi).2   -0.110  0.0190 0.340 1.00   286
    +#> s(ndvi).3   -0.048 -0.0019 0.051 1.00   560
    +#> s(ndvi).4   -0.210  0.1200 1.500 1.01   198
    +#> s(ndvi).5   -0.079  0.1500 0.360 1.01   350
    +#> 
    +#> Approximate significance of GAM smooths:
    +#>          edf Ref.df Chi.sq p-value
    +#> s(ndvi) 2.26      5   87.8     0.1
    +#> 
    +#> Latent trend parameter AR estimates:
    +#>          2.5%  50% 97.5% Rhat n_eff
    +#> ar1[1]   0.70 0.81  0.92 1.01   234
    +#> sigma[1] 0.68 0.80  0.96 1.00   488
    +#> 
    +#> Stan MCMC diagnostics:
    +#> n_eff / iter looks reasonable for all parameters
    +#> Rhat looks reasonable for all parameters
    +#> 0 of 2000 iterations ended with a divergence (0%)
    +#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
    +#> E-FMI indicated no pathological behavior
    +#> 
    +#> Samples were drawn using NUTS(diag_e) at Tue Apr 16 1:02:26 PM 2024.
    +#> For each parameter, n_eff is a crude measure of effective sample size,
    +#> and Rhat is the potential scale reduction factor on split MCMC chains
    +#> (at convergence, Rhat = 1)
    +

    View conditional smooths for the ndvi effect:

    +
    plot_predictions(model4, 
    +                 condition = "ndvi",
    +                 points = 0.5, rug = TRUE)
    +

    View posterior hindcasts / forecasts and compare against the out of sample test data

    -
    plot(model4, type = 'forecast', newdata = data_test)
    -#> Out of sample DRPS:
    -#> 150.62898975
    -

    +
    plot(model4, type = 'forecast', newdata = data_test)
    +

    +
    #> Out of sample DRPS:
    +#> [1] 150.5241

    The trend is evolving as an AR1 process, which we can also view:

    -
    plot(model4, type = 'trend', newdata = data_test)
    -

    +
    plot(model4, type = 'trend', newdata = data_test)
    +

    In-sample model performance can be interrogated using leave-one-out cross-validation utilities from the loo package (a higher value is preferred for this metric):

    -
    loo_compare(model3, model4)
    -#> Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
    -
    -#> Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
    -#>        elpd_diff se_diff
    -#> model4    0.0       0.0 
    -#> model3 -561.4      66.3
    +
    loo_compare(model3, model4)
    +#> Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
    +
    +#> Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
    +#>        elpd_diff se_diff
    +#> model4    0.0       0.0 
    +#> model3 -558.9      66.4

    The higher estimated log predictive density (ELPD) value for the dynamic model suggests it provides a better fit to the in-sample data.

    @@ -1775,12 +1853,12 @@

    Latent dynamics in mvgam

    the forecast and score functions. Here we will compare models based on their Discrete Ranked Probability Scores (a lower value is preferred for this metric)

    -
    fc_mod3 <- forecast(model3)
    -fc_mod4 <- forecast(model4)
    -score_mod3 <- score(fc_mod3, score = 'drps')
    -score_mod4 <- score(fc_mod4, score = 'drps')
    -sum(score_mod4$PP$score, na.rm = TRUE) - sum(score_mod3$PP$score, na.rm = TRUE)
    -#> [1] -137.0216
    +
    fc_mod3 <- forecast(model3)
    +fc_mod4 <- forecast(model4)
    +score_mod3 <- score(fc_mod3, score = 'drps')
    +score_mod4 <- score(fc_mod4, score = 'drps')
    +sum(score_mod4$PP$score, na.rm = TRUE) - sum(score_mod3$PP$score, na.rm = TRUE)
    +#> [1] -137.8603

    A strongly negative value here suggests the score for the dynamic model (model 4) is much smaller than the score for the model with a smooth function of time (model 3)

    diff --git a/inst/doc/nmixtures.R b/inst/doc/nmixtures.R index 81faebc2..a4e7a766 100644 --- a/inst/doc/nmixtures.R +++ b/inst/doc/nmixtures.R @@ -1,4 +1,4 @@ -## ----echo = FALSE------------------------------------------------------ +## ----echo = FALSE------------------------------------------------------------- NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true") knitr::opts_chunk$set( collapse = TRUE, @@ -7,11 +7,10 @@ knitr::opts_chunk$set( eval = NOT_CRAN ) - -## ----setup, include=FALSE---------------------------------------------- +## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( echo = TRUE, - dpi = 100, + dpi = 150, fig.asp = 0.8, fig.width = 6, out.width = "60%", @@ -34,8 +33,7 @@ options(ggplot2.discrete.colour = c("#A25050", 'darkred', "#010048")) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- set.seed(999) # Simulate observations for species 1, which shows a declining trend and 0.7 detection probability data.frame(site = 1, @@ -94,20 +92,17 @@ testdat = testdat %>% cap = 50) %>% dplyr::select(-replicate)) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- testdat$species <- factor(testdat$species, levels = unique(testdat$species)) testdat$series <- factor(testdat$series, levels = unique(testdat$series)) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- dplyr::glimpse(testdat) head(testdat, 12) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- testdat %>% # each unique combination of site*species is a separate process dplyr::mutate(trend = as.numeric(factor(paste0(site, species)))) %>% @@ -115,8 +110,7 @@ testdat %>% dplyr::distinct() -> trend_map trend_map - -## ----include = FALSE, results='hide'----------------------------------- +## ----include = FALSE, results='hide'------------------------------------------ mod <- mvgam( # the observation formula sets up linear predictors for # detection probability on the logit scale @@ -138,47 +132,41 @@ mod <- mvgam( prior(normal(1, 1.5), class = Intercept_trend)), samples = 1000) - -## ----eval = FALSE------------------------------------------------------ -## mod <- mvgam( -## # the observation formula sets up linear predictors for -## # detection probability on the logit scale -## formula = obs ~ species - 1, -## -## # the trend_formula sets up the linear predictors for -## # the latent abundance processes on the log scale -## trend_formula = ~ s(time, by = trend, k = 4) + species, -## -## # the trend_map takes care of the mapping -## trend_map = trend_map, -## -## # nmix() family and data -## family = nmix(), -## data = testdat, -## -## # priors can be set in the usual way -## priors = c(prior(std_normal(), class = b), -## prior(normal(1, 1.5), class = Intercept_trend)), -## samples = 1000) - - -## ---------------------------------------------------------------------- +## ----eval = FALSE------------------------------------------------------------- +# mod <- mvgam( +# # the observation formula sets up linear predictors for +# # detection probability on the logit scale +# formula = obs ~ species - 1, +# +# # the trend_formula sets up the linear predictors for +# # the latent abundance processes on the log scale +# trend_formula = ~ s(time, by = trend, k = 4) + species, +# +# # the trend_map takes care of the mapping +# trend_map = trend_map, +# +# # nmix() family and data +# family = nmix(), +# data = testdat, +# +# # priors can be set in the usual way +# priors = c(prior(std_normal(), class = b), +# prior(normal(1, 1.5), class = Intercept_trend)), +# samples = 1000) + +## ----------------------------------------------------------------------------- code(mod) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- summary(mod) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- loo(mod) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot(mod, type = 'smooths', trend_effects = TRUE) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot_predictions(mod, condition = 'species', type = 'detection') + ylab('Pr(detection)') + @@ -186,8 +174,7 @@ plot_predictions(mod, condition = 'species', theme_classic() + theme(legend.position = 'none') - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- hc <- hindcast(mod, type = 'latent_N') # Function to plot latent abundance estimates vs truth @@ -240,13 +227,11 @@ plot_latentN = function(hindcasts, data, species = 'sp_1'){ title = species) } - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot_latentN(hc, testdat, species = 'sp_1') plot_latentN(hc, testdat, species = 'sp_2') - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- # Date link load(url('https://github.com/doserjef/spAbundance/raw/main/data/dataNMixSim.rda')) data.one.sp <- dataNMixSim @@ -266,8 +251,7 @@ det.cov[is.na(det.cov)] <- rnorm(length(which(is.na(det.cov)))) det.cov2 <- dataNMixSim$det.covs$det.cov.2 det.cov2[is.na(det.cov2)] <- rnorm(length(which(is.na(det.cov2)))) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- mod_data <- do.call(rbind, lapply(1:NROW(data.one.sp$y), function(x){ data.frame(y = data.one.sp$y[x,], @@ -285,14 +269,12 @@ mod_data <- do.call(rbind, time = 1, cap = max(data.one.sp$y, na.rm = TRUE) + 20) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- NROW(mod_data) dplyr::glimpse(mod_data) head(mod_data) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- mod_data %>% # each unique combination of site*species is a separate process dplyr::mutate(trend = as.numeric(factor(paste0(site, species)))) %>% @@ -303,8 +285,7 @@ trend_map %>% dplyr::arrange(trend) %>% head(12) - -## ----include = FALSE, results='hide'----------------------------------- +## ----include = FALSE, results='hide'------------------------------------------ mod <- mvgam( # effects of covariates on detection probability; # here we use penalized splines for both continuous covariates @@ -336,82 +317,73 @@ mod <- mvgam( residuals = FALSE, samples = 1000) - -## ----eval=FALSE-------------------------------------------------------- -## mod <- mvgam( -## # effects of covariates on detection probability; -## # here we use penalized splines for both continuous covariates -## formula = y ~ s(det_cov, k = 4) + s(det_cov2, k = 4), -## -## # effects of the covariates on latent abundance; -## # here we use a penalized spline for the continuous covariate and -## # hierarchical intercepts for the factor covariate -## trend_formula = ~ s(abund_cov, k = 4) + -## s(abund_fac, bs = 're'), -## -## # link multiple observations to each site -## trend_map = trend_map, -## -## # nmix() family and supplied data -## family = nmix(), -## data = mod_data, -## -## # standard normal priors on key regression parameters -## priors = c(prior(std_normal(), class = 'b'), -## prior(std_normal(), class = 'Intercept'), -## prior(std_normal(), class = 'Intercept_trend'), -## prior(std_normal(), class = 'sigma_raw_trend')), -## -## # use Stan's variational inference for quicker results -## algorithm = 'meanfield', -## -## # no need to compute "series-level" residuals -## residuals = FALSE, -## samples = 1000) - - -## ---------------------------------------------------------------------- +## ----eval=FALSE--------------------------------------------------------------- +# mod <- mvgam( +# # effects of covariates on detection probability; +# # here we use penalized splines for both continuous covariates +# formula = y ~ s(det_cov, k = 4) + s(det_cov2, k = 4), +# +# # effects of the covariates on latent abundance; +# # here we use a penalized spline for the continuous covariate and +# # hierarchical intercepts for the factor covariate +# trend_formula = ~ s(abund_cov, k = 4) + +# s(abund_fac, bs = 're'), +# +# # link multiple observations to each site +# trend_map = trend_map, +# +# # nmix() family and supplied data +# family = nmix(), +# data = mod_data, +# +# # standard normal priors on key regression parameters +# priors = c(prior(std_normal(), class = 'b'), +# prior(std_normal(), class = 'Intercept'), +# prior(std_normal(), class = 'Intercept_trend'), +# prior(std_normal(), class = 'sigma_raw_trend')), +# +# # use Stan's variational inference for quicker results +# algorithm = 'meanfield', +# +# # no need to compute "series-level" residuals +# residuals = FALSE, +# samples = 1000) + +## ----------------------------------------------------------------------------- summary(mod, include_betas = FALSE) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- avg_predictions(mod, type = 'detection') - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- abund_plots <- plot(conditional_effects(mod, type = 'link', effects = c('abund_cov', 'abund_fac')), plot = FALSE) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- abund_plots[[1]] + ylab('Expected latent abundance') - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- abund_plots[[2]] + ylab('Expected latent abundance') - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- det_plots <- plot(conditional_effects(mod, type = 'detection', effects = c('det_cov', 'det_cov2')), plot = FALSE) - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- det_plots[[1]] + ylab('Pr(detection)') det_plots[[2]] + ylab('Pr(detection)') - -## ---------------------------------------------------------------------- +## ----------------------------------------------------------------------------- fivenum_round = function(x)round(fivenum(x, na.rm = TRUE), 2) plot_predictions(mod, diff --git a/inst/doc/nmixtures.Rmd b/inst/doc/nmixtures.Rmd index f7585ab1..62d4a08e 100644 --- a/inst/doc/nmixtures.Rmd +++ b/inst/doc/nmixtures.Rmd @@ -23,7 +23,7 @@ knitr::opts_chunk$set( ```{r setup, include=FALSE} knitr::opts_chunk$set( echo = TRUE, - dpi = 100, + dpi = 150, fig.asp = 0.8, fig.width = 6, out.width = "60%", diff --git a/inst/doc/nmixtures.html b/inst/doc/nmixtures.html index 0acb06b2..f92147dd 100644 --- a/inst/doc/nmixtures.html +++ b/inst/doc/nmixtures.html @@ -12,7 +12,7 @@ - + N-mixtures in mvgam @@ -340,7 +340,7 @@

    N-mixtures in mvgam

    Nicholas J Clark

    -

    2024-05-09

    +

    2024-04-16

    @@ -711,68 +711,66 @@

    Modelling with the nmix() family

    summary(mod)
     #> GAM observation formula:
     #> obs ~ species - 1
    -#> <environment: 0x00000237248751d0>
    -#> 
    -#> GAM process formula:
    -#> ~s(time, by = trend, k = 4) + species
    -#> <environment: 0x00000237248751d0>
    -#> 
    -#> Family:
    -#> nmix
    -#> 
    -#> Link function:
    -#> log
    -#> 
    -#> Trend model:
    -#> None
    -#> 
    -#> N process models:
    -#> 2 
    -#> 
    -#> N series:
    -#> 10 
    -#> 
    -#> N timepoints:
    -#> 6 
    -#> 
    -#> Status:
    -#> Fitted using Stan 
    -#> 4 chains, each with iter = 1500; warmup = 500; thin = 1 
    -#> Total post-warmup draws = 4000
    -#> 
    -#> 
    -#> GAM observation model coefficient (beta) estimates:
    -#>              2.5%   50% 97.5% Rhat n_eff
    -#> speciessp_1 -0.28 0.720  1.40    1  1630
    -#> speciessp_2 -1.20 0.035  0.88    1  1913
    -#> 
    -#> GAM process model coefficient (beta) estimates:
    -#>                               2.5%     50%  97.5% Rhat n_eff
    -#> (Intercept)_trend            2.700  3.0000  3.500 1.00  1453
    -#> speciessp_2_trend           -1.200 -0.6300  0.140 1.00  1587
    -#> s(time):trendtrend1.1_trend -0.078  0.0160  0.210 1.01   931
    -#> s(time):trendtrend1.2_trend -0.250  0.0058  0.270 1.00  2384
    -#> s(time):trendtrend1.3_trend -0.450 -0.2500 -0.040 1.00  1747
    -#> s(time):trendtrend2.1_trend -0.210 -0.0120  0.092 1.00   849
    -#> s(time):trendtrend2.2_trend -0.190  0.0290  0.530 1.00   627
    -#> s(time):trendtrend2.3_trend  0.052  0.3300  0.640 1.00  2547
    -#> 
    -#> Approximate significance of GAM process smooths:
    -#>                       edf Ref.df    F p-value
    -#> s(time):seriestrend1 1.16      3 1.04    0.64
    -#> s(time):seriestrend2 1.12      3 1.58    0.62
    -#> 
    -#> Stan MCMC diagnostics:
    -#> n_eff / iter looks reasonable for all parameters
    -#> Rhat looks reasonable for all parameters
    -#> 0 of 4000 iterations ended with a divergence (0%)
    -#> 0 of 4000 iterations saturated the maximum tree depth of 12 (0%)
    -#> E-FMI indicated no pathological behavior
    -#> 
    -#> Samples were drawn using NUTS(diag_e) at Thu May 09 9:46:13 PM 2024.
    -#> For each parameter, n_eff is a crude measure of effective sample size,
    -#> and Rhat is the potential scale reduction factor on split MCMC chains
    -#> (at convergence, Rhat = 1)
    +#> +#> GAM process formula: +#> ~s(time, by = trend, k = 4) + species +#> +#> Family: +#> nmix +#> +#> Link function: +#> log +#> +#> Trend model: +#> None +#> +#> N process models: +#> 2 +#> +#> N series: +#> 10 +#> +#> N timepoints: +#> 6 +#> +#> Status: +#> Fitted using Stan +#> 4 chains, each with iter = 1500; warmup = 500; thin = 1 +#> Total post-warmup draws = 4000 +#> +#> +#> GAM observation model coefficient (beta) estimates: +#> 2.5% 50% 97.5% Rhat n_eff +#> speciessp_1 -0.28 0.7200 1.40 1 1361 +#> speciessp_2 -1.20 -0.0075 0.89 1 1675 +#> +#> GAM process model coefficient (beta) estimates: +#> 2.5% 50% 97.5% Rhat n_eff +#> (Intercept)_trend 2.700 3.0000 3.400 1.00 1148 +#> speciessp_2_trend -1.200 -0.6100 0.190 1.00 1487 +#> s(time):trendtrend1.1_trend -0.081 0.0130 0.200 1.00 800 +#> s(time):trendtrend1.2_trend -0.230 0.0077 0.310 1.00 1409 +#> s(time):trendtrend1.3_trend -0.460 -0.2500 -0.038 1.00 1699 +#> s(time):trendtrend2.1_trend -0.220 -0.0130 0.095 1.00 995 +#> s(time):trendtrend2.2_trend -0.190 0.0320 0.500 1.01 1071 +#> s(time):trendtrend2.3_trend 0.064 0.3300 0.640 1.00 2268 +#> +#> Approximate significance of GAM process smooths: +#> edf Ref.df F p-value +#> s(time):seriestrend1 1.25 3 0.19 0.83 +#> s(time):seriestrend2 1.07 3 0.39 0.92 +#> +#> Stan MCMC diagnostics: +#> n_eff / iter looks reasonable for all parameters +#> Rhat looks reasonable for all parameters +#> 0 of 4000 iterations ended with a divergence (0%) +#> 0 of 4000 iterations saturated the maximum tree depth of 12 (0%) +#> E-FMI indicated no pathological behavior +#> +#> Samples were drawn using NUTS(diag_e) at Tue Apr 16 1:04:54 PM 2024. +#> For each parameter, n_eff is a crude measure of effective sample size, +#> and Rhat is the potential scale reduction factor on split MCMC chains +#> (at convergence, Rhat = 1)

    loo() functionality works just as it does for all mvgam models to aid in model comparison / selection (though note that Pareto K values often give warnings for mixture models so @@ -782,24 +780,24 @@

    Modelling with the nmix() family

    #> #> Computed from 4000 by 60 log-likelihood matrix #> -#> Estimate SE -#> elpd_loo -452.5 77.5 -#> p_loo 254.0 64.5 -#> looic 905.0 155.0 +#> Estimate SE +#> elpd_loo -230.4 13.8 +#> p_loo 83.3 12.7 +#> looic 460.9 27.5 #> ------ #> Monte Carlo SE of elpd_loo is NA. #> #> Pareto k diagnostic values: #> Count Pct. Min. n_eff -#> (-Inf, 0.5] (good) 29 48.3% 823 -#> (0.5, 0.7] (ok) 1 1.7% 1302 -#> (0.7, 1] (bad) 9 15.0% 49 -#> (1, Inf) (very bad) 21 35.0% 1 +#> (-Inf, 0.5] (good) 25 41.7% 1141 +#> (0.5, 0.7] (ok) 5 8.3% 390 +#> (0.7, 1] (bad) 7 11.7% 13 +#> (1, Inf) (very bad) 23 38.3% 2 #> See help('pareto-k-diagnostic') for details.

    Plot the estimated smooths of time from each species’ latent abundance process (on the log scale)

    plot(mod, type = 'smooths', trend_effects = TRUE)
    -

    +

    marginaleffects support allows for more useful prediction-based interrogations on different scales (though note that at the time of writing this Vignette, you must have the development version @@ -818,7 +816,7 @@

    Modelling with the nmix() family

    ylim(c(0, 1)) + theme_classic() + theme(legend.position = 'none') -

    +

    A common goal in N-mixture modelling is to estimate the true latent abundance. The model has automatically generated predictions for the unknown latent abundance that are conditional on the observations. We @@ -879,9 +877,9 @@

    Modelling with the nmix() family

    black line shows the true latent abundance, and the ribbons show credible intervals of our estimates:

    plot_latentN(hc, testdat, species = 'sp_1')
    -

    +

    plot_latentN(hc, testdat, species = 'sp_2')
    -

    +

    We can see that estimates for both species have correctly captured the true temporal variation and magnitudes in abundance

    @@ -946,8 +944,8 @@

    Example 2: a larger survey with possible nonlinear effects

    #> $ y <int> 1, NA, NA, NA, 2, 2, NA, 1, NA, NA, 0, 1, 0, 0, 0, 0, NA, NA… #> $ abund_cov <dbl> -0.3734384, -0.3734384, -0.3734384, 0.7064305, 0.7064305, 0.… #> $ abund_fac <fct> 3, 3, 3, 4, 4, 4, 9, 9, 9, 2, 2, 2, 3, 3, 3, 2, 2, 2, 1, 1, … -#> $ det_cov <dbl> -1.28279990, 0.02694282, 0.06007252, 0.23234268, 0.19548086,… -#> $ det_cov2 <dbl> 2.0304731, 1.8551705, 1.0026802, -0.8328419, 1.0455536, 1.91… +#> $ det_cov <dbl> -1.28279990, -0.08474811, 0.44789392, 1.71731815, 0.19548086… +#> $ det_cov2 <dbl> 2.03047314, -1.42459158, 1.68497337, 0.75026787, 1.04555361,… #> $ replicate <int> 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, … #> $ site <fct> site1, site1, site1, site2, site2, site2, site3, site3, site… #> $ species <fct> sp_1, sp_1, sp_1, sp_1, sp_1, sp_1, sp_1, sp_1, sp_1, sp_1, … @@ -957,9 +955,9 @@

    Example 2: a larger survey with possible nonlinear effects

    head(mod_data) #> y abund_cov abund_fac det_cov det_cov2 replicate site species #> 1 1 -0.3734384 3 -1.28279990 2.0304731 1 site1 sp_1 -#> 2 NA -0.3734384 3 0.02694282 1.8551705 2 site1 sp_1 -#> 3 NA -0.3734384 3 0.06007252 1.0026802 3 site1 sp_1 -#> 4 NA 0.7064305 4 0.23234268 -0.8328419 1 site2 sp_1 +#> 2 NA -0.3734384 3 -0.08474811 -1.4245916 2 site1 sp_1 +#> 3 NA -0.3734384 3 0.44789392 1.6849734 3 site1 sp_1 +#> 4 NA 0.7064305 4 1.71731815 0.7502679 1 site2 sp_1 #> 5 2 0.7064305 4 0.19548086 1.0455536 2 site2 sp_1 #> 6 2 0.7064305 4 0.96730338 1.9197118 3 site2 sp_1 #> series time cap @@ -1039,71 +1037,69 @@

    Example 2: a larger survey with possible nonlinear effects

    summary(mod, include_betas = FALSE)
     #> GAM observation formula:
     #> y ~ s(det_cov, k = 3) + s(det_cov2, k = 3)
    -#> <environment: 0x00000237248751d0>
    -#> 
    -#> GAM process formula:
    -#> ~s(abund_cov, k = 3) + s(abund_fac, bs = "re")
    -#> <environment: 0x00000237248751d0>
    -#> 
    -#> Family:
    -#> nmix
    -#> 
    -#> Link function:
    -#> log
    -#> 
    -#> Trend model:
    -#> None
    -#> 
    -#> N process models:
    -#> 225 
    -#> 
    -#> N series:
    -#> 675 
    -#> 
    -#> N timepoints:
    -#> 1 
    -#> 
    -#> Status:
    -#> Fitted using Stan 
    -#> 1 chains, each with iter = 1000; warmup = ; thin = 1 
    -#> Total post-warmup draws = 1000
    -#> 
    -#> 
    -#> GAM observation model coefficient (beta) estimates:
    -#>              2.5%  50% 97.5% Rhat n.eff
    -#> (Intercept) 0.099 0.44  0.82  NaN   NaN
    -#> 
    -#> Approximate significance of GAM observation smooths:
    -#>              edf Ref.df Chi.sq p-value    
    -#> s(det_cov)  1.07      2     89 0.00058 ***
    -#> s(det_cov2) 1.05      2    318 < 2e-16 ***
    -#> ---
    -#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    -#> 
    -#> GAM process model coefficient (beta) estimates:
    -#>                     2.5%  50% 97.5% Rhat n.eff
    -#> (Intercept)_trend -0.028 0.11  0.28  NaN   NaN
    -#> 
    -#> GAM process model group-level estimates:
    -#>                           2.5%   50% 97.5% Rhat n.eff
    -#> mean(s(abund_fac))_trend -0.44 -0.32 -0.18  NaN   NaN
    -#> sd(s(abund_fac))_trend    0.28  0.42  0.60  NaN   NaN
    -#> 
    -#> Approximate significance of GAM process smooths:
    -#>               edf Ref.df    F p-value  
    -#> s(abund_cov) 1.18      2 0.29   0.813  
    -#> s(abund_fac) 8.85     10 3.33   0.038 *
    -#> ---
    -#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    -#> 
    -#> Posterior approximation used: no diagnostics to compute
    +#> +#> GAM process formula: +#> ~s(abund_cov, k = 3) + s(abund_fac, bs = "re") +#> +#> Family: +#> nmix +#> +#> Link function: +#> log +#> +#> Trend model: +#> None +#> +#> N process models: +#> 225 +#> +#> N series: +#> 675 +#> +#> N timepoints: +#> 1 +#> +#> Status: +#> Fitted using Stan +#> 1 chains, each with iter = 1000; warmup = ; thin = 1 +#> Total post-warmup draws = 1000 +#> +#> +#> GAM observation model coefficient (beta) estimates: +#> 2.5% 50% 97.5% Rhat n.eff +#> (Intercept) 0.052 0.4 0.71 NaN NaN +#> +#> Approximate significance of GAM observation smooths: +#> edf Ref.df Chi.sq p-value +#> s(det_cov) 1.22 2 52.3 0.0011 ** +#> s(det_cov2) 1.07 2 307.1 <2e-16 *** +#> --- +#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 +#> +#> GAM process model coefficient (beta) estimates: +#> 2.5% 50% 97.5% Rhat n.eff +#> (Intercept)_trend -0.25 -0.081 0.079 NaN NaN +#> +#> GAM process model group-level estimates: +#> 2.5% 50% 97.5% Rhat n.eff +#> mean(s(abund_fac))_trend -0.18 0.0038 0.19 NaN NaN +#> sd(s(abund_fac))_trend 0.26 0.3900 0.56 NaN NaN +#> +#> Approximate significance of GAM process smooths: +#> edf Ref.df F p-value +#> s(abund_cov) 1.19 2 2.38 0.299 +#> s(abund_fac) 8.82 10 2.79 0.025 * +#> --- +#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 +#> +#> Posterior approximation used: no diagnostics to compute

    Again we can make use of marginaleffects support for interrogating the model through targeted predictions. First, we can inspect the estimated average detection probability

    avg_predictions(mod, type = 'detection')
     #> 
     #>  Estimate 2.5 % 97.5 %
    -#>     0.588  0.52  0.662
    +#>     0.579  0.51  0.644
     #> 
     #> Columns: estimate, conf.low, conf.high 
     #> Type:  detection
    @@ -1120,12 +1116,12 @@

    Example 2: a larger survey with possible nonlinear effects

    abundance

    abund_plots[[1]] +
       ylab('Expected latent abundance')
    -

    +

    The effect of the factor covariate on expected latent abundance, estimated as a hierarchical random effect

    abund_plots[[2]] +
       ylab('Expected latent abundance')
    -

    +

    Now we can investigate estimated effects of covariates on detection probability using type = 'detection'

    det_plots <- plot(conditional_effects(mod,
    @@ -1139,10 +1135,10 @@ 

    Example 2: a larger survey with possible nonlinear effects

    the probability scale is more intuitive and useful

    det_plots[[1]] +
       ylab('Pr(detection)')
    -

    +

    det_plots[[2]] +
       ylab('Pr(detection)')
    -

    +

    More targeted predictions are also easy with marginaleffects support. For example, we can ask: How does detection probability change as we change both detection @@ -1156,7 +1152,7 @@

    Example 2: a larger survey with possible nonlinear effects

    type = 'detection') + theme_classic() + ylab('Pr(detection)')
    -

    +

    The model has found support for some important covariate effects, but of course we’d want to interrogate how well the model predicts and think about possible spatial effects to capture unmodelled variation in latent diff --git a/inst/doc/shared_states.R b/inst/doc/shared_states.R index 971c0bec..3619bf06 100644 --- a/inst/doc/shared_states.R +++ b/inst/doc/shared_states.R @@ -1,4 +1,4 @@ -## ----echo = FALSE----------------------------------------------------- +## ----echo = FALSE------------------------------------------------------------- NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true") knitr::opts_chunk$set( collapse = TRUE, @@ -8,7 +8,7 @@ knitr::opts_chunk$set( ) -## ----setup, include=FALSE--------------------------------------------- +## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( echo = TRUE, dpi = 100, @@ -21,9 +21,9 @@ library(ggplot2) theme_set(theme_bw(base_size = 12, base_family = 'serif')) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- set.seed(122) -simdat <- sim_mvgam(trend_model = 'AR1', +simdat <- sim_mvgam(trend_model = AR(), prop_trend = 0.6, mu = c(0, 1, 2), family = poisson()) @@ -32,11 +32,11 @@ trend_map <- data.frame(series = unique(simdat$data_train$series), trend_map -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- all.equal(levels(trend_map$series), levels(simdat$data_train$series)) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- fake_mod <- mvgam(y ~ # observation model formula, which has a # different intercept per series @@ -47,8 +47,10 @@ fake_mod <- mvgam(y ~ trend_formula = ~ s(season, bs = 'cc', k = 6), # AR1 dynamics (each latent process model has DIFFERENT) - # dynamics - trend_model = 'AR1', + # dynamics; processes are estimated using the noncentred + # parameterisation for improved efficiency + trend_model = AR(), + noncentred = TRUE, # supplied trend_map trend_map = trend_map, @@ -59,46 +61,50 @@ fake_mod <- mvgam(y ~ run_model = FALSE) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- code(fake_mod) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- fake_mod$model_data$Z -## ----full_mod, include = FALSE, results='hide'------------------------ +## ----full_mod, include = FALSE, results='hide'-------------------------------- full_mod <- mvgam(y ~ series - 1, trend_formula = ~ s(season, bs = 'cc', k = 6), - trend_model = 'AR1', + trend_model = AR(), + noncentred = TRUE, trend_map = trend_map, family = poisson(), - data = simdat$data_train) + data = simdat$data_train, + silent = 2) -## ----eval=FALSE------------------------------------------------------- +## ----eval=FALSE--------------------------------------------------------------- ## full_mod <- mvgam(y ~ series - 1, ## trend_formula = ~ s(season, bs = 'cc', k = 6), -## trend_model = 'AR1', +## trend_model = AR(), +## noncentred = TRUE, ## trend_map = trend_map, ## family = poisson(), -## data = simdat$data_train) +## data = simdat$data_train, +## silent = 2) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- summary(full_mod) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot(full_mod, type = 'trend', series = 1) plot(full_mod, type = 'trend', series = 2) plot(full_mod, type = 'trend', series = 3) -## --------------------------------------------------------------------- -set.seed(543210) +## ----------------------------------------------------------------------------- +set.seed(0) # simulate a nonlinear relationship using the mgcv function gamSim -signal_dat <- gamSim(n = 100, eg = 1, scale = 1) +signal_dat <- mgcv::gamSim(n = 100, eg = 1, scale = 1) # productivity is one of the variables in the simulated data productivity <- signal_dat$x2 @@ -110,17 +116,16 @@ true_signal <- as.vector(scale(signal_dat$y) + arima.sim(100, model = list(ar = 0.8, sd = 0.1))) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot(true_signal, type = 'l', bty = 'l', lwd = 2, ylab = 'True signal', xlab = 'Time') -## --------------------------------------------------------------------- -set.seed(543210) +## ----------------------------------------------------------------------------- sim_series = function(n_series = 3, true_signal){ - temp_effects <- gamSim(n = 100, eg = 7, scale = 0.1) + temp_effects <- mgcv::gamSim(n = 100, eg = 7, scale = 0.1) temperature <- temp_effects$y alphas <- rnorm(n_series, sd = 2) @@ -141,12 +146,12 @@ model_dat <- sim_series(true_signal = true_signal) %>% dplyr::mutate(series = factor(series)) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot_mvgam_series(data = model_dat, y = 'observed', series = 'all') -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot(observed ~ temperature, data = model_dat %>% dplyr::filter(series == 'sensor_1'), pch = 16, bty = 'l', @@ -164,7 +169,7 @@ plot_mvgam_series(data = model_dat, y = 'observed', xlab = 'Temperature') -## ----sensor_mod, include = FALSE, results='hide'---------------------- +## ----sensor_mod, include = FALSE, results='hide'------------------------------ mod <- mvgam(formula = # formula for observations, allowing for different # intercepts and smooth effects of temperature @@ -180,7 +185,8 @@ mod <- mvgam(formula = trend_model = # in addition to productivity effects, the signal is # assumed to exhibit temporal autocorrelation - 'AR1', + AR(), + noncentred = TRUE, trend_map = # trend_map forces all sensors to track the same @@ -197,10 +203,11 @@ mod <- mvgam(formula = family = gaussian(), burnin = 600, adapt_delta = 0.95, - data = model_dat) + data = model_dat, + silent = 2) -## ----eval=FALSE------------------------------------------------------- +## ----eval=FALSE--------------------------------------------------------------- ## mod <- mvgam(formula = ## # formula for observations, allowing for different ## # intercepts and hierarchical smooth effects of temperature @@ -216,7 +223,8 @@ mod <- mvgam(formula = ## trend_model = ## # in addition to productivity effects, the signal is ## # assumed to exhibit temporal autocorrelation -## 'AR1', +## AR(), +## noncentred = TRUE, ## ## trend_map = ## # trend_map forces all sensors to track the same @@ -231,25 +239,27 @@ mod <- mvgam(formula = ## ## # Gaussian observations ## family = gaussian(), -## data = model_dat) +## data = model_dat, +## silent = 2) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- summary(mod, include_betas = FALSE) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- conditional_effects(mod, type = 'link') -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- +require(marginaleffects) plot_predictions(mod, condition = c('temperature', 'series', 'series'), points = 0.5) + theme(legend.position = 'none') -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot(mod, type = 'trend') # Overlay the true simulated signal diff --git a/inst/doc/shared_states.Rmd b/inst/doc/shared_states.Rmd index fb1b7282..bc79aa71 100644 --- a/inst/doc/shared_states.Rmd +++ b/inst/doc/shared_states.Rmd @@ -39,7 +39,7 @@ This vignette gives an example of how `mvgam` can be used to estimate models whe The `trend_map` argument in the `mvgam()` function is an optional `data.frame` that can be used to specify which series should depend on which latent process models (called "trends" in `mvgam`). This can be particularly useful if we wish to force multiple observed time series to depend on the same latent trend process, but with different observation processes. If this argument is supplied, a latent factor model is set up by setting `use_lv = TRUE` and using the supplied `trend_map` to set up the shared trends. Users familiar with the `MARSS` family of packages will recognize this as a way of specifying the $Z$ matrix. This `data.frame` needs to have column names `series` and `trend`, with integer values in the `trend` column to state which trend each series should depend on. The `series` column should have a single unique entry for each time series in the data, with names that perfectly match the factor levels of the `series` variable in `data`). For example, if we were to simulate a collection of three integer-valued time series (using `sim_mvgam`), the following `trend_map` would force the first two series to share the same latent trend process: ```{r} set.seed(122) -simdat <- sim_mvgam(trend_model = 'AR1', +simdat <- sim_mvgam(trend_model = AR(), prop_trend = 0.6, mu = c(0, 1, 2), family = poisson()) @@ -66,8 +66,10 @@ fake_mod <- mvgam(y ~ trend_formula = ~ s(season, bs = 'cc', k = 6), # AR1 dynamics (each latent process model has DIFFERENT) - # dynamics - trend_model = 'AR1', + # dynamics; processes are estimated using the noncentred + # parameterisation for improved efficiency + trend_model = AR(), + noncentred = TRUE, # supplied trend_map trend_map = trend_map, @@ -93,19 +95,23 @@ Though this model doesn't perfectly match the data-generating process (which all ```{r full_mod, include = FALSE, results='hide'} full_mod <- mvgam(y ~ series - 1, trend_formula = ~ s(season, bs = 'cc', k = 6), - trend_model = 'AR1', + trend_model = AR(), + noncentred = TRUE, trend_map = trend_map, family = poisson(), - data = simdat$data_train) + data = simdat$data_train, + silent = 2) ``` ```{r eval=FALSE} full_mod <- mvgam(y ~ series - 1, trend_formula = ~ s(season, bs = 'cc', k = 6), - trend_model = 'AR1', + trend_model = AR(), + noncentred = TRUE, trend_map = trend_map, family = poisson(), - data = simdat$data_train) + data = simdat$data_train, + silent = 2) ``` The summary of this model is informative as it shows that only two latent process models have been estimated, even though we have three observed time series. The model converges well @@ -125,9 +131,9 @@ However, forecasts for series' 1 and 2 will differ because they have different i ## Example: signal detection Now we will explore a more complicated example. Here we simulate a true hidden signal that we are trying to track. This signal depends nonlinearly on some covariate (called `productivity`, which represents a measure of how productive the landscape is). The signal also demonstrates a fairly large amount of temporal autocorrelation: ```{r} -set.seed(543210) +set.seed(0) # simulate a nonlinear relationship using the mgcv function gamSim -signal_dat <- gamSim(n = 100, eg = 1, scale = 1) +signal_dat <- mgcv::gamSim(n = 100, eg = 1, scale = 1) # productivity is one of the variables in the simulated data productivity <- signal_dat$x2 @@ -149,9 +155,8 @@ plot(true_signal, type = 'l', Next we simulate three sensors that are trying to track the same hidden signal. All of these sensors have different observation errors that can depend nonlinearly on a second external covariate, called `temperature` in this example. Again this makes use of `gamSim` ```{r} -set.seed(543210) sim_series = function(n_series = 3, true_signal){ - temp_effects <- gamSim(n = 100, eg = 7, scale = 0.1) + temp_effects <- mgcv::gamSim(n = 100, eg = 7, scale = 0.1) temperature <- temp_effects$y alphas <- rnorm(n_series, sd = 2) @@ -215,7 +220,8 @@ mod <- mvgam(formula = trend_model = # in addition to productivity effects, the signal is # assumed to exhibit temporal autocorrelation - 'AR1', + AR(), + noncentred = TRUE, trend_map = # trend_map forces all sensors to track the same @@ -232,7 +238,8 @@ mod <- mvgam(formula = family = gaussian(), burnin = 600, adapt_delta = 0.95, - data = model_dat) + data = model_dat, + silent = 2) ``` ```{r eval=FALSE} @@ -251,7 +258,8 @@ mod <- mvgam(formula = trend_model = # in addition to productivity effects, the signal is # assumed to exhibit temporal autocorrelation - 'AR1', + AR(), + noncentred = TRUE, trend_map = # trend_map forces all sensors to track the same @@ -266,7 +274,8 @@ mod <- mvgam(formula = # Gaussian observations family = gaussian(), - data = model_dat) + data = model_dat, + silent = 2) ``` View a reduced version of the model summary because there will be many spline coefficients in this model @@ -282,6 +291,7 @@ conditional_effects(mod, type = 'link') `conditional_effects` is simply a wrapper to the more flexible `plot_predictions` function from the `marginaleffects` package. We can get more useful plots of these effects using this function for further customisation: ```{r} +require(marginaleffects) plot_predictions(mod, condition = c('temperature', 'series', 'series'), points = 0.5) + diff --git a/inst/doc/shared_states.html b/inst/doc/shared_states.html index 665beb75..db2dac3b 100644 --- a/inst/doc/shared_states.html +++ b/inst/doc/shared_states.html @@ -12,7 +12,7 @@ - + Shared latent states in mvgam @@ -340,7 +340,7 @@

    Shared latent states in mvgam

    Nicholas J Clark

    -

    2024-05-08

    +

    2024-07-01

    @@ -393,7 +393,7 @@

    The trend_map argument

    sim_mvgam), the following trend_map would force the first two series to share the same latent trend process:

    set.seed(122)
    -simdat <- sim_mvgam(trend_model = 'AR1',
    +simdat <- sim_mvgam(trend_model = AR(),
                         prop_trend = 0.6,
                         mu = c(0, 1, 2),
                         family = poisson())
    @@ -432,16 +432,18 @@ 

    Checking trend_map with trend_formula = ~ s(season, bs = 'cc', k = 6), # AR1 dynamics (each latent process model has DIFFERENT) - # dynamics - trend_model = 'AR1', - - # supplied trend_map - trend_map = trend_map, - - # data and observation family - family = poisson(), - data = simdat$data_train, - run_model = FALSE)

    + # dynamics; processes are estimated using the noncentred + # parameterisation for improved efficiency + trend_model = AR(), + noncentred = TRUE, + + # supplied trend_map + trend_map = trend_map, + + # data and observation family + family = poisson(), + data = simdat$data_train, + run_model = FALSE)

    Inspecting the Stan code shows how this model is a dynamic factor model in which the loadings are constructed to reflect the supplied trend_map:

    @@ -479,96 +481,99 @@

    Checking trend_map with #> vector<lower=0>[n_lv] sigma; #> #> // latent state AR1 terms -#> vector<lower=-1.5, upper=1.5>[n_lv] ar1; +#> vector<lower=-1, upper=1>[n_lv] ar1; #> -#> // latent states -#> matrix[n, n_lv] LV; +#> // raw latent states +#> matrix[n, n_lv] LV_raw; #> #> // smoothing parameters #> vector<lower=0>[n_sp_trend] lambda_trend; #> } #> transformed parameters { -#> // latent states and loading matrix +#> // raw latent states #> vector[n * n_lv] trend_mus; #> matrix[n, n_series] trend; #> #> // basis coefficients #> vector[num_basis] b; -#> vector[num_basis_trend] b_trend; -#> -#> // observation model basis coefficients -#> b[1 : num_basis] = b_raw[1 : num_basis]; +#> +#> // latent states +#> matrix[n, n_lv] LV; +#> vector[num_basis_trend] b_trend; #> -#> // process model basis coefficients -#> b_trend[1 : num_basis_trend] = b_raw_trend[1 : num_basis_trend]; +#> // observation model basis coefficients +#> b[1 : num_basis] = b_raw[1 : num_basis]; #> -#> // latent process linear predictors -#> trend_mus = X_trend * b_trend; +#> // process model basis coefficients +#> b_trend[1 : num_basis_trend] = b_raw_trend[1 : num_basis_trend]; #> -#> // derived latent states -#> for (i in 1 : n) { -#> for (s in 1 : n_series) { -#> trend[i, s] = dot_product(Z[s, : ], LV[i, : ]); -#> } -#> } -#> } -#> model { -#> // prior for seriesseries_1... -#> b_raw[1] ~ student_t(3, 0, 2); +#> // latent process linear predictors +#> trend_mus = X_trend * b_trend; +#> LV = LV_raw .* rep_matrix(sigma', rows(LV_raw)); +#> for (j in 1 : n_lv) { +#> LV[1, j] += trend_mus[ytimes_trend[1, j]]; +#> for (i in 2 : n) { +#> LV[i, j] += trend_mus[ytimes_trend[i, j]] +#> + ar1[j] * (LV[i - 1, j] - trend_mus[ytimes_trend[i - 1, j]]); +#> } +#> } #> -#> // prior for seriesseries_2... -#> b_raw[2] ~ student_t(3, 0, 2); -#> -#> // prior for seriesseries_3... -#> b_raw[3] ~ student_t(3, 0, 2); -#> -#> // priors for AR parameters -#> ar1 ~ std_normal(); -#> -#> // priors for latent state SD parameters -#> sigma ~ student_t(3, 0, 2.5); -#> -#> // dynamic process models +#> // derived latent states +#> for (i in 1 : n) { +#> for (s in 1 : n_series) { +#> trend[i, s] = dot_product(Z[s, : ], LV[i, : ]); +#> } +#> } +#> } +#> model { +#> // prior for seriesseries_1... +#> b_raw[1] ~ student_t(3, 0, 2); +#> +#> // prior for seriesseries_2... +#> b_raw[2] ~ student_t(3, 0, 2); #> -#> // prior for s(season)_trend... -#> b_raw_trend[1 : 4] ~ multi_normal_prec(zero_trend[1 : 4], -#> S_trend1[1 : 4, 1 : 4] -#> * lambda_trend[1]); -#> lambda_trend ~ normal(5, 30); -#> for (j in 1 : n_lv) { -#> LV[1, j] ~ normal(trend_mus[ytimes_trend[1, j]], sigma[j]); -#> for (i in 2 : n) { -#> LV[i, j] ~ normal(trend_mus[ytimes_trend[i, j]] -#> + ar1[j] -#> * (LV[i - 1, j] - trend_mus[ytimes_trend[i - 1, j]]), -#> sigma[j]); -#> } -#> } -#> { -#> // likelihood functions -#> vector[n_nonmissing] flat_trends; -#> flat_trends = to_vector(trend)[obs_ind]; -#> flat_ys ~ poisson_log_glm(append_col(flat_xs, flat_trends), 0.0, -#> append_row(b, 1.0)); -#> } -#> } -#> generated quantities { -#> vector[total_obs] eta; -#> matrix[n, n_series] mus; -#> vector[n_sp_trend] rho_trend; -#> vector[n_lv] penalty; -#> array[n, n_series] int ypred; -#> penalty = 1.0 / (sigma .* sigma); -#> rho_trend = log(lambda_trend); -#> -#> matrix[n_series, n_lv] lv_coefs = Z; -#> // posterior predictions -#> eta = X * b; -#> for (s in 1 : n_series) { -#> mus[1 : n, s] = eta[ytimes[1 : n, s]] + trend[1 : n, s]; -#> ypred[1 : n, s] = poisson_log_rng(mus[1 : n, s]); -#> } -#> } +#> // prior for seriesseries_3... +#> b_raw[3] ~ student_t(3, 0, 2); +#> +#> // priors for AR parameters +#> ar1 ~ std_normal(); +#> +#> // priors for latent state SD parameters +#> sigma ~ student_t(3, 0, 2.5); +#> to_vector(LV_raw) ~ std_normal(); +#> +#> // dynamic process models +#> +#> // prior for s(season)_trend... +#> b_raw_trend[1 : 4] ~ multi_normal_prec(zero_trend[1 : 4], +#> S_trend1[1 : 4, 1 : 4] +#> * lambda_trend[1]); +#> lambda_trend ~ normal(5, 30); +#> { +#> // likelihood functions +#> vector[n_nonmissing] flat_trends; +#> flat_trends = to_vector(trend)[obs_ind]; +#> flat_ys ~ poisson_log_glm(append_col(flat_xs, flat_trends), 0.0, +#> append_row(b, 1.0)); +#> } +#> } +#> generated quantities { +#> vector[total_obs] eta; +#> matrix[n, n_series] mus; +#> vector[n_sp_trend] rho_trend; +#> vector[n_lv] penalty; +#> array[n, n_series] int ypred; +#> penalty = 1.0 / (sigma .* sigma); +#> rho_trend = log(lambda_trend); +#> +#> matrix[n_series, n_lv] lv_coefs = Z; +#> // posterior predictions +#> eta = X * b; +#> for (s in 1 : n_series) { +#> mus[1 : n, s] = eta[ytimes[1 : n, s]] + trend[1 : n, s]; +#> ypred[1 : n, s] = poisson_log_rng(mus[1 : n, s]); +#> } +#> }

    Notice the line that states “lv_coefs = Z;”. This uses the supplied \(Z\) matrix to construct the loading coefficients. The supplied matrix now looks exactly like what you’d use @@ -587,21 +592,23 @@

    Fitting and inspecting the model

    can still fit it to show what the resulting inferences look like:

    full_mod <- mvgam(y ~ series - 1,
                       trend_formula = ~ s(season, bs = 'cc', k = 6),
    -                  trend_model = 'AR1',
    -                  trend_map = trend_map,
    -                  family = poisson(),
    -                  data = simdat$data_train)
    + trend_model = AR(), + noncentred = TRUE, + trend_map = trend_map, + family = poisson(), + data = simdat$data_train, + silent = 2)

    The summary of this model is informative as it shows that only two latent process models have been estimated, even though we have three observed time series. The model converges well

    summary(full_mod)
     #> GAM observation formula:
     #> y ~ series - 1
    -#> <environment: 0x000001b6d1eaf110>
    +#> <environment: 0x000001f52b9e3130>
     #> 
     #> GAM process formula:
     #> ~s(season, bs = "cc", k = 6)
    -#> <environment: 0x000001b6d1eaf110>
    +#> <environment: 0x000001f52b9e3130>
     #> 
     #> Family:
     #> poisson
    @@ -610,7 +617,7 @@ 

    Fitting and inspecting the model

    #> log #> #> Trend model: -#> AR1 +#> AR() #> #> N process models: #> 2 @@ -629,30 +636,30 @@

    Fitting and inspecting the model

    #> #> GAM observation model coefficient (beta) estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> seriesseries_1 -0.15 0.079 0.3 1.00 1468 -#> seriesseries_2 0.90 1.100 1.2 1.00 1001 -#> seriesseries_3 1.90 2.100 2.3 1.01 318 +#> seriesseries_1 -0.14 0.084 0.29 1.00 1720 +#> seriesseries_2 0.91 1.100 1.20 1.00 1374 +#> seriesseries_3 1.90 2.100 2.30 1.01 447 #> #> Process model AR parameter estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> ar1[1] -0.71 -0.42 -0.054 1 952 -#> ar1[2] -0.29 -0.01 0.290 1 1786 +#> 2.5% 50% 97.5% Rhat n_eff +#> ar1[1] -0.72 -0.430 -0.037 1.01 560 +#> ar1[2] -0.30 -0.017 0.270 1.01 286 #> #> Process error parameter estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> sigma[1] 0.34 0.50 0.69 1.01 435 -#> sigma[2] 0.60 0.73 0.92 1.00 1061 +#> sigma[1] 0.34 0.49 0.65 1 819 +#> sigma[2] 0.59 0.73 0.90 1 573 #> #> GAM process model coefficient (beta) estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> s(season).1_trend -0.22 -0.010 0.21 1 1610 -#> s(season).2_trend -0.27 -0.048 0.16 1 1547 -#> s(season).3_trend -0.16 0.075 0.29 1 1834 -#> s(season).4_trend -0.15 0.065 0.26 1 1605 +#> s(season).1_trend -0.20 -0.003 0.21 1.01 857 +#> s(season).2_trend -0.27 -0.046 0.17 1.00 918 +#> s(season).3_trend -0.15 0.068 0.28 1.00 850 +#> s(season).4_trend -0.14 0.064 0.27 1.00 972 #> #> Approximate significance of GAM process smooths: -#> edf Ref.df F p-value -#> s(season) 1.81 4 0.17 0.92 +#> edf Ref.df Chi.sq p-value +#> s(season) 2.33 4 0.38 0.93 #> #> Stan MCMC diagnostics: #> n_eff / iter looks reasonable for all parameters @@ -661,18 +668,18 @@

    Fitting and inspecting the model

    #> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%) #> E-FMI indicated no pathological behavior #> -#> Samples were drawn using NUTS(diag_e) at Wed May 08 9:17:16 PM 2024. +#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:31:17 AM 2024. #> For each parameter, n_eff is a crude measure of effective sample size, #> and Rhat is the potential scale reduction factor on split MCMC chains #> (at convergence, Rhat = 1)

    Both series 1 and 2 share the exact same latent process estimates, while the estimates for series 3 are different:

    plot(full_mod, type = 'trend', series = 1)
    -

    +

    plot(full_mod, type = 'trend', series = 2)
    -

    +

    plot(full_mod, type = 'trend', series = 3)
    -

    +

    However, forecasts for series’ 1 and 2 will differ because they have different intercepts in the observation model

    @@ -684,9 +691,9 @@

    Example: signal detection

    nonlinearly on some covariate (called productivity, which represents a measure of how productive the landscape is). The signal also demonstrates a fairly large amount of temporal autocorrelation:

    -
    set.seed(543210)
    +
    set.seed(0)
     # simulate a nonlinear relationship using the mgcv function gamSim
    -signal_dat <- gamSim(n = 100, eg = 1, scale = 1)
    +signal_dat <- mgcv::gamSim(n = 100, eg = 1, scale = 1)
     #> Gu & Wahba 4 term additive model
     
     # productivity is one of the variables in the simulated data
    @@ -702,38 +709,37 @@ 

    Example: signal detection

    bty = 'l', lwd = 2, ylab = 'True signal', xlab = 'Time')
    -

    +

    Next we simulate three sensors that are trying to track the same hidden signal. All of these sensors have different observation errors that can depend nonlinearly on a second external covariate, called temperature in this example. Again this makes use of gamSim

    -
    set.seed(543210)
    -sim_series = function(n_series = 3, true_signal){
    -  temp_effects <- gamSim(n = 100, eg = 7, scale = 0.1)
    -  temperature <- temp_effects$y
    -  alphas <- rnorm(n_series, sd = 2)
    -
    -  do.call(rbind, lapply(seq_len(n_series), function(series){
    -    data.frame(observed = rnorm(length(true_signal),
    -                                mean = alphas[series] +
    -                                       1.5*as.vector(scale(temp_effects[, series + 1])) +
    -                                       true_signal,
    -                                sd = runif(1, 1, 2)),
    -               series = paste0('sensor_', series),
    -               time = 1:length(true_signal),
    -               temperature = temperature,
    -               productivity = productivity,
    -               true_signal = true_signal)
    -   }))
    -  }
    -model_dat <- sim_series(true_signal = true_signal) %>%
    -  dplyr::mutate(series = factor(series))
    -#> Gu & Wahba 4 term additive model, correlated predictors
    +
    sim_series = function(n_series = 3, true_signal){
    +  temp_effects <- mgcv::gamSim(n = 100, eg = 7, scale = 0.1)
    +  temperature <- temp_effects$y
    +  alphas <- rnorm(n_series, sd = 2)
    +
    +  do.call(rbind, lapply(seq_len(n_series), function(series){
    +    data.frame(observed = rnorm(length(true_signal),
    +                                mean = alphas[series] +
    +                                       1.5*as.vector(scale(temp_effects[, series + 1])) +
    +                                       true_signal,
    +                                sd = runif(1, 1, 2)),
    +               series = paste0('sensor_', series),
    +               time = 1:length(true_signal),
    +               temperature = temperature,
    +               productivity = productivity,
    +               true_signal = true_signal)
    +   }))
    +  }
    +model_dat <- sim_series(true_signal = true_signal) %>%
    +  dplyr::mutate(series = factor(series))
    +#> Gu & Wahba 4 term additive model, correlated predictors

    Plot the sensor observations

    plot_mvgam_series(data = model_dat, y = 'observed',
                       series = 'all')
    -

    +

    And now plot the observed relationships between the three sensors and the temperature covariate

     plot(observed ~ temperature, data = model_dat %>%
    @@ -741,19 +747,19 @@ 

    Example: signal detection

    pch = 16, bty = 'l', ylab = 'Sensor 1', xlab = 'Temperature')
    -

    +

     plot(observed ~ temperature, data = model_dat %>%
        dplyr::filter(series == 'sensor_2'),
        pch = 16, bty = 'l',
        ylab = 'Sensor 2',
        xlab = 'Temperature')
    -

    +

     plot(observed ~ temperature, data = model_dat %>%
        dplyr::filter(series == 'sensor_3'),
        pch = 16, bty = 'l',
        ylab = 'Sensor 3',
        xlab = 'Temperature')
    -

    +

    The shared signal model

    Now we can formulate and fit a model that allows each sensor’s @@ -781,33 +787,35 @@

    The shared signal model

    trend_model = # in addition to productivity effects, the signal is # assumed to exhibit temporal autocorrelation - 'AR1', - - trend_map = - # trend_map forces all sensors to track the same - # latent signal - data.frame(series = unique(model_dat$series), - trend = c(1, 1, 1)), - - # informative priors on process error - # and observation error will help with convergence - priors = c(prior(normal(2, 0.5), class = sigma), - prior(normal(1, 0.5), class = sigma_obs)), - - # Gaussian observations - family = gaussian(), - data = model_dat)
    + AR(), + noncentred = TRUE, + + trend_map = + # trend_map forces all sensors to track the same + # latent signal + data.frame(series = unique(model_dat$series), + trend = c(1, 1, 1)), + + # informative priors on process error + # and observation error will help with convergence + priors = c(prior(normal(2, 0.5), class = sigma), + prior(normal(1, 0.5), class = sigma_obs)), + + # Gaussian observations + family = gaussian(), + data = model_dat, + silent = 2)

    View a reduced version of the model summary because there will be many spline coefficients in this model

    summary(mod, include_betas = FALSE)
     #> GAM observation formula:
     #> observed ~ series + s(temperature, k = 10) + s(series, temperature, 
     #>     bs = "sz", k = 8)
    -#> <environment: 0x000001b6d1eaf110>
    +#> <environment: 0x000001f52b9e3130>
     #> 
     #> GAM process formula:
     #> ~s(productivity, k = 8)
    -#> <environment: 0x000001b6d1eaf110>
    +#> <environment: 0x000001f52b9e3130>
     #> 
     #> Family:
     #> gaussian
    @@ -816,7 +824,7 @@ 

    The shared signal model

    #> identity #> #> Trend model: -#> AR1 +#> AR() #> #> N process models: #> 1 @@ -835,49 +843,46 @@

    The shared signal model

    #> #> Observation error parameter estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> sigma_obs[1] 1.6 1.9 2.2 1 1757 -#> sigma_obs[2] 1.4 1.7 2.0 1 1090 -#> sigma_obs[3] 1.3 1.5 1.8 1 1339 +#> sigma_obs[1] 1.4 1.7 2.1 1 1298 +#> sigma_obs[2] 1.7 2.0 2.3 1 1946 +#> sigma_obs[3] 2.0 2.3 2.7 1 2569 #> #> GAM observation model coefficient (beta) estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> (Intercept) 0.72 1.70 2.50 1.01 360 -#> seriessensor_2 -2.10 -0.96 0.32 1.00 1068 -#> seriessensor_3 -3.40 -2.00 -0.39 1.00 1154 +#> 2.5% 50% 97.5% Rhat n_eff +#> (Intercept) -3.40 -2.1 -0.69 1 1067 +#> seriessensor_2 -2.80 -1.4 -0.14 1 1169 +#> seriessensor_3 0.63 3.1 4.80 1 1055 #> #> Approximate significance of GAM observation smooths: -#> edf Ref.df F p-value -#> s(temperature) 1.22 9 12.6 <2e-16 *** -#> s(series,temperature) 1.92 16 1.0 0.011 * +#> edf Ref.df Chi.sq p-value +#> s(temperature) 1.39 9 0.11 1 +#> s(series,temperature) 2.78 16 107.40 5.4e-05 *** #> --- #> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #> #> Process model AR parameter estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> ar1[1] 0.33 0.59 0.83 1 492 +#> 2.5% 50% 97.5% Rhat n_eff +#> ar1[1] 0.37 0.6 0.8 1.01 616 #> #> Process error parameter estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> sigma[1] 0.72 1 1.3 1.01 392 +#> sigma[1] 1.5 1.8 2.2 1.01 649 #> #> Approximate significance of GAM process smooths: -#> edf Ref.df F p-value -#> s(productivity) 3.6 7 9.34 0.00036 *** -#> --- -#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 -#> -#> Stan MCMC diagnostics: -#> n_eff / iter looks reasonable for all parameters -#> Rhats above 1.05 found for 28 parameters -#> *Diagnose further to investigate why the chains have not mixed -#> 0 of 2000 iterations ended with a divergence (0%) -#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%) -#> E-FMI indicated no pathological behavior -#> -#> Samples were drawn using NUTS(diag_e) at Wed May 08 9:20:39 PM 2024. -#> For each parameter, n_eff is a crude measure of effective sample size, -#> and Rhat is the potential scale reduction factor on split MCMC chains -#> (at convergence, Rhat = 1)
    +#> edf Ref.df Chi.sq p-value +#> s(productivity) 0.926 7 5.45 1 +#> +#> Stan MCMC diagnostics: +#> n_eff / iter looks reasonable for all parameters +#> Rhat looks reasonable for all parameters +#> 0 of 2000 iterations ended with a divergence (0%) +#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%) +#> E-FMI indicated no pathological behavior +#> +#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:32:12 AM 2024. +#> For each parameter, n_eff is a crude measure of effective sample size, +#> and Rhat is the potential scale reduction factor on split MCMC chains +#> (at convergence, Rhat = 1)

    Inspecting effects on both process and observation models

    @@ -888,16 +893,18 @@

    Inspecting effects on both process and observation models

    prediction-based plots of the smooth functions. All main effects can be quickly plotted with conditional_effects:

    conditional_effects(mod, type = 'link')
    -

    +

    conditional_effects is simply a wrapper to the more flexible plot_predictions function from the marginaleffects package. We can get more useful plots of these effects using this function for further customisation:

    -
    plot_predictions(mod, 
    -                 condition = c('temperature', 'series', 'series'),
    -                 points = 0.5) +
    -  theme(legend.position = 'none')
    -

    +
    require(marginaleffects)
    +#> Loading required package: marginaleffects
    +plot_predictions(mod, 
    +                 condition = c('temperature', 'series', 'series'),
    +                 points = 0.5) +
    +  theme(legend.position = 'none')
    +

    We have successfully estimated effects, some of them nonlinear, that impact the hidden process AND the observations. All in a single joint model. But there can always be challenges with these models, @@ -917,7 +924,7 @@

    Recovering the hidden signal

    # Overlay the true simulated signal points(true_signal, pch = 16, cex = 1, col = 'white') points(true_signal, pch = 16, cex = 0.8)
    -

    +

    diff --git a/inst/doc/time_varying_effects.R b/inst/doc/time_varying_effects.R index 6d791ad6..18caf805 100644 --- a/inst/doc/time_varying_effects.R +++ b/inst/doc/time_varying_effects.R @@ -1,4 +1,4 @@ -## ----echo = FALSE----------------------------------------------------- +## ----echo = FALSE------------------------------------------------------------- NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true") knitr::opts_chunk$set( collapse = TRUE, @@ -8,7 +8,7 @@ knitr::opts_chunk$set( ) -## ----setup, include=FALSE--------------------------------------------- +## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( echo = TRUE, dpi = 100, @@ -21,7 +21,7 @@ library(ggplot2) theme_set(theme_bw(base_size = 12, base_family = 'serif')) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- set.seed(1111) N <- 200 beta_temp <- mvgam:::sim_gp(rnorm(1), @@ -30,18 +30,18 @@ beta_temp <- mvgam:::sim_gp(rnorm(1), h = N) + 0.5 -## ----fig.alt = "Simulating time-varying effects in mvgam and R"------- +## ----fig.alt = "Simulating time-varying effects in mvgam and R"--------------- plot(beta_temp, type = 'l', lwd = 3, bty = 'l', xlab = 'Time', ylab = 'Coefficient', col = 'darkred') box(bty = 'l', lwd = 2) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- temp <- rnorm(N, sd = 1) -## ----fig.alt = "Simulating time-varying effects in mvgam and R"------- +## ----fig.alt = "Simulating time-varying effects in mvgam and R"--------------- out <- rnorm(N, mean = 4 + beta_temp * temp, sd = 0.25) time <- seq_along(temp) @@ -51,36 +51,39 @@ plot(out, type = 'l', lwd = 3, box(bty = 'l', lwd = 2) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- data <- data.frame(out, temp, time) data_train <- data[1:190,] data_test <- data[191:200,] -## ----include=FALSE---------------------------------------------------- +## ----include=FALSE------------------------------------------------------------ mod <- mvgam(out ~ dynamic(temp, rho = 8, stationary = TRUE, k = 40), family = gaussian(), - data = data_train) + data = data_train, + silent = 2) -## ----eval=FALSE------------------------------------------------------- +## ----eval=FALSE--------------------------------------------------------------- ## mod <- mvgam(out ~ dynamic(temp, rho = 8, stationary = TRUE, k = 40), ## family = gaussian(), -## data = data_train) +## data = data_train, +## silent = 2) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- summary(mod, include_betas = FALSE) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot_mvgam_smooth(mod, smooth = 1, newdata = data) abline(v = 190, lty = 'dashed', lwd = 2) lines(beta_temp, lwd = 2.5, col = 'white') lines(beta_temp, lwd = 2) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- +require(marginaleffects) range_round = function(x){ round(range(x, na.rm = TRUE), 2) } @@ -91,40 +94,42 @@ plot_predictions(mod, type = 'link') -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- fc <- forecast(mod, newdata = data_test) plot(fc) -## ----include=FALSE---------------------------------------------------- +## ----include=FALSE------------------------------------------------------------ mod <- mvgam(out ~ dynamic(temp, k = 40), family = gaussian(), - data = data_train) + data = data_train, + silent = 2) -## ----eval=FALSE------------------------------------------------------- +## ----eval=FALSE--------------------------------------------------------------- ## mod <- mvgam(out ~ dynamic(temp, k = 40), ## family = gaussian(), -## data = data_train) +## data = data_train, +## silent = 2) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- summary(mod, include_betas = FALSE) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot_mvgam_smooth(mod, smooth = 1, newdata = data) abline(v = 190, lty = 'dashed', lwd = 2) lines(beta_temp, lwd = 2.5, col = 'white') lines(beta_temp, lwd = 2) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- load(url('https://github.com/atsa-es/MARSS/raw/master/data/SalmonSurvCUI.rda')) dplyr::glimpse(SalmonSurvCUI) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- SalmonSurvCUI %>% # create a time variable dplyr::mutate(time = dplyr::row_number()) %>% @@ -139,66 +144,74 @@ SalmonSurvCUI %>% dplyr::mutate(survival = plogis(logit.s)) -> model_data -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- dplyr::glimpse(model_data) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot_mvgam_series(data = model_data, y = 'survival') -## ----include = FALSE-------------------------------------------------- +## ----include = FALSE---------------------------------------------------------- mod0 <- mvgam(formula = survival ~ 1, - trend_model = 'RW', + trend_model = AR(), + noncentred = TRUE, family = betar(), - data = model_data) + data = model_data, + silent = 2) -## ----eval = FALSE----------------------------------------------------- +## ----eval = FALSE------------------------------------------------------------- ## mod0 <- mvgam(formula = survival ~ 1, -## trend_model = RW(), -## family = betar(), -## data = model_data) +## trend_model = AR(), +## noncentred = TRUE, +## family = betar(), +## data = model_data, +## silent = 2) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- summary(mod0) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot(mod0, type = 'trend') -## ----include=FALSE---------------------------------------------------- +## ----include=FALSE------------------------------------------------------------ mod1 <- mvgam(formula = survival ~ 1, trend_formula = ~ dynamic(CUI.apr, k = 25, scale = FALSE), - trend_model = 'RW', + trend_model = AR(), + noncentred = TRUE, family = betar(), data = model_data, - adapt_delta = 0.99) + adapt_delta = 0.99, + silent = 2) -## ----eval=FALSE------------------------------------------------------- +## ----eval=FALSE--------------------------------------------------------------- ## mod1 <- mvgam(formula = survival ~ 1, ## trend_formula = ~ dynamic(CUI.apr, k = 25, scale = FALSE), -## trend_model = 'RW', +## trend_model = AR(), +## noncentred = TRUE, ## family = betar(), -## data = model_data) +## data = model_data, +## silent = 2) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- summary(mod1, include_betas = FALSE) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot(mod1, type = 'trend') -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot(mod1, type = 'forecast') -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- # Extract estimates of the process error 'sigma' for each model mod0_sigma <- as.data.frame(mod0, variable = 'sigma', regex = TRUE) %>% dplyr::mutate(model = 'Mod0') @@ -207,31 +220,31 @@ mod1_sigma <- as.data.frame(mod1, variable = 'sigma', regex = TRUE) %>% sigmas <- rbind(mod0_sigma, mod1_sigma) # Plot using ggplot2 -library(ggplot2) +require(ggplot2) ggplot(sigmas, aes(y = `sigma[1]`, fill = model)) + geom_density(alpha = 0.3, colour = NA) + coord_flip() -## --------------------------------------------------------------------- -plot(mod1, type = 'smooth', trend_effects = TRUE) +## ----------------------------------------------------------------------------- +plot(mod1, type = 'smooths', trend_effects = TRUE) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- loo_compare(mod0, mod1) -## ----include=FALSE---------------------------------------------------- +## ----include=FALSE------------------------------------------------------------ lfo_mod0 <- lfo_cv(mod0, min_t = 30) lfo_mod1 <- lfo_cv(mod1, min_t = 30) -## ----eval=FALSE------------------------------------------------------- +## ----eval=FALSE--------------------------------------------------------------- ## lfo_mod0 <- lfo_cv(mod0, min_t = 30) ## lfo_mod1 <- lfo_cv(mod1, min_t = 30) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- sum(lfo_mod0$elpds) sum(lfo_mod1$elpds) diff --git a/inst/doc/time_varying_effects.Rmd b/inst/doc/time_varying_effects.Rmd index cf9573fd..ec19a933 100644 --- a/inst/doc/time_varying_effects.Rmd +++ b/inst/doc/time_varying_effects.Rmd @@ -85,13 +85,15 @@ Time-varying coefficients can be fairly easily set up using the `s()` or `gp()` ```{r, include=FALSE} mod <- mvgam(out ~ dynamic(temp, rho = 8, stationary = TRUE, k = 40), family = gaussian(), - data = data_train) + data = data_train, + silent = 2) ``` ```{r, eval=FALSE} mod <- mvgam(out ~ dynamic(temp, rho = 8, stationary = TRUE, k = 40), family = gaussian(), - data = data_train) + data = data_train, + silent = 2) ``` Inspect the model summary, which shows how the `dynamic()` wrapper was used to construct a low-rank Gaussian Process smooth function: @@ -109,6 +111,7 @@ lines(beta_temp, lwd = 2) We can also use `plot_predictions` from the `marginaleffects` package to visualise the time-varying coefficient for what the effect would be estimated to be at different values of $temperature$: ```{r} +require(marginaleffects) range_round = function(x){ round(range(x, na.rm = TRUE), 2) } @@ -129,13 +132,15 @@ The syntax is very similar if we wish to estimate the parameters of the underlyi ```{r include=FALSE} mod <- mvgam(out ~ dynamic(temp, k = 40), family = gaussian(), - data = data_train) + data = data_train, + silent = 2) ``` ```{r eval=FALSE} mod <- mvgam(out ~ dynamic(temp, k = 40), family = gaussian(), - data = data_train) + data = data_train, + silent = 2) ``` This model summary now contains estimates for the marginal deviation and length scale parameters of the underlying Gaussian Process function: @@ -186,19 +191,23 @@ plot_mvgam_series(data = model_data, y = 'survival') ### A State-Space Beta regression -`mvgam` can easily handle data that are bounded at 0 and 1 with a Beta observation model (using the `mgcv` function `betar()`, see `?mgcv::betar` for details). First we will fit a simple State-Space model that uses a Random Walk dynamic process model with no predictors and a Beta observation model: +`mvgam` can easily handle data that are bounded at 0 and 1 with a Beta observation model (using the `mgcv` function `betar()`, see `?mgcv::betar` for details). First we will fit a simple State-Space model that uses an AR1 dynamic process model with no predictors and a Beta observation model: ```{r include = FALSE} mod0 <- mvgam(formula = survival ~ 1, - trend_model = 'RW', + trend_model = AR(), + noncentred = TRUE, family = betar(), - data = model_data) + data = model_data, + silent = 2) ``` ```{r eval = FALSE} mod0 <- mvgam(formula = survival ~ 1, - trend_model = RW(), - family = betar(), - data = model_data) + trend_model = AR(), + noncentred = TRUE, + family = betar(), + data = model_data, + silent = 2) ``` The summary of this model shows good behaviour of the Hamiltonian Monte Carlo sampler and provides useful summaries on the Beta observation model parameters: @@ -216,18 +225,22 @@ Now we can increase the complexity of our model by constructing and fitting a St ```{r include=FALSE} mod1 <- mvgam(formula = survival ~ 1, trend_formula = ~ dynamic(CUI.apr, k = 25, scale = FALSE), - trend_model = 'RW', + trend_model = AR(), + noncentred = TRUE, family = betar(), data = model_data, - adapt_delta = 0.99) + adapt_delta = 0.99, + silent = 2) ``` ```{r eval=FALSE} mod1 <- mvgam(formula = survival ~ 1, trend_formula = ~ dynamic(CUI.apr, k = 25, scale = FALSE), - trend_model = 'RW', + trend_model = AR(), + noncentred = TRUE, family = betar(), - data = model_data) + data = model_data, + silent = 2) ``` The summary for this model now includes estimates for the time-varying GP parameters: @@ -254,15 +267,15 @@ mod1_sigma <- as.data.frame(mod1, variable = 'sigma', regex = TRUE) %>% sigmas <- rbind(mod0_sigma, mod1_sigma) # Plot using ggplot2 -library(ggplot2) +require(ggplot2) ggplot(sigmas, aes(y = `sigma[1]`, fill = model)) + geom_density(alpha = 0.3, colour = NA) + coord_flip() ``` -Why does the process error not need to be as flexible in the second model? Because the estimates of this dynamic process are now informed partly by the time-varying effect of upwelling, which we can visualise on the link scale using `plot()` with `trend_effects = TRUE`: +Why does the process error not need to be as flexible in the second model? Because the estimates of this dynamic process are now informed partly by the time-varying effect of upwelling, which we can visualise on the link scale using `plot()`: ```{r} -plot(mod1, type = 'smooth', trend_effects = TRUE) +plot(mod1, type = 'smooths', trend_effects = TRUE) ``` ### Comparing model predictive performances diff --git a/inst/doc/time_varying_effects.html b/inst/doc/time_varying_effects.html index 6f53eb9e..341e817b 100644 --- a/inst/doc/time_varying_effects.html +++ b/inst/doc/time_varying_effects.html @@ -12,7 +12,7 @@ - + Time-varying effects in mvgam @@ -340,7 +340,7 @@

    Time-varying effects in mvgam

    Nicholas J Clark

    -

    2024-05-08

    +

    2024-07-01

    @@ -453,14 +453,15 @@

    The dynamic() function

    s(time, by = temp, bs = "gp", m = c(-2, 8, 2), k = 40)

    mod <- mvgam(out ~ dynamic(temp, rho = 8, stationary = TRUE, k = 40),
                  family = gaussian(),
    -             data = data_train)
    + data = data_train, + silent = 2)

    Inspect the model summary, which shows how the dynamic() wrapper was used to construct a low-rank Gaussian Process smooth function:

    summary(mod, include_betas = FALSE)
     #> GAM formula:
     #> out ~ s(time, by = temp, bs = "gp", m = c(-2, 8, 2), k = 40)
    -#> <environment: 0x000001820adad480>
    +#> <environment: 0x0000014d37f0f110>
     #> 
     #> Family:
     #> gaussian
    @@ -485,15 +486,15 @@ 

    The dynamic() function

    #> #> Observation error parameter estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> sigma_obs[1] 0.23 0.25 0.28 1 2222 +#> sigma_obs[1] 0.23 0.25 0.28 1 2026 #> #> GAM coefficient (beta) estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> (Intercept) 4 4 4.1 1 2893 +#> (Intercept) 4 4 4.1 1 2640 #> #> Approximate significance of GAM smooths: -#> edf Ref.df F p-value -#> s(time):temp 14 40 73.2 <2e-16 *** +#> edf Ref.df Chi.sq p-value +#> s(time):temp 15.4 40 173 <2e-16 *** #> --- #> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #> @@ -504,7 +505,7 @@

    The dynamic() function

    #> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%) #> E-FMI indicated no pathological behavior #> -#> Samples were drawn using NUTS(diag_e) at Wed May 08 9:26:35 PM 2024. +#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:35:21 AM 2024. #> For each parameter, n_eff is a crude measure of effective sample size, #> and Rhat is the potential scale reduction factor on split MCMC chains #> (at convergence, Rhat = 1)
    @@ -521,26 +522,28 @@

    The dynamic() function

    abline(v = 190, lty = 'dashed', lwd = 2) lines(beta_temp, lwd = 2.5, col = 'white') lines(beta_temp, lwd = 2)
    -

    +

    We can also use plot_predictions from the marginaleffects package to visualise the time-varying coefficient for what the effect would be estimated to be at different values of \(temperature\):

    -
    range_round = function(x){
    -  round(range(x, na.rm = TRUE), 2)
    -}
    -plot_predictions(mod, 
    -                 newdata = datagrid(time = unique,
    -                                    temp = range_round),
    -                 by = c('time', 'temp', 'temp'),
    -                 type = 'link')
    -

    +
    require(marginaleffects)
    +#> Loading required package: marginaleffects
    +range_round = function(x){
    +  round(range(x, na.rm = TRUE), 2)
    +}
    +plot_predictions(mod, 
    +                 newdata = datagrid(time = unique,
    +                                    temp = range_round),
    +                 by = c('time', 'temp', 'temp'),
    +                 type = 'link')
    +

    This results in sensible forecasts of the observations as well

    fc <- forecast(mod, newdata = data_test)
     plot(fc)
     #> Out of sample CRPS:
    -#> 1.28034661302171
    -

    +#> 1.30674285292277 +

    The syntax is very similar if we wish to estimate the parameters of the underlying Gaussian Process, this time using a Hilbert space approximation. We simply omit the rho argument in @@ -548,14 +551,15 @@

    The dynamic() function

    similar to gp(time, by = 'temp', c = 5/4, k = 40).

    mod <- mvgam(out ~ dynamic(temp, k = 40),
                  family = gaussian(),
    -             data = data_train)
    + data = data_train, + silent = 2)

    This model summary now contains estimates for the marginal deviation and length scale parameters of the underlying Gaussian Process function:

    summary(mod, include_betas = FALSE)
     #> GAM formula:
     #> out ~ gp(time, by = temp, c = 5/4, k = 40, scale = TRUE)
    -#> <environment: 0x000001820adad480>
    +#> <environment: 0x0000014d37f0f110>
     #> 
     #> Family:
     #> gaussian
    @@ -580,36 +584,35 @@ 

    The dynamic() function

    #> #> Observation error parameter estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> sigma_obs[1] 0.24 0.26 0.29 1 2151 +#> sigma_obs[1] 0.24 0.26 0.3 1 2183 #> #> GAM coefficient (beta) estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> (Intercept) 4 4 4.1 1 2989 +#> (Intercept) 4 4 4.1 1 2733 #> #> GAM gp term marginal deviation (alpha) and length scale (rho) estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> alpha_gp(time):temp 0.640 0.890 1.400 1.01 745 -#> rho_gp(time):temp 0.028 0.053 0.069 1.00 888 +#> alpha_gp(time):temp 0.620 0.890 1.400 1.01 539 +#> rho_gp(time):temp 0.026 0.053 0.069 1.00 628 #> #> Stan MCMC diagnostics: #> n_eff / iter looks reasonable for all parameters #> Rhat looks reasonable for all parameters -#> 1 of 2000 iterations ended with a divergence (0.05%) -#> *Try running with larger adapt_delta to remove the divergences -#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%) -#> E-FMI indicated no pathological behavior -#> -#> Samples were drawn using NUTS(diag_e) at Wed May 08 9:27:46 PM 2024. -#> For each parameter, n_eff is a crude measure of effective sample size, -#> and Rhat is the potential scale reduction factor on split MCMC chains -#> (at convergence, Rhat = 1)
    +#> 0 of 2000 iterations ended with a divergence (0%) +#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%) +#> E-FMI indicated no pathological behavior +#> +#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:36:09 AM 2024. +#> For each parameter, n_eff is a crude measure of effective sample size, +#> and Rhat is the potential scale reduction factor on split MCMC chains +#> (at convergence, Rhat = 1)

    Effects for gp() terms can also be plotted as smooths:

    plot_mvgam_smooth(mod, smooth = 1, newdata = data)
     abline(v = 190, lty = 'dashed', lwd = 2)
     lines(beta_temp, lwd = 2.5, col = 'white')
     lines(beta_temp, lwd = 2)
    -

    +

    @@ -667,25 +670,27 @@

    Salmon survival example

    proportional variable with particular restrictions that we want to model:

    plot_mvgam_series(data = model_data, y = 'survival')
    -

    +

    A State-Space Beta regression

    mvgam can easily handle data that are bounded at 0 and 1 with a Beta observation model (using the mgcv function betar(), see ?mgcv::betar for details). First -we will fit a simple State-Space model that uses a Random Walk dynamic -process model with no predictors and a Beta observation model:

    +we will fit a simple State-Space model that uses an AR1 dynamic process +model with no predictors and a Beta observation model:

    mod0 <- mvgam(formula = survival ~ 1,
    -              trend_model = RW(),
    -              family = betar(),
    -              data = model_data)
    + trend_model = AR(), + noncentred = TRUE, + family = betar(), + data = model_data, + silent = 2)

    The summary of this model shows good behaviour of the Hamiltonian Monte Carlo sampler and provides useful summaries on the Beta observation model parameters:

    summary(mod0)
     #> GAM formula:
     #> survival ~ 1
    -#> <environment: 0x000001820adad480>
    +#> <environment: 0x0000014d37f0f110>
     #> 
     #> Family:
     #> beta
    @@ -694,7 +699,7 @@ 

    A State-Space Beta regression

    #> logit #> #> Trend model: -#> RW +#> AR() #> #> N series: #> 1 @@ -710,31 +715,32 @@

    A State-Space Beta regression

    #> #> Observation precision parameter estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> phi[1] 160 310 570 1 633 +#> phi[1] 95 280 630 1.02 271 #> #> GAM coefficient (beta) estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> (Intercept) -4.2 -3.3 -2.4 1.01 147 +#> (Intercept) -4.7 -4.4 -4 1 625 #> -#> Latent trend variance estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> sigma[1] 0.19 0.34 0.57 1.01 254 -#> -#> Stan MCMC diagnostics: -#> n_eff / iter looks reasonable for all parameters -#> Rhat looks reasonable for all parameters -#> 0 of 2000 iterations ended with a divergence (0%) -#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%) -#> E-FMI indicated no pathological behavior -#> -#> Samples were drawn using NUTS(diag_e) at Wed May 08 9:29:11 PM 2024. -#> For each parameter, n_eff is a crude measure of effective sample size, -#> and Rhat is the potential scale reduction factor on split MCMC chains -#> (at convergence, Rhat = 1)
    +#> Latent trend parameter AR estimates: +#> 2.5% 50% 97.5% Rhat n_eff +#> ar1[1] -0.230 0.67 0.98 1.01 415 +#> sigma[1] 0.073 0.47 0.72 1.02 213 +#> +#> Stan MCMC diagnostics: +#> n_eff / iter looks reasonable for all parameters +#> Rhat looks reasonable for all parameters +#> 0 of 2000 iterations ended with a divergence (0%) +#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%) +#> E-FMI indicated no pathological behavior +#> +#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:36:57 AM 2024. +#> For each parameter, n_eff is a crude measure of effective sample size, +#> and Rhat is the potential scale reduction factor on split MCMC chains +#> (at convergence, Rhat = 1)

    A plot of the underlying dynamic component shows how it has easily handled the temporal evolution of the time series:

    plot(mod0, type = 'trend')
    -

    +

    Including time-varying upwelling effects

    @@ -748,19 +754,21 @@

    Including time-varying upwelling effects

    to estimate it using a Hilbert space approximate GP:

    mod1 <- mvgam(formula = survival ~ 1,
                   trend_formula = ~ dynamic(CUI.apr, k = 25, scale = FALSE),
    -              trend_model = 'RW',
    -              family = betar(),
    -              data = model_data)
    + trend_model = AR(), + noncentred = TRUE, + family = betar(), + data = model_data, + silent = 2)

    The summary for this model now includes estimates for the time-varying GP parameters:

    summary(mod1, include_betas = FALSE)
     #> GAM observation formula:
     #> survival ~ 1
    -#> <environment: 0x000001820adad480>
    +#> <environment: 0x0000014d37f0f110>
     #> 
     #> GAM process formula:
     #> ~dynamic(CUI.apr, k = 25, scale = FALSE)
    -#> <environment: 0x000001820adad480>
    +#> <environment: 0x0000014d37f0f110>
     #> 
     #> Family:
     #> beta
    @@ -769,7 +777,7 @@ 

    Including time-varying upwelling effects

    #> logit #> #> Trend model: -#> RW +#> AR() #> #> N process models: #> 1 @@ -788,39 +796,43 @@

    Including time-varying upwelling effects

    #> #> Observation precision parameter estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> phi[1] 180 350 620 1 931 +#> phi[1] 160 350 690 1 557 #> #> GAM observation model coefficient (beta) estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> (Intercept) -4.1 -3.3 -2.4 1.03 80 +#> 2.5% 50% 97.5% Rhat n_eff +#> (Intercept) -4.7 -4 -2.6 1 331 #> -#> Process error parameter estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> sigma[1] 0.15 0.3 0.49 1.02 215 +#> Process model AR parameter estimates: +#> 2.5% 50% 97.5% Rhat n_eff +#> ar1[1] 0.46 0.89 0.99 1.01 364 #> -#> GAM process model gp term marginal deviation (alpha) and length scale (rho) estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> alpha_gp_time_byCUI_apr_trend 0.027 0.31 1.3 1 680 -#> rho_gp_time_byCUI_apr_trend 1.300 6.50 34.0 1 668 -#> -#> Stan MCMC diagnostics: -#> n_eff / iter looks reasonable for all parameters -#> Rhat looks reasonable for all parameters -#> 73 of 2000 iterations ended with a divergence (3.65%) -#> *Try running with larger adapt_delta to remove the divergences -#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%) -#> E-FMI indicated no pathological behavior -#> -#> Samples were drawn using NUTS(diag_e) at Wed May 08 9:31:10 PM 2024. -#> For each parameter, n_eff is a crude measure of effective sample size, -#> and Rhat is the potential scale reduction factor on split MCMC chains -#> (at convergence, Rhat = 1)
    +#> Process error parameter estimates: +#> 2.5% 50% 97.5% Rhat n_eff +#> sigma[1] 0.18 0.35 0.58 1 596 +#> +#> GAM process model gp term marginal deviation (alpha) and length scale (rho) estimates: +#> 2.5% 50% 97.5% Rhat n_eff +#> alpha_gp_time_byCUI_apr_trend 0.02 0.3 1.2 1 760 +#> rho_gp_time_byCUI_apr_trend 1.30 5.5 28.0 1 674 +#> +#> Stan MCMC diagnostics: +#> n_eff / iter looks reasonable for all parameters +#> Rhat looks reasonable for all parameters +#> 79 of 2000 iterations ended with a divergence (3.95%) +#> *Try running with larger adapt_delta to remove the divergences +#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%) +#> E-FMI indicated no pathological behavior +#> +#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:37:32 AM 2024. +#> For each parameter, n_eff is a crude measure of effective sample size, +#> and Rhat is the potential scale reduction factor on split MCMC chains +#> (at convergence, Rhat = 1)

    The estimates for the underlying dynamic process, and for the hindcasts, haven’t changed much:

    plot(mod1, type = 'trend')
    -

    +

    plot(mod1, type = 'forecast')
    -

    +

    But the process error parameter \(\sigma\) is slightly smaller for this model than for the first model:

    # Extract estimates of the process error 'sigma' for each model
    @@ -831,18 +843,17 @@ 

    Including time-varying upwelling effects

    sigmas <- rbind(mod0_sigma, mod1_sigma) # Plot using ggplot2 -library(ggplot2) +require(ggplot2) ggplot(sigmas, aes(y = `sigma[1]`, fill = model)) + geom_density(alpha = 0.3, colour = NA) + coord_flip()
    -

    +

    Why does the process error not need to be as flexible in the second model? Because the estimates of this dynamic process are now informed partly by the time-varying effect of upwelling, which we can visualise -on the link scale using plot() with -trend_effects = TRUE:

    -
    plot(mod1, type = 'smooth', trend_effects = TRUE)
    -

    +on the link scale using plot():

    +
    plot(mod1, type = 'smooths', trend_effects = TRUE)
    +

    Comparing model predictive performances

    @@ -858,7 +869,7 @@

    Comparing model predictive performances

    #> Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details. #> elpd_diff se_diff #> mod1 0.0 0.0 -#> mod0 -1.7 1.5
    +#> mod0 -6.5 2.7

    The second model has the larger Expected Log Predictive Density (ELPD), meaning that it is slightly favoured over the simpler model that did not include the time-varying upwelling effect. However, the two @@ -878,9 +889,9 @@

    Comparing model predictive performances

    The model with the time-varying upwelling effect tends to provides better 1-step ahead forecasts, with a higher total forecast ELPD

    sum(lfo_mod0$elpds)
    -#> [1] 34.83384
    +#> [1] 39.52656
     sum(lfo_mod1$elpds)
    -#> [1] 35.99664
    +#> [1] 40.81327

    We can also plot the ELPDs for each model as a contrast. Here, values less than zero suggest the time-varying predictor model (Mod1) gives better 1-step ahead forecasts:

    @@ -892,7 +903,7 @@

    Comparing model predictive performances

    col = 'darkred', bty = 'l') abline(h = 0, lty = 'dashed') -

    Comparing forecast skill for dynamic beta regression models in mvgam and R

    +

    Comparing forecast skill for dynamic beta regression models in mvgam and R

    A useful exercise to further expand this model would be to think about what kinds of predictors might impact measurement error, which could easily be implemented into the observation formula in diff --git a/inst/doc/trend_formulas.R b/inst/doc/trend_formulas.R index 45ece59c..21ec225e 100644 --- a/inst/doc/trend_formulas.R +++ b/inst/doc/trend_formulas.R @@ -1,4 +1,4 @@ -## ----echo = FALSE----------------------------------------------------- +## ----echo = FALSE------------------------------------------------------------- NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true") knitr::opts_chunk$set( collapse = TRUE, @@ -8,7 +8,7 @@ knitr::opts_chunk$set( ) -## ----setup, include=FALSE--------------------------------------------- +## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( echo = TRUE, dpi = 100, @@ -21,15 +21,15 @@ library(ggplot2) theme_set(theme_bw(base_size = 12, base_family = 'serif')) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- load(url('https://github.com/atsa-es/MARSS/raw/master/data/lakeWAplankton.rda')) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- outcomes <- c('Greens', 'Bluegreens', 'Diatoms', 'Unicells', 'Other.algae') -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- # loop across each plankton group to create the long datframe plankton_data <- do.call(rbind, lapply(outcomes, function(x){ @@ -57,19 +57,19 @@ plankton_data <- do.call(rbind, lapply(outcomes, function(x){ dplyr::ungroup() -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- head(plankton_data) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- dplyr::glimpse(plankton_data) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot_mvgam_series(data = plankton_data, series = 'all') -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plankton_data %>% dplyr::filter(series == 'Other.algae') %>% ggplot(aes(x = time, y = temp)) + @@ -83,7 +83,7 @@ plankton_data %>% ggtitle('Temperature (black) vs Other algae (red)') -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plankton_data %>% dplyr::filter(series == 'Diatoms') %>% ggplot(aes(x = time, y = temp)) + @@ -97,14 +97,14 @@ plankton_data %>% ggtitle('Temperature (black) vs Diatoms (red)') -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plankton_train <- plankton_data %>% dplyr::filter(time <= 112) plankton_test <- plankton_data %>% dplyr::filter(time > 112) -## ----notrend_mod, include = FALSE, results='hide'--------------------- +## ----notrend_mod, include = FALSE, results='hide'----------------------------- notrend_mod <- mvgam(y ~ te(temp, month, k = c(4, 4)) + te(temp, month, k = c(4, 4), by = series), @@ -114,7 +114,7 @@ notrend_mod <- mvgam(y ~ trend_model = 'None') -## ----eval=FALSE------------------------------------------------------- +## ----eval=FALSE--------------------------------------------------------------- ## notrend_mod <- mvgam(y ~ ## # tensor of temp and month to capture ## # "global" seasonality @@ -129,43 +129,43 @@ notrend_mod <- mvgam(y ~ ## -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot_mvgam_smooth(notrend_mod, smooth = 1) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot_mvgam_smooth(notrend_mod, smooth = 2) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot_mvgam_smooth(notrend_mod, smooth = 3) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot(notrend_mod, type = 'forecast', series = 1) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot(notrend_mod, type = 'forecast', series = 2) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot(notrend_mod, type = 'forecast', series = 3) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot(notrend_mod, type = 'residuals', series = 1) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot(notrend_mod, type = 'residuals', series = 2) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot(notrend_mod, type = 'residuals', series = 3) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- priors <- get_mvgam_priors( # observation formula, which has no terms in it y ~ -1, @@ -175,25 +175,25 @@ priors <- get_mvgam_priors( te(temp, month, k = c(4, 4), by = trend), # VAR1 model with uncorrelated process errors - trend_model = 'VAR1', + trend_model = VAR(), family = gaussian(), data = plankton_train) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- priors[, 3] -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- priors[, 4] -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2), prior(normal(0.5, 0.25), class = sigma)) -## ----var_mod, include = FALSE, results='hide'------------------------- +## ----var_mod, include = FALSE, results='hide'--------------------------------- var_mod <- mvgam(y ~ -1, trend_formula = ~ # tensor of temp and month should capture @@ -205,12 +205,13 @@ var_mod <- mvgam(y ~ -1, family = gaussian(), data = plankton_train, newdata = plankton_test, - trend_model = 'VAR1', + trend_model = VAR(), priors = priors, + adapt_delta = 0.99, burnin = 1000) -## ----eval=FALSE------------------------------------------------------- +## ----eval=FALSE--------------------------------------------------------------- ## var_mod <- mvgam( ## # observation formula, which is empty ## y ~ -1, @@ -220,24 +221,25 @@ var_mod <- mvgam(y ~ -1, ## te(temp, month, k = c(4, 4), by = trend), ## ## # VAR1 model with uncorrelated process errors -## trend_model = 'VAR1', +## trend_model = VAR(), ## family = gaussian(), ## data = plankton_train, ## newdata = plankton_test, ## ## # include the updated priors -## priors = priors) +## priors = priors, +## silent = 2) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- summary(var_mod, include_betas = FALSE) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- plot(var_mod, 'smooths', trend_effects = TRUE) -## ----warning=FALSE, message=FALSE------------------------------------- +## ----warning=FALSE, message=FALSE--------------------------------------------- A_pars <- matrix(NA, nrow = 5, ncol = 5) for(i in 1:5){ for(j in 1:5){ @@ -249,7 +251,7 @@ mcmc_plot(var_mod, type = 'hist') -## ----warning=FALSE, message=FALSE------------------------------------- +## ----warning=FALSE, message=FALSE--------------------------------------------- Sigma_pars <- matrix(NA, nrow = 5, ncol = 5) for(i in 1:5){ for(j in 1:5){ @@ -261,16 +263,16 @@ mcmc_plot(var_mod, type = 'hist') -## ----warning=FALSE, message=FALSE------------------------------------- +## ----warning=FALSE, message=FALSE--------------------------------------------- mcmc_plot(var_mod, variable = 'sigma_obs', regex = TRUE, type = 'hist') -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2), prior(normal(0.5, 0.25), class = sigma)) -## ----varcor_mod, include = FALSE, results='hide'---------------------- +## ----varcor_mod, include = FALSE, results='hide'------------------------------ varcor_mod <- mvgam(y ~ -1, trend_formula = ~ # tensor of temp and month should capture @@ -282,12 +284,13 @@ varcor_mod <- mvgam(y ~ -1, family = gaussian(), data = plankton_train, newdata = plankton_test, - trend_model = 'VAR1cor', + trend_model = VAR(cor = TRUE), burnin = 1000, + adapt_delta = 0.99, priors = priors) -## ----eval=FALSE------------------------------------------------------- +## ----eval=FALSE--------------------------------------------------------------- ## varcor_mod <- mvgam( ## # observation formula, which remains empty ## y ~ -1, @@ -297,16 +300,17 @@ varcor_mod <- mvgam(y ~ -1, ## te(temp, month, k = c(4, 4), by = trend), ## ## # VAR1 model with correlated process errors -## trend_model = 'VAR1cor', +## trend_model = VAR(cor = TRUE), ## family = gaussian(), ## data = plankton_train, ## newdata = plankton_test, ## ## # include the updated priors -## priors = priors) +## priors = priors, +## silent = 2) -## ----warning=FALSE, message=FALSE------------------------------------- +## ----warning=FALSE, message=FALSE--------------------------------------------- Sigma_pars <- matrix(NA, nrow = 5, ncol = 5) for(i in 1:5){ for(j in 1:5){ @@ -318,7 +322,7 @@ mcmc_plot(varcor_mod, type = 'hist') -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- Sigma_post <- as.matrix(varcor_mod, variable = 'Sigma', regex = TRUE) median_correlations <- cov2cor(matrix(apply(Sigma_post, 2, median), nrow = 5, ncol = 5)) @@ -327,7 +331,7 @@ rownames(median_correlations) <- colnames(median_correlations) <- levels(plankto round(median_correlations, 2) -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- # create forecast objects for each model fcvar <- forecast(var_mod) fcvarcor <- forecast(varcor_mod) @@ -344,7 +348,7 @@ plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', abline(h = 0, lty = 'dashed') -## --------------------------------------------------------------------- +## ----------------------------------------------------------------------------- # plot the difference in energy scores; a negative value means the VAR1cor model is better, while a positive value means the VAR1 model is better diff_scores <- score(fcvarcor, score = 'energy')$all_series$score - score(fcvar, score = 'energy')$all_series$score diff --git a/inst/doc/trend_formulas.Rmd b/inst/doc/trend_formulas.Rmd index 46dbe8a5..387a8708 100644 --- a/inst/doc/trend_formulas.Rmd +++ b/inst/doc/trend_formulas.Rmd @@ -231,7 +231,7 @@ priors <- get_mvgam_priors( te(temp, month, k = c(4, 4), by = trend), # VAR1 model with uncorrelated process errors - trend_model = 'VAR1', + trend_model = VAR(), family = gaussian(), data = plankton_train) ``` @@ -265,8 +265,9 @@ var_mod <- mvgam(y ~ -1, family = gaussian(), data = plankton_train, newdata = plankton_test, - trend_model = 'VAR1', + trend_model = VAR(), priors = priors, + adapt_delta = 0.99, burnin = 1000) ``` @@ -280,13 +281,14 @@ var_mod <- mvgam( te(temp, month, k = c(4, 4), by = trend), # VAR1 model with uncorrelated process errors - trend_model = 'VAR1', + trend_model = VAR(), family = gaussian(), data = plankton_train, newdata = plankton_test, # include the updated priors - priors = priors) + priors = priors, + silent = 2) ``` ### Inspecting SS models @@ -313,7 +315,7 @@ mcmc_plot(var_mod, type = 'hist') ``` -There is a lot happening in this matrix. Each cell captures the lagged effect of the process in the column on the process in the row in the next timestep. So for example, the effect in cell [1,3], which is quite strongly negative, means that an *increase* in the process for series 3 (Greens) at time $t$ is expected to lead to a subsequent *decrease* in the process for series 1 (Bluegreens) at time $t+1$. The latent process model is now capturing these effects and the smooth seasonal effects. +There is a lot happening in this matrix. Each cell captures the lagged effect of the process in the column on the process in the row in the next timestep. So for example, the effect in cell [1,3] shows how an *increase* in the process for series 3 (Greens) at time $t$ is expected to impact the process for series 1 (Bluegreens) at time $t+1$. The latent process model is now capturing these effects and the smooth seasonal effects. The process error $(\Sigma)$ captures unmodelled variation in the process models. Again, we fixed the off-diagonals to 0, so the histograms for these will look like flat boxes: ```{r warning=FALSE, message=FALSE} @@ -356,8 +358,9 @@ varcor_mod <- mvgam(y ~ -1, family = gaussian(), data = plankton_train, newdata = plankton_test, - trend_model = 'VAR1cor', + trend_model = VAR(cor = TRUE), burnin = 1000, + adapt_delta = 0.99, priors = priors) ``` @@ -371,13 +374,14 @@ varcor_mod <- mvgam( te(temp, month, k = c(4, 4), by = trend), # VAR1 model with correlated process errors - trend_model = 'VAR1cor', + trend_model = VAR(cor = TRUE), family = gaussian(), data = plankton_train, newdata = plankton_test, # include the updated priors - priors = priors) + priors = priors, + silent = 2) ``` The $(\Sigma)$ matrix now captures any evidence of contemporaneously correlated process error: @@ -393,7 +397,7 @@ mcmc_plot(varcor_mod, type = 'hist') ``` -This symmetric matrix tells us there is support for correlated process errors. For example, series 1 and 3 (Bluegreens and Greens) show negatively correlated process errors, while series 1 and 4 (Bluegreens and Other.algae) show positively correlated errors. But it is easier to interpret these estimates if we convert the covariance matrix to a correlation matrix. Here we compute the posterior median process error correlations: +This symmetric matrix tells us there is support for correlated process errors, as several of the off-diagonal entries are strongly non-zero. But it is easier to interpret these estimates if we convert the covariance matrix to a correlation matrix. Here we compute the posterior median process error correlations: ```{r} Sigma_post <- as.matrix(varcor_mod, variable = 'Sigma', regex = TRUE) median_correlations <- cov2cor(matrix(apply(Sigma_post, 2, median), diff --git a/inst/doc/trend_formulas.html b/inst/doc/trend_formulas.html index 0cbb003a..5e2a8efc 100644 --- a/inst/doc/trend_formulas.html +++ b/inst/doc/trend_formulas.html @@ -12,7 +12,7 @@ - + State-Space models in mvgam @@ -340,7 +340,7 @@

    State-Space models in mvgam

    Nicholas J Clark

    -

    2024-05-08

    +

    2024-07-01

    @@ -536,38 +536,38 @@

    Capturing seasonality

    The “global” tensor product smooth function can be quickly visualized:

    plot_mvgam_smooth(notrend_mod, smooth = 1)
    -

    +

    On this plot, red indicates below-average linear predictors and white indicates above-average. We can then plot the deviation smooths for a few algal groups to see how they vary from the “global” pattern:

    plot_mvgam_smooth(notrend_mod, smooth = 2)
    -

    +

    plot_mvgam_smooth(notrend_mod, smooth = 3)
    -

    +

    These multidimensional smooths have done a good job of capturing the seasonal variation in our observations:

    plot(notrend_mod, type = 'forecast', series = 1)
     #> Out of sample CRPS:
    -#> 6.76565115609046
    -

    +#> 6.77172874237756
    +

    plot(notrend_mod, type = 'forecast', series = 2)
     #> Out of sample CRPS:
    -#> 6.8078256849951
    -

    +#> 6.75657325046048 +

    plot(notrend_mod, type = 'forecast', series = 3)
     #> Out of sample CRPS:
    -#> 4.12245753246738
    -

    +#> 4.09992574037549 +

    This basic model gives us confidence that we can capture the seasonal variation in the observations. But the model has not captured the remaining temporal dynamics, which is obvious when we inspect Dunn-Smyth residuals for a few series:

    plot(notrend_mod, type = 'residuals', series = 1)
    -

    +

    plot(notrend_mod, type = 'residuals', series = 2)
    -

    +

    plot(notrend_mod, type = 'residuals', series = 3)
    -

    +

    Multiseries dynamics

    @@ -639,7 +639,7 @@

    Multiseries dynamics

    te(temp, month, k = c(4, 4), by = trend), # VAR1 model with uncorrelated process errors - trend_model = 'VAR1', + trend_model = VAR(), family = gaussian(), data = plankton_train)

    Get names of all parameters whose priors can be modified:

    @@ -694,13 +694,14 @@

    Multiseries dynamics

    te(temp, month, k = c(4, 4), by = trend), # VAR1 model with uncorrelated process errors - trend_model = 'VAR1', + trend_model = VAR(), family = gaussian(), data = plankton_train, newdata = plankton_test, # include the updated priors - priors = priors) + priors = priors, + silent = 2)

    Inspecting SS models

    @@ -718,12 +719,12 @@

    Inspecting SS models

    summary(var_mod, include_betas = FALSE)
     #> GAM observation formula:
     #> y ~ 1
    -#> <environment: 0x000001ea39b95140>
    +#> <environment: 0x00000241693f91f0>
     #> 
     #> GAM process formula:
     #> ~te(temp, month, k = c(4, 4)) + te(temp, month, k = c(4, 4), 
     #>     by = trend)
    -#> <environment: 0x000001ea39b95140>
    +#> <environment: 0x00000241693f91f0>
     #> 
     #> Family:
     #> gaussian
    @@ -732,7 +733,7 @@ 

    Inspecting SS models

    #> identity #> #> Trend model: -#> VAR1 +#> VAR() #> #> N process models: #> 5 @@ -751,11 +752,11 @@

    Inspecting SS models

    #> #> Observation error parameter estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> sigma_obs[1] 0.20 0.26 0.34 1.01 453 -#> sigma_obs[2] 0.27 0.40 0.54 1.00 284 -#> sigma_obs[3] 0.42 0.64 0.81 1.02 90 -#> sigma_obs[4] 0.25 0.37 0.50 1.01 340 -#> sigma_obs[5] 0.31 0.43 0.53 1.02 266 +#> sigma_obs[1] 0.20 0.25 0.34 1.01 508 +#> sigma_obs[2] 0.27 0.40 0.54 1.03 179 +#> sigma_obs[3] 0.43 0.64 0.82 1.13 20 +#> sigma_obs[4] 0.25 0.37 0.50 1.00 378 +#> sigma_obs[5] 0.30 0.43 0.54 1.03 229 #> #> GAM observation model coefficient (beta) estimates: #> 2.5% 50% 97.5% Rhat n_eff @@ -763,88 +764,87 @@

    Inspecting SS models

    #> #> Process model VAR parameter estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> A[1,1] -0.015 0.500 0.830 1.01 171 -#> A[1,2] -0.330 -0.030 0.210 1.01 485 -#> A[1,3] -0.490 -0.024 0.330 1.00 332 -#> A[1,4] -0.260 0.027 0.380 1.00 706 -#> A[1,5] -0.100 0.120 0.510 1.02 218 -#> A[2,1] -0.180 0.012 0.180 1.02 238 -#> A[2,2] 0.630 0.790 0.920 1.01 435 -#> A[2,3] -0.400 -0.120 0.048 1.01 370 -#> A[2,4] -0.044 0.100 0.340 1.02 307 -#> A[2,5] -0.050 0.056 0.210 1.01 480 -#> A[3,1] -0.600 0.013 0.310 1.09 47 -#> A[3,2] -0.500 -0.180 0.031 1.03 156 -#> A[3,3] 0.049 0.450 0.730 1.01 259 -#> A[3,4] -0.039 0.210 0.630 1.02 189 -#> A[3,5] -0.064 0.120 0.400 1.04 183 -#> A[4,1] -0.250 0.049 0.300 1.05 91 -#> A[4,2] -0.110 0.055 0.260 1.00 540 -#> A[4,3] -0.460 -0.110 0.110 1.01 274 -#> A[4,4] 0.470 0.740 0.950 1.01 348 -#> A[4,5] -0.190 -0.032 0.140 1.01 555 -#> A[5,1] -0.520 0.072 0.450 1.11 32 -#> A[5,2] -0.430 -0.110 0.090 1.02 209 -#> A[5,3] -0.650 -0.170 0.130 1.02 185 -#> A[5,4] -0.057 0.180 0.570 1.02 226 -#> A[5,5] 0.540 0.730 0.980 1.06 69 +#> A[1,1] 0.038 0.520 0.870 1.08 32 +#> A[1,2] -0.350 -0.030 0.200 1.00 497 +#> A[1,3] -0.530 -0.044 0.330 1.02 261 +#> A[1,4] -0.280 0.038 0.420 1.00 392 +#> A[1,5] -0.100 0.120 0.510 1.04 141 +#> A[2,1] -0.160 0.011 0.200 1.00 1043 +#> A[2,2] 0.620 0.790 0.910 1.01 418 +#> A[2,3] -0.400 -0.130 0.045 1.03 291 +#> A[2,4] -0.034 0.110 0.360 1.02 274 +#> A[2,5] -0.048 0.061 0.200 1.01 585 +#> A[3,1] -0.260 0.025 0.560 1.10 28 +#> A[3,2] -0.530 -0.200 0.027 1.02 167 +#> A[3,3] 0.069 0.430 0.740 1.01 256 +#> A[3,4] -0.022 0.230 0.660 1.02 162 +#> A[3,5] -0.094 0.120 0.390 1.02 208 +#> A[4,1] -0.150 0.058 0.360 1.03 137 +#> A[4,2] -0.110 0.063 0.270 1.01 360 +#> A[4,3] -0.430 -0.110 0.140 1.01 312 +#> A[4,4] 0.470 0.730 0.950 1.02 278 +#> A[4,5] -0.200 -0.036 0.130 1.01 548 +#> A[5,1] -0.190 0.083 0.650 1.08 41 +#> A[5,2] -0.460 -0.120 0.076 1.04 135 +#> A[5,3] -0.620 -0.180 0.130 1.04 153 +#> A[5,4] -0.062 0.190 0.660 1.04 140 +#> A[5,5] 0.510 0.740 0.930 1.00 437 #> #> Process error parameter estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> Sigma[1,1] 0.034 0.28 0.65 1.02 77 +#> Sigma[1,1] 0.033 0.27 0.64 1.20 9 #> Sigma[1,2] 0.000 0.00 0.00 NaN NaN #> Sigma[1,3] 0.000 0.00 0.00 NaN NaN #> Sigma[1,4] 0.000 0.00 0.00 NaN NaN #> Sigma[1,5] 0.000 0.00 0.00 NaN NaN #> Sigma[2,1] 0.000 0.00 0.00 NaN NaN -#> Sigma[2,2] 0.065 0.11 0.18 1.00 508 +#> Sigma[2,2] 0.066 0.12 0.18 1.01 541 #> Sigma[2,3] 0.000 0.00 0.00 NaN NaN #> Sigma[2,4] 0.000 0.00 0.00 NaN NaN #> Sigma[2,5] 0.000 0.00 0.00 NaN NaN #> Sigma[3,1] 0.000 0.00 0.00 NaN NaN #> Sigma[3,2] 0.000 0.00 0.00 NaN NaN -#> Sigma[3,3] 0.061 0.16 0.30 1.02 179 +#> Sigma[3,3] 0.051 0.16 0.29 1.04 163 #> Sigma[3,4] 0.000 0.00 0.00 NaN NaN #> Sigma[3,5] 0.000 0.00 0.00 NaN NaN #> Sigma[4,1] 0.000 0.00 0.00 NaN NaN #> Sigma[4,2] 0.000 0.00 0.00 NaN NaN #> Sigma[4,3] 0.000 0.00 0.00 NaN NaN -#> Sigma[4,4] 0.058 0.13 0.27 1.01 199 +#> Sigma[4,4] 0.054 0.14 0.28 1.03 182 #> Sigma[4,5] 0.000 0.00 0.00 NaN NaN #> Sigma[5,1] 0.000 0.00 0.00 NaN NaN #> Sigma[5,2] 0.000 0.00 0.00 NaN NaN #> Sigma[5,3] 0.000 0.00 0.00 NaN NaN #> Sigma[5,4] 0.000 0.00 0.00 NaN NaN -#> Sigma[5,5] 0.100 0.21 0.35 1.02 256 +#> Sigma[5,5] 0.100 0.21 0.35 1.01 343 #> #> Approximate significance of GAM process smooths: -#> edf Ref.df F p-value -#> te(temp,month) 3.409 15 2.05 0.43 -#> te(temp,month):seriestrend1 2.137 15 0.07 1.00 -#> te(temp,month):seriestrend2 0.843 15 0.93 1.00 -#> te(temp,month):seriestrend3 4.421 15 3.01 0.41 -#> te(temp,month):seriestrend4 2.639 15 0.56 0.97 -#> te(temp,month):seriestrend5 1.563 15 0.35 1.00 +#> edf Ref.df Chi.sq p-value +#> te(temp,month) 2.902 15 43.54 0.44 +#> te(temp,month):seriestrend1 2.001 15 1.66 1.00 +#> te(temp,month):seriestrend2 0.943 15 7.03 1.00 +#> te(temp,month):seriestrend3 5.867 15 45.04 0.21 +#> te(temp,month):seriestrend4 2.984 15 9.12 0.98 +#> te(temp,month):seriestrend5 1.986 15 4.66 1.00 #> #> Stan MCMC diagnostics: #> n_eff / iter looks reasonable for all parameters -#> Rhats above 1.05 found for 5 parameters +#> Rhats above 1.05 found for 33 parameters #> *Diagnose further to investigate why the chains have not mixed #> 0 of 2000 iterations ended with a divergence (0%) #> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%) -#> Chain 3: E-FMI = 0.1497 -#> *E-FMI below 0.2 indicates you may need to reparameterize your model -#> -#> Samples were drawn using NUTS(diag_e) at Wed May 08 9:01:11 PM 2024. -#> For each parameter, n_eff is a crude measure of effective sample size, -#> and Rhat is the potential scale reduction factor on split MCMC chains -#> (at convergence, Rhat = 1)
    +#> E-FMI indicated no pathological behavior +#> +#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:43:45 AM 2024. +#> For each parameter, n_eff is a crude measure of effective sample size, +#> and Rhat is the potential scale reduction factor on split MCMC chains +#> (at convergence, Rhat = 1)

    The convergence of this model isn’t fabulous (more on this in a moment). But we can again plot the smooth functions, which this time operate on the process model. We can see the same plot using trend_effects = TRUE in the plotting functions:

    plot(var_mod, 'smooths', trend_effects = TRUE)
    -

    +

    The VAR matrix is of particular interest here, as it captures lagged dependencies and cross-dependencies in the latent process model. Unfortunately bayesplot doesn’t know this is a matrix of @@ -860,13 +860,11 @@

    Inspecting SS models

    mcmc_plot(var_mod, variable = as.vector(t(A_pars)), type = 'hist') -

    +

    There is a lot happening in this matrix. Each cell captures the lagged effect of the process in the column on the process in the row in -the next timestep. So for example, the effect in cell [1,3], which is -quite strongly negative, means that an increase in the process -for series 3 (Greens) at time \(t\) is -expected to lead to a subsequent decrease in the process for +the next timestep. So for example, the effect in cell [1,3] shows how an +increase in the process for series 3 (Greens) at time \(t\) is expected to impact the process for series 1 (Bluegreens) at time \(t+1\). The latent process model is now capturing these effects and the smooth seasonal effects.

    @@ -883,12 +881,12 @@

    Inspecting SS models

    mcmc_plot(var_mod, variable = as.vector(t(Sigma_pars)), type = 'hist') -

    +

    The observation error estimates \((\sigma_{obs})\) represent how much the model thinks we might miss the true count when we take our imperfect measurements:

    mcmc_plot(var_mod, variable = 'sigma_obs', regex = TRUE, type = 'hist')
    -

    +

    These are still a bit hard to identify overall, especially when trying to estimate both process and observation error. Often we need to make some strong assumptions about which of these is more important for @@ -911,13 +909,14 @@

    Correlated process errors

    te(temp, month, k = c(4, 4), by = trend), # VAR1 model with correlated process errors - trend_model = 'VAR1cor', + trend_model = VAR(cor = TRUE), family = gaussian(), data = plankton_train, newdata = plankton_test, # include the updated priors - priors = priors) + priors = priors, + silent = 2)

    The \((\Sigma)\) matrix now captures any evidence of contemporaneously correlated process error:

    Sigma_pars <- matrix(NA, nrow = 5, ncol = 5)
    @@ -929,14 +928,12 @@ 

    Correlated process errors

    mcmc_plot(varcor_mod, variable = as.vector(t(Sigma_pars)), type = 'hist')
    -

    +

    This symmetric matrix tells us there is support for correlated -process errors. For example, series 1 and 3 (Bluegreens and Greens) show -negatively correlated process errors, while series 1 and 4 (Bluegreens -and Other.algae) show positively correlated errors. But it is easier to -interpret these estimates if we convert the covariance matrix to a -correlation matrix. Here we compute the posterior median process error -correlations:

    +process errors, as several of the off-diagonal entries are strongly +non-zero. But it is easier to interpret these estimates if we convert +the covariance matrix to a correlation matrix. Here we compute the +posterior median process error correlations:

    Sigma_post <- as.matrix(varcor_mod, variable = 'Sigma', regex = TRUE)
     median_correlations <- cov2cor(matrix(apply(Sigma_post, 2, median),
                                           nrow = 5, ncol = 5))
    @@ -944,11 +941,11 @@ 

    Correlated process errors

    round(median_correlations, 2) #> Bluegreens Diatoms Greens Other.algae Unicells -#> Bluegreens 1.00 -0.04 0.16 -0.08 0.30 -#> Diatoms -0.04 1.00 -0.21 0.47 0.17 -#> Greens 0.16 -0.21 1.00 0.18 0.46 -#> Other.algae -0.08 0.47 0.18 1.00 0.26 -#> Unicells 0.30 0.17 0.46 0.26 1.00
    +#> Bluegreens 1.00 -0.04 0.16 -0.05 0.29 +#> Diatoms -0.04 1.00 -0.21 0.48 0.17 +#> Greens 0.16 -0.21 1.00 0.17 0.46 +#> Other.algae -0.05 0.48 0.17 1.00 0.28 +#> Unicells 0.29 0.17 0.46 0.28 1.00

    But which model is better? We can compute the variogram score for out of sample forecasts to get a sense of which model does a better job of capturing the dependence structure in the true evaluation set:

    @@ -966,7 +963,7 @@

    Correlated process errors

    xlab = 'Forecast horizon', ylab = expression(variogram[VAR1cor]~-~variogram[VAR1])) abline(h = 0, lty = 'dashed') -

    +

    And we can also compute the energy score for out of sample forecasts to get a sense of which model provides forecasts that are better calibrated:

    @@ -980,7 +977,7 @@

    Correlated process errors

    xlab = 'Forecast horizon', ylab = expression(energy[VAR1cor]~-~energy[VAR1])) abline(h = 0, lty = 'dashed') -

    +

    The models tend to provide similar forecasts, though the correlated error model does slightly better overall. We would probably need to use a more extensive rolling forecast evaluation exercise if we felt like we