diff --git a/cran-comments.md b/cran-comments.md
index d77db677..22adf636 100644
--- a/cran-comments.md
+++ b/cran-comments.md
@@ -1,4 +1,4 @@
-## Version 1.1.2
+## Version 1.1.3
## Summary of changes
This version brings a few efficiency updates and added functionality to enhance the user interface. There are no major structural changes or modifications that would break pre-existing workflows
diff --git a/doc/trend_formulas.R b/doc/trend_formulas.R
index 21ec225e..98066ef4 100644
--- a/doc/trend_formulas.R
+++ b/doc/trend_formulas.R
@@ -1,4 +1,4 @@
-## ----echo = FALSE-------------------------------------------------------------
+## ---- echo = FALSE------------------------------------------------------------
NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true")
knitr::opts_chunk$set(
collapse = TRUE,
@@ -107,7 +107,7 @@ plankton_test <- plankton_data %>%
## ----notrend_mod, include = FALSE, results='hide'-----------------------------
notrend_mod <- mvgam(y ~
te(temp, month, k = c(4, 4)) +
- te(temp, month, k = c(4, 4), by = series),
+ te(temp, month, k = c(4, 4), by = series) - 1,
family = gaussian(),
data = plankton_train,
newdata = plankton_test,
@@ -121,7 +121,7 @@ notrend_mod <- mvgam(y ~
## te(temp, month, k = c(4, 4)) +
##
## # series-specific deviation tensor products
-## te(temp, month, k = c(4, 4), by = series),
+## te(temp, month, k = c(4, 4), by = series) - 1,
## family = gaussian(),
## data = plankton_train,
## newdata = plankton_test,
@@ -157,10 +157,6 @@ plot(notrend_mod, type = 'forecast', series = 3)
plot(notrend_mod, type = 'residuals', series = 1)
-## -----------------------------------------------------------------------------
-plot(notrend_mod, type = 'residuals', series = 2)
-
-
## -----------------------------------------------------------------------------
plot(notrend_mod, type = 'residuals', series = 3)
@@ -172,7 +168,7 @@ priors <- get_mvgam_priors(
# process model formula, which includes the smooth functions
trend_formula = ~ te(temp, month, k = c(4, 4)) +
- te(temp, month, k = c(4, 4), by = trend),
+ te(temp, month, k = c(4, 4), by = trend) - 1,
# VAR1 model with uncorrelated process errors
trend_model = VAR(),
@@ -201,7 +197,7 @@ var_mod <- mvgam(y ~ -1,
te(temp, month, k = c(4, 4)) +
# need to use 'trend' rather than series
# here
- te(temp, month, k = c(4, 4), by = trend),
+ te(temp, month, k = c(4, 4), by = trend) - 1,
family = gaussian(),
data = plankton_train,
newdata = plankton_test,
@@ -218,7 +214,7 @@ var_mod <- mvgam(y ~ -1,
##
## # process model formula, which includes the smooth functions
## trend_formula = ~ te(temp, month, k = c(4, 4)) +
-## te(temp, month, k = c(4, 4), by = trend),
+## te(temp, month, k = c(4, 4), by = trend) - 1,
##
## # VAR1 model with uncorrelated process errors
## trend_model = VAR(),
@@ -280,7 +276,7 @@ varcor_mod <- mvgam(y ~ -1,
te(temp, month, k = c(4, 4)) +
# need to use 'trend' rather than series
# here
- te(temp, month, k = c(4, 4), by = trend),
+ te(temp, month, k = c(4, 4), by = trend) - 1,
family = gaussian(),
data = plankton_train,
newdata = plankton_test,
@@ -297,7 +293,7 @@ varcor_mod <- mvgam(y ~ -1,
##
## # process model formula, which includes the smooth functions
## trend_formula = ~ te(temp, month, k = c(4, 4)) +
-## te(temp, month, k = c(4, 4), by = trend),
+## te(temp, month, k = c(4, 4), by = trend) - 1,
##
## # VAR1 model with correlated process errors
## trend_model = VAR(cor = TRUE),
@@ -331,6 +327,14 @@ rownames(median_correlations) <- colnames(median_correlations) <- levels(plankto
round(median_correlations, 2)
+## -----------------------------------------------------------------------------
+irfs <- irf(varcor_mod, h = 12)
+
+
+## -----------------------------------------------------------------------------
+plot(irfs, series = 3)
+
+
## -----------------------------------------------------------------------------
# create forecast objects for each model
fcvar <- forecast(var_mod)
diff --git a/doc/trend_formulas.Rmd b/doc/trend_formulas.Rmd
index 387a8708..8d636d2e 100644
--- a/doc/trend_formulas.Rmd
+++ b/doc/trend_formulas.Rmd
@@ -143,7 +143,7 @@ First we will fit a model that does not include a dynamic component, just to see
```{r notrend_mod, include = FALSE, results='hide'}
notrend_mod <- mvgam(y ~
te(temp, month, k = c(4, 4)) +
- te(temp, month, k = c(4, 4), by = series),
+ te(temp, month, k = c(4, 4), by = series) - 1,
family = gaussian(),
data = plankton_train,
newdata = plankton_test,
@@ -157,7 +157,7 @@ notrend_mod <- mvgam(y ~
te(temp, month, k = c(4, 4)) +
# series-specific deviation tensor products
- te(temp, month, k = c(4, 4), by = series),
+ te(temp, month, k = c(4, 4), by = series) - 1,
family = gaussian(),
data = plankton_train,
newdata = plankton_test,
@@ -197,10 +197,6 @@ This basic model gives us confidence that we can capture the seasonal variation
plot(notrend_mod, type = 'residuals', series = 1)
```
-```{r}
-plot(notrend_mod, type = 'residuals', series = 2)
-```
-
```{r}
plot(notrend_mod, type = 'residuals', series = 3)
```
@@ -213,11 +209,11 @@ Now it is time to get into multivariate State-Space models. We will fit two mode
\boldsymbol{count}_t & \sim \text{Normal}(\mu_{obs[t]}, \sigma_{obs}) \\
\mu_{obs[t]} & = process_t \\
process_t & \sim \text{MVNormal}(\mu_{process[t]}, \Sigma_{process}) \\
-\mu_{process[t]} & = VAR * process_{t-1} + f_{global}(\boldsymbol{month},\boldsymbol{temp})_t + f_{series}(\boldsymbol{month},\boldsymbol{temp})_t \\
+\mu_{process[t]} & = A * process_{t-1} + f_{global}(\boldsymbol{month},\boldsymbol{temp})_t + f_{series}(\boldsymbol{month},\boldsymbol{temp})_t \\
f_{global}(\boldsymbol{month},\boldsymbol{temp}) & = \sum_{k=1}^{K}b_{global} * \beta_{global} \\
f_{series}(\boldsymbol{month},\boldsymbol{temp}) & = \sum_{k=1}^{K}b_{series} * \beta_{series} \end{align*}
-Here you can see that there are no terms in the observation model apart from the underlying process model. But we could easily add covariates into the observation model if we felt that they could explain some of the systematic observation errors. We also assume independent observation processes (there is no covariance structure in the observation errors $\sigma_{obs}$). At present, `mvgam` does not support multivariate observation models. But this feature will be added in future versions. However the underlying process model is multivariate, and there is a lot going on here. This component has a Vector Autoregressive part, where the process mean at time $t$ $(\mu_{process[t]})$ is a vector that evolves as a function of where the vector-valued process model was at time $t-1$. The $VAR$ matrix captures these dynamics with self-dependencies on the diagonal and possibly asymmetric cross-dependencies on the off-diagonals, while also incorporating the nonlinear smooth functions that capture seasonality for each series. The contemporaneous process errors are modeled by $\Sigma_{process}$, which can be constrained so that process errors are independent (i.e. setting the off-diagonals to 0) or can be fully parameterized using a Cholesky decomposition (using `Stan`'s $LKJcorr$ distribution to place a prior on the strength of inter-species correlations). For those that are interested in the inner-workings, `mvgam` makes use of a recent breakthrough by [Sarah Heaps to enforce stationarity of Bayesian VAR processes](https://www.tandfonline.com/doi/full/10.1080/10618600.2022.2079648). This is advantageous as we often don't expect forecast variance to increase without bound forever into the future, but many estimated VARs tend to behave this way.
+Here you can see that there are no terms in the observation model apart from the underlying process model. But we could easily add covariates into the observation model if we felt that they could explain some of the systematic observation errors. We also assume independent observation processes (there is no covariance structure in the observation errors $\sigma_{obs}$). At present, `mvgam` does not support multivariate observation models. But this feature will be added in future versions. However the underlying process model is multivariate, and there is a lot going on here. This component has a Vector Autoregressive part, where the process mean at time $t$ $(\mu_{process[t]})$ is a vector that evolves as a function of where the vector-valued process model was at time $t-1$. The $A$ matrix captures these dynamics with self-dependencies on the diagonal and possibly asymmetric cross-dependencies on the off-diagonals, while also incorporating the nonlinear smooth functions that capture seasonality for each series. The contemporaneous process errors are modeled by $\Sigma_{process}$, which can be constrained so that process errors are independent (i.e. setting the off-diagonals to 0) or can be fully parameterized using a Cholesky decomposition (using `Stan`'s $LKJcorr$ distribution to place a prior on the strength of inter-species correlations). For those that are interested in the inner-workings, `mvgam` makes use of a recent breakthrough by [Sarah Heaps to enforce stationarity of Bayesian VAR processes](https://www.tandfonline.com/doi/full/10.1080/10618600.2022.2079648). This is advantageous as we often don't expect forecast variance to increase without bound forever into the future, but many estimated VARs tend to behave this way.
Ok that was a lot to take in. Let's fit some models to try and inspect what is going on and what they assume. But first, we need to update `mvgam`'s default priors for the observation and process errors. By default, `mvgam` uses a fairly wide Student-T prior on these parameters to avoid being overly informative. But our observations are z-scored and so we do not expect very large process or observation errors. However, we also do not expect very small observation errors either as we know these measurements are not perfect. So let's update the priors for these parameters. In doing so, you will get to see how the formula for the latent process (i.e. trend) model is used in `mvgam`:
@@ -228,7 +224,7 @@ priors <- get_mvgam_priors(
# process model formula, which includes the smooth functions
trend_formula = ~ te(temp, month, k = c(4, 4)) +
- te(temp, month, k = c(4, 4), by = trend),
+ te(temp, month, k = c(4, 4), by = trend) - 1,
# VAR1 model with uncorrelated process errors
trend_model = VAR(),
@@ -261,7 +257,7 @@ var_mod <- mvgam(y ~ -1,
te(temp, month, k = c(4, 4)) +
# need to use 'trend' rather than series
# here
- te(temp, month, k = c(4, 4), by = trend),
+ te(temp, month, k = c(4, 4), by = trend) - 1,
family = gaussian(),
data = plankton_train,
newdata = plankton_test,
@@ -278,7 +274,7 @@ var_mod <- mvgam(
# process model formula, which includes the smooth functions
trend_formula = ~ te(temp, month, k = c(4, 4)) +
- te(temp, month, k = c(4, 4), by = trend),
+ te(temp, month, k = c(4, 4), by = trend) - 1,
# VAR1 model with uncorrelated process errors
trend_model = VAR(),
@@ -354,7 +350,7 @@ varcor_mod <- mvgam(y ~ -1,
te(temp, month, k = c(4, 4)) +
# need to use 'trend' rather than series
# here
- te(temp, month, k = c(4, 4), by = trend),
+ te(temp, month, k = c(4, 4), by = trend) - 1,
family = gaussian(),
data = plankton_train,
newdata = plankton_test,
@@ -371,7 +367,7 @@ varcor_mod <- mvgam(
# process model formula, which includes the smooth functions
trend_formula = ~ te(temp, month, k = c(4, 4)) +
- te(temp, month, k = c(4, 4), by = trend),
+ te(temp, month, k = c(4, 4), by = trend) - 1,
# VAR1 model with correlated process errors
trend_model = VAR(cor = TRUE),
@@ -407,6 +403,21 @@ rownames(median_correlations) <- colnames(median_correlations) <- levels(plankto
round(median_correlations, 2)
```
+### Impulse response functions
+Because Vector Autoregressions can capture complex lagged dependencies, it is often difficult to understand how the member time series are thought to interact with one another. A method that is commonly used to directly test for possible interactions is to compute an [Impulse Response Function](https://www.mathworks.com/help/econ/varm.irf.html) (IRF). If $h$ represents the simulated forecast horizon, an IRF asks how each of the remaining series might respond over times $(t+1):h$ if a focal series is given an innovation "shock" at time $t = 0$. `mvgam` can compute Generalized and Orthogonalized IRFs from models that included latent VAR dynamics. We simply feed the fitted model to the `irf()` function and then use the S3 `plot()` function to view the estimated responses. By default, `irf()` will compute IRFs by separately imposing positive shocks of one standard deviation to each series in the VAR process. Here we compute Generalized IRFs over a horizon of 12 timesteps:
+```{r}
+irfs <- irf(varcor_mod, h = 12)
+```
+
+Plot the expected responses of the remaining series to a positive shock for series 3 (Greens)
+```{r}
+plot(irfs, series = 3)
+```
+
+This series of plots makes it clear that some of the other series would be expected to show both instantaneous responses to a shock for the Greens (due to their correlated process errors) as well as delayed and nonlinear responses over time (due to the complex lagged dependence structure captured by the $A$ matrix). This hopefully makes it clear why IRFs are an important tool in the analysis of multivariate autoregressive models.
+
+### Comparing forecast scores
+
But which model is better? We can compute the variogram score for out of sample forecasts to get a sense of which model does a better job of capturing the dependence structure in the true evaluation set:
```{r}
# create forecast objects for each model
@@ -439,11 +450,13 @@ plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred',
abline(h = 0, lty = 'dashed')
```
-The models tend to provide similar forecasts, though the correlated error model does slightly better overall. We would probably need to use a more extensive rolling forecast evaluation exercise if we felt like we needed to only choose one for production. `mvgam` offers some utilities for doing this (i.e. see `?lfo_cv` for guidance).
+The models tend to provide similar forecasts, though the correlated error model does slightly better overall. We would probably need to use a more extensive rolling forecast evaluation exercise if we felt like we needed to only choose one for production. `mvgam` offers some utilities for doing this (i.e. see `?lfo_cv` for guidance). Alternatively, we could use forecasts from *both* models by creating an evenly-weighted ensemble forecast distribution. This capability is available using the `ensemble()` function in `mvgam` (see `?ensemble` for guidance).
-### Further reading
+## Further reading
The following papers and resources offer a lot of useful material about multivariate State-Space models and how they can be applied in practice:
+Auger‐Méthé, Marie, et al. ["A guide to state–space modeling of ecological time series.](https://esajournals.onlinelibrary.wiley.com/doi/full/10.1002/ecm.1470)" *Ecological Monographs* 91.4 (2021): e01470.
+
Heaps, Sarah E. "[Enforcing stationarity through the prior in vector autoregressions.](https://www.tandfonline.com/doi/full/10.1080/10618600.2022.2079648)" *Journal of Computational and Graphical Statistics* 32.1 (2023): 74-83.
Hannaford, Naomi E., et al. "[A sparse Bayesian hierarchical vector autoregressive model for microbial dynamics in a wastewater treatment plant.](https://www.sciencedirect.com/science/article/pii/S0167947322002390)" *Computational Statistics & Data Analysis* 179 (2023): 107659.
@@ -451,8 +464,6 @@ Hannaford, Naomi E., et al. "[A sparse Bayesian hierarchical vector autoregressi
Holmes, Elizabeth E., Eric J. Ward, and Wills Kellie. "[MARSS: multivariate autoregressive state-space models for analyzing time-series data.](https://journal.r-project.org/archive/2012/RJ-2012-002/index.html)" *R Journal*. 4.1 (2012): 11.
Ward, Eric J., et al. "[Inferring spatial structure from time‐series data: using multivariate state‐space models to detect metapopulation structure of California sea lions in the Gulf of California, Mexico.](https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/j.1365-2664.2009.01745.x)" *Journal of Applied Ecology* 47.1 (2010): 47-56.
-
-Auger‐Méthé, Marie, et al. ["A guide to state–space modeling of ecological time series.](https://esajournals.onlinelibrary.wiley.com/doi/full/10.1002/ecm.1470)" *Ecological Monographs* 91.4 (2021): e01470.
## Interested in contributing?
I'm actively seeking PhD students and other researchers to work in the areas of ecological forecasting, multivariate model evaluation and development of `mvgam`. Please reach out if you are interested (n.clark'at'uq.edu.au)
diff --git a/doc/trend_formulas.html b/doc/trend_formulas.html
index 5e2a8efc..15dcaae4 100644
--- a/doc/trend_formulas.html
+++ b/doc/trend_formulas.html
@@ -12,7 +12,7 @@
-
+
The “global” tensor product smooth function can be quickly visualized:
- +On this plot, red indicates below-average linear predictors and white indicates above-average. We can then plot the deviation smooths for a few algal groups to see how they vary from the “global” pattern:
- + - +These multidimensional smooths have done a good job of capturing the seasonal variation in our observations:
- +#> 6.94414781962135 + - +#> 6.57641830249056 + - +#> 4.04870000030721 +This basic model gives us confidence that we can capture the seasonal variation in the observations. But the model has not captured the remaining temporal dynamics, which is obvious when we inspect Dunn-Smyth residuals for a few series:
- - - - - + + +mvgam
:
-priors <- get_mvgam_priors(
- # observation formula, which has no terms in it
- y ~ -1,
-
- # process model formula, which includes the smooth functions
- trend_formula = ~ te(temp, month, k = c(4, 4)) +
- te(temp, month, k = c(4, 4), by = trend),
-
- # VAR1 model with uncorrelated process errors
- trend_model = VAR(),
- family = gaussian(),
- data = plankton_train)
priors <- get_mvgam_priors(
+ # observation formula, which has no terms in it
+ y ~ -1,
+
+ # process model formula, which includes the smooth functions
+ trend_formula = ~ te(temp, month, k = c(4, 4)) +
+ te(temp, month, k = c(4, 4), by = trend) - 1,
+
+ # VAR1 model with uncorrelated process errors
+ trend_model = VAR(),
+ family = gaussian(),
+ data = plankton_train)
Get names of all parameters whose priors can be modified:
-priors[, 3]
-#> [1] "(Intercept)"
-#> [2] "process error sd"
-#> [3] "diagonal autocorrelation population mean"
-#> [4] "off-diagonal autocorrelation population mean"
-#> [5] "diagonal autocorrelation population variance"
-#> [6] "off-diagonal autocorrelation population variance"
-#> [7] "shape1 for diagonal autocorrelation precision"
-#> [8] "shape1 for off-diagonal autocorrelation precision"
-#> [9] "shape2 for diagonal autocorrelation precision"
-#> [10] "shape2 for off-diagonal autocorrelation precision"
-#> [11] "observation error sd"
-#> [12] "te(temp,month) smooth parameters, te(temp,month):trendtrend1 smooth parameters, te(temp,month):trendtrend2 smooth parameters, te(temp,month):trendtrend3 smooth parameters, te(temp,month):trendtrend4 smooth parameters, te(temp,month):trendtrend5 smooth parameters"
priors[, 3]
+#> [1] "(Intercept)"
+#> [2] "process error sd"
+#> [3] "diagonal autocorrelation population mean"
+#> [4] "off-diagonal autocorrelation population mean"
+#> [5] "diagonal autocorrelation population variance"
+#> [6] "off-diagonal autocorrelation population variance"
+#> [7] "shape1 for diagonal autocorrelation precision"
+#> [8] "shape1 for off-diagonal autocorrelation precision"
+#> [9] "shape2 for diagonal autocorrelation precision"
+#> [10] "shape2 for off-diagonal autocorrelation precision"
+#> [11] "observation error sd"
+#> [12] "te(temp,month) smooth parameters, te(temp,month):trendtrend1 smooth parameters, te(temp,month):trendtrend2 smooth parameters, te(temp,month):trendtrend3 smooth parameters, te(temp,month):trendtrend4 smooth parameters, te(temp,month):trendtrend5 smooth parameters"
And their default prior distributions:
-priors[, 4]
-#> [1] "(Intercept) ~ student_t(3, -0.1, 2.5);"
-#> [2] "sigma ~ student_t(3, 0, 2.5);"
-#> [3] "es[1] = 0;"
-#> [4] "es[2] = 0;"
-#> [5] "fs[1] = sqrt(0.455);"
-#> [6] "fs[2] = sqrt(0.455);"
-#> [7] "gs[1] = 1.365;"
-#> [8] "gs[2] = 1.365;"
-#> [9] "hs[1] = 0.071175;"
-#> [10] "hs[2] = 0.071175;"
-#> [11] "sigma_obs ~ student_t(3, 0, 2.5);"
-#> [12] "lambda_trend ~ normal(5, 30);"
priors[, 4]
+#> [1] "(Intercept) ~ student_t(3, -0.1, 2.5);"
+#> [2] "sigma ~ student_t(3, 0, 2.5);"
+#> [3] "es[1] = 0;"
+#> [4] "es[2] = 0;"
+#> [5] "fs[1] = sqrt(0.455);"
+#> [6] "fs[2] = sqrt(0.455);"
+#> [7] "gs[1] = 1.365;"
+#> [8] "gs[2] = 1.365;"
+#> [9] "hs[1] = 0.071175;"
+#> [10] "hs[2] = 0.071175;"
+#> [11] "sigma_obs ~ student_t(3, 0, 2.5);"
+#> [12] "lambda_trend ~ normal(5, 30);"
Setting priors is easy in mvgam
as you can use
brms
routines. Here we use more informative Normal priors
for both error components, but we impose a lower bound of 0.2 for the
observation errors:
priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2),
- prior(normal(0.5, 0.25), class = sigma))
priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2),
+ prior(normal(0.5, 0.25), class = sigma))
You may have noticed something else unique about this model: there is no intercept term in the observation formula. This is because a shared intercept parameter can sometimes be unidentifiable with respect to the @@ -685,23 +685,23 @@
mvgam
accomplishes this by fixing the
coefficient for the intercept to zero. Now we can fit the first model,
which assumes that process errors are contemporaneously uncorrelated
-var_mod <- mvgam(
- # observation formula, which is empty
- y ~ -1,
-
- # process model formula, which includes the smooth functions
- trend_formula = ~ te(temp, month, k = c(4, 4)) +
- te(temp, month, k = c(4, 4), by = trend),
-
- # VAR1 model with uncorrelated process errors
- trend_model = VAR(),
- family = gaussian(),
- data = plankton_train,
- newdata = plankton_test,
-
- # include the updated priors
- priors = priors,
- silent = 2)
var_mod <- mvgam(
+ # observation formula, which is empty
+ y ~ -1,
+
+ # process model formula, which includes the smooth functions
+ trend_formula = ~ te(temp, month, k = c(4, 4)) +
+ te(temp, month, k = c(4, 4), by = trend) - 1,
+
+ # VAR1 model with uncorrelated process errors
+ trend_model = VAR(),
+ family = gaussian(),
+ data = plankton_train,
+ newdata = plankton_test,
+
+ # include the updated priors
+ priors = priors,
+ silent = 2)
include_betas = FALSE
to stop the summary from printing
output for all of the spline coefficients, which can be dense and hard
to interpret:
-summary(var_mod, include_betas = FALSE)
-#> GAM observation formula:
-#> y ~ 1
-#> <environment: 0x00000241693f91f0>
-#>
-#> GAM process formula:
-#> ~te(temp, month, k = c(4, 4)) + te(temp, month, k = c(4, 4),
-#> by = trend)
-#> <environment: 0x00000241693f91f0>
-#>
-#> Family:
-#> gaussian
-#>
-#> Link function:
-#> identity
-#>
-#> Trend model:
-#> VAR()
-#>
-#> N process models:
-#> 5
-#>
-#> N series:
-#> 5
-#>
-#> N timepoints:
-#> 120
-#>
-#> Status:
-#> Fitted using Stan
-#> 4 chains, each with iter = 1500; warmup = 1000; thin = 1
-#> Total post-warmup draws = 2000
-#>
-#>
-#> Observation error parameter estimates:
-#> 2.5% 50% 97.5% Rhat n_eff
-#> sigma_obs[1] 0.20 0.25 0.34 1.01 508
-#> sigma_obs[2] 0.27 0.40 0.54 1.03 179
-#> sigma_obs[3] 0.43 0.64 0.82 1.13 20
-#> sigma_obs[4] 0.25 0.37 0.50 1.00 378
-#> sigma_obs[5] 0.30 0.43 0.54 1.03 229
-#>
-#> GAM observation model coefficient (beta) estimates:
-#> 2.5% 50% 97.5% Rhat n_eff
-#> (Intercept) 0 0 0 NaN NaN
-#>
-#> Process model VAR parameter estimates:
-#> 2.5% 50% 97.5% Rhat n_eff
-#> A[1,1] 0.038 0.520 0.870 1.08 32
-#> A[1,2] -0.350 -0.030 0.200 1.00 497
-#> A[1,3] -0.530 -0.044 0.330 1.02 261
-#> A[1,4] -0.280 0.038 0.420 1.00 392
-#> A[1,5] -0.100 0.120 0.510 1.04 141
-#> A[2,1] -0.160 0.011 0.200 1.00 1043
-#> A[2,2] 0.620 0.790 0.910 1.01 418
-#> A[2,3] -0.400 -0.130 0.045 1.03 291
-#> A[2,4] -0.034 0.110 0.360 1.02 274
-#> A[2,5] -0.048 0.061 0.200 1.01 585
-#> A[3,1] -0.260 0.025 0.560 1.10 28
-#> A[3,2] -0.530 -0.200 0.027 1.02 167
-#> A[3,3] 0.069 0.430 0.740 1.01 256
-#> A[3,4] -0.022 0.230 0.660 1.02 162
-#> A[3,5] -0.094 0.120 0.390 1.02 208
-#> A[4,1] -0.150 0.058 0.360 1.03 137
-#> A[4,2] -0.110 0.063 0.270 1.01 360
-#> A[4,3] -0.430 -0.110 0.140 1.01 312
-#> A[4,4] 0.470 0.730 0.950 1.02 278
-#> A[4,5] -0.200 -0.036 0.130 1.01 548
-#> A[5,1] -0.190 0.083 0.650 1.08 41
-#> A[5,2] -0.460 -0.120 0.076 1.04 135
-#> A[5,3] -0.620 -0.180 0.130 1.04 153
-#> A[5,4] -0.062 0.190 0.660 1.04 140
-#> A[5,5] 0.510 0.740 0.930 1.00 437
-#>
-#> Process error parameter estimates:
-#> 2.5% 50% 97.5% Rhat n_eff
-#> Sigma[1,1] 0.033 0.27 0.64 1.20 9
-#> Sigma[1,2] 0.000 0.00 0.00 NaN NaN
-#> Sigma[1,3] 0.000 0.00 0.00 NaN NaN
-#> Sigma[1,4] 0.000 0.00 0.00 NaN NaN
-#> Sigma[1,5] 0.000 0.00 0.00 NaN NaN
-#> Sigma[2,1] 0.000 0.00 0.00 NaN NaN
-#> Sigma[2,2] 0.066 0.12 0.18 1.01 541
-#> Sigma[2,3] 0.000 0.00 0.00 NaN NaN
-#> Sigma[2,4] 0.000 0.00 0.00 NaN NaN
-#> Sigma[2,5] 0.000 0.00 0.00 NaN NaN
-#> Sigma[3,1] 0.000 0.00 0.00 NaN NaN
-#> Sigma[3,2] 0.000 0.00 0.00 NaN NaN
-#> Sigma[3,3] 0.051 0.16 0.29 1.04 163
-#> Sigma[3,4] 0.000 0.00 0.00 NaN NaN
-#> Sigma[3,5] 0.000 0.00 0.00 NaN NaN
-#> Sigma[4,1] 0.000 0.00 0.00 NaN NaN
-#> Sigma[4,2] 0.000 0.00 0.00 NaN NaN
-#> Sigma[4,3] 0.000 0.00 0.00 NaN NaN
-#> Sigma[4,4] 0.054 0.14 0.28 1.03 182
-#> Sigma[4,5] 0.000 0.00 0.00 NaN NaN
-#> Sigma[5,1] 0.000 0.00 0.00 NaN NaN
-#> Sigma[5,2] 0.000 0.00 0.00 NaN NaN
-#> Sigma[5,3] 0.000 0.00 0.00 NaN NaN
-#> Sigma[5,4] 0.000 0.00 0.00 NaN NaN
-#> Sigma[5,5] 0.100 0.21 0.35 1.01 343
-#>
-#> Approximate significance of GAM process smooths:
-#> edf Ref.df Chi.sq p-value
-#> te(temp,month) 2.902 15 43.54 0.44
-#> te(temp,month):seriestrend1 2.001 15 1.66 1.00
-#> te(temp,month):seriestrend2 0.943 15 7.03 1.00
-#> te(temp,month):seriestrend3 5.867 15 45.04 0.21
-#> te(temp,month):seriestrend4 2.984 15 9.12 0.98
-#> te(temp,month):seriestrend5 1.986 15 4.66 1.00
-#>
-#> Stan MCMC diagnostics:
-#> n_eff / iter looks reasonable for all parameters
-#> Rhats above 1.05 found for 33 parameters
-#> *Diagnose further to investigate why the chains have not mixed
-#> 0 of 2000 iterations ended with a divergence (0%)
-#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
-#> E-FMI indicated no pathological behavior
-#>
-#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:43:45 AM 2024.
-#> For each parameter, n_eff is a crude measure of effective sample size,
-#> and Rhat is the potential scale reduction factor on split MCMC chains
-#> (at convergence, Rhat = 1)
summary(var_mod, include_betas = FALSE)
+#> GAM observation formula:
+#> y ~ 1
+#> <environment: 0x0000020ef28a3068>
+#>
+#> GAM process formula:
+#> ~te(temp, month, k = c(4, 4)) + te(temp, month, k = c(4, 4),
+#> by = trend) - 1
+#> <environment: 0x0000020ef28a3068>
+#>
+#> Family:
+#> gaussian
+#>
+#> Link function:
+#> identity
+#>
+#> Trend model:
+#> VAR()
+#>
+#> N process models:
+#> 5
+#>
+#> N series:
+#> 5
+#>
+#> N timepoints:
+#> 120
+#>
+#> Status:
+#> Fitted using Stan
+#> 4 chains, each with iter = 1500; warmup = 1000; thin = 1
+#> Total post-warmup draws = 2000
+#>
+#>
+#> Observation error parameter estimates:
+#> 2.5% 50% 97.5% Rhat n_eff
+#> sigma_obs[1] 0.20 0.25 0.34 1.00 481
+#> sigma_obs[2] 0.25 0.40 0.54 1.05 113
+#> sigma_obs[3] 0.41 0.64 0.81 1.08 84
+#> sigma_obs[4] 0.24 0.37 0.50 1.02 270
+#> sigma_obs[5] 0.31 0.43 0.54 1.01 268
+#>
+#> GAM observation model coefficient (beta) estimates:
+#> 2.5% 50% 97.5% Rhat n_eff
+#> (Intercept) 0 0 0 NaN NaN
+#>
+#> Process model VAR parameter estimates:
+#> 2.5% 50% 97.5% Rhat n_eff
+#> A[1,1] -0.072 0.47000 0.830 1.05 151
+#> A[1,2] -0.370 -0.04000 0.200 1.01 448
+#> A[1,3] -0.540 -0.04500 0.360 1.02 232
+#> A[1,4] -0.290 0.03900 0.460 1.02 439
+#> A[1,5] -0.072 0.15000 0.560 1.03 183
+#> A[2,1] -0.150 0.01800 0.210 1.00 677
+#> A[2,2] 0.620 0.79000 0.920 1.01 412
+#> A[2,3] -0.400 -0.14000 0.042 1.01 295
+#> A[2,4] -0.045 0.12000 0.350 1.01 344
+#> A[2,5] -0.057 0.05800 0.200 1.01 641
+#> A[3,1] -0.480 0.00015 0.390 1.03 71
+#> A[3,2] -0.500 -0.22000 0.023 1.02 193
+#> A[3,3] 0.066 0.41000 0.710 1.01 259
+#> A[3,4] -0.020 0.24000 0.630 1.02 227
+#> A[3,5] -0.051 0.14000 0.410 1.02 195
+#> A[4,1] -0.220 0.05100 0.290 1.03 141
+#> A[4,2] -0.100 0.05300 0.260 1.01 402
+#> A[4,3] -0.420 -0.12000 0.120 1.02 363
+#> A[4,4] 0.480 0.74000 0.940 1.01 322
+#> A[4,5] -0.200 -0.03100 0.120 1.01 693
+#> A[5,1] -0.360 0.06400 0.430 1.03 99
+#> A[5,2] -0.430 -0.13000 0.074 1.01 230
+#> A[5,3] -0.610 -0.20000 0.110 1.02 232
+#> A[5,4] -0.061 0.19000 0.620 1.01 250
+#> A[5,5] 0.530 0.75000 0.970 1.02 273
+#>
+#> Process error parameter estimates:
+#> 2.5% 50% 97.5% Rhat n_eff
+#> Sigma[1,1] 0.037 0.28 0.65 1.11 64
+#> Sigma[1,2] 0.000 0.00 0.00 NaN NaN
+#> Sigma[1,3] 0.000 0.00 0.00 NaN NaN
+#> Sigma[1,4] 0.000 0.00 0.00 NaN NaN
+#> Sigma[1,5] 0.000 0.00 0.00 NaN NaN
+#> Sigma[2,1] 0.000 0.00 0.00 NaN NaN
+#> Sigma[2,2] 0.067 0.11 0.19 1.00 505
+#> Sigma[2,3] 0.000 0.00 0.00 NaN NaN
+#> Sigma[2,4] 0.000 0.00 0.00 NaN NaN
+#> Sigma[2,5] 0.000 0.00 0.00 NaN NaN
+#> Sigma[3,1] 0.000 0.00 0.00 NaN NaN
+#> Sigma[3,2] 0.000 0.00 0.00 NaN NaN
+#> Sigma[3,3] 0.062 0.15 0.29 1.05 93
+#> Sigma[3,4] 0.000 0.00 0.00 NaN NaN
+#> Sigma[3,5] 0.000 0.00 0.00 NaN NaN
+#> Sigma[4,1] 0.000 0.00 0.00 NaN NaN
+#> Sigma[4,2] 0.000 0.00 0.00 NaN NaN
+#> Sigma[4,3] 0.000 0.00 0.00 NaN NaN
+#> Sigma[4,4] 0.052 0.13 0.26 1.01 201
+#> Sigma[4,5] 0.000 0.00 0.00 NaN NaN
+#> Sigma[5,1] 0.000 0.00 0.00 NaN NaN
+#> Sigma[5,2] 0.000 0.00 0.00 NaN NaN
+#> Sigma[5,3] 0.000 0.00 0.00 NaN NaN
+#> Sigma[5,4] 0.000 0.00 0.00 NaN NaN
+#> Sigma[5,5] 0.110 0.21 0.35 1.03 210
+#>
+#> Approximate significance of GAM process smooths:
+#> edf Ref.df Chi.sq p-value
+#> te(temp,month) 2.67 15 38.52 0.405
+#> te(temp,month):seriestrend1 1.77 15 1.73 1.000
+#> te(temp,month):seriestrend2 2.18 15 4.07 0.995
+#> te(temp,month):seriestrend3 4.07 15 51.07 0.059 .
+#> te(temp,month):seriestrend4 3.72 15 6.98 0.825
+#> te(temp,month):seriestrend5 1.85 15 5.15 0.998
+#> ---
+#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+#>
+#> Stan MCMC diagnostics:
+#> n_eff / iter looks reasonable for all parameters
+#> Rhats above 1.05 found for 11 parameters
+#> *Diagnose further to investigate why the chains have not mixed
+#> 0 of 2000 iterations ended with a divergence (0%)
+#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%)
+#> E-FMI indicated no pathological behavior
+#>
+#> Samples were drawn using NUTS(diag_e) at Wed Sep 04 9:30:41 AM 2024.
+#> For each parameter, n_eff is a crude measure of effective sample size,
+#> and Rhat is the potential scale reduction factor on split MCMC chains
+#> (at convergence, Rhat = 1)
The convergence of this model isn’t fabulous (more on this in a
moment). But we can again plot the smooth functions, which this time
operate on the process model. We can see the same plot using
trend_effects = TRUE
in the plotting functions:
The VAR matrix is of particular interest here, as it captures lagged
dependencies and cross-dependencies in the latent process model.
Unfortunately bayesplot
doesn’t know this is a matrix of
parameters so what we see is actually the transpose of the VAR matrix. A
little bit of wrangling gives us these histograms in the correct
order:
A_pars <- matrix(NA, nrow = 5, ncol = 5)
-for(i in 1:5){
- for(j in 1:5){
- A_pars[i, j] <- paste0('A[', i, ',', j, ']')
- }
-}
-mcmc_plot(var_mod,
- variable = as.vector(t(A_pars)),
- type = 'hist')
A_pars <- matrix(NA, nrow = 5, ncol = 5)
+for(i in 1:5){
+ for(j in 1:5){
+ A_pars[i, j] <- paste0('A[', i, ',', j, ']')
+ }
+}
+mcmc_plot(var_mod,
+ variable = as.vector(t(A_pars)),
+ type = 'hist')
There is a lot happening in this matrix. Each cell captures the lagged effect of the process in the column on the process in the row in the next timestep. So for example, the effect in cell [1,3] shows how an @@ -872,21 +874,21 @@
Sigma_pars <- matrix(NA, nrow = 5, ncol = 5)
-for(i in 1:5){
- for(j in 1:5){
- Sigma_pars[i, j] <- paste0('Sigma[', i, ',', j, ']')
- }
-}
-mcmc_plot(var_mod,
- variable = as.vector(t(Sigma_pars)),
- type = 'hist')
Sigma_pars <- matrix(NA, nrow = 5, ncol = 5)
+for(i in 1:5){
+ for(j in 1:5){
+ Sigma_pars[i, j] <- paste0('Sigma[', i, ',', j, ']')
+ }
+}
+mcmc_plot(var_mod,
+ variable = as.vector(t(Sigma_pars)),
+ type = 'hist')
The observation error estimates \((\sigma_{obs})\) represent how much the model thinks we might miss the true count when we take our imperfect measurements:
- - + +These are still a bit hard to identify overall, especially when trying to estimate both process and observation error. Often we need to make some strong assumptions about which of these is more important for @@ -897,99 +899,141 @@
Let’s see if these estimates improve when we allow the process errors to be correlated. Once again, we need to first update the priors for the observation errors:
-priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2),
- prior(normal(0.5, 0.25), class = sigma))
priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2),
+ prior(normal(0.5, 0.25), class = sigma))
And now we can fit the correlated process error model
-varcor_mod <- mvgam(
- # observation formula, which remains empty
- y ~ -1,
-
- # process model formula, which includes the smooth functions
- trend_formula = ~ te(temp, month, k = c(4, 4)) +
- te(temp, month, k = c(4, 4), by = trend),
-
- # VAR1 model with correlated process errors
- trend_model = VAR(cor = TRUE),
- family = gaussian(),
- data = plankton_train,
- newdata = plankton_test,
-
- # include the updated priors
- priors = priors,
- silent = 2)
varcor_mod <- mvgam(
+ # observation formula, which remains empty
+ y ~ -1,
+
+ # process model formula, which includes the smooth functions
+ trend_formula = ~ te(temp, month, k = c(4, 4)) +
+ te(temp, month, k = c(4, 4), by = trend) - 1,
+
+ # VAR1 model with correlated process errors
+ trend_model = VAR(cor = TRUE),
+ family = gaussian(),
+ data = plankton_train,
+ newdata = plankton_test,
+
+ # include the updated priors
+ priors = priors,
+ silent = 2)
The \((\Sigma)\) matrix now captures any evidence of contemporaneously correlated process error:
-Sigma_pars <- matrix(NA, nrow = 5, ncol = 5)
-for(i in 1:5){
- for(j in 1:5){
- Sigma_pars[i, j] <- paste0('Sigma[', i, ',', j, ']')
- }
-}
-mcmc_plot(varcor_mod,
- variable = as.vector(t(Sigma_pars)),
- type = 'hist')
Sigma_pars <- matrix(NA, nrow = 5, ncol = 5)
+for(i in 1:5){
+ for(j in 1:5){
+ Sigma_pars[i, j] <- paste0('Sigma[', i, ',', j, ']')
+ }
+}
+mcmc_plot(varcor_mod,
+ variable = as.vector(t(Sigma_pars)),
+ type = 'hist')
This symmetric matrix tells us there is support for correlated process errors, as several of the off-diagonal entries are strongly non-zero. But it is easier to interpret these estimates if we convert the covariance matrix to a correlation matrix. Here we compute the posterior median process error correlations:
-Sigma_post <- as.matrix(varcor_mod, variable = 'Sigma', regex = TRUE)
-median_correlations <- cov2cor(matrix(apply(Sigma_post, 2, median),
- nrow = 5, ncol = 5))
-rownames(median_correlations) <- colnames(median_correlations) <- levels(plankton_train$series)
-
-round(median_correlations, 2)
-#> Bluegreens Diatoms Greens Other.algae Unicells
-#> Bluegreens 1.00 -0.04 0.16 -0.05 0.29
-#> Diatoms -0.04 1.00 -0.21 0.48 0.17
-#> Greens 0.16 -0.21 1.00 0.17 0.46
-#> Other.algae -0.05 0.48 0.17 1.00 0.28
-#> Unicells 0.29 0.17 0.46 0.28 1.00
Sigma_post <- as.matrix(varcor_mod, variable = 'Sigma', regex = TRUE)
+median_correlations <- cov2cor(matrix(apply(Sigma_post, 2, median),
+ nrow = 5, ncol = 5))
+rownames(median_correlations) <- colnames(median_correlations) <- levels(plankton_train$series)
+
+round(median_correlations, 2)
+#> Bluegreens Diatoms Greens Other.algae Unicells
+#> Bluegreens 1.00 -0.03 0.17 -0.05 0.33
+#> Diatoms -0.03 1.00 -0.20 0.49 0.17
+#> Greens 0.17 -0.20 1.00 0.18 0.47
+#> Other.algae -0.05 0.49 0.18 1.00 0.29
+#> Unicells 0.33 0.17 0.47 0.29 1.00
Because Vector Autoregressions can capture complex lagged
+dependencies, it is often difficult to understand how the member time
+series are thought to interact with one another. A method that is
+commonly used to directly test for possible interactions is to compute
+an Impulse
+Response Function (IRF). If \(h\)
+represents the simulated forecast horizon, an IRF asks how each of the
+remaining series might respond over times \((t+1):h\) if a focal series is given an
+innovation “shock” at time \(t = 0\).
+mvgam
can compute Generalized and Orthogonalized IRFs from
+models that included latent VAR dynamics. We simply feed the fitted
+model to the irf()
function and then use the S3
+plot()
function to view the estimated responses. By
+default, irf()
will compute IRFs by separately imposing
+positive shocks of one standard deviation to each series in the VAR
+process. Here we compute Generalized IRFs over a horizon of 12
+timesteps:
Plot the expected responses of the remaining series to a positive +shock for series 3 (Greens)
+ + +This series of plots makes it clear that some of the other series +would be expected to show both instantaneous responses to a shock for +the Greens (due to their correlated process errors) as well as delayed +and nonlinear responses over time (due to the complex lagged dependence +structure captured by the \(A\) +matrix). This hopefully makes it clear why IRFs are an important tool in +the analysis of multivariate autoregressive models.
+But which model is better? We can compute the variogram score for out of sample forecasts to get a sense of which model does a better job of capturing the dependence structure in the true evaluation set:
-# create forecast objects for each model
-fcvar <- forecast(var_mod)
-fcvarcor <- forecast(varcor_mod)
-
-# plot the difference in variogram scores; a negative value means the VAR1cor model is better, while a positive value means the VAR1 model is better
-diff_scores <- score(fcvarcor, score = 'variogram')$all_series$score -
- score(fcvar, score = 'variogram')$all_series$score
-plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred',
- ylim = c(-1*max(abs(diff_scores), na.rm = TRUE),
- max(abs(diff_scores), na.rm = TRUE)),
- bty = 'l',
- xlab = 'Forecast horizon',
- ylab = expression(variogram[VAR1cor]~-~variogram[VAR1]))
-abline(h = 0, lty = 'dashed')
# create forecast objects for each model
+fcvar <- forecast(var_mod)
+fcvarcor <- forecast(varcor_mod)
+
+# plot the difference in variogram scores; a negative value means the VAR1cor model is better, while a positive value means the VAR1 model is better
+diff_scores <- score(fcvarcor, score = 'variogram')$all_series$score -
+ score(fcvar, score = 'variogram')$all_series$score
+plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred',
+ ylim = c(-1*max(abs(diff_scores), na.rm = TRUE),
+ max(abs(diff_scores), na.rm = TRUE)),
+ bty = 'l',
+ xlab = 'Forecast horizon',
+ ylab = expression(variogram[VAR1cor]~-~variogram[VAR1]))
+abline(h = 0, lty = 'dashed')
And we can also compute the energy score for out of sample forecasts to get a sense of which model provides forecasts that are better calibrated:
-# plot the difference in energy scores; a negative value means the VAR1cor model is better, while a positive value means the VAR1 model is better
-diff_scores <- score(fcvarcor, score = 'energy')$all_series$score -
- score(fcvar, score = 'energy')$all_series$score
-plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred',
- ylim = c(-1*max(abs(diff_scores), na.rm = TRUE),
- max(abs(diff_scores), na.rm = TRUE)),
- bty = 'l',
- xlab = 'Forecast horizon',
- ylab = expression(energy[VAR1cor]~-~energy[VAR1]))
-abline(h = 0, lty = 'dashed')
# plot the difference in energy scores; a negative value means the VAR1cor model is better, while a positive value means the VAR1 model is better
+diff_scores <- score(fcvarcor, score = 'energy')$all_series$score -
+ score(fcvar, score = 'energy')$all_series$score
+plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred',
+ ylim = c(-1*max(abs(diff_scores), na.rm = TRUE),
+ max(abs(diff_scores), na.rm = TRUE)),
+ bty = 'l',
+ xlab = 'Forecast horizon',
+ ylab = expression(energy[VAR1cor]~-~energy[VAR1]))
+abline(h = 0, lty = 'dashed')
The models tend to provide similar forecasts, though the correlated
error model does slightly better overall. We would probably need to use
a more extensive rolling forecast evaluation exercise if we felt like we
needed to only choose one for production. mvgam
offers some
-utilities for doing this (i.e. see ?lfo_cv
for
-guidance).
?lfo_cv
for guidance).
+Alternatively, we could use forecasts from both models by
+creating an evenly-weighted ensemble forecast distribution. This
+capability is available using the ensemble()
function in
+mvgam
(see ?ensemble
for guidance).
+The following papers and resources offer a lot of useful material about multivariate State-Space models and how they can be applied in practice:
+Auger‐Méthé, Marie, et al. “A +guide to state–space modeling of ecological time series.” +Ecological Monographs 91.4 (2021): e01470.
Heaps, Sarah E. “Enforcing stationarity through the prior in vector autoregressions.” Journal of Computational and Graphical Statistics 32.1 (2023): @@ -1006,10 +1050,6 @@
Auger‐Méthé, Marie, et al. “A -guide to state–space modeling of ecological time series.” -Ecological Monographs 91.4 (2021): e01470.
-vignettes/trend_formulas.Rmd
trend_formulas.Rmd
We will have to try and capture this seasonality in our process model, which should be easy to do given the flexibility of GAMs. Next we @@ -293,17 +293,17 @@
plot(notrend_mod, type = 'forecast', series = 1)
#> Out of sample CRPS:
-#> 7.01328371945431
plot(notrend_mod, type = 'forecast', series = 2)
#> Out of sample CRPS:
-#> 6.54071425133111
plot(notrend_mod, type = 'forecast', series = 3)
#> Out of sample CRPS:
-#> 4.06261023205356
This basic model gives us confidence that we can capture the seasonal variation in the observations. But the model has not captured the @@ -313,11 +313,8 @@
-plot(notrend_mod, type = 'residuals', series = 2)
plot(notrend_mod, type = 'residuals', series = 3)
+priors <- get_mvgam_priors( # observation formula, which has no terms in it y ~ -1, @@ -395,7 +392,7 @@
Multiseries dynamics= gaussian(), data = plankton_train)
Get names of all parameters whose priors can be modified:
-+priors[, 3] #> [1] "(Intercept)" #> [2] "process error sd" @@ -410,7 +407,7 @@
Multiseries dynamics#> [11] "observation error sd" #> [12] "te(temp,month) smooth parameters, te(temp,month):trendtrend1 smooth parameters, te(temp,month):trendtrend2 smooth parameters, te(temp,month):trendtrend3 smooth parameters, te(temp,month):trendtrend4 smooth parameters, te(temp,month):trendtrend5 smooth parameters"
And their default prior distributions:
-++priors[, 4] #> [1] "(Intercept) ~ student_t(3, -0.1, 2.5);" #> [2] "sigma ~ student_t(3, 0, 2.5);" @@ -428,9 +425,9 @@
Multiseries dynamics
-priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2), - prior(normal(0.5, 0.25), class = sigma))
+priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2), + prior(normal(0.5, 0.25), class = sigma))
You may have noticed something else unique about this model: there is no intercept term in the observation formula. This is because a shared intercept parameter can sometimes be unidentifiable with respect to the @@ -440,7 +437,7 @@
Multiseries dynamics
+-var_mod <- mvgam( # observation formula, which is empty y ~ -1, @@ -473,7 +470,7 @@
Inspecting SS models
+@@ -599,26 +595,26 @@summary(var_mod, include_betas = FALSE) #> GAM observation formula: #> y ~ 1 @@ -508,90 +505,89 @@
Inspecting SS models#> #> Observation error parameter estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> sigma_obs[1] 0.20 0.26 0.34 1.01 381 -#> sigma_obs[2] 0.26 0.40 0.54 1.05 143 -#> sigma_obs[3] 0.42 0.65 0.82 1.13 41 -#> sigma_obs[4] 0.25 0.38 0.49 1.01 248 -#> sigma_obs[5] 0.31 0.43 0.55 1.02 265 +#> sigma_obs[1] 0.20 0.26 0.34 1.01 364 +#> sigma_obs[2] 0.26 0.40 0.53 1.02 264 +#> sigma_obs[3] 0.42 0.63 0.79 1.02 147 +#> sigma_obs[4] 0.25 0.38 0.50 1.00 241 +#> sigma_obs[5] 0.30 0.42 0.54 1.02 280 #> #> GAM observation model coefficient (beta) estimates: #> 2.5% 50% 97.5% Rhat n_eff #> (Intercept) 0 0 0 NaN NaN #> #> Process model VAR parameter estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> A[1,1] -0.0022 0.520 0.850 1.10 48 -#> A[1,2] -0.3700 -0.037 0.200 1.00 470 -#> A[1,3] -0.4900 -0.052 0.310 1.01 590 -#> A[1,4] -0.2600 0.037 0.430 1.00 436 -#> A[1,5] -0.0980 0.130 0.510 1.03 183 -#> A[2,1] -0.1600 0.015 0.220 1.00 718 -#> A[2,2] 0.6100 0.780 0.910 1.01 304 -#> A[2,3] -0.4200 -0.150 0.030 1.01 253 -#> A[2,4] -0.0340 0.120 0.390 1.02 201 -#> A[2,5] -0.0560 0.066 0.210 1.00 749 -#> A[3,1] -0.3000 0.021 0.490 1.06 81 -#> A[3,2] -0.5400 -0.220 0.016 1.01 190 -#> A[3,3] 0.0660 0.410 0.700 1.02 305 -#> A[3,4] -0.0120 0.250 0.670 1.02 177 -#> A[3,5] -0.0680 0.130 0.400 1.03 209 -#> A[4,1] -0.1600 0.059 0.340 1.04 152 -#> A[4,2] -0.1100 0.057 0.250 1.01 384 -#> A[4,3] -0.4400 -0.120 0.120 1.01 334 -#> A[4,4] 0.4800 0.740 0.970 1.02 212 -#> A[4,5] -0.2000 -0.037 0.120 1.00 861 -#> A[5,1] -0.2000 0.085 0.600 1.05 88 -#> A[5,2] -0.4800 -0.150 0.061 1.02 137 -#> A[5,3] -0.6900 -0.210 0.098 1.01 168 -#> A[5,4] -0.0460 0.210 0.730 1.02 142 -#> A[5,5] 0.5300 0.750 0.960 1.00 449 +#> 2.5% 50% 97.5% Rhat n_eff +#> A[1,1] -0.026 0.470 0.820 1.02 181 +#> A[1,2] -0.320 -0.031 0.200 1.00 720 +#> A[1,3] -0.520 -0.035 0.350 1.00 515 +#> A[1,4] -0.260 0.033 0.380 1.00 888 +#> A[1,5] -0.094 0.140 0.530 1.01 260 +#> A[2,1] -0.130 0.017 0.170 1.00 1043 +#> A[2,2] 0.630 0.800 0.910 1.01 531 +#> A[2,3] -0.360 -0.120 0.051 1.00 546 +#> A[2,4] -0.045 0.100 0.320 1.00 450 +#> A[2,5] -0.059 0.059 0.190 1.00 1171 +#> A[3,1] -0.270 0.019 0.310 1.01 249 +#> A[3,2] -0.500 -0.190 0.028 1.01 227 +#> A[3,3] 0.074 0.430 0.740 1.00 310 +#> A[3,4] -0.032 0.210 0.580 1.00 243 +#> A[3,5] -0.056 0.120 0.360 1.01 278 +#> A[4,1] -0.150 0.051 0.270 1.01 563 +#> A[4,2] -0.091 0.062 0.250 1.01 436 +#> A[4,3] -0.400 -0.110 0.120 1.01 295 +#> A[4,4] 0.480 0.730 0.930 1.02 366 +#> A[4,5] -0.200 -0.036 0.120 1.00 875 +#> A[5,1] -0.220 0.071 0.370 1.01 299 +#> A[5,2] -0.400 -0.110 0.079 1.01 249 +#> A[5,3] -0.610 -0.170 0.120 1.01 264 +#> A[5,4] -0.049 0.180 0.560 1.01 253 +#> A[5,5] 0.530 0.740 0.930 1.00 593 #> #> Process error parameter estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> Sigma[1,1] 0.038 0.25 0.65 1.18 31 +#> Sigma[1,1] 0.061 0.31 0.66 1.03 120 #> Sigma[1,2] 0.000 0.00 0.00 NaN NaN #> Sigma[1,3] 0.000 0.00 0.00 NaN NaN #> Sigma[1,4] 0.000 0.00 0.00 NaN NaN #> Sigma[1,5] 0.000 0.00 0.00 NaN NaN #> Sigma[2,1] 0.000 0.00 0.00 NaN NaN -#> Sigma[2,2] 0.065 0.11 0.18 1.02 419 +#> Sigma[2,2] 0.067 0.12 0.18 1.01 396 #> Sigma[2,3] 0.000 0.00 0.00 NaN NaN #> Sigma[2,4] 0.000 0.00 0.00 NaN NaN #> Sigma[2,5] 0.000 0.00 0.00 NaN NaN #> Sigma[3,1] 0.000 0.00 0.00 NaN NaN #> Sigma[3,2] 0.000 0.00 0.00 NaN NaN -#> Sigma[3,3] 0.054 0.15 0.30 1.07 74 +#> Sigma[3,3] 0.064 0.16 0.29 1.02 213 #> Sigma[3,4] 0.000 0.00 0.00 NaN NaN #> Sigma[3,5] 0.000 0.00 0.00 NaN NaN #> Sigma[4,1] 0.000 0.00 0.00 NaN NaN #> Sigma[4,2] 0.000 0.00 0.00 NaN NaN #> Sigma[4,3] 0.000 0.00 0.00 NaN NaN -#> Sigma[4,4] 0.043 0.13 0.25 1.04 115 +#> Sigma[4,4] 0.065 0.14 0.27 1.02 207 #> Sigma[4,5] 0.000 0.00 0.00 NaN NaN #> Sigma[5,1] 0.000 0.00 0.00 NaN NaN #> Sigma[5,2] 0.000 0.00 0.00 NaN NaN #> Sigma[5,3] 0.000 0.00 0.00 NaN NaN #> Sigma[5,4] 0.000 0.00 0.00 NaN NaN -#> Sigma[5,5] 0.098 0.20 0.34 1.03 191 +#> Sigma[5,5] 0.100 0.21 0.35 1.01 284 #> #> Approximate significance of GAM process smooths: #> edf Ref.df Chi.sq p-value -#> te(temp,month) 4.321 15 35.43 0.11 -#> te(temp,month):seriestrend1 0.929 15 2.18 1.00 -#> te(temp,month):seriestrend2 1.113 15 6.96 1.00 -#> te(temp,month):seriestrend3 4.106 15 48.72 0.41 -#> te(temp,month):seriestrend4 2.831 15 7.54 0.94 -#> te(temp,month):seriestrend5 1.542 15 6.76 1.00 +#> te(temp,month) 3.836 15 41.22 0.26 +#> te(temp,month):seriestrend1 3.325 15 0.59 1.00 +#> te(temp,month):seriestrend2 1.697 15 6.02 0.99 +#> te(temp,month):seriestrend3 4.218 15 51.16 0.59 +#> te(temp,month):seriestrend4 1.677 15 12.18 0.98 +#> te(temp,month):seriestrend5 0.787 15 10.87 1.00 #> #> Stan MCMC diagnostics: #> n_eff / iter looks reasonable for all parameters -#> Rhats above 1.05 found for 38 parameters -#> *Diagnose further to investigate why the chains have not mixed +#> Rhat looks reasonable for all parameters #> 0 of 2000 iterations ended with a divergence (0%) #> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%) #> E-FMI indicated no pathological behavior #> -#> Samples were drawn using NUTS(diag_e) at Tue Sep 03 3:05:05 PM 2024. +#> Samples were drawn using NUTS(diag_e) at Wed Sep 04 9:03:29 AM 2024. #> For each parameter, n_eff is a crude measure of effective sample size, #> and Rhat is the potential scale reduction factor on split MCMC chains #> (at convergence, Rhat = 1)
Inspecting SS models
+- +plot(var_mod, 'smooths', trend_effects = TRUE)
The VAR matrix is of particular interest here, as it captures lagged dependencies and cross-dependencies in the latent process model. Unfortunately
-bayesplot
doesn’t know this is a matrix of parameters so what we see is actually the transpose of the VAR matrix. A little bit of wrangling gives us these histograms in the correct order:+- +A_pars <- matrix(NA, nrow = 5, ncol = 5) for(i in 1:5){ for(j in 1:5){ A_pars[i, j] <- paste0('A[', i, ',', j, ']') } } -mcmc_plot(var_mod, +mcmc_plot(var_mod, variable = as.vector(t(A_pars)), type = 'hist')
There is a lot happening in this matrix. Each cell captures the lagged effect of the process in the column on the process in the row in the next timestep. So for example, the effect in cell [1,3] shows how an @@ -630,23 +626,23 @@
Inspecting SS models
+- +Sigma_pars <- matrix(NA, nrow = 5, ncol = 5) for(i in 1:5){ for(j in 1:5){ Sigma_pars[i, j] <- paste0('Sigma[', i, ',', j, ']') } } -mcmc_plot(var_mod, +mcmc_plot(var_mod, variable = as.vector(t(Sigma_pars)), type = 'hist')
The observation error estimates \((\sigma_{obs})\) represent how much the model thinks we might miss the true count when we take our imperfect measurements:
-- +-mcmc_plot(var_mod, variable = 'sigma_obs', regex = TRUE, type = 'hist')
++mcmc_plot(var_mod, variable = 'sigma_obs', regex = TRUE, type = 'hist')
These are still a bit hard to identify overall, especially when trying to estimate both process and observation error. Often we need to make some strong assumptions about which of these is more important for @@ -658,11 +654,11 @@
Correlated process errorsLet’s see if these estimates improve when we allow the process errors to be correlated. Once again, we need to first update the priors for the observation errors: -
+-priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2), - prior(normal(0.5, 0.25), class = sigma))
+priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2), + prior(normal(0.5, 0.25), class = sigma))
And now we can fit the correlated process error model
-++varcor_mod <- mvgam( # observation formula, which remains empty y ~ -1, @@ -682,23 +678,23 @@
Correlated process errors silent = 2)
The \((\Sigma)\) matrix now captures any evidence of contemporaneously correlated process error:
-++- +Sigma_pars <- matrix(NA, nrow = 5, ncol = 5) for(i in 1:5){ for(j in 1:5){ Sigma_pars[i, j] <- paste0('Sigma[', i, ',', j, ']') } } -mcmc_plot(varcor_mod, +mcmc_plot(varcor_mod, variable = as.vector(t(Sigma_pars)), type = 'hist')
This symmetric matrix tells us there is support for correlated process errors, as several of the off-diagonal entries are strongly non-zero. But it is easier to interpret these estimates if we convert the covariance matrix to a correlation matrix. Here we compute the posterior median process error correlations:
-+++#> Bluegreens 1.00 -0.04 0.17 -0.05 0.33 +#> Diatoms -0.04 1.00 -0.19 0.48 0.17 +#> Greens 0.17 -0.19 1.00 0.19 0.46 +#> Other.algae -0.05 0.48 0.19 1.00 0.29 +#> Unicells 0.33 0.17 0.46 0.29 1.00Sigma_post <- as.matrix(varcor_mod, variable = 'Sigma', regex = TRUE) median_correlations <- cov2cor(matrix(apply(Sigma_post, 2, median), nrow = 5, ncol = 5)) @@ -706,15 +702,54 @@
Correlated process errors round(median_correlations, 2) #> Bluegreens Diatoms Greens Other.algae Unicells -#> Bluegreens 1.00 -0.04 0.16 -0.06 0.29 -#> Diatoms -0.04 1.00 -0.20 0.50 0.17 -#> Greens 0.16 -0.20 1.00 0.16 0.47 -#> Other.algae -0.06 0.50 0.16 1.00 0.28 -#> Unicells 0.29 0.17 0.47 0.28 1.00
++Impulse response functions +
+Because Vector Autoregressions can capture complex lagged +dependencies, it is often difficult to understand how the member time +series are thought to interact with one another. A method that is +commonly used to directly test for possible interactions is to compute +an Impulse +Response Function (IRF). If \(h\) +represents the simulated forecast horizon, an IRF asks how each of the +remaining series might respond over times \((t+1):h\) if a focal series is given an +innovation “shock” at time \(t = 0\). +
+mvgam
can compute Generalized and Orthogonalized IRFs from +models that included latent VAR dynamics. We simply feed the fitted +model to theirf()
function and then use the S3 +plot()
function to view the estimated responses. By +default,irf()
will compute IRFs by separately imposing +positive shocks of one standard deviation to each series in the VAR +process. Here we compute Generalized IRFs over a horizon of 12 +timesteps:++irfs <- irf(varcor_mod, h = 12)
Plot the expected responses of the remaining series to a positive +shock for series 3 (Greens)
++ ++plot(irfs, series = 3)
This series of plots makes it clear that some of the other series +would be expected to show both instantaneous responses to a shock for +the Greens (due to their correlated process errors) as well as delayed +and nonlinear responses over time (due to the complex lagged dependence +structure captured by the \(A\) +matrix). This hopefully makes it clear why IRFs are an important tool in +the analysis of multivariate autoregressive models.
+++ - +#> 4.04870000030721Comparing forecast scores +
But which model is better? We can compute the variogram score for out of sample forecasts to get a sense of which model does a better job of capturing the dependence structure in the true evaluation set:
-+- +# create forecast objects for each model fcvar <- forecast(var_mod) fcvarcor <- forecast(varcor_mod) @@ -729,11 +764,11 @@
Correlated process errors xlab = 'Forecast horizon', ylab = expression(variogram[VAR1cor]~-~variogram[VAR1])) abline(h = 0, lty = 'dashed')
And we can also compute the energy score for out of sample forecasts to get a sense of which model provides forecasts that are better calibrated:
-- ++- +# plot the difference in energy scores; a negative value means the VAR1cor model is better, while a positive value means the VAR1 model is better diff_scores <- score(fcvarcor, score = 'energy')$all_series$score - score(fcvar, score = 'energy')$all_series$score @@ -744,20 +779,27 @@
Correlated process errors xlab = 'Forecast horizon', ylab = expression(energy[VAR1cor]~-~energy[VAR1])) abline(h = 0, lty = 'dashed')
The models tend to provide similar forecasts, though the correlated error model does slightly better overall. We would probably need to use a more extensive rolling forecast evaluation exercise if we felt like we needed to only choose one for production.
+utilities for doing this (i.e. seemvgam
offers some -utilities for doing this (i.e. see?lfo_cv
for -guidance).?lfo_cv
for guidance). +Alternatively, we could use forecasts from both models by +creating an evenly-weighted ensemble forecast distribution. This +capability is available using theensemble()
function in +mvgam
(see?ensemble
for guidance).+Further reading +
The following papers and resources offer a lot of useful material about multivariate State-Space models and how they can be applied in practice:
+Auger‐Méthé, Marie, et al. “A +guide to state–space modeling of ecological time series.” +Ecological Monographs 91.4 (2021): e01470.
Heaps, Sarah E. “Enforcing stationarity through the prior in vector autoregressions.” Journal of Computational and Graphical Statistics 32.1 (2023): @@ -774,10 +816,6 @@
Further reading” Journal of Applied Ecology 47.1 (2010): 47-56. -
Auger‐Méthé, Marie, et al. “A -guide to state–space modeling of ecological time series.” -Ecological Monographs 91.4 (2021): e01470.
-Interested in contributing? diff --git a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-12-1.png b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-12-1.png index 07de7948..46353495 100644 Binary files a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-12-1.png and b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-12-1.png differ diff --git a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-13-1.png b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-13-1.png index 2763603f..cbae0f7f 100644 Binary files a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-13-1.png and b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-13-1.png differ diff --git a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-14-1.png b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-14-1.png index 4eb06df1..c131186a 100644 Binary files a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-14-1.png and b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-14-1.png differ diff --git a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-15-1.png b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-15-1.png index f9119163..887e37ac 100644 Binary files a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-15-1.png and b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-15-1.png differ diff --git a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-16-1.png b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-16-1.png index 12ceb628..ff210248 100644 Binary files a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-16-1.png and b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-16-1.png differ diff --git a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-17-1.png b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-17-1.png index 863969d1..41b0b25c 100644 Binary files a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-17-1.png and b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-17-1.png differ diff --git a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-18-1.png b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-18-1.png index 4a0184ee..d73ded6d 100644 Binary files a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-18-1.png and b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-18-1.png differ diff --git a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-19-1.png b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-19-1.png index 10c823db..ce741425 100644 Binary files a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-19-1.png and b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-19-1.png differ diff --git a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-26-1.png b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-26-1.png index 36f4ad06..c13ca4b5 100644 Binary files a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-26-1.png and b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-26-1.png differ diff --git a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-26-2.png b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-26-2.png new file mode 100644 index 00000000..931ad1ba Binary files /dev/null and b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-26-2.png differ diff --git a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-27-1.png b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-27-1.png index c73b0812..893708d1 100644 Binary files a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-27-1.png and b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-27-1.png differ diff --git a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-28-1.png b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-28-1.png index 8e5737df..a4b0a8d9 100644 Binary files a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-28-1.png and b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-28-1.png differ diff --git a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-29-1.png b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-29-1.png index 560c4dce..9972ad6c 100644 Binary files a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-29-1.png and b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-29-1.png differ diff --git a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-32-1.png b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-32-1.png new file mode 100644 index 00000000..2ee41b9c Binary files /dev/null and b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-32-1.png differ diff --git a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-35-1.png b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-35-1.png index 4980f9a5..25255b4f 100644 Binary files a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-35-1.png and b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-35-1.png differ diff --git a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-35-2.png b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-35-2.png index f715d75f..e1a25f51 100644 Binary files a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-35-2.png and b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-35-2.png differ diff --git a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-36-1.png b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-36-1.png index 3ff7e68f..3ec538d9 100644 Binary files a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-36-1.png and b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-36-1.png differ diff --git a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-37-1.png b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-37-1.png index bc4d065e..b62c734f 100644 Binary files a/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-37-1.png and b/docs/articles/trend_formulas_files/figure-html/unnamed-chunk-37-1.png differ diff --git a/docs/news/index.html b/docs/news/index.html index 883d5191..16253ebf 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -62,8 +62,9 @@
New functionalities
- Allow intercepts to be included in process models when
trend_formula
is supplied. This breaks the assumption that the process has to be zero-centred, adding more modelling flexibility but also potentially inducing nonidentifiabilities with respect to any observation model intercepts. Thoughtful priors are a must for these models- Added
-standata.mvgam_prefit
,stancode.mvgam
andstancode.mvgam_prefit
methods for better alignment with ‘brms’ workflows- Added ‘gratia’ to Enhancements to allow popular methods such as
+draw()
to be used for ‘mvgam’ models if ‘gratia’ is already installed- Added ‘gratia’ to Enhancements to allow popular methods such as
draw()
to be used for ‘mvgam’ models if ‘gratia’ is already installed- Added an
+ensemble.mvgam_forecast
method to generate evenly weighted combinations of probabilistic forecast distributions- Added an
irf.mvgam
method to compute Generalized and Orthogonalized Impulse Response Functions (IRFs) from models fit with Vector Autoregressive dynamics+ - +#> 6.57641830249056Deprecations
diff --git a/inst/doc/trend_formulas.R b/inst/doc/trend_formulas.R index 21ec225e..98066ef4 100644 --- a/inst/doc/trend_formulas.R +++ b/inst/doc/trend_formulas.R @@ -1,4 +1,4 @@ -## ----echo = FALSE------------------------------------------------------------- +## ---- echo = FALSE------------------------------------------------------------ NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true") knitr::opts_chunk$set( collapse = TRUE, @@ -107,7 +107,7 @@ plankton_test <- plankton_data %>% ## ----notrend_mod, include = FALSE, results='hide'----------------------------- notrend_mod <- mvgam(y ~ te(temp, month, k = c(4, 4)) + - te(temp, month, k = c(4, 4), by = series), + te(temp, month, k = c(4, 4), by = series) - 1, family = gaussian(), data = plankton_train, newdata = plankton_test, @@ -121,7 +121,7 @@ notrend_mod <- mvgam(y ~ ## te(temp, month, k = c(4, 4)) + ## ## # series-specific deviation tensor products -## te(temp, month, k = c(4, 4), by = series), +## te(temp, month, k = c(4, 4), by = series) - 1, ## family = gaussian(), ## data = plankton_train, ## newdata = plankton_test, @@ -157,10 +157,6 @@ plot(notrend_mod, type = 'forecast', series = 3) plot(notrend_mod, type = 'residuals', series = 1) -## ----------------------------------------------------------------------------- -plot(notrend_mod, type = 'residuals', series = 2) - - ## ----------------------------------------------------------------------------- plot(notrend_mod, type = 'residuals', series = 3) @@ -172,7 +168,7 @@ priors <- get_mvgam_priors( # process model formula, which includes the smooth functions trend_formula = ~ te(temp, month, k = c(4, 4)) + - te(temp, month, k = c(4, 4), by = trend), + te(temp, month, k = c(4, 4), by = trend) - 1, # VAR1 model with uncorrelated process errors trend_model = VAR(), @@ -201,7 +197,7 @@ var_mod <- mvgam(y ~ -1, te(temp, month, k = c(4, 4)) + # need to use 'trend' rather than series # here - te(temp, month, k = c(4, 4), by = trend), + te(temp, month, k = c(4, 4), by = trend) - 1, family = gaussian(), data = plankton_train, newdata = plankton_test, @@ -218,7 +214,7 @@ var_mod <- mvgam(y ~ -1, ## ## # process model formula, which includes the smooth functions ## trend_formula = ~ te(temp, month, k = c(4, 4)) + -## te(temp, month, k = c(4, 4), by = trend), +## te(temp, month, k = c(4, 4), by = trend) - 1, ## ## # VAR1 model with uncorrelated process errors ## trend_model = VAR(), @@ -280,7 +276,7 @@ varcor_mod <- mvgam(y ~ -1, te(temp, month, k = c(4, 4)) + # need to use 'trend' rather than series # here - te(temp, month, k = c(4, 4), by = trend), + te(temp, month, k = c(4, 4), by = trend) - 1, family = gaussian(), data = plankton_train, newdata = plankton_test, @@ -297,7 +293,7 @@ varcor_mod <- mvgam(y ~ -1, ## ## # process model formula, which includes the smooth functions ## trend_formula = ~ te(temp, month, k = c(4, 4)) + -## te(temp, month, k = c(4, 4), by = trend), +## te(temp, month, k = c(4, 4), by = trend) - 1, ## ## # VAR1 model with correlated process errors ## trend_model = VAR(cor = TRUE), @@ -331,6 +327,14 @@ rownames(median_correlations) <- colnames(median_correlations) <- levels(plankto round(median_correlations, 2) +## ----------------------------------------------------------------------------- +irfs <- irf(varcor_mod, h = 12) + + +## ----------------------------------------------------------------------------- +plot(irfs, series = 3) + + ## ----------------------------------------------------------------------------- # create forecast objects for each model fcvar <- forecast(var_mod) diff --git a/inst/doc/trend_formulas.Rmd b/inst/doc/trend_formulas.Rmd index 387a8708..8d636d2e 100644 --- a/inst/doc/trend_formulas.Rmd +++ b/inst/doc/trend_formulas.Rmd @@ -143,7 +143,7 @@ First we will fit a model that does not include a dynamic component, just to see ```{r notrend_mod, include = FALSE, results='hide'} notrend_mod <- mvgam(y ~ te(temp, month, k = c(4, 4)) + - te(temp, month, k = c(4, 4), by = series), + te(temp, month, k = c(4, 4), by = series) - 1, family = gaussian(), data = plankton_train, newdata = plankton_test, @@ -157,7 +157,7 @@ notrend_mod <- mvgam(y ~ te(temp, month, k = c(4, 4)) + # series-specific deviation tensor products - te(temp, month, k = c(4, 4), by = series), + te(temp, month, k = c(4, 4), by = series) - 1, family = gaussian(), data = plankton_train, newdata = plankton_test, @@ -197,10 +197,6 @@ This basic model gives us confidence that we can capture the seasonal variation plot(notrend_mod, type = 'residuals', series = 1) ``` -```{r} -plot(notrend_mod, type = 'residuals', series = 2) -``` - ```{r} plot(notrend_mod, type = 'residuals', series = 3) ``` @@ -213,11 +209,11 @@ Now it is time to get into multivariate State-Space models. We will fit two mode \boldsymbol{count}_t & \sim \text{Normal}(\mu_{obs[t]}, \sigma_{obs}) \\ \mu_{obs[t]} & = process_t \\ process_t & \sim \text{MVNormal}(\mu_{process[t]}, \Sigma_{process}) \\ -\mu_{process[t]} & = VAR * process_{t-1} + f_{global}(\boldsymbol{month},\boldsymbol{temp})_t + f_{series}(\boldsymbol{month},\boldsymbol{temp})_t \\ +\mu_{process[t]} & = A * process_{t-1} + f_{global}(\boldsymbol{month},\boldsymbol{temp})_t + f_{series}(\boldsymbol{month},\boldsymbol{temp})_t \\ f_{global}(\boldsymbol{month},\boldsymbol{temp}) & = \sum_{k=1}^{K}b_{global} * \beta_{global} \\ f_{series}(\boldsymbol{month},\boldsymbol{temp}) & = \sum_{k=1}^{K}b_{series} * \beta_{series} \end{align*} -Here you can see that there are no terms in the observation model apart from the underlying process model. But we could easily add covariates into the observation model if we felt that they could explain some of the systematic observation errors. We also assume independent observation processes (there is no covariance structure in the observation errors $\sigma_{obs}$). At present, `mvgam` does not support multivariate observation models. But this feature will be added in future versions. However the underlying process model is multivariate, and there is a lot going on here. This component has a Vector Autoregressive part, where the process mean at time $t$ $(\mu_{process[t]})$ is a vector that evolves as a function of where the vector-valued process model was at time $t-1$. The $VAR$ matrix captures these dynamics with self-dependencies on the diagonal and possibly asymmetric cross-dependencies on the off-diagonals, while also incorporating the nonlinear smooth functions that capture seasonality for each series. The contemporaneous process errors are modeled by $\Sigma_{process}$, which can be constrained so that process errors are independent (i.e. setting the off-diagonals to 0) or can be fully parameterized using a Cholesky decomposition (using `Stan`'s $LKJcorr$ distribution to place a prior on the strength of inter-species correlations). For those that are interested in the inner-workings, `mvgam` makes use of a recent breakthrough by [Sarah Heaps to enforce stationarity of Bayesian VAR processes](https://www.tandfonline.com/doi/full/10.1080/10618600.2022.2079648). This is advantageous as we often don't expect forecast variance to increase without bound forever into the future, but many estimated VARs tend to behave this way. +Here you can see that there are no terms in the observation model apart from the underlying process model. But we could easily add covariates into the observation model if we felt that they could explain some of the systematic observation errors. We also assume independent observation processes (there is no covariance structure in the observation errors $\sigma_{obs}$). At present, `mvgam` does not support multivariate observation models. But this feature will be added in future versions. However the underlying process model is multivariate, and there is a lot going on here. This component has a Vector Autoregressive part, where the process mean at time $t$ $(\mu_{process[t]})$ is a vector that evolves as a function of where the vector-valued process model was at time $t-1$. The $A$ matrix captures these dynamics with self-dependencies on the diagonal and possibly asymmetric cross-dependencies on the off-diagonals, while also incorporating the nonlinear smooth functions that capture seasonality for each series. The contemporaneous process errors are modeled by $\Sigma_{process}$, which can be constrained so that process errors are independent (i.e. setting the off-diagonals to 0) or can be fully parameterized using a Cholesky decomposition (using `Stan`'s $LKJcorr$ distribution to place a prior on the strength of inter-species correlations). For those that are interested in the inner-workings, `mvgam` makes use of a recent breakthrough by [Sarah Heaps to enforce stationarity of Bayesian VAR processes](https://www.tandfonline.com/doi/full/10.1080/10618600.2022.2079648). This is advantageous as we often don't expect forecast variance to increase without bound forever into the future, but many estimated VARs tend to behave this way.
Ok that was a lot to take in. Let's fit some models to try and inspect what is going on and what they assume. But first, we need to update `mvgam`'s default priors for the observation and process errors. By default, `mvgam` uses a fairly wide Student-T prior on these parameters to avoid being overly informative. But our observations are z-scored and so we do not expect very large process or observation errors. However, we also do not expect very small observation errors either as we know these measurements are not perfect. So let's update the priors for these parameters. In doing so, you will get to see how the formula for the latent process (i.e. trend) model is used in `mvgam`: @@ -228,7 +224,7 @@ priors <- get_mvgam_priors( # process model formula, which includes the smooth functions trend_formula = ~ te(temp, month, k = c(4, 4)) + - te(temp, month, k = c(4, 4), by = trend), + te(temp, month, k = c(4, 4), by = trend) - 1, # VAR1 model with uncorrelated process errors trend_model = VAR(), @@ -261,7 +257,7 @@ var_mod <- mvgam(y ~ -1, te(temp, month, k = c(4, 4)) + # need to use 'trend' rather than series # here - te(temp, month, k = c(4, 4), by = trend), + te(temp, month, k = c(4, 4), by = trend) - 1, family = gaussian(), data = plankton_train, newdata = plankton_test, @@ -278,7 +274,7 @@ var_mod <- mvgam( # process model formula, which includes the smooth functions trend_formula = ~ te(temp, month, k = c(4, 4)) + - te(temp, month, k = c(4, 4), by = trend), + te(temp, month, k = c(4, 4), by = trend) - 1, # VAR1 model with uncorrelated process errors trend_model = VAR(), @@ -354,7 +350,7 @@ varcor_mod <- mvgam(y ~ -1, te(temp, month, k = c(4, 4)) + # need to use 'trend' rather than series # here - te(temp, month, k = c(4, 4), by = trend), + te(temp, month, k = c(4, 4), by = trend) - 1, family = gaussian(), data = plankton_train, newdata = plankton_test, @@ -371,7 +367,7 @@ varcor_mod <- mvgam( # process model formula, which includes the smooth functions trend_formula = ~ te(temp, month, k = c(4, 4)) + - te(temp, month, k = c(4, 4), by = trend), + te(temp, month, k = c(4, 4), by = trend) - 1, # VAR1 model with correlated process errors trend_model = VAR(cor = TRUE), @@ -407,6 +403,21 @@ rownames(median_correlations) <- colnames(median_correlations) <- levels(plankto round(median_correlations, 2) ``` +### Impulse response functions +Because Vector Autoregressions can capture complex lagged dependencies, it is often difficult to understand how the member time series are thought to interact with one another. A method that is commonly used to directly test for possible interactions is to compute an [Impulse Response Function](https://www.mathworks.com/help/econ/varm.irf.html) (IRF). If $h$ represents the simulated forecast horizon, an IRF asks how each of the remaining series might respond over times $(t+1):h$ if a focal series is given an innovation "shock" at time $t = 0$. `mvgam` can compute Generalized and Orthogonalized IRFs from models that included latent VAR dynamics. We simply feed the fitted model to the `irf()` function and then use the S3 `plot()` function to view the estimated responses. By default, `irf()` will compute IRFs by separately imposing positive shocks of one standard deviation to each series in the VAR process. Here we compute Generalized IRFs over a horizon of 12 timesteps: +```{r} +irfs <- irf(varcor_mod, h = 12) +``` + +Plot the expected responses of the remaining series to a positive shock for series 3 (Greens) +```{r} +plot(irfs, series = 3) +``` + +This series of plots makes it clear that some of the other series would be expected to show both instantaneous responses to a shock for the Greens (due to their correlated process errors) as well as delayed and nonlinear responses over time (due to the complex lagged dependence structure captured by the $A$ matrix). This hopefully makes it clear why IRFs are an important tool in the analysis of multivariate autoregressive models. + +### Comparing forecast scores + But which model is better? We can compute the variogram score for out of sample forecasts to get a sense of which model does a better job of capturing the dependence structure in the true evaluation set: ```{r} # create forecast objects for each model @@ -439,11 +450,13 @@ plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', abline(h = 0, lty = 'dashed') ``` -The models tend to provide similar forecasts, though the correlated error model does slightly better overall. We would probably need to use a more extensive rolling forecast evaluation exercise if we felt like we needed to only choose one for production. `mvgam` offers some utilities for doing this (i.e. see `?lfo_cv` for guidance). +The models tend to provide similar forecasts, though the correlated error model does slightly better overall. We would probably need to use a more extensive rolling forecast evaluation exercise if we felt like we needed to only choose one for production. `mvgam` offers some utilities for doing this (i.e. see `?lfo_cv` for guidance). Alternatively, we could use forecasts from *both* models by creating an evenly-weighted ensemble forecast distribution. This capability is available using the `ensemble()` function in `mvgam` (see `?ensemble` for guidance). -### Further reading +## Further reading The following papers and resources offer a lot of useful material about multivariate State-Space models and how they can be applied in practice: +Auger‐Méthé, Marie, et al. ["A guide to state–space modeling of ecological time series.](https://esajournals.onlinelibrary.wiley.com/doi/full/10.1002/ecm.1470)" *Ecological Monographs* 91.4 (2021): e01470. + Heaps, Sarah E. "[Enforcing stationarity through the prior in vector autoregressions.](https://www.tandfonline.com/doi/full/10.1080/10618600.2022.2079648)" *Journal of Computational and Graphical Statistics* 32.1 (2023): 74-83. Hannaford, Naomi E., et al. "[A sparse Bayesian hierarchical vector autoregressive model for microbial dynamics in a wastewater treatment plant.](https://www.sciencedirect.com/science/article/pii/S0167947322002390)" *Computational Statistics & Data Analysis* 179 (2023): 107659. @@ -451,8 +464,6 @@ Hannaford, Naomi E., et al. "[A sparse Bayesian hierarchical vector autoregressi Holmes, Elizabeth E., Eric J. Ward, and Wills Kellie. "[MARSS: multivariate autoregressive state-space models for analyzing time-series data.](https://journal.r-project.org/archive/2012/RJ-2012-002/index.html)" *R Journal*. 4.1 (2012): 11. Ward, Eric J., et al. "[Inferring spatial structure from time‐series data: using multivariate state‐space models to detect metapopulation structure of California sea lions in the Gulf of California, Mexico.](https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/j.1365-2664.2009.01745.x)" *Journal of Applied Ecology* 47.1 (2010): 47-56. - -Auger‐Méthé, Marie, et al. ["A guide to state–space modeling of ecological time series.](https://esajournals.onlinelibrary.wiley.com/doi/full/10.1002/ecm.1470)" *Ecological Monographs* 91.4 (2021): e01470. ## Interested in contributing? I'm actively seeking PhD students and other researchers to work in the areas of ecological forecasting, multivariate model evaluation and development of `mvgam`. Please reach out if you are interested (n.clark'at'uq.edu.au) diff --git a/inst/doc/trend_formulas.html b/inst/doc/trend_formulas.html index 5e2a8efc..15dcaae4 100644 --- a/inst/doc/trend_formulas.html +++ b/inst/doc/trend_formulas.html @@ -12,7 +12,7 @@ - +State-Space models in mvgam @@ -340,7 +340,7 @@State-Space models in mvgam
Nicholas J Clark
-2024-07-01
+2024-09-04
@@ -353,9 +353,11 @@@@ -528,7 +530,7 @@2024-07-01
Multiseries dynamics Inspecting SS models Correlated process errors +Impulse response functions +Comparing forecast scores +Further reading -Interested in contributing? Capturing seasonality
te(temp, month, k = c(4, 4)) + # series-specific deviation tensor products - te(temp, month, k = c(4, 4), by = series), + te(temp, month, k = c(4, 4), by = series) - 1, family = gaussian(), data = plankton_train, newdata = plankton_test, @@ -536,38 +538,36 @@Capturing seasonality
The “global” tensor product smooth function can be quickly visualized:
- +On this plot, red indicates below-average linear predictors and white indicates above-average. We can then plot the deviation smooths for a few algal groups to see how they vary from the “global” pattern:
- + - +These multidimensional smooths have done a good job of capturing the seasonal variation in our observations:
- +#> 6.94414781962135This basic model gives us confidence that we can capture the seasonal variation in the observations. But the model has not captured the remaining temporal dynamics, which is obvious when we inspect Dunn-Smyth residuals for a few series:
- - - - - + + +Multiseries dynamics
@@ -585,7 +585,7 @@Multiseries dynamics
\mu_{obs[t]} & = process_t \\ process_t & \sim \text{MVNormal}(\mu_{process[t]}, \Sigma_{process}) \\ -\mu_{process[t]} & = VAR * process_{t-1} + +\mu_{process[t]} & = A * process_{t-1} + f_{global}(\boldsymbol{month},\boldsymbol{temp})_t + f_{series}(\boldsymbol{month},\boldsymbol{temp})_t \\ f_{global}(\boldsymbol{month},\boldsymbol{temp}) & = @@ -604,7 +604,7 @@Multiseries dynamics
here. This component has a Vector Autoregressive part, where the process mean at time \(t\) \((\mu_{process[t]})\) is a vector that evolves as a function of where the vector-valued process model was at -time \(t-1\). The \(VAR\) matrix captures these dynamics with +time \(t-1\). The \(A\) matrix captures these dynamics with self-dependencies on the diagonal and possibly asymmetric cross-dependencies on the off-diagonals, while also incorporating the nonlinear smooth functions that capture seasonality for each series. The @@ -630,52 +630,52 @@Multiseries dynamics
So let’s update the priors for these parameters. In doing so, you will get to see how the formula for the latent process (i.e. trend) model is used inmvgam
: -+priors <- get_mvgam_priors( - # observation formula, which has no terms in it - y ~ -1, - - # process model formula, which includes the smooth functions - trend_formula = ~ te(temp, month, k = c(4, 4)) + - te(temp, month, k = c(4, 4), by = trend), - - # VAR1 model with uncorrelated process errors - trend_model = VAR(), - family = gaussian(), - data = plankton_train)
priors <- get_mvgam_priors( + # observation formula, which has no terms in it + y ~ -1, + + # process model formula, which includes the smooth functions + trend_formula = ~ te(temp, month, k = c(4, 4)) + + te(temp, month, k = c(4, 4), by = trend) - 1, + + # VAR1 model with uncorrelated process errors + trend_model = VAR(), + family = gaussian(), + data = plankton_train)
Get names of all parameters whose priors can be modified:
-+priors[, 3] -#> [1] "(Intercept)" -#> [2] "process error sd" -#> [3] "diagonal autocorrelation population mean" -#> [4] "off-diagonal autocorrelation population mean" -#> [5] "diagonal autocorrelation population variance" -#> [6] "off-diagonal autocorrelation population variance" -#> [7] "shape1 for diagonal autocorrelation precision" -#> [8] "shape1 for off-diagonal autocorrelation precision" -#> [9] "shape2 for diagonal autocorrelation precision" -#> [10] "shape2 for off-diagonal autocorrelation precision" -#> [11] "observation error sd" -#> [12] "te(temp,month) smooth parameters, te(temp,month):trendtrend1 smooth parameters, te(temp,month):trendtrend2 smooth parameters, te(temp,month):trendtrend3 smooth parameters, te(temp,month):trendtrend4 smooth parameters, te(temp,month):trendtrend5 smooth parameters"
priors[, 3] +#> [1] "(Intercept)" +#> [2] "process error sd" +#> [3] "diagonal autocorrelation population mean" +#> [4] "off-diagonal autocorrelation population mean" +#> [5] "diagonal autocorrelation population variance" +#> [6] "off-diagonal autocorrelation population variance" +#> [7] "shape1 for diagonal autocorrelation precision" +#> [8] "shape1 for off-diagonal autocorrelation precision" +#> [9] "shape2 for diagonal autocorrelation precision" +#> [10] "shape2 for off-diagonal autocorrelation precision" +#> [11] "observation error sd" +#> [12] "te(temp,month) smooth parameters, te(temp,month):trendtrend1 smooth parameters, te(temp,month):trendtrend2 smooth parameters, te(temp,month):trendtrend3 smooth parameters, te(temp,month):trendtrend4 smooth parameters, te(temp,month):trendtrend5 smooth parameters"
And their default prior distributions:
-+priors[, 4] -#> [1] "(Intercept) ~ student_t(3, -0.1, 2.5);" -#> [2] "sigma ~ student_t(3, 0, 2.5);" -#> [3] "es[1] = 0;" -#> [4] "es[2] = 0;" -#> [5] "fs[1] = sqrt(0.455);" -#> [6] "fs[2] = sqrt(0.455);" -#> [7] "gs[1] = 1.365;" -#> [8] "gs[2] = 1.365;" -#> [9] "hs[1] = 0.071175;" -#> [10] "hs[2] = 0.071175;" -#> [11] "sigma_obs ~ student_t(3, 0, 2.5);" -#> [12] "lambda_trend ~ normal(5, 30);"
priors[, 4] +#> [1] "(Intercept) ~ student_t(3, -0.1, 2.5);" +#> [2] "sigma ~ student_t(3, 0, 2.5);" +#> [3] "es[1] = 0;" +#> [4] "es[2] = 0;" +#> [5] "fs[1] = sqrt(0.455);" +#> [6] "fs[2] = sqrt(0.455);" +#> [7] "gs[1] = 1.365;" +#> [8] "gs[2] = 1.365;" +#> [9] "hs[1] = 0.071175;" +#> [10] "hs[2] = 0.071175;" +#> [11] "sigma_obs ~ student_t(3, 0, 2.5);" +#> [12] "lambda_trend ~ normal(5, 30);"
Setting priors is easy in
-mvgam
as you can usebrms
routines. Here we use more informative Normal priors for both error components, but we impose a lower bound of 0.2 for the observation errors:+priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2), - prior(normal(0.5, 0.25), class = sigma))
priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2), + prior(normal(0.5, 0.25), class = sigma))
You may have noticed something else unique about this model: there is no intercept term in the observation formula. This is because a shared intercept parameter can sometimes be unidentifiable with respect to the @@ -685,23 +685,23 @@
Multiseries dynamics
this parameter.mvgam
accomplishes this by fixing the coefficient for the intercept to zero. Now we can fit the first model, which assumes that process errors are contemporaneously uncorrelated -+var_mod <- mvgam( - # observation formula, which is empty - y ~ -1, - - # process model formula, which includes the smooth functions - trend_formula = ~ te(temp, month, k = c(4, 4)) + - te(temp, month, k = c(4, 4), by = trend), - - # VAR1 model with uncorrelated process errors - trend_model = VAR(), - family = gaussian(), - data = plankton_train, - newdata = plankton_test, - - # include the updated priors - priors = priors, - silent = 2)
var_mod <- mvgam( + # observation formula, which is empty + y ~ -1, + + # process model formula, which includes the smooth functions + trend_formula = ~ te(temp, month, k = c(4, 4)) + + te(temp, month, k = c(4, 4), by = trend) - 1, + + # VAR1 model with uncorrelated process errors + trend_model = VAR(), + family = gaussian(), + data = plankton_train, + newdata = plankton_test, + + # include the updated priors + priors = priors, + silent = 2)
+Inspecting SS models
@@ -716,151 +716,153 @@Inspecting SS models
include_betas = FALSE
to stop the summary from printing output for all of the spline coefficients, which can be dense and hard to interpret: -+summary(var_mod, include_betas = FALSE) -#> GAM observation formula: -#> y ~ 1 -#> <environment: 0x00000241693f91f0> -#> -#> GAM process formula: -#> ~te(temp, month, k = c(4, 4)) + te(temp, month, k = c(4, 4), -#> by = trend) -#> <environment: 0x00000241693f91f0> -#> -#> Family: -#> gaussian -#> -#> Link function: -#> identity -#> -#> Trend model: -#> VAR() -#> -#> N process models: -#> 5 -#> -#> N series: -#> 5 -#> -#> N timepoints: -#> 120 -#> -#> Status: -#> Fitted using Stan -#> 4 chains, each with iter = 1500; warmup = 1000; thin = 1 -#> Total post-warmup draws = 2000 -#> -#> -#> Observation error parameter estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> sigma_obs[1] 0.20 0.25 0.34 1.01 508 -#> sigma_obs[2] 0.27 0.40 0.54 1.03 179 -#> sigma_obs[3] 0.43 0.64 0.82 1.13 20 -#> sigma_obs[4] 0.25 0.37 0.50 1.00 378 -#> sigma_obs[5] 0.30 0.43 0.54 1.03 229 -#> -#> GAM observation model coefficient (beta) estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> (Intercept) 0 0 0 NaN NaN -#> -#> Process model VAR parameter estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> A[1,1] 0.038 0.520 0.870 1.08 32 -#> A[1,2] -0.350 -0.030 0.200 1.00 497 -#> A[1,3] -0.530 -0.044 0.330 1.02 261 -#> A[1,4] -0.280 0.038 0.420 1.00 392 -#> A[1,5] -0.100 0.120 0.510 1.04 141 -#> A[2,1] -0.160 0.011 0.200 1.00 1043 -#> A[2,2] 0.620 0.790 0.910 1.01 418 -#> A[2,3] -0.400 -0.130 0.045 1.03 291 -#> A[2,4] -0.034 0.110 0.360 1.02 274 -#> A[2,5] -0.048 0.061 0.200 1.01 585 -#> A[3,1] -0.260 0.025 0.560 1.10 28 -#> A[3,2] -0.530 -0.200 0.027 1.02 167 -#> A[3,3] 0.069 0.430 0.740 1.01 256 -#> A[3,4] -0.022 0.230 0.660 1.02 162 -#> A[3,5] -0.094 0.120 0.390 1.02 208 -#> A[4,1] -0.150 0.058 0.360 1.03 137 -#> A[4,2] -0.110 0.063 0.270 1.01 360 -#> A[4,3] -0.430 -0.110 0.140 1.01 312 -#> A[4,4] 0.470 0.730 0.950 1.02 278 -#> A[4,5] -0.200 -0.036 0.130 1.01 548 -#> A[5,1] -0.190 0.083 0.650 1.08 41 -#> A[5,2] -0.460 -0.120 0.076 1.04 135 -#> A[5,3] -0.620 -0.180 0.130 1.04 153 -#> A[5,4] -0.062 0.190 0.660 1.04 140 -#> A[5,5] 0.510 0.740 0.930 1.00 437 -#> -#> Process error parameter estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> Sigma[1,1] 0.033 0.27 0.64 1.20 9 -#> Sigma[1,2] 0.000 0.00 0.00 NaN NaN -#> Sigma[1,3] 0.000 0.00 0.00 NaN NaN -#> Sigma[1,4] 0.000 0.00 0.00 NaN NaN -#> Sigma[1,5] 0.000 0.00 0.00 NaN NaN -#> Sigma[2,1] 0.000 0.00 0.00 NaN NaN -#> Sigma[2,2] 0.066 0.12 0.18 1.01 541 -#> Sigma[2,3] 0.000 0.00 0.00 NaN NaN -#> Sigma[2,4] 0.000 0.00 0.00 NaN NaN -#> Sigma[2,5] 0.000 0.00 0.00 NaN NaN -#> Sigma[3,1] 0.000 0.00 0.00 NaN NaN -#> Sigma[3,2] 0.000 0.00 0.00 NaN NaN -#> Sigma[3,3] 0.051 0.16 0.29 1.04 163 -#> Sigma[3,4] 0.000 0.00 0.00 NaN NaN -#> Sigma[3,5] 0.000 0.00 0.00 NaN NaN -#> Sigma[4,1] 0.000 0.00 0.00 NaN NaN -#> Sigma[4,2] 0.000 0.00 0.00 NaN NaN -#> Sigma[4,3] 0.000 0.00 0.00 NaN NaN -#> Sigma[4,4] 0.054 0.14 0.28 1.03 182 -#> Sigma[4,5] 0.000 0.00 0.00 NaN NaN -#> Sigma[5,1] 0.000 0.00 0.00 NaN NaN -#> Sigma[5,2] 0.000 0.00 0.00 NaN NaN -#> Sigma[5,3] 0.000 0.00 0.00 NaN NaN -#> Sigma[5,4] 0.000 0.00 0.00 NaN NaN -#> Sigma[5,5] 0.100 0.21 0.35 1.01 343 -#> -#> Approximate significance of GAM process smooths: -#> edf Ref.df Chi.sq p-value -#> te(temp,month) 2.902 15 43.54 0.44 -#> te(temp,month):seriestrend1 2.001 15 1.66 1.00 -#> te(temp,month):seriestrend2 0.943 15 7.03 1.00 -#> te(temp,month):seriestrend3 5.867 15 45.04 0.21 -#> te(temp,month):seriestrend4 2.984 15 9.12 0.98 -#> te(temp,month):seriestrend5 1.986 15 4.66 1.00 -#> -#> Stan MCMC diagnostics: -#> n_eff / iter looks reasonable for all parameters -#> Rhats above 1.05 found for 33 parameters -#> *Diagnose further to investigate why the chains have not mixed -#> 0 of 2000 iterations ended with a divergence (0%) -#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%) -#> E-FMI indicated no pathological behavior -#> -#> Samples were drawn using NUTS(diag_e) at Mon Jul 01 7:43:45 AM 2024. -#> For each parameter, n_eff is a crude measure of effective sample size, -#> and Rhat is the potential scale reduction factor on split MCMC chains -#> (at convergence, Rhat = 1)
summary(var_mod, include_betas = FALSE) +#> GAM observation formula: +#> y ~ 1 +#> <environment: 0x0000020ef28a3068> +#> +#> GAM process formula: +#> ~te(temp, month, k = c(4, 4)) + te(temp, month, k = c(4, 4), +#> by = trend) - 1 +#> <environment: 0x0000020ef28a3068> +#> +#> Family: +#> gaussian +#> +#> Link function: +#> identity +#> +#> Trend model: +#> VAR() +#> +#> N process models: +#> 5 +#> +#> N series: +#> 5 +#> +#> N timepoints: +#> 120 +#> +#> Status: +#> Fitted using Stan +#> 4 chains, each with iter = 1500; warmup = 1000; thin = 1 +#> Total post-warmup draws = 2000 +#> +#> +#> Observation error parameter estimates: +#> 2.5% 50% 97.5% Rhat n_eff +#> sigma_obs[1] 0.20 0.25 0.34 1.00 481 +#> sigma_obs[2] 0.25 0.40 0.54 1.05 113 +#> sigma_obs[3] 0.41 0.64 0.81 1.08 84 +#> sigma_obs[4] 0.24 0.37 0.50 1.02 270 +#> sigma_obs[5] 0.31 0.43 0.54 1.01 268 +#> +#> GAM observation model coefficient (beta) estimates: +#> 2.5% 50% 97.5% Rhat n_eff +#> (Intercept) 0 0 0 NaN NaN +#> +#> Process model VAR parameter estimates: +#> 2.5% 50% 97.5% Rhat n_eff +#> A[1,1] -0.072 0.47000 0.830 1.05 151 +#> A[1,2] -0.370 -0.04000 0.200 1.01 448 +#> A[1,3] -0.540 -0.04500 0.360 1.02 232 +#> A[1,4] -0.290 0.03900 0.460 1.02 439 +#> A[1,5] -0.072 0.15000 0.560 1.03 183 +#> A[2,1] -0.150 0.01800 0.210 1.00 677 +#> A[2,2] 0.620 0.79000 0.920 1.01 412 +#> A[2,3] -0.400 -0.14000 0.042 1.01 295 +#> A[2,4] -0.045 0.12000 0.350 1.01 344 +#> A[2,5] -0.057 0.05800 0.200 1.01 641 +#> A[3,1] -0.480 0.00015 0.390 1.03 71 +#> A[3,2] -0.500 -0.22000 0.023 1.02 193 +#> A[3,3] 0.066 0.41000 0.710 1.01 259 +#> A[3,4] -0.020 0.24000 0.630 1.02 227 +#> A[3,5] -0.051 0.14000 0.410 1.02 195 +#> A[4,1] -0.220 0.05100 0.290 1.03 141 +#> A[4,2] -0.100 0.05300 0.260 1.01 402 +#> A[4,3] -0.420 -0.12000 0.120 1.02 363 +#> A[4,4] 0.480 0.74000 0.940 1.01 322 +#> A[4,5] -0.200 -0.03100 0.120 1.01 693 +#> A[5,1] -0.360 0.06400 0.430 1.03 99 +#> A[5,2] -0.430 -0.13000 0.074 1.01 230 +#> A[5,3] -0.610 -0.20000 0.110 1.02 232 +#> A[5,4] -0.061 0.19000 0.620 1.01 250 +#> A[5,5] 0.530 0.75000 0.970 1.02 273 +#> +#> Process error parameter estimates: +#> 2.5% 50% 97.5% Rhat n_eff +#> Sigma[1,1] 0.037 0.28 0.65 1.11 64 +#> Sigma[1,2] 0.000 0.00 0.00 NaN NaN +#> Sigma[1,3] 0.000 0.00 0.00 NaN NaN +#> Sigma[1,4] 0.000 0.00 0.00 NaN NaN +#> Sigma[1,5] 0.000 0.00 0.00 NaN NaN +#> Sigma[2,1] 0.000 0.00 0.00 NaN NaN +#> Sigma[2,2] 0.067 0.11 0.19 1.00 505 +#> Sigma[2,3] 0.000 0.00 0.00 NaN NaN +#> Sigma[2,4] 0.000 0.00 0.00 NaN NaN +#> Sigma[2,5] 0.000 0.00 0.00 NaN NaN +#> Sigma[3,1] 0.000 0.00 0.00 NaN NaN +#> Sigma[3,2] 0.000 0.00 0.00 NaN NaN +#> Sigma[3,3] 0.062 0.15 0.29 1.05 93 +#> Sigma[3,4] 0.000 0.00 0.00 NaN NaN +#> Sigma[3,5] 0.000 0.00 0.00 NaN NaN +#> Sigma[4,1] 0.000 0.00 0.00 NaN NaN +#> Sigma[4,2] 0.000 0.00 0.00 NaN NaN +#> Sigma[4,3] 0.000 0.00 0.00 NaN NaN +#> Sigma[4,4] 0.052 0.13 0.26 1.01 201 +#> Sigma[4,5] 0.000 0.00 0.00 NaN NaN +#> Sigma[5,1] 0.000 0.00 0.00 NaN NaN +#> Sigma[5,2] 0.000 0.00 0.00 NaN NaN +#> Sigma[5,3] 0.000 0.00 0.00 NaN NaN +#> Sigma[5,4] 0.000 0.00 0.00 NaN NaN +#> Sigma[5,5] 0.110 0.21 0.35 1.03 210 +#> +#> Approximate significance of GAM process smooths: +#> edf Ref.df Chi.sq p-value +#> te(temp,month) 2.67 15 38.52 0.405 +#> te(temp,month):seriestrend1 1.77 15 1.73 1.000 +#> te(temp,month):seriestrend2 2.18 15 4.07 0.995 +#> te(temp,month):seriestrend3 4.07 15 51.07 0.059 . +#> te(temp,month):seriestrend4 3.72 15 6.98 0.825 +#> te(temp,month):seriestrend5 1.85 15 5.15 0.998 +#> --- +#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 +#> +#> Stan MCMC diagnostics: +#> n_eff / iter looks reasonable for all parameters +#> Rhats above 1.05 found for 11 parameters +#> *Diagnose further to investigate why the chains have not mixed +#> 0 of 2000 iterations ended with a divergence (0%) +#> 0 of 2000 iterations saturated the maximum tree depth of 12 (0%) +#> E-FMI indicated no pathological behavior +#> +#> Samples were drawn using NUTS(diag_e) at Wed Sep 04 9:30:41 AM 2024. +#> For each parameter, n_eff is a crude measure of effective sample size, +#> and Rhat is the potential scale reduction factor on split MCMC chains +#> (at convergence, Rhat = 1)
The convergence of this model isn’t fabulous (more on this in a moment). But we can again plot the smooth functions, which this time operate on the process model. We can see the same plot using
- - + +trend_effects = TRUE
in the plotting functions:The VAR matrix is of particular interest here, as it captures lagged dependencies and cross-dependencies in the latent process model. Unfortunately
-bayesplot
doesn’t know this is a matrix of parameters so what we see is actually the transpose of the VAR matrix. A little bit of wrangling gives us these histograms in the correct order:- +A_pars <- matrix(NA, nrow = 5, ncol = 5) -for(i in 1:5){ - for(j in 1:5){ - A_pars[i, j] <- paste0('A[', i, ',', j, ']') - } -} -mcmc_plot(var_mod, - variable = as.vector(t(A_pars)), - type = 'hist')
+A_pars <- matrix(NA, nrow = 5, ncol = 5) +for(i in 1:5){ + for(j in 1:5){ + A_pars[i, j] <- paste0('A[', i, ',', j, ']') + } +} +mcmc_plot(var_mod, + variable = as.vector(t(A_pars)), + type = 'hist')
There is a lot happening in this matrix. Each cell captures the lagged effect of the process in the column on the process in the row in the next timestep. So for example, the effect in cell [1,3] shows how an @@ -872,21 +874,21 @@
Inspecting SS models
captures unmodelled variation in the process models. Again, we fixed the off-diagonals to 0, so the histograms for these will look like flat boxes: -- +Sigma_pars <- matrix(NA, nrow = 5, ncol = 5) -for(i in 1:5){ - for(j in 1:5){ - Sigma_pars[i, j] <- paste0('Sigma[', i, ',', j, ']') - } -} -mcmc_plot(var_mod, - variable = as.vector(t(Sigma_pars)), - type = 'hist')
+Sigma_pars <- matrix(NA, nrow = 5, ncol = 5) +for(i in 1:5){ + for(j in 1:5){ + Sigma_pars[i, j] <- paste0('Sigma[', i, ',', j, ']') + } +} +mcmc_plot(var_mod, + variable = as.vector(t(Sigma_pars)), + type = 'hist')
The observation error estimates \((\sigma_{obs})\) represent how much the model thinks we might miss the true count when we take our imperfect measurements:
- - + +These are still a bit hard to identify overall, especially when trying to estimate both process and observation error. Often we need to make some strong assumptions about which of these is more important for @@ -897,99 +899,141 @@
Correlated process errors
Let’s see if these estimates improve when we allow the process errors to be correlated. Once again, we need to first update the priors for the observation errors:
-+priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2), - prior(normal(0.5, 0.25), class = sigma))
priors <- c(prior(normal(0.5, 0.1), class = sigma_obs, lb = 0.2), + prior(normal(0.5, 0.25), class = sigma))
And now we can fit the correlated process error model
-+varcor_mod <- mvgam( - # observation formula, which remains empty - y ~ -1, - - # process model formula, which includes the smooth functions - trend_formula = ~ te(temp, month, k = c(4, 4)) + - te(temp, month, k = c(4, 4), by = trend), - - # VAR1 model with correlated process errors - trend_model = VAR(cor = TRUE), - family = gaussian(), - data = plankton_train, - newdata = plankton_test, - - # include the updated priors - priors = priors, - silent = 2)
varcor_mod <- mvgam( + # observation formula, which remains empty + y ~ -1, + + # process model formula, which includes the smooth functions + trend_formula = ~ te(temp, month, k = c(4, 4)) + + te(temp, month, k = c(4, 4), by = trend) - 1, + + # VAR1 model with correlated process errors + trend_model = VAR(cor = TRUE), + family = gaussian(), + data = plankton_train, + newdata = plankton_test, + + # include the updated priors + priors = priors, + silent = 2)
The \((\Sigma)\) matrix now captures any evidence of contemporaneously correlated process error:
-- +Sigma_pars <- matrix(NA, nrow = 5, ncol = 5) -for(i in 1:5){ - for(j in 1:5){ - Sigma_pars[i, j] <- paste0('Sigma[', i, ',', j, ']') - } -} -mcmc_plot(varcor_mod, - variable = as.vector(t(Sigma_pars)), - type = 'hist')
+Sigma_pars <- matrix(NA, nrow = 5, ncol = 5) +for(i in 1:5){ + for(j in 1:5){ + Sigma_pars[i, j] <- paste0('Sigma[', i, ',', j, ']') + } +} +mcmc_plot(varcor_mod, + variable = as.vector(t(Sigma_pars)), + type = 'hist')
This symmetric matrix tells us there is support for correlated process errors, as several of the off-diagonal entries are strongly non-zero. But it is easier to interpret these estimates if we convert the covariance matrix to a correlation matrix. Here we compute the posterior median process error correlations:
-+Sigma_post <- as.matrix(varcor_mod, variable = 'Sigma', regex = TRUE) -median_correlations <- cov2cor(matrix(apply(Sigma_post, 2, median), - nrow = 5, ncol = 5)) -rownames(median_correlations) <- colnames(median_correlations) <- levels(plankton_train$series) - -round(median_correlations, 2) -#> Bluegreens Diatoms Greens Other.algae Unicells -#> Bluegreens 1.00 -0.04 0.16 -0.05 0.29 -#> Diatoms -0.04 1.00 -0.21 0.48 0.17 -#> Greens 0.16 -0.21 1.00 0.17 0.46 -#> Other.algae -0.05 0.48 0.17 1.00 0.28 -#> Unicells 0.29 0.17 0.46 0.28 1.00
+Sigma_post <- as.matrix(varcor_mod, variable = 'Sigma', regex = TRUE) +median_correlations <- cov2cor(matrix(apply(Sigma_post, 2, median), + nrow = 5, ncol = 5)) +rownames(median_correlations) <- colnames(median_correlations) <- levels(plankton_train$series) + +round(median_correlations, 2) +#> Bluegreens Diatoms Greens Other.algae Unicells +#> Bluegreens 1.00 -0.03 0.17 -0.05 0.33 +#> Diatoms -0.03 1.00 -0.20 0.49 0.17 +#> Greens 0.17 -0.20 1.00 0.18 0.47 +#> Other.algae -0.05 0.49 0.18 1.00 0.29 +#> Unicells 0.33 0.17 0.47 0.29 1.00
++Impulse response functions
+Because Vector Autoregressions can capture complex lagged +dependencies, it is often difficult to understand how the member time +series are thought to interact with one another. A method that is +commonly used to directly test for possible interactions is to compute +an Impulse +Response Function (IRF). If \(h\) +represents the simulated forecast horizon, an IRF asks how each of the +remaining series might respond over times \((t+1):h\) if a focal series is given an +innovation “shock” at time \(t = 0\). +
+ +mvgam
can compute Generalized and Orthogonalized IRFs from +models that included latent VAR dynamics. We simply feed the fitted +model to theirf()
function and then use the S3 +plot()
function to view the estimated responses. By +default,irf()
will compute IRFs by separately imposing +positive shocks of one standard deviation to each series in the VAR +process. Here we compute Generalized IRFs over a horizon of 12 +timesteps:Plot the expected responses of the remaining series to a positive +shock for series 3 (Greens)
+ + +This series of plots makes it clear that some of the other series +would be expected to show both instantaneous responses to a shock for +the Greens (due to their correlated process errors) as well as delayed +and nonlinear responses over time (due to the complex lagged dependence +structure captured by the \(A\) +matrix). This hopefully makes it clear why IRFs are an important tool in +the analysis of multivariate autoregressive models.
++Comparing forecast scores
But which model is better? We can compute the variogram score for out of sample forecasts to get a sense of which model does a better job of capturing the dependence structure in the true evaluation set:
-- +# create forecast objects for each model -fcvar <- forecast(var_mod) -fcvarcor <- forecast(varcor_mod) - -# plot the difference in variogram scores; a negative value means the VAR1cor model is better, while a positive value means the VAR1 model is better -diff_scores <- score(fcvarcor, score = 'variogram')$all_series$score - - score(fcvar, score = 'variogram')$all_series$score -plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', - ylim = c(-1*max(abs(diff_scores), na.rm = TRUE), - max(abs(diff_scores), na.rm = TRUE)), - bty = 'l', - xlab = 'Forecast horizon', - ylab = expression(variogram[VAR1cor]~-~variogram[VAR1])) -abline(h = 0, lty = 'dashed')
+# create forecast objects for each model +fcvar <- forecast(var_mod) +fcvarcor <- forecast(varcor_mod) + +# plot the difference in variogram scores; a negative value means the VAR1cor model is better, while a positive value means the VAR1 model is better +diff_scores <- score(fcvarcor, score = 'variogram')$all_series$score - + score(fcvar, score = 'variogram')$all_series$score +plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', + ylim = c(-1*max(abs(diff_scores), na.rm = TRUE), + max(abs(diff_scores), na.rm = TRUE)), + bty = 'l', + xlab = 'Forecast horizon', + ylab = expression(variogram[VAR1cor]~-~variogram[VAR1])) +abline(h = 0, lty = 'dashed')
And we can also compute the energy score for out of sample forecasts to get a sense of which model provides forecasts that are better calibrated:
-- +# plot the difference in energy scores; a negative value means the VAR1cor model is better, while a positive value means the VAR1 model is better -diff_scores <- score(fcvarcor, score = 'energy')$all_series$score - - score(fcvar, score = 'energy')$all_series$score -plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', - ylim = c(-1*max(abs(diff_scores), na.rm = TRUE), - max(abs(diff_scores), na.rm = TRUE)), - bty = 'l', - xlab = 'Forecast horizon', - ylab = expression(energy[VAR1cor]~-~energy[VAR1])) -abline(h = 0, lty = 'dashed')
+# plot the difference in energy scores; a negative value means the VAR1cor model is better, while a positive value means the VAR1 model is better +diff_scores <- score(fcvarcor, score = 'energy')$all_series$score - + score(fcvar, score = 'energy')$all_series$score +plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', + ylim = c(-1*max(abs(diff_scores), na.rm = TRUE), + max(abs(diff_scores), na.rm = TRUE)), + bty = 'l', + xlab = 'Forecast horizon', + ylab = expression(energy[VAR1cor]~-~energy[VAR1])) +abline(h = 0, lty = 'dashed')
The models tend to provide similar forecasts, though the correlated error model does slightly better overall. We would probably need to use a more extensive rolling forecast evaluation exercise if we felt like we needed to only choose one for production.
+utilities for doing this (i.e. seemvgam
offers some -utilities for doing this (i.e. see?lfo_cv
for -guidance).?lfo_cv
for guidance). +Alternatively, we could use forecasts from both models by +creating an evenly-weighted ensemble forecast distribution. This +capability is available using theensemble()
function in +mvgam
(see?ensemble
for guidance). +-Further reading
++Further reading
The following papers and resources offer a lot of useful material about multivariate State-Space models and how they can be applied in practice:
+Auger‐Méthé, Marie, et al. “A +guide to state–space modeling of ecological time series.” +Ecological Monographs 91.4 (2021): e01470.
Heaps, Sarah E. “Enforcing stationarity through the prior in vector autoregressions.” Journal of Computational and Graphical Statistics 32.1 (2023): @@ -1006,10 +1050,6 @@
Further reading
models to detect metapopulation structure of California sea lions in the Gulf of California, Mexico.” Journal of Applied Ecology 47.1 (2010): 47-56. -Auger‐Méthé, Marie, et al. “A -guide to state–space modeling of ecological time series.” -Ecological Monographs 91.4 (2021): e01470.
-Interested in contributing?
diff --git a/man/mvgam_marginaleffects.Rd b/man/mvgam_marginaleffects.Rd index 5564cfdb..16e6648e 100644 --- a/man/mvgam_marginaleffects.Rd +++ b/man/mvgam_marginaleffects.Rd @@ -86,6 +86,8 @@ arguments.} \item \code{newdata = datagrid(cyl = c(4, 6))}: \code{cyl} variable equal to 4 and 6 and other regressors fixed at their means or modes. \item See the Examples section and the \code{\link[marginaleffects:datagrid]{datagrid()}} documentation. } +\item \code{\link[=subset]{subset()}} call with a single argument to select a subset of the dataset used to fit the model, ex: \code{newdata = subset(treatment == 1)} +\item \code{\link[dplyr:filter]{dplyr::filter()}} call with a single argument to select a subset of the dataset used to fit the model, ex: \code{newdata = filter(treatment == 1)} \item string: \itemize{ \item "mean": Marginal Effects at the Mean. Slopes when each predictor is held at its mean or mode. diff --git a/vignettes/trend_formulas.Rmd b/vignettes/trend_formulas.Rmd index 9271d821..8d636d2e 100644 --- a/vignettes/trend_formulas.Rmd +++ b/vignettes/trend_formulas.Rmd @@ -197,10 +197,6 @@ This basic model gives us confidence that we can capture the seasonal variation plot(notrend_mod, type = 'residuals', series = 1) ``` -```{r} -plot(notrend_mod, type = 'residuals', series = 2) -``` - ```{r} plot(notrend_mod, type = 'residuals', series = 3) ``` @@ -213,11 +209,11 @@ Now it is time to get into multivariate State-Space models. We will fit two mode \boldsymbol{count}_t & \sim \text{Normal}(\mu_{obs[t]}, \sigma_{obs}) \\ \mu_{obs[t]} & = process_t \\ process_t & \sim \text{MVNormal}(\mu_{process[t]}, \Sigma_{process}) \\ -\mu_{process[t]} & = VAR * process_{t-1} + f_{global}(\boldsymbol{month},\boldsymbol{temp})_t + f_{series}(\boldsymbol{month},\boldsymbol{temp})_t \\ +\mu_{process[t]} & = A * process_{t-1} + f_{global}(\boldsymbol{month},\boldsymbol{temp})_t + f_{series}(\boldsymbol{month},\boldsymbol{temp})_t \\ f_{global}(\boldsymbol{month},\boldsymbol{temp}) & = \sum_{k=1}^{K}b_{global} * \beta_{global} \\ f_{series}(\boldsymbol{month},\boldsymbol{temp}) & = \sum_{k=1}^{K}b_{series} * \beta_{series} \end{align*} -Here you can see that there are no terms in the observation model apart from the underlying process model. But we could easily add covariates into the observation model if we felt that they could explain some of the systematic observation errors. We also assume independent observation processes (there is no covariance structure in the observation errors $\sigma_{obs}$). At present, `mvgam` does not support multivariate observation models. But this feature will be added in future versions. However the underlying process model is multivariate, and there is a lot going on here. This component has a Vector Autoregressive part, where the process mean at time $t$ $(\mu_{process[t]})$ is a vector that evolves as a function of where the vector-valued process model was at time $t-1$. The $VAR$ matrix captures these dynamics with self-dependencies on the diagonal and possibly asymmetric cross-dependencies on the off-diagonals, while also incorporating the nonlinear smooth functions that capture seasonality for each series. The contemporaneous process errors are modeled by $\Sigma_{process}$, which can be constrained so that process errors are independent (i.e. setting the off-diagonals to 0) or can be fully parameterized using a Cholesky decomposition (using `Stan`'s $LKJcorr$ distribution to place a prior on the strength of inter-species correlations). For those that are interested in the inner-workings, `mvgam` makes use of a recent breakthrough by [Sarah Heaps to enforce stationarity of Bayesian VAR processes](https://www.tandfonline.com/doi/full/10.1080/10618600.2022.2079648). This is advantageous as we often don't expect forecast variance to increase without bound forever into the future, but many estimated VARs tend to behave this way. +Here you can see that there are no terms in the observation model apart from the underlying process model. But we could easily add covariates into the observation model if we felt that they could explain some of the systematic observation errors. We also assume independent observation processes (there is no covariance structure in the observation errors $\sigma_{obs}$). At present, `mvgam` does not support multivariate observation models. But this feature will be added in future versions. However the underlying process model is multivariate, and there is a lot going on here. This component has a Vector Autoregressive part, where the process mean at time $t$ $(\mu_{process[t]})$ is a vector that evolves as a function of where the vector-valued process model was at time $t-1$. The $A$ matrix captures these dynamics with self-dependencies on the diagonal and possibly asymmetric cross-dependencies on the off-diagonals, while also incorporating the nonlinear smooth functions that capture seasonality for each series. The contemporaneous process errors are modeled by $\Sigma_{process}$, which can be constrained so that process errors are independent (i.e. setting the off-diagonals to 0) or can be fully parameterized using a Cholesky decomposition (using `Stan`'s $LKJcorr$ distribution to place a prior on the strength of inter-species correlations). For those that are interested in the inner-workings, `mvgam` makes use of a recent breakthrough by [Sarah Heaps to enforce stationarity of Bayesian VAR processes](https://www.tandfonline.com/doi/full/10.1080/10618600.2022.2079648). This is advantageous as we often don't expect forecast variance to increase without bound forever into the future, but many estimated VARs tend to behave this way.
Ok that was a lot to take in. Let's fit some models to try and inspect what is going on and what they assume. But first, we need to update `mvgam`'s default priors for the observation and process errors. By default, `mvgam` uses a fairly wide Student-T prior on these parameters to avoid being overly informative. But our observations are z-scored and so we do not expect very large process or observation errors. However, we also do not expect very small observation errors either as we know these measurements are not perfect. So let's update the priors for these parameters. In doing so, you will get to see how the formula for the latent process (i.e. trend) model is used in `mvgam`: @@ -407,6 +403,21 @@ rownames(median_correlations) <- colnames(median_correlations) <- levels(plankto round(median_correlations, 2) ``` +### Impulse response functions +Because Vector Autoregressions can capture complex lagged dependencies, it is often difficult to understand how the member time series are thought to interact with one another. A method that is commonly used to directly test for possible interactions is to compute an [Impulse Response Function](https://www.mathworks.com/help/econ/varm.irf.html) (IRF). If $h$ represents the simulated forecast horizon, an IRF asks how each of the remaining series might respond over times $(t+1):h$ if a focal series is given an innovation "shock" at time $t = 0$. `mvgam` can compute Generalized and Orthogonalized IRFs from models that included latent VAR dynamics. We simply feed the fitted model to the `irf()` function and then use the S3 `plot()` function to view the estimated responses. By default, `irf()` will compute IRFs by separately imposing positive shocks of one standard deviation to each series in the VAR process. Here we compute Generalized IRFs over a horizon of 12 timesteps: +```{r} +irfs <- irf(varcor_mod, h = 12) +``` + +Plot the expected responses of the remaining series to a positive shock for series 3 (Greens) +```{r} +plot(irfs, series = 3) +``` + +This series of plots makes it clear that some of the other series would be expected to show both instantaneous responses to a shock for the Greens (due to their correlated process errors) as well as delayed and nonlinear responses over time (due to the complex lagged dependence structure captured by the $A$ matrix). This hopefully makes it clear why IRFs are an important tool in the analysis of multivariate autoregressive models. + +### Comparing forecast scores + But which model is better? We can compute the variogram score for out of sample forecasts to get a sense of which model does a better job of capturing the dependence structure in the true evaluation set: ```{r} # create forecast objects for each model @@ -439,11 +450,13 @@ plot(diff_scores, pch = 16, cex = 1.25, col = 'darkred', abline(h = 0, lty = 'dashed') ``` -The models tend to provide similar forecasts, though the correlated error model does slightly better overall. We would probably need to use a more extensive rolling forecast evaluation exercise if we felt like we needed to only choose one for production. `mvgam` offers some utilities for doing this (i.e. see `?lfo_cv` for guidance). +The models tend to provide similar forecasts, though the correlated error model does slightly better overall. We would probably need to use a more extensive rolling forecast evaluation exercise if we felt like we needed to only choose one for production. `mvgam` offers some utilities for doing this (i.e. see `?lfo_cv` for guidance). Alternatively, we could use forecasts from *both* models by creating an evenly-weighted ensemble forecast distribution. This capability is available using the `ensemble()` function in `mvgam` (see `?ensemble` for guidance). -### Further reading +## Further reading The following papers and resources offer a lot of useful material about multivariate State-Space models and how they can be applied in practice: +Auger‐Méthé, Marie, et al. ["A guide to state–space modeling of ecological time series.](https://esajournals.onlinelibrary.wiley.com/doi/full/10.1002/ecm.1470)" *Ecological Monographs* 91.4 (2021): e01470. + Heaps, Sarah E. "[Enforcing stationarity through the prior in vector autoregressions.](https://www.tandfonline.com/doi/full/10.1080/10618600.2022.2079648)" *Journal of Computational and Graphical Statistics* 32.1 (2023): 74-83. Hannaford, Naomi E., et al. "[A sparse Bayesian hierarchical vector autoregressive model for microbial dynamics in a wastewater treatment plant.](https://www.sciencedirect.com/science/article/pii/S0167947322002390)" *Computational Statistics & Data Analysis* 179 (2023): 107659. @@ -451,8 +464,6 @@ Hannaford, Naomi E., et al. "[A sparse Bayesian hierarchical vector autoregressi Holmes, Elizabeth E., Eric J. Ward, and Wills Kellie. "[MARSS: multivariate autoregressive state-space models for analyzing time-series data.](https://journal.r-project.org/archive/2012/RJ-2012-002/index.html)" *R Journal*. 4.1 (2012): 11. Ward, Eric J., et al. "[Inferring spatial structure from time‐series data: using multivariate state‐space models to detect metapopulation structure of California sea lions in the Gulf of California, Mexico.](https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/j.1365-2664.2009.01745.x)" *Journal of Applied Ecology* 47.1 (2010): 47-56. - -Auger‐Méthé, Marie, et al. ["A guide to state–space modeling of ecological time series.](https://esajournals.onlinelibrary.wiley.com/doi/full/10.1002/ecm.1470)" *Ecological Monographs* 91.4 (2021): e01470. ## Interested in contributing? I'm actively seeking PhD students and other researchers to work in the areas of ecological forecasting, multivariate model evaluation and development of `mvgam`. Please reach out if you are interested (n.clark'at'uq.edu.au)