Specify dynamic Gaussian processes
+![](../logo.png)
Specify dynamic Gaussian processes
Source:R/mvgam_trend_types.R
GP.Rd
Specify autoregressive dynamic processes in mvgam
+![](../logo.png)
Specify autoregressive dynamic processes in mvgam
Source:R/mvgam_trend_types.R
RW.Rd
Specify correlated residual processes in mvgam
+![](../logo.png)
Specify correlated residual processes in mvgam
Source:R/mvgam_trend_types.R
ZMVN.Rd
Extract or compute hindcasts and forecasts for a fitted mvgam
object
+ ![](../logo.png)
Extract or compute hindcasts and forecasts for a fitted mvgam
object
Source: R/forecast.mvgam.R
forecast.mvgam.Rd
Examples chains = 2)
#> Compiling Stan program using cmdstanr
#>
-<<<<<<< HEAD
#> In file included from stan/lib/stan_math/stan/math/prim/prob/von_mises_lccdf.hpp:5,
#> from stan/lib/stan_math/stan/math/prim/prob/von_mises_ccdf_log.hpp:4,
#> from stan/lib/stan_math/stan/math/prim/prob.hpp:359,
@@ -152,65 +151,45 @@ Examples#> from stan/lib/stan_math/stan/math/rev.hpp:16,
#> from stan/lib/stan_math/stan/math.hpp:19,
#> from stan/src/stan/model/model_header.hpp:4,
-#> from C:/Users/uqnclar2/AppData/Local/Temp/Rtmp2bnpq5/model-3cd012a31411.hpp:2:
+#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpARA4r4/model-1333c59e1e97.hpp:2:
#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)':
#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers
#> 194 | if (cdf_n < 0.0)
#> |
#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: adding '-flarge-source-files' will allow for more column-tracking support, at the expense of compilation time and memory
-=======
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
#> Start sampling
#> Running MCMC with 2 parallel chains...
#>
#> Chain 1 Iteration: 1 / 1000 [ 0%] (Warmup)
#> Chain 2 Iteration: 1 / 1000 [ 0%] (Warmup)
-#> Chain 2 Iteration: 100 / 1000 [ 10%] (Warmup)
-<<<<<<< HEAD
-=======
#> Chain 1 Iteration: 100 / 1000 [ 10%] (Warmup)
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
-#> Chain 2 Iteration: 200 / 1000 [ 20%] (Warmup)
#> Chain 1 Iteration: 200 / 1000 [ 20%] (Warmup)
#> Chain 1 Iteration: 300 / 1000 [ 30%] (Warmup)
-#> Chain 2 Iteration: 300 / 1000 [ 30%] (Warmup)
-#> Chain 2 Iteration: 400 / 1000 [ 40%] (Warmup)
#> Chain 1 Iteration: 400 / 1000 [ 40%] (Warmup)
+#> Chain 2 Iteration: 100 / 1000 [ 10%] (Warmup)
#> Chain 1 Iteration: 500 / 1000 [ 50%] (Warmup)
#> Chain 1 Iteration: 501 / 1000 [ 50%] (Sampling)
-#> Chain 2 Iteration: 500 / 1000 [ 50%] (Warmup)
-#> Chain 2 Iteration: 501 / 1000 [ 50%] (Sampling)
-#> Chain 2 Iteration: 600 / 1000 [ 60%] (Sampling)
+#> Chain 2 Iteration: 200 / 1000 [ 20%] (Warmup)
+#> Chain 2 Iteration: 300 / 1000 [ 30%] (Warmup)
#> Chain 1 Iteration: 600 / 1000 [ 60%] (Sampling)
+#> Chain 2 Iteration: 400 / 1000 [ 40%] (Warmup)
#> Chain 1 Iteration: 700 / 1000 [ 70%] (Sampling)
-<<<<<<< HEAD
+#> Chain 2 Iteration: 500 / 1000 [ 50%] (Warmup)
+#> Chain 2 Iteration: 501 / 1000 [ 50%] (Sampling)
#> Chain 1 Iteration: 800 / 1000 [ 80%] (Sampling)
-#> Chain 2 Iteration: 800 / 1000 [ 80%] (Sampling)
+#> Chain 2 Iteration: 600 / 1000 [ 60%] (Sampling)
#> Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling)
-#> Chain 2 Iteration: 900 / 1000 [ 90%] (Sampling)
-#> Chain 2 Iteration: 1000 / 1000 [100%] (Sampling)
-#> Chain 2 finished in 1.1 seconds.
-#> Chain 1 Iteration: 1000 / 1000 [100%] (Sampling)
-#> Chain 1 finished in 1.2 seconds.
-#>
-#> Both chains finished successfully.
-#> Mean chain execution time: 1.1 seconds.
-#> Total execution time: 1.3 seconds.
-=======
#> Chain 2 Iteration: 700 / 1000 [ 70%] (Sampling)
+#> Chain 1 Iteration: 1000 / 1000 [100%] (Sampling)
#> Chain 2 Iteration: 800 / 1000 [ 80%] (Sampling)
-#> Chain 1 Iteration: 800 / 1000 [ 80%] (Sampling)
-#> Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling)
+#> Chain 1 finished in 1.0 seconds.
#> Chain 2 Iteration: 900 / 1000 [ 90%] (Sampling)
-#> Chain 1 Iteration: 1000 / 1000 [100%] (Sampling)
#> Chain 2 Iteration: 1000 / 1000 [100%] (Sampling)
-#> Chain 1 finished in 2.7 seconds.
-#> Chain 2 finished in 2.6 seconds.
+#> Chain 2 finished in 1.1 seconds.
#>
#> Both chains finished successfully.
-#> Mean chain execution time: 2.7 seconds.
-#> Total execution time: 2.8 seconds.
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
+#> Mean chain execution time: 1.0 seconds.
+#> Total execution time: 1.3 seconds.
#>
# Hindcasts on response scale
@@ -218,11 +197,7 @@ Examplesstr(hc)
#> List of 15
#> $ call :Class 'formula' language y ~ s(season, bs = "cc", k = 6)
-<<<<<<< HEAD
-#> .. ..- attr(*, ".Environment")=<environment: 0x000001b48c2257f8>
-=======
-#> .. ..- attr(*, ".Environment")=<environment: 0x000001f1650b5778>
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
+#> .. ..- attr(*, ".Environment")=<environment: 0x0000022097b26560>
#> $ trend_call : NULL
#> $ family : chr "poisson"
#> $ trend_model :List of 7
@@ -240,65 +215,43 @@ Examples#> $ type : chr "response"
#> $ series_names : chr [1:3] "series_1" "series_2" "series_3"
#> $ train_observations:List of 3
-<<<<<<< HEAD
-#> ..$ series_1: int [1:75] 1 0 0 1 0 0 1 1 1 4 ...
-#> ..$ series_2: int [1:75] 1 1 2 0 1 0 2 4 0 6 ...
-#> ..$ series_3: int [1:75] 1 2 2 0 0 0 3 0 0 3 ...
-=======
-#> ..$ series_1: int [1:75] 0 1 2 4 0 1 13 6 1 3 ...
-#> ..$ series_2: int [1:75] 0 0 0 0 0 0 0 1 2 0 ...
-#> ..$ series_3: int [1:75] 0 0 0 0 0 1 2 2 1 0 ...
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
+#> ..$ series_1: int [1:75] 0 0 0 1 0 1 1 4 0 0 ...
+#> ..$ series_2: int [1:75] 0 0 0 0 1 0 4 0 0 0 ...
+#> ..$ series_3: int [1:75] 1 1 1 2 0 1 4 6 6 0 ...
#> $ train_times : int [1:75] 1 2 3 4 5 6 7 8 9 10 ...
#> $ test_observations : NULL
#> $ test_times : NULL
#> $ hindcasts :List of 3
-<<<<<<< HEAD
-#> ..$ series_1: num [1:1000, 1:75] 1 0 0 0 0 2 3 1 3 2 ...
+#> ..$ series_1: num [1:1000, 1:75] 0 0 0 1 0 1 0 0 4 0 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : NULL
#> .. .. ..$ : chr [1:75] "ypred[1,1]" "ypred[2,1]" "ypred[3,1]" "ypred[4,1]" ...
-#> ..$ series_2: num [1:1000, 1:75] 0 3 1 0 5 1 1 0 0 2 ...
+#> ..$ series_2: num [1:1000, 1:75] 0 0 0 0 1 1 1 1 1 1 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : NULL
#> .. .. ..$ : chr [1:75] "ypred[1,2]" "ypred[2,2]" "ypred[3,2]" "ypred[4,2]" ...
-#> ..$ series_3: num [1:1000, 1:75] 1 0 1 2 1 5 0 1 3 1 ...
-=======
-#> ..$ series_1: num [1:1000, 1:75] 1 0 0 0 0 0 0 0 1 0 ...
-#> .. ..- attr(*, "dimnames")=List of 2
-#> .. .. ..$ : NULL
-#> .. .. ..$ : chr [1:75] "ypred[1,1]" "ypred[2,1]" "ypred[3,1]" "ypred[4,1]" ...
-#> ..$ series_2: num [1:1000, 1:75] 1 2 0 1 0 0 0 0 1 0 ...
-#> .. ..- attr(*, "dimnames")=List of 2
-#> .. .. ..$ : NULL
-#> .. .. ..$ : chr [1:75] "ypred[1,2]" "ypred[2,2]" "ypred[3,2]" "ypred[4,2]" ...
-#> ..$ series_3: num [1:1000, 1:75] 0 2 1 0 1 2 1 1 1 0 ...
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
+#> ..$ series_3: num [1:1000, 1:75] 3 0 1 0 1 1 2 1 1 1 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : NULL
#> .. .. ..$ : chr [1:75] "ypred[1,3]" "ypred[2,3]" "ypred[3,3]" "ypred[4,3]" ...
#> $ forecasts : NULL
#> - attr(*, "class")= chr "mvgam_forecast"
plot(hc, series = 1)
-#> No non-missing values in test_observations; cannot calculate forecast score
-plot(hc, series = 2)
#> No non-missing values in test_observations; cannot calculate forecast score
+plot(hc, series = 2)
-plot(hc, series = 3)
#> No non-missing values in test_observations; cannot calculate forecast score
+plot(hc, series = 3)
+#> No non-missing values in test_observations; cannot calculate forecast score
# Forecasts on response scale
fc <- forecast(mod, newdata = simdat$data_test)
str(fc)
#> List of 16
#> $ call :Class 'formula' language y ~ s(season, bs = "cc", k = 6)
-<<<<<<< HEAD
-#> .. ..- attr(*, ".Environment")=<environment: 0x000001b48c2257f8>
-=======
-#> .. ..- attr(*, ".Environment")=<environment: 0x000001f1650b5778>
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
+#> .. ..- attr(*, ".Environment")=<environment: 0x0000022097b26560>
#> $ trend_call : NULL
#> $ family : chr "poisson"
#> $ family_pars : NULL
@@ -317,84 +270,44 @@ Examples#> $ type : chr "response"
#> $ series_names : Factor w/ 3 levels "series_1","series_2",..: 1 2 3
#> $ train_observations:List of 3
-<<<<<<< HEAD
-#> ..$ series_1: int [1:75] 1 0 0 1 0 0 1 1 1 4 ...
-#> ..$ series_2: int [1:75] 1 1 2 0 1 0 2 4 0 6 ...
-#> ..$ series_3: int [1:75] 1 2 2 0 0 0 3 0 0 3 ...
+#> ..$ series_1: int [1:75] 0 0 0 1 0 1 1 4 0 0 ...
+#> ..$ series_2: int [1:75] 0 0 0 0 1 0 4 0 0 0 ...
+#> ..$ series_3: int [1:75] 1 1 1 2 0 1 4 6 6 0 ...
#> $ train_times : int [1:75] 1 2 3 4 5 6 7 8 9 10 ...
#> $ test_observations :List of 3
-#> ..$ series_1: int [1:25] 0 0 0 0 0 1 1 4 2 3 ...
-#> ..$ series_2: int [1:25] 1 0 0 2 0 3 2 9 8 1 ...
-#> ..$ series_3: int [1:25] 0 0 1 1 0 0 0 4 1 1 ...
+#> ..$ series_1: int [1:25] 2 0 1 0 0 1 1 1 0 0 ...
+#> ..$ series_2: int [1:25] 4 0 0 1 2 1 2 1 2 0 ...
+#> ..$ series_3: int [1:25] 5 0 2 6 6 4 1 8 2 0 ...
#> $ test_times : int [1:25] 76 77 78 79 80 81 82 83 84 85 ...
#> $ hindcasts :List of 3
-#> ..$ series_1: num [1:1000, 1:75] 1 0 0 0 0 2 3 1 3 2 ...
+#> ..$ series_1: num [1:1000, 1:75] 0 0 0 1 0 1 0 0 4 0 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : NULL
#> .. .. ..$ : chr [1:75] "ypred[1,1]" "ypred[2,1]" "ypred[3,1]" "ypred[4,1]" ...
-#> ..$ series_2: num [1:1000, 1:75] 0 3 1 0 5 1 1 0 0 2 ...
+#> ..$ series_2: num [1:1000, 1:75] 0 0 0 0 1 1 1 1 1 1 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : NULL
#> .. .. ..$ : chr [1:75] "ypred[1,2]" "ypred[2,2]" "ypred[3,2]" "ypred[4,2]" ...
-#> ..$ series_3: num [1:1000, 1:75] 1 0 1 2 1 5 0 1 3 1 ...
-=======
-#> ..$ series_1: int [1:75] 0 1 2 4 0 1 13 6 1 3 ...
-#> ..$ series_2: int [1:75] 0 0 0 0 0 0 0 1 2 0 ...
-#> ..$ series_3: int [1:75] 0 0 0 0 0 1 2 2 1 0 ...
-#> $ train_times : int [1:75] 1 2 3 4 5 6 7 8 9 10 ...
-#> $ test_observations :List of 3
-#> ..$ series_1: int [1:25] 2 1 1 12 2 6 0 5 0 0 ...
-#> ..$ series_2: int [1:25] 0 0 0 10 0 0 1 1 0 0 ...
-#> ..$ series_3: int [1:25] 1 1 1 4 1 3 0 0 1 0 ...
-#> $ test_times : int [1:25] 76 77 78 79 80 81 82 83 84 85 ...
-#> $ hindcasts :List of 3
-#> ..$ series_1: num [1:1000, 1:75] 1 0 0 0 0 0 0 0 1 0 ...
-#> .. ..- attr(*, "dimnames")=List of 2
-#> .. .. ..$ : NULL
-#> .. .. ..$ : chr [1:75] "ypred[1,1]" "ypred[2,1]" "ypred[3,1]" "ypred[4,1]" ...
-#> ..$ series_2: num [1:1000, 1:75] 1 2 0 1 0 0 0 0 1 0 ...
-#> .. ..- attr(*, "dimnames")=List of 2
-#> .. .. ..$ : NULL
-#> .. .. ..$ : chr [1:75] "ypred[1,2]" "ypred[2,2]" "ypred[3,2]" "ypred[4,2]" ...
-#> ..$ series_3: num [1:1000, 1:75] 0 2 1 0 1 2 1 1 1 0 ...
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
+#> ..$ series_3: num [1:1000, 1:75] 3 0 1 0 1 1 2 1 1 1 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : NULL
#> .. .. ..$ : chr [1:75] "ypred[1,3]" "ypred[2,3]" "ypred[3,3]" "ypred[4,3]" ...
#> $ forecasts :List of 3
-<<<<<<< HEAD
-#> ..$ series_1: int [1:1000, 1:25] 0 2 0 0 1 0 0 1 0 2 ...
-#> ..$ series_2: int [1:1000, 1:25] 0 1 0 1 0 0 0 0 1 0 ...
-#> ..$ series_3: int [1:1000, 1:25] 0 1 0 1 0 0 1 1 0 0 ...
-#> - attr(*, "class")= chr "mvgam_forecast"
-plot(fc, series = 1)
-#> Out of sample DRPS:
-#> 11.465246
-
-plot(fc, series = 2)
-#> Out of sample DRPS:
-#> 25.378196
-
-plot(fc, series = 3)
-#> Out of sample DRPS:
-#> 17.418698
-=======
-#> ..$ series_1: int [1:1000, 1:25] 2 1 1 0 6 0 0 0 0 0 ...
-#> ..$ series_2: int [1:1000, 1:25] 1 2 1 0 0 2 0 0 0 1 ...
-#> ..$ series_3: int [1:1000, 1:25] 1 0 0 1 1 0 0 2 1 0 ...
+#> ..$ series_1: int [1:1000, 1:25] 0 0 0 0 0 0 1 1 1 0 ...
+#> ..$ series_2: int [1:1000, 1:25] 0 1 0 0 2 2 0 0 1 1 ...
+#> ..$ series_3: int [1:1000, 1:25] 3 0 0 0 3 1 0 0 0 0 ...
#> - attr(*, "class")= chr "mvgam_forecast"
plot(fc, series = 1)
#> Out of sample DRPS:
-#> 40.733034
+#> 10.868257
plot(fc, series = 2)
#> Out of sample DRPS:
-#> 20.222428
+#> 12.139955
plot(fc, series = 3)
#> Out of sample DRPS:
-#> 13.041625
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
+#> 32.02099
# Forecasts as expectations
diff --git a/docs/reference/get_mvgam_priors.html b/docs/reference/get_mvgam_priors.html
index d9b2555b..4d60cda9 100644
--- a/docs/reference/get_mvgam_priors.html
+++ b/docs/reference/get_mvgam_priors.html
@@ -1,7 +1,7 @@
Extract information on default prior distributions for an mvgam model — get_mvgam_priors • mvgam
@@ -54,7 +54,7 @@
- ![]()
Extract information on default prior distributions for an mvgam model
+ ![](../logo.png)
Extract information on default prior distributions for an mvgam model
Source: R/get_mvgam_priors.R
get_mvgam_priors.Rd
@@ -69,6 +69,7 @@ Usage
get_mvgam_priors(
formula,
trend_formula,
+ factor_formula,
data,
data_train,
family = "poisson",
@@ -88,11 +89,7 @@ Argumentss()
, te()
, ti()
, t2()
, as well as time-varying
-<<<<<<< HEAD
dynamic()
terms and nonparametric gp()
terms, can be added to the right hand side
-=======
-dynamic()
terms, can be added to the right hand side
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
to specify that the linear predictor depends on smooth functions of predictors
(or linear functionals of these). In nmix()
family models, the formula
is used to
set up a linear predictor for the detection probability. Details of the formula syntax used by mvgam
@@ -114,14 +111,14 @@ Argumentsjsdgam
+
+
data
A dataframe
or list
containing the model response variable and covariates
-<<<<<<< HEAD
required by the GAM formula
and optional trend_formula
. Most models should include columns:
series
(a factor
index of the series IDs; the number of levels should be identical
-=======
-required by the GAM formula
and optional trend_formula
. Most models should include columns:
-#'
series
(a factor
index of the series IDs; the number of levels should be identical
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
to the number of unique series labels (i.e. n_series = length(levels(data$series))
))
time
(numeric
or integer
index of the time point for each observation).
For most dynamic trend types available in mvgam
(see argument trend_model
), time should be
@@ -450,23 +447,13 @@
Examples#> 6 trend sd sigma ~ student_t(3, 0, 2.5);
#> 7 inverse of NB dispsersion phi_inv ~ student_t(3, 0, 0.1);
#> example_change new_lowerbound new_upperbound
-<<<<<<< HEAD
-#> 1 lambda ~ exponential(0.53); NA NA
-#> 2 mu_raw ~ normal(0.3, 0.11); NA NA
-#> 3 sigma_raw ~ exponential(0.34); NA NA
-#> 4 ar1 ~ normal(-0.87, 0.84); NA NA
-#> 5 ar2 ~ normal(0.56, 0.76); NA NA
-#> 6 sigma ~ exponential(0.88); NA NA
-#> 7 phi_inv ~ normal(0.81, 0.92); NA NA
-=======
-#> 1 lambda ~ exponential(0.27); NA NA
-#> 2 mu_raw ~ normal(0.58, 0.14); NA NA
-#> 3 sigma_raw ~ exponential(0.42); NA NA
-#> 4 ar1 ~ normal(-0.22, 0.6); NA NA
-#> 5 ar2 ~ normal(-0.92, 0.63); NA NA
-#> 6 sigma ~ exponential(0.32); NA NA
-#> 7 phi_inv ~ normal(0.36, 0.79); NA NA
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
+#> 1 lambda ~ exponential(0.65); NA NA
+#> 2 mu_raw ~ normal(0.92, 1); NA NA
+#> 3 sigma_raw ~ exponential(0.57); NA NA
+#> 4 ar1 ~ normal(-0.96, 0.9); NA NA
+#> 5 ar2 ~ normal(0.5, 0.48); NA NA
+#> 6 sigma ~ exponential(0.45); NA NA
+#> 7 phi_inv ~ normal(0.71, 0.14); NA NA
# Make a few changes; first, change the population mean for the series-level
# random intercepts
diff --git a/docs/reference/index.html b/docs/reference/index.html
index 76c8fa37..dc02b805 100644
--- a/docs/reference/index.html
+++ b/docs/reference/index.html
@@ -1,5 +1,5 @@
-Function reference • mvgam Function reference • mvgam
@@ -52,7 +52,7 @@
- ![]()
Function reference
+ ![](../logo.png)
Function reference
@@ -385,7 +385,7 @@ All functionsseries_to_mvgam()
- This function converts univariate or multivariate time series (
xts
or ts
objects)
-to the format necessary for mvgam
+to the format necessary for mvgam
-
sim_mvgam()
diff --git a/docs/reference/jsdgam-1.png b/docs/reference/jsdgam-1.png
new file mode 100644
index 00000000..6a3f8289
Binary files /dev/null and b/docs/reference/jsdgam-1.png differ
diff --git a/docs/reference/jsdgam-10.png b/docs/reference/jsdgam-10.png
new file mode 100644
index 00000000..ca1876f1
Binary files /dev/null and b/docs/reference/jsdgam-10.png differ
diff --git a/docs/reference/jsdgam-11.png b/docs/reference/jsdgam-11.png
new file mode 100644
index 00000000..16c1f439
Binary files /dev/null and b/docs/reference/jsdgam-11.png differ
diff --git a/docs/reference/jsdgam-2.png b/docs/reference/jsdgam-2.png
new file mode 100644
index 00000000..b75ffd16
Binary files /dev/null and b/docs/reference/jsdgam-2.png differ
diff --git a/docs/reference/jsdgam-3.png b/docs/reference/jsdgam-3.png
new file mode 100644
index 00000000..3e2fdbc7
Binary files /dev/null and b/docs/reference/jsdgam-3.png differ
diff --git a/docs/reference/jsdgam-4.png b/docs/reference/jsdgam-4.png
new file mode 100644
index 00000000..adf629f3
Binary files /dev/null and b/docs/reference/jsdgam-4.png differ
diff --git a/docs/reference/jsdgam-5.png b/docs/reference/jsdgam-5.png
new file mode 100644
index 00000000..a5df20a4
Binary files /dev/null and b/docs/reference/jsdgam-5.png differ
diff --git a/docs/reference/jsdgam-6.png b/docs/reference/jsdgam-6.png
new file mode 100644
index 00000000..f501a71f
Binary files /dev/null and b/docs/reference/jsdgam-6.png differ
diff --git a/docs/reference/jsdgam-7.png b/docs/reference/jsdgam-7.png
new file mode 100644
index 00000000..b3f126c3
Binary files /dev/null and b/docs/reference/jsdgam-7.png differ
diff --git a/docs/reference/jsdgam-8.png b/docs/reference/jsdgam-8.png
new file mode 100644
index 00000000..7ec77b86
Binary files /dev/null and b/docs/reference/jsdgam-8.png differ
diff --git a/docs/reference/jsdgam-9.png b/docs/reference/jsdgam-9.png
new file mode 100644
index 00000000..8cd7bce8
Binary files /dev/null and b/docs/reference/jsdgam-9.png differ
diff --git a/docs/reference/jsdgam.html b/docs/reference/jsdgam.html
index 83602f06..e2b1d787 100644
--- a/docs/reference/jsdgam.html
+++ b/docs/reference/jsdgam.html
@@ -9,7 +9,7 @@
specification is extremely flexible, allowing users to include spatial, temporal or any other type
of predictor effects to more efficiently capture unmodelled residual associations, while the
observation model can also be highly flexible (including all smooth, GP and other effects that
-mvgam can handle)">
@@ -62,7 +62,7 @@
- ![]()
Fit Joint Species Distribution Models in mvgam
+ ![](../logo.png)
Fit Joint Species Distribution Models in mvgam
Source: R/jsdgam.R
jsdgam.Rd
@@ -174,6 +174,31 @@ Argumentspoisson(). See mvgam_families
for more details
+- unit
+The unquoted name of the variable that represents the unit of analysis in data
over
+which latent residuals should be correlated. This variable should be either a
+numeric
or integer
variable in the supplied data
.
+Defaults to time
to be consistent with other functionalities
+in mvgam, though note that the data need not be time series in this case. See examples below
+for further details and explanations
+
+
+- subgr
+A subgrouping factor
variable specifying which element in data
represents the
+different observational units. Defaults to series
to be consistent with other functionalities
+in mvgam, though note that the data need not be time series in this case.
+But note that models that use the hierarchical correlations (by supplying a value for gr
)
+should not include a series
element in data
. Rather, this element will be created internally based
+on the supplied variables for gr
and subgr
. For example, if you are modelling
+counts for a group of species (labelled as species
in the data) across sampling sites
+(labelled as site
in the data) in three
+different geographical regions (labelled as region
), and you would like the residuals to be correlated
+within regions, then you should specify
+unit = site
, gr = region
, and subgr = species
. Internally, mvgam()
will appropriately order
+the data by unit
(in this case, by site
) and create
+the series
element for the data using something like: series = as.factor(paste0(group, '_', subgroup))
+
+
- share_obs_params
logical
. If TRUE
and the family
has additional family-specific observation parameters (e.g. variance components in
@@ -293,19 +318,19 @@
Argumentsmvgam()
+Other arguments to pass to mvgam
Value
-A list
object of classes jsdgam
and mvgam
containing model output,
+
A list
object of class mvgam
containing model output,
the text representation of the model file,
the mgcv model output (for easily generating simulations at
unsampled covariate values), Dunn-Smyth residuals for each series and key information needed
for other functions in the package. See mvgam-class
for details.
-Use methods(class = "mvgam")
and methods(class = "jsdgam")
for an overview on available methods
+Use methods(class = "mvgam")
for an overview on available methods
Details
@@ -313,11 +338,22 @@ Details
learned hierarchically, whereby responses to environmental variables in formula
can be partially
pooled and any latent, unmodelled residual associations can also be learned. In mvgam, both of
these effects can be modelled with the full power of latent factor Hierarchical GAMs, providing unmatched
-flexibility to model full communities of species
+flexibility to model full communities of species. When calling jsdgam, an initial State-Space model using
+trend = 'None'
is set up and then modified to include the latent factors and their linear predictors.
+Consequently, you can inspect priors for these models using get_mvgam_priors by supplying the relevant
+formula
, factor_formula
, data
and family
arguments and using trend = 'None'
+
+
+ References
+ Nicholas J Clark & Konstans Wells (2020). Dynamic generalised additive models (DGAMs) for forecasting discrete ecological time series.
+Methods in Ecology and Evolution. 14:3, 771-784.
+David I Warton, F Guillaume Blanchet, Robert B O’Hara, Otso Ovaskainen, Sara Taskinen, Steven C
+Walker & Francis KC Hui (2015). So many variables: joint modeling in community ecology.
+Trends in Ecology & Evolution 30:12, 766-779.
Author
@@ -326,7 +362,7 @@ Author<
Examples
- if (FALSE) {
+ # \donttest{
# Simulate latent count data for 500 spatial locations and 10 species
set.seed(0)
N_points <- 500
@@ -352,6 +388,7 @@ Examples# The design matrix for this smooth is in the 'X' slot
des_mat <- sm$X
dim(des_mat)
+#> [1] 500 25
# Function to generate a random covariance matrix where all variables
# have unit variance (i.e. diagonals are all 1)
@@ -416,17 +453,59 @@ Examplesggplot(dat, aes(x = count)) +
geom_histogram() +
facet_wrap(~ species, scales = 'free')
+#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
+
ggplot(dat, aes(x = lat, y = lon, col = log(count + 1))) +
geom_point(size = 2.25) +
facet_wrap(~ species, scales = 'free') +
viridis::scale_color_viridis() +
theme_classic()
+
+
+# Inspect default priors for the underlying model
+priors <- get_mvgam_priors(formula = count ~
+ # Environmental model includes species-level intercepts
+ # and random slopes for a linear effect of temperature
+ species +
+ s(species, bs = 're', by = temperature),
+
+ # Each factor estimates a different nonlinear spatial process, using
+ # 'by = trend' as in other mvgam State-Space models
+ factor_formula = ~ te(lat, lon, k = 5, by = trend) - 1,
+
+ # The data
+ data = dat,
+
+ # Poisson observations
+ family = poisson())
+head(priors)
+#> param_name param_length param_info
+#> 1 (Intercept) 1 (Intercept)
+#> 2 speciesspecies_2 1 speciesspecies_2 fixed effect
+#> 3 speciesspecies_3 1 speciesspecies_3 fixed effect
+#> 4 speciesspecies_4 1 speciesspecies_4 fixed effect
+#> 5 speciesspecies_5 1 speciesspecies_5 fixed effect
+#> 6 speciesspecies_6 1 speciesspecies_6 fixed effect
+#> prior example_change
+#> 1 (Intercept) ~ student_t(3, 2.1, 2.5); (Intercept) ~ normal(0, 1);
+#> 2 speciesspecies_2 ~ student_t(3, 0, 2); speciesspecies_2 ~ normal(0, 1);
+#> 3 speciesspecies_3 ~ student_t(3, 0, 2); speciesspecies_3 ~ normal(0, 1);
+#> 4 speciesspecies_4 ~ student_t(3, 0, 2); speciesspecies_4 ~ normal(0, 1);
+#> 5 speciesspecies_5 ~ student_t(3, 0, 2); speciesspecies_5 ~ normal(0, 1);
+#> 6 speciesspecies_6 ~ student_t(3, 0, 2); speciesspecies_6 ~ normal(0, 1);
+#> new_lowerbound new_upperbound
+#> 1 <NA> <NA>
+#> 2 <NA> <NA>
+#> 3 <NA> <NA>
+#> 4 <NA> <NA>
+#> 5 <NA> <NA>
+#> 6 <NA> <NA>
# Fit a JSDM that estimates a hierarchical temperature responses
-# and that uses three latent spatial factors
+# and that uses four latent spatial factors
mod <- jsdgam(formula = count ~
- # Environmental model includes species-level interecepts
+ # Environmental model includes species-level intercepts
# and random slopes for a linear effect of temperature
species +
s(species, bs = 're', by = temperature),
@@ -434,7 +513,10 @@ Examples # Each factor estimates a different nonlinear spatial process, using
# 'by = trend' as in other mvgam State-Space models
factor_formula = ~ te(lat, lon, k = 5, by = trend) - 1,
- n_lv = 3,
+ n_lv = 4,
+
+ # Change default priors for fixed effect betas to standard normal
+ priors = prior(std_normal(), class = b),
# The data and the grouping variables
data = dat,
@@ -442,28 +524,47 @@ Examples subgr = species,
# Poisson observations
- family = poisson())
+ family = poisson(),
+ chains = 2,
+ silent = 2)
+#> In file included from stan/lib/stan_math/stan/math/prim/prob/von_mises_lccdf.hpp:5,
+#> from stan/lib/stan_math/stan/math/prim/prob/von_mises_ccdf_log.hpp:4,
+#> from stan/lib/stan_math/stan/math/prim/prob.hpp:359,
+#> from stan/lib/stan_math/stan/math/prim.hpp:16,
+#> from stan/lib/stan_math/stan/math/rev.hpp:16,
+#> from stan/lib/stan_math/stan/math.hpp:19,
+#> from stan/src/stan/model/model_header.hpp:4,
+#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpIzSX2o/model-acb842615ba.hpp:2:
+#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)':
+#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers
+#> 194 | if (cdf_n < 0.0)
+#> |
+#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: adding '-flarge-source-files' will allow for more column-tracking support, at the expense of compilation time and memory
# Plot species-level intercept estimates
plot_predictions(mod, condition = 'species',
type = 'link')
+
# Plot species' hierarchical responses to temperature
plot_predictions(mod, condition = c('temperature', 'species', 'species'),
type = 'link')
+
+
+# Plot posterior median estimates of the latent spatial factors
+plot(mod, type = 'smooths', trend_effects = TRUE)
+
-# Plotting species' average geographical predictions against the observed
-# data gives some idea of whether more flexibility in the spatial process
-# may be needed
-plot_predictions(mod, condition = c('lat', 'species', 'species'),
- points = 0.5)
-plot_predictions(mod, condition = c('lon', 'species', 'species'),
- points = 0.5)
+# Or using gratia, if you have it installed
+if(requireNamespace('gratia', quietly = TRUE)){
+ gratia::draw(mod, trend_effects = TRUE)
+}
+
# Calculate (median) residual spatial correlations
med_loadings <- matrix(apply(as.matrix(mod$model_output, 'lv_coefs'),
2, median),
- nrow = N_species, ncol = 3)
+ nrow = N_species, ncol = 4)
cormat <- cov2cor(tcrossprod(med_loadings))
rownames(cormat) <- colnames(cormat) <- levels(dat$species)
@@ -472,12 +573,54 @@ Examples corrplot::corrplot(cormat, type = 'lower', tl.col = 'black',
tl.srt = 45)
}
+
# Posterior predictive checks and ELPD-LOO can ascertain model fit
pp_check(mod, type = "ecdf_overlay_grouped",
- group = "species", ndraws = 50)
+ group = "species", ndraws = 100)
+
loo(mod)
-}
+#> Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
+#>
+#> Computed from 1000 by 600 log-likelihood matrix
+#>
+#> Estimate SE
+#> elpd_loo -1942.0 41.6
+#> p_loo 351.5 23.9
+#> looic 3884.0 83.1
+#> ------
+#> Monte Carlo SE of elpd_loo is NA.
+#>
+#> Pareto k diagnostic values:
+#> Count Pct. Min. n_eff
+#> (-Inf, 0.5] (good) 292 48.7% 81
+#> (0.5, 0.7] (ok) 171 28.5% 32
+#> (0.7, 1] (bad) 109 18.2% 14
+#> (1, Inf) (very bad) 28 4.7% 2
+#> See help('pareto-k-diagnostic') for details.
+
+# Forecast log(counts) for entire region (site value doesn't matter as long
+# as each spatial location has a different and unique site identifier);
+# note this calculation takes a few minutes because of the need to calculate
+# draws from the stochastic latent factors
+newdata <- st_process %>%
+ dplyr::mutate(species = factor(species,
+ levels = paste0('species_',
+ 1:N_species))) %>%
+ dplyr::group_by(lat, lon) %>%
+ dplyr::mutate(site = dplyr::cur_group_id()) %>%
+ dplyr::ungroup()
+preds <- predict(mod, newdata = newdata)
+
+# Plot the median log(count) predictions on a grid
+newdata$log_count <- preds[,1]
+ggplot(newdata, aes(x = lat, y = lon, col = log_count)) +
+ geom_point(size = 1.5) +
+ facet_wrap(~ species, scales = 'free') +
+ viridis::scale_color_viridis() +
+ theme_classic()
+
+# }
Examplesstr(hc)
#> List of 15
#> $ call :Class 'formula' language y ~ s(season, bs = "cc", k = 6)
-<<<<<<< HEAD
-#> .. ..- attr(*, ".Environment")=<environment: 0x000001b48c2257f8>
-=======
-#> .. ..- attr(*, ".Environment")=<environment: 0x000001f1650b5778>
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
+#> .. ..- attr(*, ".Environment")=<environment: 0x0000022097b26560>
#> $ trend_call : NULL
#> $ family : chr "poisson"
#> $ trend_model :List of 7
@@ -240,65 +215,43 @@ Examples#> $ type : chr "response"
#> $ series_names : chr [1:3] "series_1" "series_2" "series_3"
#> $ train_observations:List of 3
-<<<<<<< HEAD
-#> ..$ series_1: int [1:75] 1 0 0 1 0 0 1 1 1 4 ...
-#> ..$ series_2: int [1:75] 1 1 2 0 1 0 2 4 0 6 ...
-#> ..$ series_3: int [1:75] 1 2 2 0 0 0 3 0 0 3 ...
-=======
-#> ..$ series_1: int [1:75] 0 1 2 4 0 1 13 6 1 3 ...
-#> ..$ series_2: int [1:75] 0 0 0 0 0 0 0 1 2 0 ...
-#> ..$ series_3: int [1:75] 0 0 0 0 0 1 2 2 1 0 ...
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
+#> ..$ series_1: int [1:75] 0 0 0 1 0 1 1 4 0 0 ...
+#> ..$ series_2: int [1:75] 0 0 0 0 1 0 4 0 0 0 ...
+#> ..$ series_3: int [1:75] 1 1 1 2 0 1 4 6 6 0 ...
#> $ train_times : int [1:75] 1 2 3 4 5 6 7 8 9 10 ...
#> $ test_observations : NULL
#> $ test_times : NULL
#> $ hindcasts :List of 3
-<<<<<<< HEAD
-#> ..$ series_1: num [1:1000, 1:75] 1 0 0 0 0 2 3 1 3 2 ...
+#> ..$ series_1: num [1:1000, 1:75] 0 0 0 1 0 1 0 0 4 0 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : NULL
#> .. .. ..$ : chr [1:75] "ypred[1,1]" "ypred[2,1]" "ypred[3,1]" "ypred[4,1]" ...
-#> ..$ series_2: num [1:1000, 1:75] 0 3 1 0 5 1 1 0 0 2 ...
+#> ..$ series_2: num [1:1000, 1:75] 0 0 0 0 1 1 1 1 1 1 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : NULL
#> .. .. ..$ : chr [1:75] "ypred[1,2]" "ypred[2,2]" "ypred[3,2]" "ypred[4,2]" ...
-#> ..$ series_3: num [1:1000, 1:75] 1 0 1 2 1 5 0 1 3 1 ...
-=======
-#> ..$ series_1: num [1:1000, 1:75] 1 0 0 0 0 0 0 0 1 0 ...
-#> .. ..- attr(*, "dimnames")=List of 2
-#> .. .. ..$ : NULL
-#> .. .. ..$ : chr [1:75] "ypred[1,1]" "ypred[2,1]" "ypred[3,1]" "ypred[4,1]" ...
-#> ..$ series_2: num [1:1000, 1:75] 1 2 0 1 0 0 0 0 1 0 ...
-#> .. ..- attr(*, "dimnames")=List of 2
-#> .. .. ..$ : NULL
-#> .. .. ..$ : chr [1:75] "ypred[1,2]" "ypred[2,2]" "ypred[3,2]" "ypred[4,2]" ...
-#> ..$ series_3: num [1:1000, 1:75] 0 2 1 0 1 2 1 1 1 0 ...
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
+#> ..$ series_3: num [1:1000, 1:75] 3 0 1 0 1 1 2 1 1 1 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : NULL
#> .. .. ..$ : chr [1:75] "ypred[1,3]" "ypred[2,3]" "ypred[3,3]" "ypred[4,3]" ...
#> $ forecasts : NULL
#> - attr(*, "class")= chr "mvgam_forecast"
plot(hc, series = 1)
-#> No non-missing values in test_observations; cannot calculate forecast score
-plot(hc, series = 2)
#> No non-missing values in test_observations; cannot calculate forecast score
+plot(hc, series = 2)
-plot(hc, series = 3)
#> No non-missing values in test_observations; cannot calculate forecast score
+plot(hc, series = 3)
+#> No non-missing values in test_observations; cannot calculate forecast score
# Forecasts on response scale
fc <- forecast(mod, newdata = simdat$data_test)
str(fc)
#> List of 16
#> $ call :Class 'formula' language y ~ s(season, bs = "cc", k = 6)
-<<<<<<< HEAD
-#> .. ..- attr(*, ".Environment")=<environment: 0x000001b48c2257f8>
-=======
-#> .. ..- attr(*, ".Environment")=<environment: 0x000001f1650b5778>
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
+#> .. ..- attr(*, ".Environment")=<environment: 0x0000022097b26560>
#> $ trend_call : NULL
#> $ family : chr "poisson"
#> $ family_pars : NULL
@@ -317,84 +270,44 @@ Examples#> $ type : chr "response"
#> $ series_names : Factor w/ 3 levels "series_1","series_2",..: 1 2 3
#> $ train_observations:List of 3
-<<<<<<< HEAD
-#> ..$ series_1: int [1:75] 1 0 0 1 0 0 1 1 1 4 ...
-#> ..$ series_2: int [1:75] 1 1 2 0 1 0 2 4 0 6 ...
-#> ..$ series_3: int [1:75] 1 2 2 0 0 0 3 0 0 3 ...
+#> ..$ series_1: int [1:75] 0 0 0 1 0 1 1 4 0 0 ...
+#> ..$ series_2: int [1:75] 0 0 0 0 1 0 4 0 0 0 ...
+#> ..$ series_3: int [1:75] 1 1 1 2 0 1 4 6 6 0 ...
#> $ train_times : int [1:75] 1 2 3 4 5 6 7 8 9 10 ...
#> $ test_observations :List of 3
-#> ..$ series_1: int [1:25] 0 0 0 0 0 1 1 4 2 3 ...
-#> ..$ series_2: int [1:25] 1 0 0 2 0 3 2 9 8 1 ...
-#> ..$ series_3: int [1:25] 0 0 1 1 0 0 0 4 1 1 ...
+#> ..$ series_1: int [1:25] 2 0 1 0 0 1 1 1 0 0 ...
+#> ..$ series_2: int [1:25] 4 0 0 1 2 1 2 1 2 0 ...
+#> ..$ series_3: int [1:25] 5 0 2 6 6 4 1 8 2 0 ...
#> $ test_times : int [1:25] 76 77 78 79 80 81 82 83 84 85 ...
#> $ hindcasts :List of 3
-#> ..$ series_1: num [1:1000, 1:75] 1 0 0 0 0 2 3 1 3 2 ...
+#> ..$ series_1: num [1:1000, 1:75] 0 0 0 1 0 1 0 0 4 0 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : NULL
#> .. .. ..$ : chr [1:75] "ypred[1,1]" "ypred[2,1]" "ypred[3,1]" "ypred[4,1]" ...
-#> ..$ series_2: num [1:1000, 1:75] 0 3 1 0 5 1 1 0 0 2 ...
+#> ..$ series_2: num [1:1000, 1:75] 0 0 0 0 1 1 1 1 1 1 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : NULL
#> .. .. ..$ : chr [1:75] "ypred[1,2]" "ypred[2,2]" "ypred[3,2]" "ypred[4,2]" ...
-#> ..$ series_3: num [1:1000, 1:75] 1 0 1 2 1 5 0 1 3 1 ...
-=======
-#> ..$ series_1: int [1:75] 0 1 2 4 0 1 13 6 1 3 ...
-#> ..$ series_2: int [1:75] 0 0 0 0 0 0 0 1 2 0 ...
-#> ..$ series_3: int [1:75] 0 0 0 0 0 1 2 2 1 0 ...
-#> $ train_times : int [1:75] 1 2 3 4 5 6 7 8 9 10 ...
-#> $ test_observations :List of 3
-#> ..$ series_1: int [1:25] 2 1 1 12 2 6 0 5 0 0 ...
-#> ..$ series_2: int [1:25] 0 0 0 10 0 0 1 1 0 0 ...
-#> ..$ series_3: int [1:25] 1 1 1 4 1 3 0 0 1 0 ...
-#> $ test_times : int [1:25] 76 77 78 79 80 81 82 83 84 85 ...
-#> $ hindcasts :List of 3
-#> ..$ series_1: num [1:1000, 1:75] 1 0 0 0 0 0 0 0 1 0 ...
-#> .. ..- attr(*, "dimnames")=List of 2
-#> .. .. ..$ : NULL
-#> .. .. ..$ : chr [1:75] "ypred[1,1]" "ypred[2,1]" "ypred[3,1]" "ypred[4,1]" ...
-#> ..$ series_2: num [1:1000, 1:75] 1 2 0 1 0 0 0 0 1 0 ...
-#> .. ..- attr(*, "dimnames")=List of 2
-#> .. .. ..$ : NULL
-#> .. .. ..$ : chr [1:75] "ypred[1,2]" "ypred[2,2]" "ypred[3,2]" "ypred[4,2]" ...
-#> ..$ series_3: num [1:1000, 1:75] 0 2 1 0 1 2 1 1 1 0 ...
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
+#> ..$ series_3: num [1:1000, 1:75] 3 0 1 0 1 1 2 1 1 1 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : NULL
#> .. .. ..$ : chr [1:75] "ypred[1,3]" "ypred[2,3]" "ypred[3,3]" "ypred[4,3]" ...
#> $ forecasts :List of 3
-<<<<<<< HEAD
-#> ..$ series_1: int [1:1000, 1:25] 0 2 0 0 1 0 0 1 0 2 ...
-#> ..$ series_2: int [1:1000, 1:25] 0 1 0 1 0 0 0 0 1 0 ...
-#> ..$ series_3: int [1:1000, 1:25] 0 1 0 1 0 0 1 1 0 0 ...
-#> - attr(*, "class")= chr "mvgam_forecast"
-plot(fc, series = 1)
-#> Out of sample DRPS:
-#> 11.465246
-
-plot(fc, series = 2)
-#> Out of sample DRPS:
-#> 25.378196
-
-plot(fc, series = 3)
-#> Out of sample DRPS:
-#> 17.418698
-=======
-#> ..$ series_1: int [1:1000, 1:25] 2 1 1 0 6 0 0 0 0 0 ...
-#> ..$ series_2: int [1:1000, 1:25] 1 2 1 0 0 2 0 0 0 1 ...
-#> ..$ series_3: int [1:1000, 1:25] 1 0 0 1 1 0 0 2 1 0 ...
+#> ..$ series_1: int [1:1000, 1:25] 0 0 0 0 0 0 1 1 1 0 ...
+#> ..$ series_2: int [1:1000, 1:25] 0 1 0 0 2 2 0 0 1 1 ...
+#> ..$ series_3: int [1:1000, 1:25] 3 0 0 0 3 1 0 0 0 0 ...
#> - attr(*, "class")= chr "mvgam_forecast"
plot(fc, series = 1)
#> Out of sample DRPS:
-#> 40.733034
+#> 10.868257
plot(fc, series = 2)
#> Out of sample DRPS:
-#> 20.222428
+#> 12.139955
plot(fc, series = 3)
#> Out of sample DRPS:
-#> 13.041625
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
+#> 32.02099
# Forecasts as expectations
diff --git a/docs/reference/get_mvgam_priors.html b/docs/reference/get_mvgam_priors.html
index d9b2555b..4d60cda9 100644
--- a/docs/reference/get_mvgam_priors.html
+++ b/docs/reference/get_mvgam_priors.html
@@ -1,7 +1,7 @@
Extract information on default prior distributions for an mvgam model — get_mvgam_priors • mvgam
@@ -54,7 +54,7 @@
- ![]()
Extract information on default prior distributions for an mvgam model
+ ![](../logo.png)
Extract information on default prior distributions for an mvgam model
Source: R/get_mvgam_priors.R
get_mvgam_priors.Rd
@@ -69,6 +69,7 @@ Usage
get_mvgam_priors(
formula,
trend_formula,
+ factor_formula,
data,
data_train,
family = "poisson",
@@ -88,11 +89,7 @@ Argumentss()
, te()
, ti()
, t2()
, as well as time-varying
-<<<<<<< HEAD
dynamic()
terms and nonparametric gp()
terms, can be added to the right hand side
-=======
-dynamic()
terms, can be added to the right hand side
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
to specify that the linear predictor depends on smooth functions of predictors
(or linear functionals of these). In nmix()
family models, the formula
is used to
set up a linear predictor for the detection probability. Details of the formula syntax used by mvgam
@@ -114,14 +111,14 @@ Argumentsjsdgam
+
+
data
A dataframe
or list
containing the model response variable and covariates
-<<<<<<< HEAD
required by the GAM formula
and optional trend_formula
. Most models should include columns:
series
(a factor
index of the series IDs; the number of levels should be identical
-=======
-required by the GAM formula
and optional trend_formula
. Most models should include columns:
-#'
series
(a factor
index of the series IDs; the number of levels should be identical
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
to the number of unique series labels (i.e. n_series = length(levels(data$series))
))
time
(numeric
or integer
index of the time point for each observation).
For most dynamic trend types available in mvgam
(see argument trend_model
), time should be
@@ -450,23 +447,13 @@
Examples#> 6 trend sd sigma ~ student_t(3, 0, 2.5);
#> 7 inverse of NB dispsersion phi_inv ~ student_t(3, 0, 0.1);
#> example_change new_lowerbound new_upperbound
-<<<<<<< HEAD
-#> 1 lambda ~ exponential(0.53); NA NA
-#> 2 mu_raw ~ normal(0.3, 0.11); NA NA
-#> 3 sigma_raw ~ exponential(0.34); NA NA
-#> 4 ar1 ~ normal(-0.87, 0.84); NA NA
-#> 5 ar2 ~ normal(0.56, 0.76); NA NA
-#> 6 sigma ~ exponential(0.88); NA NA
-#> 7 phi_inv ~ normal(0.81, 0.92); NA NA
-=======
-#> 1 lambda ~ exponential(0.27); NA NA
-#> 2 mu_raw ~ normal(0.58, 0.14); NA NA
-#> 3 sigma_raw ~ exponential(0.42); NA NA
-#> 4 ar1 ~ normal(-0.22, 0.6); NA NA
-#> 5 ar2 ~ normal(-0.92, 0.63); NA NA
-#> 6 sigma ~ exponential(0.32); NA NA
-#> 7 phi_inv ~ normal(0.36, 0.79); NA NA
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
+#> 1 lambda ~ exponential(0.65); NA NA
+#> 2 mu_raw ~ normal(0.92, 1); NA NA
+#> 3 sigma_raw ~ exponential(0.57); NA NA
+#> 4 ar1 ~ normal(-0.96, 0.9); NA NA
+#> 5 ar2 ~ normal(0.5, 0.48); NA NA
+#> 6 sigma ~ exponential(0.45); NA NA
+#> 7 phi_inv ~ normal(0.71, 0.14); NA NA
# Make a few changes; first, change the population mean for the series-level
# random intercepts
diff --git a/docs/reference/index.html b/docs/reference/index.html
index 76c8fa37..dc02b805 100644
--- a/docs/reference/index.html
+++ b/docs/reference/index.html
@@ -1,5 +1,5 @@
-Function reference • mvgam Function reference • mvgam
@@ -52,7 +52,7 @@
- ![]()
Function reference
+ ![](../logo.png)
Function reference
@@ -385,7 +385,7 @@ All functionsseries_to_mvgam()
- This function converts univariate or multivariate time series (
xts
or ts
objects)
-to the format necessary for mvgam
+to the format necessary for mvgam
-
sim_mvgam()
diff --git a/docs/reference/jsdgam-1.png b/docs/reference/jsdgam-1.png
new file mode 100644
index 00000000..6a3f8289
Binary files /dev/null and b/docs/reference/jsdgam-1.png differ
diff --git a/docs/reference/jsdgam-10.png b/docs/reference/jsdgam-10.png
new file mode 100644
index 00000000..ca1876f1
Binary files /dev/null and b/docs/reference/jsdgam-10.png differ
diff --git a/docs/reference/jsdgam-11.png b/docs/reference/jsdgam-11.png
new file mode 100644
index 00000000..16c1f439
Binary files /dev/null and b/docs/reference/jsdgam-11.png differ
diff --git a/docs/reference/jsdgam-2.png b/docs/reference/jsdgam-2.png
new file mode 100644
index 00000000..b75ffd16
Binary files /dev/null and b/docs/reference/jsdgam-2.png differ
diff --git a/docs/reference/jsdgam-3.png b/docs/reference/jsdgam-3.png
new file mode 100644
index 00000000..3e2fdbc7
Binary files /dev/null and b/docs/reference/jsdgam-3.png differ
diff --git a/docs/reference/jsdgam-4.png b/docs/reference/jsdgam-4.png
new file mode 100644
index 00000000..adf629f3
Binary files /dev/null and b/docs/reference/jsdgam-4.png differ
diff --git a/docs/reference/jsdgam-5.png b/docs/reference/jsdgam-5.png
new file mode 100644
index 00000000..a5df20a4
Binary files /dev/null and b/docs/reference/jsdgam-5.png differ
diff --git a/docs/reference/jsdgam-6.png b/docs/reference/jsdgam-6.png
new file mode 100644
index 00000000..f501a71f
Binary files /dev/null and b/docs/reference/jsdgam-6.png differ
diff --git a/docs/reference/jsdgam-7.png b/docs/reference/jsdgam-7.png
new file mode 100644
index 00000000..b3f126c3
Binary files /dev/null and b/docs/reference/jsdgam-7.png differ
diff --git a/docs/reference/jsdgam-8.png b/docs/reference/jsdgam-8.png
new file mode 100644
index 00000000..7ec77b86
Binary files /dev/null and b/docs/reference/jsdgam-8.png differ
diff --git a/docs/reference/jsdgam-9.png b/docs/reference/jsdgam-9.png
new file mode 100644
index 00000000..8cd7bce8
Binary files /dev/null and b/docs/reference/jsdgam-9.png differ
diff --git a/docs/reference/jsdgam.html b/docs/reference/jsdgam.html
index 83602f06..e2b1d787 100644
--- a/docs/reference/jsdgam.html
+++ b/docs/reference/jsdgam.html
@@ -9,7 +9,7 @@
specification is extremely flexible, allowing users to include spatial, temporal or any other type
of predictor effects to more efficiently capture unmodelled residual associations, while the
observation model can also be highly flexible (including all smooth, GP and other effects that
-mvgam can handle)">
@@ -62,7 +62,7 @@
- ![]()
Fit Joint Species Distribution Models in mvgam
+ ![](../logo.png)
Fit Joint Species Distribution Models in mvgam
Source: R/jsdgam.R
jsdgam.Rd
@@ -174,6 +174,31 @@ Argumentspoisson(). See mvgam_families
for more details
+- unit
+The unquoted name of the variable that represents the unit of analysis in data
over
+which latent residuals should be correlated. This variable should be either a
+numeric
or integer
variable in the supplied data
.
+Defaults to time
to be consistent with other functionalities
+in mvgam, though note that the data need not be time series in this case. See examples below
+for further details and explanations
+
+
+- subgr
+A subgrouping factor
variable specifying which element in data
represents the
+different observational units. Defaults to series
to be consistent with other functionalities
+in mvgam, though note that the data need not be time series in this case.
+But note that models that use the hierarchical correlations (by supplying a value for gr
)
+should not include a series
element in data
. Rather, this element will be created internally based
+on the supplied variables for gr
and subgr
. For example, if you are modelling
+counts for a group of species (labelled as species
in the data) across sampling sites
+(labelled as site
in the data) in three
+different geographical regions (labelled as region
), and you would like the residuals to be correlated
+within regions, then you should specify
+unit = site
, gr = region
, and subgr = species
. Internally, mvgam()
will appropriately order
+the data by unit
(in this case, by site
) and create
+the series
element for the data using something like: series = as.factor(paste0(group, '_', subgroup))
+
+
- share_obs_params
logical
. If TRUE
and the family
has additional family-specific observation parameters (e.g. variance components in
@@ -293,19 +318,19 @@
Argumentsmvgam()
+Other arguments to pass to mvgam
Value
-A list
object of classes jsdgam
and mvgam
containing model output,
+
A list
object of class mvgam
containing model output,
the text representation of the model file,
the mgcv model output (for easily generating simulations at
unsampled covariate values), Dunn-Smyth residuals for each series and key information needed
for other functions in the package. See mvgam-class
for details.
-Use methods(class = "mvgam")
and methods(class = "jsdgam")
for an overview on available methods
+Use methods(class = "mvgam")
for an overview on available methods
Details
@@ -313,11 +338,22 @@ Details
learned hierarchically, whereby responses to environmental variables in formula
can be partially
pooled and any latent, unmodelled residual associations can also be learned. In mvgam, both of
these effects can be modelled with the full power of latent factor Hierarchical GAMs, providing unmatched
-flexibility to model full communities of species
+flexibility to model full communities of species. When calling jsdgam, an initial State-Space model using
+trend = 'None'
is set up and then modified to include the latent factors and their linear predictors.
+Consequently, you can inspect priors for these models using get_mvgam_priors by supplying the relevant
+formula
, factor_formula
, data
and family
arguments and using trend = 'None'
+
+
+ References
+ Nicholas J Clark & Konstans Wells (2020). Dynamic generalised additive models (DGAMs) for forecasting discrete ecological time series.
+Methods in Ecology and Evolution. 14:3, 771-784.
+David I Warton, F Guillaume Blanchet, Robert B O’Hara, Otso Ovaskainen, Sara Taskinen, Steven C
+Walker & Francis KC Hui (2015). So many variables: joint modeling in community ecology.
+Trends in Ecology & Evolution 30:12, 766-779.
Author
@@ -326,7 +362,7 @@ Author<
Examples
- if (FALSE) {
+ # \donttest{
# Simulate latent count data for 500 spatial locations and 10 species
set.seed(0)
N_points <- 500
@@ -352,6 +388,7 @@ Examples# The design matrix for this smooth is in the 'X' slot
des_mat <- sm$X
dim(des_mat)
+#> [1] 500 25
# Function to generate a random covariance matrix where all variables
# have unit variance (i.e. diagonals are all 1)
@@ -416,17 +453,59 @@ Examplesggplot(dat, aes(x = count)) +
geom_histogram() +
facet_wrap(~ species, scales = 'free')
+#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
+
ggplot(dat, aes(x = lat, y = lon, col = log(count + 1))) +
geom_point(size = 2.25) +
facet_wrap(~ species, scales = 'free') +
viridis::scale_color_viridis() +
theme_classic()
+
+
+# Inspect default priors for the underlying model
+priors <- get_mvgam_priors(formula = count ~
+ # Environmental model includes species-level intercepts
+ # and random slopes for a linear effect of temperature
+ species +
+ s(species, bs = 're', by = temperature),
+
+ # Each factor estimates a different nonlinear spatial process, using
+ # 'by = trend' as in other mvgam State-Space models
+ factor_formula = ~ te(lat, lon, k = 5, by = trend) - 1,
+
+ # The data
+ data = dat,
+
+ # Poisson observations
+ family = poisson())
+head(priors)
+#> param_name param_length param_info
+#> 1 (Intercept) 1 (Intercept)
+#> 2 speciesspecies_2 1 speciesspecies_2 fixed effect
+#> 3 speciesspecies_3 1 speciesspecies_3 fixed effect
+#> 4 speciesspecies_4 1 speciesspecies_4 fixed effect
+#> 5 speciesspecies_5 1 speciesspecies_5 fixed effect
+#> 6 speciesspecies_6 1 speciesspecies_6 fixed effect
+#> prior example_change
+#> 1 (Intercept) ~ student_t(3, 2.1, 2.5); (Intercept) ~ normal(0, 1);
+#> 2 speciesspecies_2 ~ student_t(3, 0, 2); speciesspecies_2 ~ normal(0, 1);
+#> 3 speciesspecies_3 ~ student_t(3, 0, 2); speciesspecies_3 ~ normal(0, 1);
+#> 4 speciesspecies_4 ~ student_t(3, 0, 2); speciesspecies_4 ~ normal(0, 1);
+#> 5 speciesspecies_5 ~ student_t(3, 0, 2); speciesspecies_5 ~ normal(0, 1);
+#> 6 speciesspecies_6 ~ student_t(3, 0, 2); speciesspecies_6 ~ normal(0, 1);
+#> new_lowerbound new_upperbound
+#> 1 <NA> <NA>
+#> 2 <NA> <NA>
+#> 3 <NA> <NA>
+#> 4 <NA> <NA>
+#> 5 <NA> <NA>
+#> 6 <NA> <NA>
# Fit a JSDM that estimates a hierarchical temperature responses
-# and that uses three latent spatial factors
+# and that uses four latent spatial factors
mod <- jsdgam(formula = count ~
- # Environmental model includes species-level interecepts
+ # Environmental model includes species-level intercepts
# and random slopes for a linear effect of temperature
species +
s(species, bs = 're', by = temperature),
@@ -434,7 +513,10 @@ Examples # Each factor estimates a different nonlinear spatial process, using
# 'by = trend' as in other mvgam State-Space models
factor_formula = ~ te(lat, lon, k = 5, by = trend) - 1,
- n_lv = 3,
+ n_lv = 4,
+
+ # Change default priors for fixed effect betas to standard normal
+ priors = prior(std_normal(), class = b),
# The data and the grouping variables
data = dat,
@@ -442,28 +524,47 @@ Examples subgr = species,
# Poisson observations
- family = poisson())
+ family = poisson(),
+ chains = 2,
+ silent = 2)
+#> In file included from stan/lib/stan_math/stan/math/prim/prob/von_mises_lccdf.hpp:5,
+#> from stan/lib/stan_math/stan/math/prim/prob/von_mises_ccdf_log.hpp:4,
+#> from stan/lib/stan_math/stan/math/prim/prob.hpp:359,
+#> from stan/lib/stan_math/stan/math/prim.hpp:16,
+#> from stan/lib/stan_math/stan/math/rev.hpp:16,
+#> from stan/lib/stan_math/stan/math.hpp:19,
+#> from stan/src/stan/model/model_header.hpp:4,
+#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpIzSX2o/model-acb842615ba.hpp:2:
+#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)':
+#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers
+#> 194 | if (cdf_n < 0.0)
+#> |
+#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: adding '-flarge-source-files' will allow for more column-tracking support, at the expense of compilation time and memory
# Plot species-level intercept estimates
plot_predictions(mod, condition = 'species',
type = 'link')
+
# Plot species' hierarchical responses to temperature
plot_predictions(mod, condition = c('temperature', 'species', 'species'),
type = 'link')
+
+
+# Plot posterior median estimates of the latent spatial factors
+plot(mod, type = 'smooths', trend_effects = TRUE)
+
-# Plotting species' average geographical predictions against the observed
-# data gives some idea of whether more flexibility in the spatial process
-# may be needed
-plot_predictions(mod, condition = c('lat', 'species', 'species'),
- points = 0.5)
-plot_predictions(mod, condition = c('lon', 'species', 'species'),
- points = 0.5)
+# Or using gratia, if you have it installed
+if(requireNamespace('gratia', quietly = TRUE)){
+ gratia::draw(mod, trend_effects = TRUE)
+}
+
# Calculate (median) residual spatial correlations
med_loadings <- matrix(apply(as.matrix(mod$model_output, 'lv_coefs'),
2, median),
- nrow = N_species, ncol = 3)
+ nrow = N_species, ncol = 4)
cormat <- cov2cor(tcrossprod(med_loadings))
rownames(cormat) <- colnames(cormat) <- levels(dat$species)
@@ -472,12 +573,54 @@ Examples corrplot::corrplot(cormat, type = 'lower', tl.col = 'black',
tl.srt = 45)
}
+
# Posterior predictive checks and ELPD-LOO can ascertain model fit
pp_check(mod, type = "ecdf_overlay_grouped",
- group = "species", ndraws = 50)
+ group = "species", ndraws = 100)
+
loo(mod)
-}
+#> Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
+#>
+#> Computed from 1000 by 600 log-likelihood matrix
+#>
+#> Estimate SE
+#> elpd_loo -1942.0 41.6
+#> p_loo 351.5 23.9
+#> looic 3884.0 83.1
+#> ------
+#> Monte Carlo SE of elpd_loo is NA.
+#>
+#> Pareto k diagnostic values:
+#> Count Pct. Min. n_eff
+#> (-Inf, 0.5] (good) 292 48.7% 81
+#> (0.5, 0.7] (ok) 171 28.5% 32
+#> (0.7, 1] (bad) 109 18.2% 14
+#> (1, Inf) (very bad) 28 4.7% 2
+#> See help('pareto-k-diagnostic') for details.
+
+# Forecast log(counts) for entire region (site value doesn't matter as long
+# as each spatial location has a different and unique site identifier);
+# note this calculation takes a few minutes because of the need to calculate
+# draws from the stochastic latent factors
+newdata <- st_process %>%
+ dplyr::mutate(species = factor(species,
+ levels = paste0('species_',
+ 1:N_species))) %>%
+ dplyr::group_by(lat, lon) %>%
+ dplyr::mutate(site = dplyr::cur_group_id()) %>%
+ dplyr::ungroup()
+preds <- predict(mod, newdata = newdata)
+
+# Plot the median log(count) predictions on a grid
+newdata$log_count <- preds[,1]
+ggplot(newdata, aes(x = lat, y = lon, col = log_count)) +
+ geom_point(size = 1.5) +
+ facet_wrap(~ species, scales = 'free') +
+ viridis::scale_color_viridis() +
+ theme_classic()
+
+# }
![](forecast.mvgam-1.png)
![](forecast.mvgam-2.png)
![](forecast.mvgam-3.png)
Examples#> $ type : chr "response"
#> $ series_names : Factor w/ 3 levels "series_1","series_2",..: 1 2 3
#> $ train_observations:List of 3
-<<<<<<< HEAD
-#> ..$ series_1: int [1:75] 1 0 0 1 0 0 1 1 1 4 ...
-#> ..$ series_2: int [1:75] 1 1 2 0 1 0 2 4 0 6 ...
-#> ..$ series_3: int [1:75] 1 2 2 0 0 0 3 0 0 3 ...
+#> ..$ series_1: int [1:75] 0 0 0 1 0 1 1 4 0 0 ...
+#> ..$ series_2: int [1:75] 0 0 0 0 1 0 4 0 0 0 ...
+#> ..$ series_3: int [1:75] 1 1 1 2 0 1 4 6 6 0 ...
#> $ train_times : int [1:75] 1 2 3 4 5 6 7 8 9 10 ...
#> $ test_observations :List of 3
-#> ..$ series_1: int [1:25] 0 0 0 0 0 1 1 4 2 3 ...
-#> ..$ series_2: int [1:25] 1 0 0 2 0 3 2 9 8 1 ...
-#> ..$ series_3: int [1:25] 0 0 1 1 0 0 0 4 1 1 ...
+#> ..$ series_1: int [1:25] 2 0 1 0 0 1 1 1 0 0 ...
+#> ..$ series_2: int [1:25] 4 0 0 1 2 1 2 1 2 0 ...
+#> ..$ series_3: int [1:25] 5 0 2 6 6 4 1 8 2 0 ...
#> $ test_times : int [1:25] 76 77 78 79 80 81 82 83 84 85 ...
#> $ hindcasts :List of 3
-#> ..$ series_1: num [1:1000, 1:75] 1 0 0 0 0 2 3 1 3 2 ...
+#> ..$ series_1: num [1:1000, 1:75] 0 0 0 1 0 1 0 0 4 0 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : NULL
#> .. .. ..$ : chr [1:75] "ypred[1,1]" "ypred[2,1]" "ypred[3,1]" "ypred[4,1]" ...
-#> ..$ series_2: num [1:1000, 1:75] 0 3 1 0 5 1 1 0 0 2 ...
+#> ..$ series_2: num [1:1000, 1:75] 0 0 0 0 1 1 1 1 1 1 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : NULL
#> .. .. ..$ : chr [1:75] "ypred[1,2]" "ypred[2,2]" "ypred[3,2]" "ypred[4,2]" ...
-#> ..$ series_3: num [1:1000, 1:75] 1 0 1 2 1 5 0 1 3 1 ...
-=======
-#> ..$ series_1: int [1:75] 0 1 2 4 0 1 13 6 1 3 ...
-#> ..$ series_2: int [1:75] 0 0 0 0 0 0 0 1 2 0 ...
-#> ..$ series_3: int [1:75] 0 0 0 0 0 1 2 2 1 0 ...
-#> $ train_times : int [1:75] 1 2 3 4 5 6 7 8 9 10 ...
-#> $ test_observations :List of 3
-#> ..$ series_1: int [1:25] 2 1 1 12 2 6 0 5 0 0 ...
-#> ..$ series_2: int [1:25] 0 0 0 10 0 0 1 1 0 0 ...
-#> ..$ series_3: int [1:25] 1 1 1 4 1 3 0 0 1 0 ...
-#> $ test_times : int [1:25] 76 77 78 79 80 81 82 83 84 85 ...
-#> $ hindcasts :List of 3
-#> ..$ series_1: num [1:1000, 1:75] 1 0 0 0 0 0 0 0 1 0 ...
-#> .. ..- attr(*, "dimnames")=List of 2
-#> .. .. ..$ : NULL
-#> .. .. ..$ : chr [1:75] "ypred[1,1]" "ypred[2,1]" "ypred[3,1]" "ypred[4,1]" ...
-#> ..$ series_2: num [1:1000, 1:75] 1 2 0 1 0 0 0 0 1 0 ...
-#> .. ..- attr(*, "dimnames")=List of 2
-#> .. .. ..$ : NULL
-#> .. .. ..$ : chr [1:75] "ypred[1,2]" "ypred[2,2]" "ypred[3,2]" "ypred[4,2]" ...
-#> ..$ series_3: num [1:1000, 1:75] 0 2 1 0 1 2 1 1 1 0 ...
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
+#> ..$ series_3: num [1:1000, 1:75] 3 0 1 0 1 1 2 1 1 1 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : NULL
#> .. .. ..$ : chr [1:75] "ypred[1,3]" "ypred[2,3]" "ypred[3,3]" "ypred[4,3]" ...
#> $ forecasts :List of 3
-<<<<<<< HEAD
-#> ..$ series_1: int [1:1000, 1:25] 0 2 0 0 1 0 0 1 0 2 ...
-#> ..$ series_2: int [1:1000, 1:25] 0 1 0 1 0 0 0 0 1 0 ...
-#> ..$ series_3: int [1:1000, 1:25] 0 1 0 1 0 0 1 1 0 0 ...
-#> - attr(*, "class")= chr "mvgam_forecast"
-plot(fc, series = 1)
-#> Out of sample DRPS:
-#> 11.465246
-
-plot(fc, series = 2)
-#> Out of sample DRPS:
-#> 25.378196
-
-plot(fc, series = 3)
-#> Out of sample DRPS:
-#> 17.418698
-=======
-#> ..$ series_1: int [1:1000, 1:25] 2 1 1 0 6 0 0 0 0 0 ...
-#> ..$ series_2: int [1:1000, 1:25] 1 2 1 0 0 2 0 0 0 1 ...
-#> ..$ series_3: int [1:1000, 1:25] 1 0 0 1 1 0 0 2 1 0 ...
+#> ..$ series_1: int [1:1000, 1:25] 0 0 0 0 0 0 1 1 1 0 ...
+#> ..$ series_2: int [1:1000, 1:25] 0 1 0 0 2 2 0 0 1 1 ...
+#> ..$ series_3: int [1:1000, 1:25] 3 0 0 0 3 1 0 0 0 0 ...
#> - attr(*, "class")= chr "mvgam_forecast"
plot(fc, series = 1)
#> Out of sample DRPS:
-#> 40.733034
+#> 10.868257
plot(fc, series = 2)
#> Out of sample DRPS:
-#> 20.222428
+#> 12.139955
plot(fc, series = 3)
#> Out of sample DRPS:
-#> 13.041625
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
+#> 32.02099
# Forecasts as expectations
diff --git a/docs/reference/get_mvgam_priors.html b/docs/reference/get_mvgam_priors.html
index d9b2555b..4d60cda9 100644
--- a/docs/reference/get_mvgam_priors.html
+++ b/docs/reference/get_mvgam_priors.html
@@ -1,7 +1,7 @@
Extract information on default prior distributions for an mvgam model — get_mvgam_priors • mvgam
@@ -54,7 +54,7 @@
- ![]()
Extract information on default prior distributions for an mvgam model
+ ![](../logo.png)
Extract information on default prior distributions for an mvgam model
Source: R/get_mvgam_priors.R
get_mvgam_priors.Rd
@@ -69,6 +69,7 @@ Usage
get_mvgam_priors(
formula,
trend_formula,
+ factor_formula,
data,
data_train,
family = "poisson",
@@ -88,11 +89,7 @@ Argumentss()
, te()
, ti()
, t2()
, as well as time-varying
-<<<<<<< HEAD
dynamic()
terms and nonparametric gp()
terms, can be added to the right hand side
-=======
-dynamic()
terms, can be added to the right hand side
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
to specify that the linear predictor depends on smooth functions of predictors
(or linear functionals of these). In nmix()
family models, the formula
is used to
set up a linear predictor for the detection probability. Details of the formula syntax used by mvgam
@@ -114,14 +111,14 @@ Argumentsjsdgam
+
+
data
A dataframe
or list
containing the model response variable and covariates
-<<<<<<< HEAD
required by the GAM formula
and optional trend_formula
. Most models should include columns:
series
(a factor
index of the series IDs; the number of levels should be identical
-=======
-required by the GAM formula
and optional trend_formula
. Most models should include columns:
-#'
series
(a factor
index of the series IDs; the number of levels should be identical
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
to the number of unique series labels (i.e. n_series = length(levels(data$series))
))
time
(numeric
or integer
index of the time point for each observation).
For most dynamic trend types available in mvgam
(see argument trend_model
), time should be
@@ -450,23 +447,13 @@
Examples#> 6 trend sd sigma ~ student_t(3, 0, 2.5);
#> 7 inverse of NB dispsersion phi_inv ~ student_t(3, 0, 0.1);
#> example_change new_lowerbound new_upperbound
-<<<<<<< HEAD
-#> 1 lambda ~ exponential(0.53); NA NA
-#> 2 mu_raw ~ normal(0.3, 0.11); NA NA
-#> 3 sigma_raw ~ exponential(0.34); NA NA
-#> 4 ar1 ~ normal(-0.87, 0.84); NA NA
-#> 5 ar2 ~ normal(0.56, 0.76); NA NA
-#> 6 sigma ~ exponential(0.88); NA NA
-#> 7 phi_inv ~ normal(0.81, 0.92); NA NA
-=======
-#> 1 lambda ~ exponential(0.27); NA NA
-#> 2 mu_raw ~ normal(0.58, 0.14); NA NA
-#> 3 sigma_raw ~ exponential(0.42); NA NA
-#> 4 ar1 ~ normal(-0.22, 0.6); NA NA
-#> 5 ar2 ~ normal(-0.92, 0.63); NA NA
-#> 6 sigma ~ exponential(0.32); NA NA
-#> 7 phi_inv ~ normal(0.36, 0.79); NA NA
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
+#> 1 lambda ~ exponential(0.65); NA NA
+#> 2 mu_raw ~ normal(0.92, 1); NA NA
+#> 3 sigma_raw ~ exponential(0.57); NA NA
+#> 4 ar1 ~ normal(-0.96, 0.9); NA NA
+#> 5 ar2 ~ normal(0.5, 0.48); NA NA
+#> 6 sigma ~ exponential(0.45); NA NA
+#> 7 phi_inv ~ normal(0.71, 0.14); NA NA
# Make a few changes; first, change the population mean for the series-level
# random intercepts
diff --git a/docs/reference/index.html b/docs/reference/index.html
index 76c8fa37..dc02b805 100644
--- a/docs/reference/index.html
+++ b/docs/reference/index.html
@@ -1,5 +1,5 @@
-Function reference • mvgam Function reference • mvgam
@@ -52,7 +52,7 @@
- ![]()
Function reference
+ ![](../logo.png)
Function reference
@@ -385,7 +385,7 @@ All functionsseries_to_mvgam()
- This function converts univariate or multivariate time series (
xts
or ts
objects)
-to the format necessary for mvgam
+to the format necessary for mvgam
-
sim_mvgam()
diff --git a/docs/reference/jsdgam-1.png b/docs/reference/jsdgam-1.png
new file mode 100644
index 00000000..6a3f8289
Binary files /dev/null and b/docs/reference/jsdgam-1.png differ
diff --git a/docs/reference/jsdgam-10.png b/docs/reference/jsdgam-10.png
new file mode 100644
index 00000000..ca1876f1
Binary files /dev/null and b/docs/reference/jsdgam-10.png differ
diff --git a/docs/reference/jsdgam-11.png b/docs/reference/jsdgam-11.png
new file mode 100644
index 00000000..16c1f439
Binary files /dev/null and b/docs/reference/jsdgam-11.png differ
diff --git a/docs/reference/jsdgam-2.png b/docs/reference/jsdgam-2.png
new file mode 100644
index 00000000..b75ffd16
Binary files /dev/null and b/docs/reference/jsdgam-2.png differ
diff --git a/docs/reference/jsdgam-3.png b/docs/reference/jsdgam-3.png
new file mode 100644
index 00000000..3e2fdbc7
Binary files /dev/null and b/docs/reference/jsdgam-3.png differ
diff --git a/docs/reference/jsdgam-4.png b/docs/reference/jsdgam-4.png
new file mode 100644
index 00000000..adf629f3
Binary files /dev/null and b/docs/reference/jsdgam-4.png differ
diff --git a/docs/reference/jsdgam-5.png b/docs/reference/jsdgam-5.png
new file mode 100644
index 00000000..a5df20a4
Binary files /dev/null and b/docs/reference/jsdgam-5.png differ
diff --git a/docs/reference/jsdgam-6.png b/docs/reference/jsdgam-6.png
new file mode 100644
index 00000000..f501a71f
Binary files /dev/null and b/docs/reference/jsdgam-6.png differ
diff --git a/docs/reference/jsdgam-7.png b/docs/reference/jsdgam-7.png
new file mode 100644
index 00000000..b3f126c3
Binary files /dev/null and b/docs/reference/jsdgam-7.png differ
diff --git a/docs/reference/jsdgam-8.png b/docs/reference/jsdgam-8.png
new file mode 100644
index 00000000..7ec77b86
Binary files /dev/null and b/docs/reference/jsdgam-8.png differ
diff --git a/docs/reference/jsdgam-9.png b/docs/reference/jsdgam-9.png
new file mode 100644
index 00000000..8cd7bce8
Binary files /dev/null and b/docs/reference/jsdgam-9.png differ
diff --git a/docs/reference/jsdgam.html b/docs/reference/jsdgam.html
index 83602f06..e2b1d787 100644
--- a/docs/reference/jsdgam.html
+++ b/docs/reference/jsdgam.html
@@ -9,7 +9,7 @@
specification is extremely flexible, allowing users to include spatial, temporal or any other type
of predictor effects to more efficiently capture unmodelled residual associations, while the
observation model can also be highly flexible (including all smooth, GP and other effects that
-mvgam can handle)">
@@ -62,7 +62,7 @@
- ![]()
Fit Joint Species Distribution Models in mvgam
+ ![](../logo.png)
Fit Joint Species Distribution Models in mvgam
Source: R/jsdgam.R
jsdgam.Rd
@@ -174,6 +174,31 @@ Argumentspoisson(). See mvgam_families
for more details
+- unit
+The unquoted name of the variable that represents the unit of analysis in data
over
+which latent residuals should be correlated. This variable should be either a
+numeric
or integer
variable in the supplied data
.
+Defaults to time
to be consistent with other functionalities
+in mvgam, though note that the data need not be time series in this case. See examples below
+for further details and explanations
+
+
+- subgr
+A subgrouping factor
variable specifying which element in data
represents the
+different observational units. Defaults to series
to be consistent with other functionalities
+in mvgam, though note that the data need not be time series in this case.
+But note that models that use the hierarchical correlations (by supplying a value for gr
)
+should not include a series
element in data
. Rather, this element will be created internally based
+on the supplied variables for gr
and subgr
. For example, if you are modelling
+counts for a group of species (labelled as species
in the data) across sampling sites
+(labelled as site
in the data) in three
+different geographical regions (labelled as region
), and you would like the residuals to be correlated
+within regions, then you should specify
+unit = site
, gr = region
, and subgr = species
. Internally, mvgam()
will appropriately order
+the data by unit
(in this case, by site
) and create
+the series
element for the data using something like: series = as.factor(paste0(group, '_', subgroup))
+
+
- share_obs_params
logical
. If TRUE
and the family
has additional family-specific observation parameters (e.g. variance components in
@@ -293,19 +318,19 @@
Argumentsmvgam()
+Other arguments to pass to mvgam
Value
-A list
object of classes jsdgam
and mvgam
containing model output,
+
A list
object of class mvgam
containing model output,
the text representation of the model file,
the mgcv model output (for easily generating simulations at
unsampled covariate values), Dunn-Smyth residuals for each series and key information needed
for other functions in the package. See mvgam-class
for details.
-Use methods(class = "mvgam")
and methods(class = "jsdgam")
for an overview on available methods
+Use methods(class = "mvgam")
for an overview on available methods
Details
@@ -313,11 +338,22 @@ Details
learned hierarchically, whereby responses to environmental variables in formula
can be partially
pooled and any latent, unmodelled residual associations can also be learned. In mvgam, both of
these effects can be modelled with the full power of latent factor Hierarchical GAMs, providing unmatched
-flexibility to model full communities of species
+flexibility to model full communities of species. When calling jsdgam, an initial State-Space model using
+trend = 'None'
is set up and then modified to include the latent factors and their linear predictors.
+Consequently, you can inspect priors for these models using get_mvgam_priors by supplying the relevant
+formula
, factor_formula
, data
and family
arguments and using trend = 'None'
+
+
+ References
+ Nicholas J Clark & Konstans Wells (2020). Dynamic generalised additive models (DGAMs) for forecasting discrete ecological time series.
+Methods in Ecology and Evolution. 14:3, 771-784.
+David I Warton, F Guillaume Blanchet, Robert B O’Hara, Otso Ovaskainen, Sara Taskinen, Steven C
+Walker & Francis KC Hui (2015). So many variables: joint modeling in community ecology.
+Trends in Ecology & Evolution 30:12, 766-779.
Author
@@ -326,7 +362,7 @@ Author<
Examples
- if (FALSE) {
+ # \donttest{
# Simulate latent count data for 500 spatial locations and 10 species
set.seed(0)
N_points <- 500
@@ -352,6 +388,7 @@ Examples# The design matrix for this smooth is in the 'X' slot
des_mat <- sm$X
dim(des_mat)
+#> [1] 500 25
# Function to generate a random covariance matrix where all variables
# have unit variance (i.e. diagonals are all 1)
@@ -416,17 +453,59 @@ Examplesggplot(dat, aes(x = count)) +
geom_histogram() +
facet_wrap(~ species, scales = 'free')
+#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
+
ggplot(dat, aes(x = lat, y = lon, col = log(count + 1))) +
geom_point(size = 2.25) +
facet_wrap(~ species, scales = 'free') +
viridis::scale_color_viridis() +
theme_classic()
+
+
+# Inspect default priors for the underlying model
+priors <- get_mvgam_priors(formula = count ~
+ # Environmental model includes species-level intercepts
+ # and random slopes for a linear effect of temperature
+ species +
+ s(species, bs = 're', by = temperature),
+
+ # Each factor estimates a different nonlinear spatial process, using
+ # 'by = trend' as in other mvgam State-Space models
+ factor_formula = ~ te(lat, lon, k = 5, by = trend) - 1,
+
+ # The data
+ data = dat,
+
+ # Poisson observations
+ family = poisson())
+head(priors)
+#> param_name param_length param_info
+#> 1 (Intercept) 1 (Intercept)
+#> 2 speciesspecies_2 1 speciesspecies_2 fixed effect
+#> 3 speciesspecies_3 1 speciesspecies_3 fixed effect
+#> 4 speciesspecies_4 1 speciesspecies_4 fixed effect
+#> 5 speciesspecies_5 1 speciesspecies_5 fixed effect
+#> 6 speciesspecies_6 1 speciesspecies_6 fixed effect
+#> prior example_change
+#> 1 (Intercept) ~ student_t(3, 2.1, 2.5); (Intercept) ~ normal(0, 1);
+#> 2 speciesspecies_2 ~ student_t(3, 0, 2); speciesspecies_2 ~ normal(0, 1);
+#> 3 speciesspecies_3 ~ student_t(3, 0, 2); speciesspecies_3 ~ normal(0, 1);
+#> 4 speciesspecies_4 ~ student_t(3, 0, 2); speciesspecies_4 ~ normal(0, 1);
+#> 5 speciesspecies_5 ~ student_t(3, 0, 2); speciesspecies_5 ~ normal(0, 1);
+#> 6 speciesspecies_6 ~ student_t(3, 0, 2); speciesspecies_6 ~ normal(0, 1);
+#> new_lowerbound new_upperbound
+#> 1 <NA> <NA>
+#> 2 <NA> <NA>
+#> 3 <NA> <NA>
+#> 4 <NA> <NA>
+#> 5 <NA> <NA>
+#> 6 <NA> <NA>
# Fit a JSDM that estimates a hierarchical temperature responses
-# and that uses three latent spatial factors
+# and that uses four latent spatial factors
mod <- jsdgam(formula = count ~
- # Environmental model includes species-level interecepts
+ # Environmental model includes species-level intercepts
# and random slopes for a linear effect of temperature
species +
s(species, bs = 're', by = temperature),
@@ -434,7 +513,10 @@ Examples # Each factor estimates a different nonlinear spatial process, using
# 'by = trend' as in other mvgam State-Space models
factor_formula = ~ te(lat, lon, k = 5, by = trend) - 1,
- n_lv = 3,
+ n_lv = 4,
+
+ # Change default priors for fixed effect betas to standard normal
+ priors = prior(std_normal(), class = b),
# The data and the grouping variables
data = dat,
@@ -442,28 +524,47 @@ Examples subgr = species,
# Poisson observations
- family = poisson())
+ family = poisson(),
+ chains = 2,
+ silent = 2)
+#> In file included from stan/lib/stan_math/stan/math/prim/prob/von_mises_lccdf.hpp:5,
+#> from stan/lib/stan_math/stan/math/prim/prob/von_mises_ccdf_log.hpp:4,
+#> from stan/lib/stan_math/stan/math/prim/prob.hpp:359,
+#> from stan/lib/stan_math/stan/math/prim.hpp:16,
+#> from stan/lib/stan_math/stan/math/rev.hpp:16,
+#> from stan/lib/stan_math/stan/math.hpp:19,
+#> from stan/src/stan/model/model_header.hpp:4,
+#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpIzSX2o/model-acb842615ba.hpp:2:
+#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)':
+#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers
+#> 194 | if (cdf_n < 0.0)
+#> |
+#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: adding '-flarge-source-files' will allow for more column-tracking support, at the expense of compilation time and memory
# Plot species-level intercept estimates
plot_predictions(mod, condition = 'species',
type = 'link')
+
# Plot species' hierarchical responses to temperature
plot_predictions(mod, condition = c('temperature', 'species', 'species'),
type = 'link')
+
+
+# Plot posterior median estimates of the latent spatial factors
+plot(mod, type = 'smooths', trend_effects = TRUE)
+
-# Plotting species' average geographical predictions against the observed
-# data gives some idea of whether more flexibility in the spatial process
-# may be needed
-plot_predictions(mod, condition = c('lat', 'species', 'species'),
- points = 0.5)
-plot_predictions(mod, condition = c('lon', 'species', 'species'),
- points = 0.5)
+# Or using gratia, if you have it installed
+if(requireNamespace('gratia', quietly = TRUE)){
+ gratia::draw(mod, trend_effects = TRUE)
+}
+
# Calculate (median) residual spatial correlations
med_loadings <- matrix(apply(as.matrix(mod$model_output, 'lv_coefs'),
2, median),
- nrow = N_species, ncol = 3)
+ nrow = N_species, ncol = 4)
cormat <- cov2cor(tcrossprod(med_loadings))
rownames(cormat) <- colnames(cormat) <- levels(dat$species)
@@ -472,12 +573,54 @@ Examples corrplot::corrplot(cormat, type = 'lower', tl.col = 'black',
tl.srt = 45)
}
+
# Posterior predictive checks and ELPD-LOO can ascertain model fit
pp_check(mod, type = "ecdf_overlay_grouped",
- group = "species", ndraws = 50)
+ group = "species", ndraws = 100)
+
loo(mod)
-}
+#> Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
+#>
+#> Computed from 1000 by 600 log-likelihood matrix
+#>
+#> Estimate SE
+#> elpd_loo -1942.0 41.6
+#> p_loo 351.5 23.9
+#> looic 3884.0 83.1
+#> ------
+#> Monte Carlo SE of elpd_loo is NA.
+#>
+#> Pareto k diagnostic values:
+#> Count Pct. Min. n_eff
+#> (-Inf, 0.5] (good) 292 48.7% 81
+#> (0.5, 0.7] (ok) 171 28.5% 32
+#> (0.7, 1] (bad) 109 18.2% 14
+#> (1, Inf) (very bad) 28 4.7% 2
+#> See help('pareto-k-diagnostic') for details.
+
+# Forecast log(counts) for entire region (site value doesn't matter as long
+# as each spatial location has a different and unique site identifier);
+# note this calculation takes a few minutes because of the need to calculate
+# draws from the stochastic latent factors
+newdata <- st_process %>%
+ dplyr::mutate(species = factor(species,
+ levels = paste0('species_',
+ 1:N_species))) %>%
+ dplyr::group_by(lat, lon) %>%
+ dplyr::mutate(site = dplyr::cur_group_id()) %>%
+ dplyr::ungroup()
+preds <- predict(mod, newdata = newdata)
+
+# Plot the median log(count) predictions on a grid
+newdata$log_count <- preds[,1]
+ggplot(newdata, aes(x = lat, y = lon, col = log_count)) +
+ geom_point(size = 1.5) +
+ facet_wrap(~ species, scales = 'free') +
+ viridis::scale_color_viridis() +
+ theme_classic()
+
+# }
Extract information on default prior distributions for an mvgam model
+![](../logo.png)
Extract information on default prior distributions for an mvgam model
Source:R/get_mvgam_priors.R
get_mvgam_priors.Rd
Usage
get_mvgam_priors(
formula,
trend_formula,
+ factor_formula,
data,
data_train,
family = "poisson",
@@ -88,11 +89,7 @@ Argumentss()
, te()
, ti()
, t2()
, as well as time-varying
-<<<<<<< HEAD
dynamic()
terms and nonparametric gp()
terms, can be added to the right hand side
-=======
-dynamic()
terms, can be added to the right hand side
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
to specify that the linear predictor depends on smooth functions of predictors
(or linear functionals of these). In nmix()
family models, the formula
is used to
set up a linear predictor for the detection probability. Details of the formula syntax used by mvgam
@@ -114,14 +111,14 @@ Argumentsjsdgam
+
+
data
A dataframe
or list
containing the model response variable and covariates
-<<<<<<< HEAD
required by the GAM formula
and optional trend_formula
. Most models should include columns:
series
(a factor
index of the series IDs; the number of levels should be identical
-=======
-required by the GAM formula
and optional trend_formula
. Most models should include columns:
-#'
series
(a factor
index of the series IDs; the number of levels should be identical
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
to the number of unique series labels (i.e. n_series = length(levels(data$series))
))
time
(numeric
or integer
index of the time point for each observation).
For most dynamic trend types available in mvgam
(see argument trend_model
), time should be
@@ -450,23 +447,13 @@
Examples#> 6 trend sd sigma ~ student_t(3, 0, 2.5);
#> 7 inverse of NB dispsersion phi_inv ~ student_t(3, 0, 0.1);
#> example_change new_lowerbound new_upperbound
-<<<<<<< HEAD
-#> 1 lambda ~ exponential(0.53); NA NA
-#> 2 mu_raw ~ normal(0.3, 0.11); NA NA
-#> 3 sigma_raw ~ exponential(0.34); NA NA
-#> 4 ar1 ~ normal(-0.87, 0.84); NA NA
-#> 5 ar2 ~ normal(0.56, 0.76); NA NA
-#> 6 sigma ~ exponential(0.88); NA NA
-#> 7 phi_inv ~ normal(0.81, 0.92); NA NA
-=======
-#> 1 lambda ~ exponential(0.27); NA NA
-#> 2 mu_raw ~ normal(0.58, 0.14); NA NA
-#> 3 sigma_raw ~ exponential(0.42); NA NA
-#> 4 ar1 ~ normal(-0.22, 0.6); NA NA
-#> 5 ar2 ~ normal(-0.92, 0.63); NA NA
-#> 6 sigma ~ exponential(0.32); NA NA
-#> 7 phi_inv ~ normal(0.36, 0.79); NA NA
->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f
+#> 1 lambda ~ exponential(0.65); NA NA
+#> 2 mu_raw ~ normal(0.92, 1); NA NA
+#> 3 sigma_raw ~ exponential(0.57); NA NA
+#> 4 ar1 ~ normal(-0.96, 0.9); NA NA
+#> 5 ar2 ~ normal(0.5, 0.48); NA NA
+#> 6 sigma ~ exponential(0.45); NA NA
+#> 7 phi_inv ~ normal(0.71, 0.14); NA NA
# Make a few changes; first, change the population mean for the series-level
# random intercepts
diff --git a/docs/reference/index.html b/docs/reference/index.html
index 76c8fa37..dc02b805 100644
--- a/docs/reference/index.html
+++ b/docs/reference/index.html
@@ -1,5 +1,5 @@
-Function reference • mvgam Function reference • mvgam
@@ -52,7 +52,7 @@
- ![]()
Function reference
+ ![](../logo.png)
Function reference
@@ -385,7 +385,7 @@ All functionsseries_to_mvgam()
- This function converts univariate or multivariate time series (
xts
or ts
objects)
-to the format necessary for mvgam
+to the format necessary for mvgam
-
sim_mvgam()
diff --git a/docs/reference/jsdgam-1.png b/docs/reference/jsdgam-1.png
new file mode 100644
index 00000000..6a3f8289
Binary files /dev/null and b/docs/reference/jsdgam-1.png differ
diff --git a/docs/reference/jsdgam-10.png b/docs/reference/jsdgam-10.png
new file mode 100644
index 00000000..ca1876f1
Binary files /dev/null and b/docs/reference/jsdgam-10.png differ
diff --git a/docs/reference/jsdgam-11.png b/docs/reference/jsdgam-11.png
new file mode 100644
index 00000000..16c1f439
Binary files /dev/null and b/docs/reference/jsdgam-11.png differ
diff --git a/docs/reference/jsdgam-2.png b/docs/reference/jsdgam-2.png
new file mode 100644
index 00000000..b75ffd16
Binary files /dev/null and b/docs/reference/jsdgam-2.png differ
diff --git a/docs/reference/jsdgam-3.png b/docs/reference/jsdgam-3.png
new file mode 100644
index 00000000..3e2fdbc7
Binary files /dev/null and b/docs/reference/jsdgam-3.png differ
diff --git a/docs/reference/jsdgam-4.png b/docs/reference/jsdgam-4.png
new file mode 100644
index 00000000..adf629f3
Binary files /dev/null and b/docs/reference/jsdgam-4.png differ
diff --git a/docs/reference/jsdgam-5.png b/docs/reference/jsdgam-5.png
new file mode 100644
index 00000000..a5df20a4
Binary files /dev/null and b/docs/reference/jsdgam-5.png differ
diff --git a/docs/reference/jsdgam-6.png b/docs/reference/jsdgam-6.png
new file mode 100644
index 00000000..f501a71f
Binary files /dev/null and b/docs/reference/jsdgam-6.png differ
diff --git a/docs/reference/jsdgam-7.png b/docs/reference/jsdgam-7.png
new file mode 100644
index 00000000..b3f126c3
Binary files /dev/null and b/docs/reference/jsdgam-7.png differ
diff --git a/docs/reference/jsdgam-8.png b/docs/reference/jsdgam-8.png
new file mode 100644
index 00000000..7ec77b86
Binary files /dev/null and b/docs/reference/jsdgam-8.png differ
diff --git a/docs/reference/jsdgam-9.png b/docs/reference/jsdgam-9.png
new file mode 100644
index 00000000..8cd7bce8
Binary files /dev/null and b/docs/reference/jsdgam-9.png differ
diff --git a/docs/reference/jsdgam.html b/docs/reference/jsdgam.html
index 83602f06..e2b1d787 100644
--- a/docs/reference/jsdgam.html
+++ b/docs/reference/jsdgam.html
@@ -9,7 +9,7 @@
specification is extremely flexible, allowing users to include spatial, temporal or any other type
of predictor effects to more efficiently capture unmodelled residual associations, while the
observation model can also be highly flexible (including all smooth, GP and other effects that
-mvgam can handle)">
@@ -62,7 +62,7 @@
- ![]()
Fit Joint Species Distribution Models in mvgam
+ ![](../logo.png)
Fit Joint Species Distribution Models in mvgam
Source: R/jsdgam.R
jsdgam.Rd
@@ -174,6 +174,31 @@ Argumentspoisson(). See mvgam_families
for more details
+- unit
+The unquoted name of the variable that represents the unit of analysis in data
over
+which latent residuals should be correlated. This variable should be either a
+numeric
or integer
variable in the supplied data
.
+Defaults to time
to be consistent with other functionalities
+in mvgam, though note that the data need not be time series in this case. See examples below
+for further details and explanations
+
+
+- subgr
+A subgrouping factor
variable specifying which element in data
represents the
+different observational units. Defaults to series
to be consistent with other functionalities
+in mvgam, though note that the data need not be time series in this case.
+But note that models that use the hierarchical correlations (by supplying a value for gr
)
+should not include a series
element in data
. Rather, this element will be created internally based
+on the supplied variables for gr
and subgr
. For example, if you are modelling
+counts for a group of species (labelled as species
in the data) across sampling sites
+(labelled as site
in the data) in three
+different geographical regions (labelled as region
), and you would like the residuals to be correlated
+within regions, then you should specify
+unit = site
, gr = region
, and subgr = species
. Internally, mvgam()
will appropriately order
+the data by unit
(in this case, by site
) and create
+the series
element for the data using something like: series = as.factor(paste0(group, '_', subgroup))
+
+
- share_obs_params
logical
. If TRUE
and the family
has additional family-specific observation parameters (e.g. variance components in
@@ -293,19 +318,19 @@
Argumentsmvgam()
+Other arguments to pass to mvgam
Value
-A list
object of classes jsdgam
and mvgam
containing model output,
+
A list
object of class mvgam
containing model output,
the text representation of the model file,
the mgcv model output (for easily generating simulations at
unsampled covariate values), Dunn-Smyth residuals for each series and key information needed
for other functions in the package. See mvgam-class
for details.
-Use methods(class = "mvgam")
and methods(class = "jsdgam")
for an overview on available methods
+Use methods(class = "mvgam")
for an overview on available methods
Details
@@ -313,11 +338,22 @@ Details
learned hierarchically, whereby responses to environmental variables in formula
can be partially
pooled and any latent, unmodelled residual associations can also be learned. In mvgam, both of
these effects can be modelled with the full power of latent factor Hierarchical GAMs, providing unmatched
-flexibility to model full communities of species
+flexibility to model full communities of species. When calling jsdgam, an initial State-Space model using
+trend = 'None'
is set up and then modified to include the latent factors and their linear predictors.
+Consequently, you can inspect priors for these models using get_mvgam_priors by supplying the relevant
+formula
, factor_formula
, data
and family
arguments and using trend = 'None'
+
+
+ References
+ Nicholas J Clark & Konstans Wells (2020). Dynamic generalised additive models (DGAMs) for forecasting discrete ecological time series.
+Methods in Ecology and Evolution. 14:3, 771-784.
+David I Warton, F Guillaume Blanchet, Robert B O’Hara, Otso Ovaskainen, Sara Taskinen, Steven C
+Walker & Francis KC Hui (2015). So many variables: joint modeling in community ecology.
+Trends in Ecology & Evolution 30:12, 766-779.
Author
@@ -326,7 +362,7 @@ Author<
Examples
- if (FALSE) {
+ # \donttest{
# Simulate latent count data for 500 spatial locations and 10 species
set.seed(0)
N_points <- 500
@@ -352,6 +388,7 @@ Examples# The design matrix for this smooth is in the 'X' slot
des_mat <- sm$X
dim(des_mat)
+#> [1] 500 25
# Function to generate a random covariance matrix where all variables
# have unit variance (i.e. diagonals are all 1)
@@ -416,17 +453,59 @@ Examplesggplot(dat, aes(x = count)) +
geom_histogram() +
facet_wrap(~ species, scales = 'free')
+#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
+
ggplot(dat, aes(x = lat, y = lon, col = log(count + 1))) +
geom_point(size = 2.25) +
facet_wrap(~ species, scales = 'free') +
viridis::scale_color_viridis() +
theme_classic()
+
+
+# Inspect default priors for the underlying model
+priors <- get_mvgam_priors(formula = count ~
+ # Environmental model includes species-level intercepts
+ # and random slopes for a linear effect of temperature
+ species +
+ s(species, bs = 're', by = temperature),
+
+ # Each factor estimates a different nonlinear spatial process, using
+ # 'by = trend' as in other mvgam State-Space models
+ factor_formula = ~ te(lat, lon, k = 5, by = trend) - 1,
+
+ # The data
+ data = dat,
+
+ # Poisson observations
+ family = poisson())
+head(priors)
+#> param_name param_length param_info
+#> 1 (Intercept) 1 (Intercept)
+#> 2 speciesspecies_2 1 speciesspecies_2 fixed effect
+#> 3 speciesspecies_3 1 speciesspecies_3 fixed effect
+#> 4 speciesspecies_4 1 speciesspecies_4 fixed effect
+#> 5 speciesspecies_5 1 speciesspecies_5 fixed effect
+#> 6 speciesspecies_6 1 speciesspecies_6 fixed effect
+#> prior example_change
+#> 1 (Intercept) ~ student_t(3, 2.1, 2.5); (Intercept) ~ normal(0, 1);
+#> 2 speciesspecies_2 ~ student_t(3, 0, 2); speciesspecies_2 ~ normal(0, 1);
+#> 3 speciesspecies_3 ~ student_t(3, 0, 2); speciesspecies_3 ~ normal(0, 1);
+#> 4 speciesspecies_4 ~ student_t(3, 0, 2); speciesspecies_4 ~ normal(0, 1);
+#> 5 speciesspecies_5 ~ student_t(3, 0, 2); speciesspecies_5 ~ normal(0, 1);
+#> 6 speciesspecies_6 ~ student_t(3, 0, 2); speciesspecies_6 ~ normal(0, 1);
+#> new_lowerbound new_upperbound
+#> 1 <NA> <NA>
+#> 2 <NA> <NA>
+#> 3 <NA> <NA>
+#> 4 <NA> <NA>
+#> 5 <NA> <NA>
+#> 6 <NA> <NA>
# Fit a JSDM that estimates a hierarchical temperature responses
-# and that uses three latent spatial factors
+# and that uses four latent spatial factors
mod <- jsdgam(formula = count ~
- # Environmental model includes species-level interecepts
+ # Environmental model includes species-level intercepts
# and random slopes for a linear effect of temperature
species +
s(species, bs = 're', by = temperature),
@@ -434,7 +513,10 @@ Examples # Each factor estimates a different nonlinear spatial process, using
# 'by = trend' as in other mvgam State-Space models
factor_formula = ~ te(lat, lon, k = 5, by = trend) - 1,
- n_lv = 3,
+ n_lv = 4,
+
+ # Change default priors for fixed effect betas to standard normal
+ priors = prior(std_normal(), class = b),
# The data and the grouping variables
data = dat,
@@ -442,28 +524,47 @@ Examples subgr = species,
# Poisson observations
- family = poisson())
+ family = poisson(),
+ chains = 2,
+ silent = 2)
+#> In file included from stan/lib/stan_math/stan/math/prim/prob/von_mises_lccdf.hpp:5,
+#> from stan/lib/stan_math/stan/math/prim/prob/von_mises_ccdf_log.hpp:4,
+#> from stan/lib/stan_math/stan/math/prim/prob.hpp:359,
+#> from stan/lib/stan_math/stan/math/prim.hpp:16,
+#> from stan/lib/stan_math/stan/math/rev.hpp:16,
+#> from stan/lib/stan_math/stan/math.hpp:19,
+#> from stan/src/stan/model/model_header.hpp:4,
+#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpIzSX2o/model-acb842615ba.hpp:2:
+#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)':
+#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers
+#> 194 | if (cdf_n < 0.0)
+#> |
+#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: adding '-flarge-source-files' will allow for more column-tracking support, at the expense of compilation time and memory
# Plot species-level intercept estimates
plot_predictions(mod, condition = 'species',
type = 'link')
+
# Plot species' hierarchical responses to temperature
plot_predictions(mod, condition = c('temperature', 'species', 'species'),
type = 'link')
+
+
+# Plot posterior median estimates of the latent spatial factors
+plot(mod, type = 'smooths', trend_effects = TRUE)
+
-# Plotting species' average geographical predictions against the observed
-# data gives some idea of whether more flexibility in the spatial process
-# may be needed
-plot_predictions(mod, condition = c('lat', 'species', 'species'),
- points = 0.5)
-plot_predictions(mod, condition = c('lon', 'species', 'species'),
- points = 0.5)
+# Or using gratia, if you have it installed
+if(requireNamespace('gratia', quietly = TRUE)){
+ gratia::draw(mod, trend_effects = TRUE)
+}
+
# Calculate (median) residual spatial correlations
med_loadings <- matrix(apply(as.matrix(mod$model_output, 'lv_coefs'),
2, median),
- nrow = N_species, ncol = 3)
+ nrow = N_species, ncol = 4)
cormat <- cov2cor(tcrossprod(med_loadings))
rownames(cormat) <- colnames(cormat) <- levels(dat$species)
@@ -472,12 +573,54 @@ Examples corrplot::corrplot(cormat, type = 'lower', tl.col = 'black',
tl.srt = 45)
}
+
# Posterior predictive checks and ELPD-LOO can ascertain model fit
pp_check(mod, type = "ecdf_overlay_grouped",
- group = "species", ndraws = 50)
+ group = "species", ndraws = 100)
+
loo(mod)
-}
+#> Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
+#>
+#> Computed from 1000 by 600 log-likelihood matrix
+#>
+#> Estimate SE
+#> elpd_loo -1942.0 41.6
+#> p_loo 351.5 23.9
+#> looic 3884.0 83.1
+#> ------
+#> Monte Carlo SE of elpd_loo is NA.
+#>
+#> Pareto k diagnostic values:
+#> Count Pct. Min. n_eff
+#> (-Inf, 0.5] (good) 292 48.7% 81
+#> (0.5, 0.7] (ok) 171 28.5% 32
+#> (0.7, 1] (bad) 109 18.2% 14
+#> (1, Inf) (very bad) 28 4.7% 2
+#> See help('pareto-k-diagnostic') for details.
+
+# Forecast log(counts) for entire region (site value doesn't matter as long
+# as each spatial location has a different and unique site identifier);
+# note this calculation takes a few minutes because of the need to calculate
+# draws from the stochastic latent factors
+newdata <- st_process %>%
+ dplyr::mutate(species = factor(species,
+ levels = paste0('species_',
+ 1:N_species))) %>%
+ dplyr::group_by(lat, lon) %>%
+ dplyr::mutate(site = dplyr::cur_group_id()) %>%
+ dplyr::ungroup()
+preds <- predict(mod, newdata = newdata)
+
+# Plot the median log(count) predictions on a grid
+newdata$log_count <- preds[,1]
+ggplot(newdata, aes(x = lat, y = lon, col = log_count)) +
+ geom_point(size = 1.5) +
+ facet_wrap(~ species, scales = 'free') +
+ viridis::scale_color_viridis() +
+ theme_classic()
+
+# }
get_mvgam_priors( formula, trend_formula, + factor_formula, data, data_train, family = "poisson", @@ -88,11 +89,7 @@
,Argumentss()
te()
,ti()
,t2()
, as well as time-varying -<<<<<<< HEADdynamic()
terms and nonparametricgp()
terms, can be added to the right hand side -======= -dynamic()
terms, can be added to the right hand side ->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f to specify that the linear predictor depends on smooth functions of predictors (or linear functionals of these). Innmix()
family models, theformula
is used to set up a linear predictor for the detection probability. Details of the formula syntax used by mvgam @@ -114,14 +111,14 @@Argumentsjsdgam + +
data A
dataframe
orlist
containing the model response variable and covariates -<<<<<<< HEAD required by the GAMformula
and optionaltrend_formula
. Most models should include columns:
series
(afactor
index of the series IDs; the number of levels should be identical -======= -required by the GAMformula
and optionaltrend_formula
. Most models should include columns: -#'
series
(afactor
index of the series IDs; the number of levels should be identical ->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f to the number of unique series labels (i.e.n_series = length(levels(data$series))
))
time
(numeric
orinteger
index of the time point for each observation). For most dynamic trend types available inmvgam
(see argumenttrend_model
), time should be @@ -450,23 +447,13 @@Examples#> 6 trend sd sigma ~ student_t(3, 0, 2.5); #> 7 inverse of NB dispsersion phi_inv ~ student_t(3, 0, 0.1); #> example_change new_lowerbound new_upperbound -<<<<<<< HEAD -#> 1 lambda ~ exponential(0.53); NA NA -#> 2 mu_raw ~ normal(0.3, 0.11); NA NA -#> 3 sigma_raw ~ exponential(0.34); NA NA -#> 4 ar1 ~ normal(-0.87, 0.84); NA NA -#> 5 ar2 ~ normal(0.56, 0.76); NA NA -#> 6 sigma ~ exponential(0.88); NA NA -#> 7 phi_inv ~ normal(0.81, 0.92); NA NA -======= -#> 1 lambda ~ exponential(0.27); NA NA -#> 2 mu_raw ~ normal(0.58, 0.14); NA NA -#> 3 sigma_raw ~ exponential(0.42); NA NA -#> 4 ar1 ~ normal(-0.22, 0.6); NA NA -#> 5 ar2 ~ normal(-0.92, 0.63); NA NA -#> 6 sigma ~ exponential(0.32); NA NA -#> 7 phi_inv ~ normal(0.36, 0.79); NA NA ->>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f +#> 1 lambda ~ exponential(0.65); NA NA +#> 2 mu_raw ~ normal(0.92, 1); NA NA +#> 3 sigma_raw ~ exponential(0.57); NA NA +#> 4 ar1 ~ normal(-0.96, 0.9); NA NA +#> 5 ar2 ~ normal(0.5, 0.48); NA NA +#> 6 sigma ~ exponential(0.45); NA NA +#> 7 phi_inv ~ normal(0.71, 0.14); NA NA # Make a few changes; first, change the population mean for the series-level # random intercepts diff --git a/docs/reference/index.html b/docs/reference/index.html index 76c8fa37..dc02b805 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -1,5 +1,5 @@ -
Function reference • mvgam Function reference • mvgam @@ -52,7 +52,7 @@ -Function reference
+Function reference
@@ -385,7 +385,7 @@All functionsseries_to_mvgam()
- This function converts univariate or multivariate time series (
+to the format necessary forxts
orts
objects) -to the format necessary formvgam
mvgam
sim_mvgam()
diff --git a/docs/reference/jsdgam-1.png b/docs/reference/jsdgam-1.png new file mode 100644 index 00000000..6a3f8289 Binary files /dev/null and b/docs/reference/jsdgam-1.png differ diff --git a/docs/reference/jsdgam-10.png b/docs/reference/jsdgam-10.png new file mode 100644 index 00000000..ca1876f1 Binary files /dev/null and b/docs/reference/jsdgam-10.png differ diff --git a/docs/reference/jsdgam-11.png b/docs/reference/jsdgam-11.png new file mode 100644 index 00000000..16c1f439 Binary files /dev/null and b/docs/reference/jsdgam-11.png differ diff --git a/docs/reference/jsdgam-2.png b/docs/reference/jsdgam-2.png new file mode 100644 index 00000000..b75ffd16 Binary files /dev/null and b/docs/reference/jsdgam-2.png differ diff --git a/docs/reference/jsdgam-3.png b/docs/reference/jsdgam-3.png new file mode 100644 index 00000000..3e2fdbc7 Binary files /dev/null and b/docs/reference/jsdgam-3.png differ diff --git a/docs/reference/jsdgam-4.png b/docs/reference/jsdgam-4.png new file mode 100644 index 00000000..adf629f3 Binary files /dev/null and b/docs/reference/jsdgam-4.png differ diff --git a/docs/reference/jsdgam-5.png b/docs/reference/jsdgam-5.png new file mode 100644 index 00000000..a5df20a4 Binary files /dev/null and b/docs/reference/jsdgam-5.png differ diff --git a/docs/reference/jsdgam-6.png b/docs/reference/jsdgam-6.png new file mode 100644 index 00000000..f501a71f Binary files /dev/null and b/docs/reference/jsdgam-6.png differ diff --git a/docs/reference/jsdgam-7.png b/docs/reference/jsdgam-7.png new file mode 100644 index 00000000..b3f126c3 Binary files /dev/null and b/docs/reference/jsdgam-7.png differ diff --git a/docs/reference/jsdgam-8.png b/docs/reference/jsdgam-8.png new file mode 100644 index 00000000..7ec77b86 Binary files /dev/null and b/docs/reference/jsdgam-8.png differ diff --git a/docs/reference/jsdgam-9.png b/docs/reference/jsdgam-9.png new file mode 100644 index 00000000..8cd7bce8 Binary files /dev/null and b/docs/reference/jsdgam-9.png differ diff --git a/docs/reference/jsdgam.html b/docs/reference/jsdgam.html index 83602f06..e2b1d787 100644 --- a/docs/reference/jsdgam.html +++ b/docs/reference/jsdgam.html @@ -9,7 +9,7 @@ specification is extremely flexible, allowing users to include spatial, temporal or any other type of predictor effects to more efficiently capture unmodelled residual associations, while the observation model can also be highly flexible (including all smooth, GP and other effects that -mvgam can handle)"> @@ -62,7 +62,7 @@ -@@ -174,6 +174,31 @@Fit Joint Species Distribution Models in mvgam
+Fit Joint Species Distribution Models in mvgam
Source:R/jsdgam.R
jsdgam.Rd
Argumentspoisson(). See
mvgam_families
for more details +- unit
+- + + +
The unquoted name of the variable that represents the unit of analysis in
data
over +which latent residuals should be correlated. This variable should be either a +numeric
orinteger
variable in the supplieddata
. +Defaults totime
to be consistent with other functionalities +in mvgam, though note that the data need not be time series in this case. See examples below +for further details and explanations- subgr
+- + +
A subgrouping
factor
variable specifying which element indata
represents the +different observational units. Defaults toseries
to be consistent with other functionalities +in mvgam, though note that the data need not be time series in this case. +But note that models that use the hierarchical correlations (by supplying a value forgr
) +should not include aseries
element indata
. Rather, this element will be created internally based +on the supplied variables forgr
andsubgr
. For example, if you are modelling +counts for a group of species (labelled asspecies
in the data) across sampling sites +(labelled assite
in the data) in three +different geographical regions (labelled asregion
), and you would like the residuals to be correlated +within regions, then you should specify +unit = site
,gr = region
, andsubgr = species
. Internally,mvgam()
will appropriately order +the data byunit
(in this case, bysite
) and create +theseries
element for the data using something like:series = as.factor(paste0(group, '_', subgroup))
- share_obs_params
- +
logical
. IfTRUE
and thefamily
has additional family-specific observation parameters (e.g. variance components in @@ -293,19 +318,19 @@Argumentsmvgam()
Other arguments to pass to mvgam
Value
-A
list
object of classesjsdgam
andmvgam
containing model output, +A
+Uselist
object of classmvgam
containing model output, the text representation of the model file, the mgcv model output (for easily generating simulations at unsampled covariate values), Dunn-Smyth residuals for each series and key information needed for other functions in the package. Seemvgam-class
for details. -Usemethods(class = "mvgam")
andmethods(class = "jsdgam")
for an overview on available methodsmethods(class = "mvgam")
for an overview on available methods+Details
@@ -313,11 +338,22 @@Details learned hierarchically, whereby responses to environmental variables in
formula
can be partially pooled and any latent, unmodelled residual associations can also be learned. In mvgam, both of these effects can be modelled with the full power of latent factor Hierarchical GAMs, providing unmatched -flexibility to model full communities of species +flexibility to model full communities of species. When calling jsdgam, an initial State-Space model using +trend = 'None'
is set up and then modified to include the latent factors and their linear predictors. +Consequently, you can inspect priors for these models using get_mvgam_priors by supplying the relevant +formula
,factor_formula
,data
andfamily
arguments and usingtrend = 'None'
++References
+Nicholas J Clark & Konstans Wells (2020). Dynamic generalised additive models (DGAMs) for forecasting discrete ecological time series. +Methods in Ecology and Evolution. 14:3, 771-784. +David I Warton, F Guillaume Blanchet, Robert B O’Hara, Otso Ovaskainen, Sara Taskinen, Steven C +Walker & Francis KC Hui (2015). So many variables: joint modeling in community ecology. +Trends in Ecology & Evolution 30:12, 766-779.
Author
@@ -326,7 +362,7 @@Author<
Examples
-if (FALSE) { +
# \donttest{ # Simulate latent count data for 500 spatial locations and 10 species set.seed(0) N_points <- 500 @@ -352,6 +388,7 @@
Examples# The design matrix for this smooth is in the 'X' slot des_mat <- sm$X dim(des_mat) +#> [1] 500 25 # Function to generate a random covariance matrix where all variables # have unit variance (i.e. diagonals are all 1) @@ -416,17 +453,59 @@
Examplesggplot(dat, aes(x = count)) + geom_histogram() + facet_wrap(~ species, scales = 'free') +#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`. +
ggplot(dat, aes(x = lat, y = lon, col = log(count + 1))) + geom_point(size = 2.25) + facet_wrap(~ species, scales = 'free') + viridis::scale_color_viridis() + theme_classic() +
+ +# Inspect default priors for the underlying model +priors <- get_mvgam_priors(formula = count ~ + # Environmental model includes species-level intercepts + # and random slopes for a linear effect of temperature + species + + s(species, bs = 're', by = temperature), + + # Each factor estimates a different nonlinear spatial process, using + # 'by = trend' as in other mvgam State-Space models + factor_formula = ~ te(lat, lon, k = 5, by = trend) - 1, + + # The data + data = dat, + + # Poisson observations + family = poisson()) +head(priors) +#> param_name param_length param_info +#> 1 (Intercept) 1 (Intercept) +#> 2 speciesspecies_2 1 speciesspecies_2 fixed effect +#> 3 speciesspecies_3 1 speciesspecies_3 fixed effect +#> 4 speciesspecies_4 1 speciesspecies_4 fixed effect +#> 5 speciesspecies_5 1 speciesspecies_5 fixed effect +#> 6 speciesspecies_6 1 speciesspecies_6 fixed effect +#> prior example_change +#> 1 (Intercept) ~ student_t(3, 2.1, 2.5); (Intercept) ~ normal(0, 1); +#> 2 speciesspecies_2 ~ student_t(3, 0, 2); speciesspecies_2 ~ normal(0, 1); +#> 3 speciesspecies_3 ~ student_t(3, 0, 2); speciesspecies_3 ~ normal(0, 1); +#> 4 speciesspecies_4 ~ student_t(3, 0, 2); speciesspecies_4 ~ normal(0, 1); +#> 5 speciesspecies_5 ~ student_t(3, 0, 2); speciesspecies_5 ~ normal(0, 1); +#> 6 speciesspecies_6 ~ student_t(3, 0, 2); speciesspecies_6 ~ normal(0, 1); +#> new_lowerbound new_upperbound +#> 1 <NA> <NA> +#> 2 <NA> <NA> +#> 3 <NA> <NA> +#> 4 <NA> <NA> +#> 5 <NA> <NA> +#> 6 <NA> <NA> # Fit a JSDM that estimates a hierarchical temperature responses -# and that uses three latent spatial factors +# and that uses four latent spatial factors mod <- jsdgam(formula = count ~ - # Environmental model includes species-level interecepts + # Environmental model includes species-level intercepts # and random slopes for a linear effect of temperature species + s(species, bs = 're', by = temperature), @@ -434,7 +513,10 @@
Examples # Each factor estimates a different nonlinear spatial process, using # 'by = trend' as in other mvgam State-Space models factor_formula = ~ te(lat, lon, k = 5, by = trend) - 1, - n_lv = 3, + n_lv = 4, + + # Change default priors for fixed effect betas to standard normal + priors = prior(std_normal(), class = b), # The data and the grouping variables data = dat, @@ -442,28 +524,47 @@
Examples subgr = species, # Poisson observations - family = poisson()) + family = poisson(), + chains = 2, + silent = 2) +#> In file included from stan/lib/stan_math/stan/math/prim/prob/von_mises_lccdf.hpp:5, +#> from stan/lib/stan_math/stan/math/prim/prob/von_mises_ccdf_log.hpp:4, +#> from stan/lib/stan_math/stan/math/prim/prob.hpp:359, +#> from stan/lib/stan_math/stan/math/prim.hpp:16, +#> from stan/lib/stan_math/stan/math/rev.hpp:16, +#> from stan/lib/stan_math/stan/math.hpp:19, +#> from stan/src/stan/model/model_header.hpp:4, +#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpIzSX2o/model-acb842615ba.hpp:2: +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)': +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers +#> 194 | if (cdf_n < 0.0) +#> | +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: adding '-flarge-source-files' will allow for more column-tracking support, at the expense of compilation time and memory # Plot species-level intercept estimates plot_predictions(mod, condition = 'species', type = 'link') +
# Plot species' hierarchical responses to temperature plot_predictions(mod, condition = c('temperature', 'species', 'species'), type = 'link') +
+ +# Plot posterior median estimates of the latent spatial factors +plot(mod, type = 'smooths', trend_effects = TRUE) +
-# Plotting species' average geographical predictions against the observed -# data gives some idea of whether more flexibility in the spatial process -# may be needed -plot_predictions(mod, condition = c('lat', 'species', 'species'), - points = 0.5) -plot_predictions(mod, condition = c('lon', 'species', 'species'), - points = 0.5) +# Or using gratia, if you have it installed +if(requireNamespace('gratia', quietly = TRUE)){ + gratia::draw(mod, trend_effects = TRUE) +} +
# Calculate (median) residual spatial correlations med_loadings <- matrix(apply(as.matrix(mod$model_output, 'lv_coefs'), 2, median), - nrow = N_species, ncol = 3) + nrow = N_species, ncol = 4) cormat <- cov2cor(tcrossprod(med_loadings)) rownames(cormat) <- colnames(cormat) <- levels(dat$species) @@ -472,12 +573,54 @@
Examples corrplot::corrplot(cormat, type = 'lower', tl.col = 'black', tl.srt = 45) } +
# Posterior predictive checks and ELPD-LOO can ascertain model fit pp_check(mod, type = "ecdf_overlay_grouped", - group = "species", ndraws = 50) + group = "species", ndraws = 100) +
loo(mod) -} +#> Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details. +#> +#> Computed from 1000 by 600 log-likelihood matrix +#> +#> Estimate SE +#> elpd_loo -1942.0 41.6 +#> p_loo 351.5 23.9 +#> looic 3884.0 83.1 +#> ------ +#> Monte Carlo SE of elpd_loo is NA. +#> +#> Pareto k diagnostic values: +#> Count Pct. Min. n_eff +#> (-Inf, 0.5] (good) 292 48.7% 81 +#> (0.5, 0.7] (ok) 171 28.5% 32 +#> (0.7, 1] (bad) 109 18.2% 14 +#> (1, Inf) (very bad) 28 4.7% 2 +#> See help('pareto-k-diagnostic') for details. + +# Forecast log(counts) for entire region (site value doesn't matter as long +# as each spatial location has a different and unique site identifier); +# note this calculation takes a few minutes because of the need to calculate +# draws from the stochastic latent factors +newdata <- st_process %>% + dplyr::mutate(species = factor(species, + levels = paste0('species_', + 1:N_species))) %>% + dplyr::group_by(lat, lon) %>% + dplyr::mutate(site = dplyr::cur_group_id()) %>% + dplyr::ungroup() +preds <- predict(mod, newdata = newdata) + +# Plot the median log(count) predictions on a grid +newdata$log_count <- preds[,1] +ggplot(newdata, aes(x = lat, y = lon, col = log_count)) + + geom_point(size = 1.5) + + facet_wrap(~ species, scales = 'free') + + viridis::scale_color_viridis() + + theme_classic() +
+# }