diff --git a/R/jsdgam.R b/R/jsdgam.R index ee2f28ba..7abdcb97 100644 --- a/R/jsdgam.R +++ b/R/jsdgam.R @@ -9,13 +9,16 @@ #' #'@inheritParams mvgam #'@inheritParams ZMVN -#'@param factor_formula A \code{character} string specifying the linear predictor -#'effects for the latent factors. These are exactly like the formula +#'@param formula A \code{character} string specifying the GAM observation model formula. These are exactly like the formula #'for a GLM except that smooth terms, `s()`, `te()`, `ti()`, `t2()`, as well as time-varying #'`dynamic()` terms and nonparametric `gp()` terms, can be added to the right hand side #'to specify that the linear predictor depends on smooth functions of predictors #'(or linear functionals of these). Details of the formula syntax used by \pkg{mvgam} #'can be found in \code{\link{mvgam_formulae}} +#'@param factor_formula A \code{character} string specifying the linear predictor +#'effects for the latent factors. Use `by = trend` within calls to functional terms +#'(i.e. `s()`, `te()`, `ti()`, `t2()`, `dynamic()`, or `gp()`) to ensure that each factor +#'captures a different axis of variation. See the example below as an illustration #'@param factor_knots An optional \code{list} containing user specified knot values to #' be used for basis construction of any smooth terms in `factor_formula`. #'For most bases the user simply supplies the knots to be used, which must match up with the `k` value supplied @@ -39,6 +42,11 @@ #' of trials #' \item`beta_binomial()` as for `binomial()` but allows for overdispersion} #'Default is `poisson()`. See \code{\link{mvgam_families}} for more details +#' @param species An unquoted string representing the `factor` variable that indexes +#' the different outcome variables in `data` (usually `'species'` in a JSDM). +#' Defaults to `series` to be consistent with other `mvgam` models +#'@param n_lv \code{integer} the number of latent dynamic factors to use if \code{use_lv == TRUE}. +#'Cannot be `n_species`. Defaults arbitrarily to `2` #'@param ... Other arguments to pass to [mvgam] #'@author Nicholas J Clark #'@details Joint Species Distribution Models allow for responses of multiple species to be @@ -48,7 +56,29 @@ #'flexibility to model full communities of species. When calling [jsdgam], an initial State-Space model using #'`trend = 'None'` is set up and then modified to include the latent factors and their linear predictors. #'Consequently, you can inspect priors for these models using [get_mvgam_priors] by supplying the relevant -#'`formula`, `factor_formula`, `data` and `family` arguments and using `trend = 'None'` +#'`formula`, `factor_formula`, `data` and `family` arguments and keeping the default `trend = 'None'`. +#' +#' In a JSDGAM, the expectation of response \eqn{Y_{ij}} is modelled with +#' +#' \deqn{g(\mu_{ij}) = X_i'\beta + u_i'\theta_j,} +#' +#' where \eqn{g(.)} is a known link function, +#' \eqn{X_i'} is a design matrix of linear predictors (with associated \eqn{\beta} coefficients), +#' \eqn{u_i} are \eqn{n_{lv}}-variate latent factors +#' (\eqn{n_{lv}}<<\eqn{n_{species}}) and +#' \eqn{\theta_j} are species-specific loadings on the latent factors, respectively. The design matrix +#' \eqn{X} and \eqn{\beta} coefficients are constructed and modelled using `formula` and can contain +#' any of `mvgam`'s predictor effects, including random intercepts and slopes, multidimensional penalized +#' smooths, GP effects etc... The factor loadings \eqn{\theta_j} are constrained for identifiability but can +#' be used to reconstruct an estimate of the species' residual variance-covariance matrix (see the example below +#' for an illustration of this). The latent factors are further modelled using: +#'\deqn{ +#'u_i \sim \text{Normal}(Q_i\beta_{factor}, 1) \quad +#'} +#'where the second design matrix \eqn{Q} and associated \eqn{\beta_{factor}} coefficients are +#'constructed and modelled using `factor_formula`. Again, the effects that make up this linear +#'predictor can contain any of `mvgam`'s allowed predictor effects, providing enormous flexibility for +#'modelling species' communities. #'@seealso [mvgam] #'@references Nicholas J Clark & Konstans Wells (2020). Dynamic generalised additive models (DGAMs) for forecasting discrete ecological time series. #'Methods in Ecology and Evolution. 14:3, 771-784. @@ -58,7 +88,7 @@ #'@return A \code{list} object of class \code{mvgam} containing model output, #'the text representation of the model file, #'the mgcv model output (for easily generating simulations at -#'unsampled covariate values), Dunn-Smyth residuals for each series and key information needed +#'unsampled covariate values), Dunn-Smyth residuals for each species and key information needed #'for other functions in the package. See \code{\link{mvgam-class}} for details. #'Use `methods(class = "mvgam")` for an overview on available methods #'@examples @@ -195,7 +225,7 @@ #' # The data and the grouping variables #' data = dat, #' unit = site, -#' subgr = species, +#' species = species, #' #' # Poisson observations #' family = poisson(), @@ -261,7 +291,7 @@ jsdgam = function(formula, newdata, family = poisson(), unit = time, - subgr = series, + species = series, share_obs_params = FALSE, priors, n_lv = 2, @@ -286,7 +316,7 @@ jsdgam = function(formula, # Prep the trend so that the data can be structured in the usual # mvgam fashion (with 'time' and 'series' variables) unit <- deparse0(substitute(unit)) - subgr <- deparse0(substitute(subgr)) + subgr <- deparse0(substitute(species)) prepped_trend <- prep_jsdgam_trend(unit = unit, subgr = subgr, data = data) @@ -617,7 +647,7 @@ prep_jsdgam_trend = function(data, unit, subgr){ #' @noRd prep_jsdgam_trendmap = function(data, n_lv){ if(n_lv > nlevels(data$series)){ - stop('Number of factors must be <= number of levels in subgr', + stop('Number of factors must be <= number of levels in species', call. = FALSE) } data.frame(trend = rep(1:n_lv, diff --git a/R/mvgam.R b/R/mvgam.R index 6a9ceb3a..59521d1b 100644 --- a/R/mvgam.R +++ b/R/mvgam.R @@ -100,9 +100,9 @@ #'@param share_obs_params \code{logical}. If \code{TRUE} and the \code{family} #'has additional family-specific observation parameters (e.g. variance components in #'`student_t()` or `gaussian()`, or dispersion parameters in `nb()` or `betar()`), -#'these parameters will be shared across all series. This is handy if you have multiple -#'time series that you believe share some properties, such as being from the same -#'species over different spatial units. Default is \code{FALSE}. +#'these parameters will be shared across all outcome variables. This is handy if you have multiple +#'outcomes (time series in most `mvgam` models) that you believe share some properties, +#'such as being from the same species over different spatial units. Default is \code{FALSE}. #'@param use_lv \code{logical}. If \code{TRUE}, use dynamic factors to estimate series' #'latent trends in a reduced dimension format. Only available for #'`RW()`, `AR()` and `GP()` trend models. Defaults to \code{FALSE} @@ -167,9 +167,9 @@ #'up by any other means. Only available for some families(`poisson()`, `nb()`, `gaussian()`) and #'when using \code{Cmdstan} as the backend #'@param priors An optional \code{data.frame} with prior -#'definitions (in JAGS or Stan syntax). if using Stan, this can also be an object of -#'class `brmsprior` (see. \code{\link[brms]{prior}} for details). See [get_mvgam_priors] and -#''Details' for more information on changing default prior distributions +#'definitions (in JAGS or Stan syntax) or, preferentially, If using Stan, a vector containing +#' objects of class `brmsprior` (see. \code{\link[brms]{prior}} for details). +#' See [get_mvgam_priors] and Details' for more information on changing default prior distributions #'@param refit Logical indicating whether this is a refit, called using [update.mvgam]. Users should leave #'as `FALSE` #'@param lfo Logical indicating whether this is part of a call to [lfo_cv.mvgam]. Returns a diff --git a/docs/reference/jsdgam.html b/docs/reference/jsdgam.html index 49f3bcaa..d0eb1ad8 100644 --- a/docs/reference/jsdgam.html +++ b/docs/reference/jsdgam.html @@ -87,7 +87,7 @@

Usage newdata, family = poisson(), unit = time, - subgr = series, + species = series, share_obs_params = FALSE, priors, n_lv = 2, @@ -115,19 +115,15 @@

Argumentss(), te(), ti(), t2(), as well as time-varying dynamic() terms and nonparametric gp() terms, can be added to the right hand side to specify that the linear predictor depends on smooth functions of predictors -(or linear functionals of these). In nmix() family models, the formula is used to -set up a linear predictor for the detection probability. Details of the formula syntax used by mvgam +(or linear functionals of these). Details of the formula syntax used by mvgam can be found in mvgam_formulae

factor_formula

A character string specifying the linear predictor -effects for the latent factors. These are exactly like the formula -for a GLM except that smooth terms, s(), te(), ti(), t2(), as well as time-varying -dynamic() terms and nonparametric gp() terms, can be added to the right hand side -to specify that the linear predictor depends on smooth functions of predictors -(or linear functionals of these). Details of the formula syntax used by mvgam -can be found in mvgam_formulae

+effects for the latent factors. Use by = trend within calls to functional terms +(i.e. s(), te(), ti(), t2(), dynamic(), or gp()) to ensure that each factor +captures a different axis of variation. See the example below as an illustration

knots
@@ -183,41 +179,31 @@

Argumentsmvgam, though note that the data need not be time series in this case. -But note that models that use the hierarchical correlations (by supplying a value for gr) -should not include a series element in data. Rather, this element will be created internally based -on the supplied variables for gr and subgr. For example, if you are modelling -counts for a group of species (labelled as species in the data) across sampling sites -(labelled as site in the data) in three -different geographical regions (labelled as region), and you would like the residuals to be correlated -within regions, then you should specify -unit = site, gr = region, and subgr = species. Internally, mvgam() will appropriately order -the data by unit (in this case, by site) and create -the series element for the data using something like: series = as.factor(paste0(group, '_', subgroup))

+
species
+

An unquoted string representing the factor variable that indexes +the different outcome variables in data (usually 'species' in a JSDM). +Defaults to series to be consistent with other mvgam models

share_obs_params

logical. If TRUE and the family has additional family-specific observation parameters (e.g. variance components in student_t() or gaussian(), or dispersion parameters in nb() or betar()), -these parameters will be shared across all series. This is handy if you have multiple -time series that you believe share some properties, such as being from the same -species over different spatial units. Default is FALSE.

+these parameters will be shared across all outcome variables. This is handy if you have multiple +outcomes (time series in most mvgam models) that you believe share some properties, +such as being from the same species over different spatial units. Default is FALSE.

priors

An optional data.frame with prior -definitions (in JAGS or Stan syntax). if using Stan, this can also be an object of -class brmsprior (see. prior for details). See get_mvgam_priors and -'Details' for more information on changing default prior distributions

+definitions (in JAGS or Stan syntax) or, preferentially, If using Stan, a vector containing +objects of class brmsprior (see. prior for details). +See get_mvgam_priors and Details' for more information on changing default prior distributions

n_lv

integer the number of latent dynamic factors to use if use_lv == TRUE. -Cannot be > n_series. Defaults arbitrarily to min(2, floor(n_series / 2))

+Cannot be n_species. Defaults arbitrarily to 2

chains
@@ -328,7 +314,7 @@

Value

A list object of class mvgam containing model output, the text representation of the model file, the mgcv model output (for easily generating simulations at -unsampled covariate values), Dunn-Smyth residuals for each series and key information needed +unsampled covariate values), Dunn-Smyth residuals for each species and key information needed for other functions in the package. See mvgam-class for details. Use methods(class = "mvgam") for an overview on available methods

@@ -341,7 +327,26 @@

Detailstrend = 'None' is set up and then modified to include the latent factors and their linear predictors. Consequently, you can inspect priors for these models using get_mvgam_priors by supplying the relevant -formula, factor_formula, data and family arguments and using trend = 'None'

+formula, factor_formula, data and family arguments and keeping the default trend = 'None'.

+

In a JSDGAM, the expectation of response \(Y_{ij}\) is modelled with

+

$$g(\mu_{ij}) = X_i'\beta + u_i'\theta_j,$$

+

where \(g(.)\) is a known link function, +\(X_i'\) is a design matrix of linear predictors (with associated \(\beta\) coefficients), +\(u_i\) are \(n_{lv}\)-variate latent factors +(\(n_{lv}\)<<\(n_{species}\)) and +\(\theta_j\) are species-specific loadings on the latent factors, respectively. The design matrix +\(X\) and \(\beta\) coefficients are constructed and modelled using formula and can contain +any of mvgam's predictor effects, including random intercepts and slopes, multidimensional penalized +smooths, GP effects etc... The factor loadings \(\theta_j\) are constrained for identifiability but can +be used to reconstruct an estimate of the species' residual variance-covariance matrix (see the example below +for an illustration of this). The latent factors are further modelled using: +$$ +u_i \sim \text{Normal}(Q_i\beta_{factor}, 1) \quad +$$ +where the second design matrix \(Q\) and associated \(\beta_{factor}\) coefficients are +constructed and modelled using factor_formula. Again, the effects that make up this linear +predictor can contain any of mvgam's allowed predictor effects, providing enormous flexibility for +modelling species' communities.

References

@@ -450,7 +455,6 @@

Examples # View the count distributions for each species library(ggplot2) -#> Warning: package ‘ggplot2’ was built under R version 4.3.3 ggplot(dat, aes(x = count)) + geom_histogram() + facet_wrap(~ species, scales = 'free') @@ -521,7 +525,7 @@

Examples # The data and the grouping variables data = dat, unit = site, - subgr = species, + species = species, # Poisson observations family = poisson(), @@ -534,7 +538,7 @@

Examples#> from stan/lib/stan_math/stan/math/rev.hpp:16, #> from stan/lib/stan_math/stan/math.hpp:19, #> from stan/src/stan/model/model_header.hpp:4, -#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpWsdp7k/model-521078b22309.hpp:2: +#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpsDr2XI/model-d73c513b78b6.hpp:2: #> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)': #> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers #> 194 | if (cdf_n < 0.0) diff --git a/docs/reference/mvgam-6.png b/docs/reference/mvgam-6.png index 94e5107d..200a7b0e 100644 Binary files a/docs/reference/mvgam-6.png and b/docs/reference/mvgam-6.png differ diff --git a/docs/reference/mvgam.html b/docs/reference/mvgam.html index 8b69a43c..683f8d1c 100644 --- a/docs/reference/mvgam.html +++ b/docs/reference/mvgam.html @@ -247,9 +247,9 @@

Argumentsstudent_t() or gaussian(), or dispersion parameters in nb() or betar()), -these parameters will be shared across all series. This is handy if you have multiple -time series that you believe share some properties, such as being from the same -species over different spatial units. Default is FALSE.

+these parameters will be shared across all outcome variables. This is handy if you have multiple +outcomes (time series in most mvgam models) that you believe share some properties, +such as being from the same species over different spatial units. Default is FALSE.

use_lv
@@ -350,9 +350,9 @@

Argumentsprior for details). See get_mvgam_priors and -'Details' for more information on changing default prior distributions

+definitions (in JAGS or Stan syntax) or, preferentially, If using Stan, a vector containing +objects of class brmsprior (see. prior for details). +See get_mvgam_priors and Details' for more information on changing default prior distributions

refit
@@ -746,7 +746,7 @@

Examples#> from stan/lib/stan_math/stan/math/rev.hpp:16, #> from stan/lib/stan_math/stan/math.hpp:19, #> from stan/src/stan/model/model_header.hpp:4, -#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpARA4r4/model-1333c5167b0e.hpp:2: +#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpSm0PAw/model-167fc6b87dbb.hpp:2: #> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)': #> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers #> 194 | if (cdf_n < 0.0) @@ -772,8 +772,8 @@

Examples#> Chain 2 Iteration: 501 / 1000 [ 50%] (Sampling) #> Chain 1 Iteration: 700 / 1000 [ 70%] (Sampling) #> Chain 2 Iteration: 600 / 1000 [ 60%] (Sampling) -#> Chain 1 Iteration: 800 / 1000 [ 80%] (Sampling) #> Chain 2 Iteration: 700 / 1000 [ 70%] (Sampling) +#> Chain 1 Iteration: 800 / 1000 [ 80%] (Sampling) #> Chain 2 Iteration: 800 / 1000 [ 80%] (Sampling) #> Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling) #> Chain 2 Iteration: 900 / 1000 [ 90%] (Sampling) @@ -784,14 +784,14 @@

Examples#> #> Both chains finished successfully. #> Mean chain execution time: 0.9 seconds. -#> Total execution time: 1.0 seconds. +#> Total execution time: 1.1 seconds. #> # Extract the model summary summary(mod1) #> GAM formula: #> y ~ s(season, bs = "cc", k = 6) -#> <environment: 0x0000022096fc2b20> +#> <environment: 0x000001b209a819f0> #> #> Family: #> poisson @@ -844,7 +844,7 @@

Examples#> 0 of 1000 iterations saturated the maximum tree depth of 12 (0%) #> E-FMI indicated no pathological behavior #> -#> Samples were drawn using NUTS(diag_e) at Fri Nov 01 3:27:18 PM 2024. +#> Samples were drawn using NUTS(diag_e) at Sat Nov 02 2:09:36 PM 2024. #> For each parameter, n_eff is a crude measure of effective sample size, #> and Rhat is the potential scale reduction factor on split MCMC chains #> (at convergence, Rhat = 1) @@ -870,7 +870,7 @@

Examplesstr(fc) #> List of 16 #> $ call :Class 'formula' language y ~ s(season, bs = "cc", k = 6) -#> .. ..- attr(*, ".Environment")=<environment: 0x0000022096fc2b20> +#> .. ..- attr(*, ".Environment")=<environment: 0x000001b209a819f0> #> $ trend_call : NULL #> $ family : chr "poisson" #> $ family_pars : NULL @@ -912,13 +912,13 @@

Examples#> .. .. ..$ : NULL #> .. .. ..$ : chr [1:60] "ypred[1,3]" "ypred[2,3]" "ypred[3,3]" "ypred[4,3]" ... #> $ forecasts :List of 3 -#> ..$ series_1: int [1:1000, 1:20] 7 9 5 3 7 6 3 19 4 12 ... -#> ..$ series_2: int [1:1000, 1:20] 15 9 7 8 14 10 1 8 5 5 ... -#> ..$ series_3: int [1:1000, 1:20] 8 7 6 8 21 8 7 26 5 13 ... +#> ..$ series_1: int [1:1000, 1:20] 2 10 9 35 8 8 11 9 31 15 ... +#> ..$ series_2: int [1:1000, 1:20] 15 4 10 13 7 11 11 12 8 19 ... +#> ..$ series_3: int [1:1000, 1:20] 2 11 5 7 18 6 9 7 11 5 ... #> - attr(*, "class")= chr "mvgam_forecast" plot(fc) #> Out of sample DRPS: -#> 54.012951 +#> 54.003704 # Plot the estimated seasonal smooth function @@ -1023,7 +1023,7 @@

Examples#> from stan/lib/stan_math/stan/math/rev.hpp:16, #> from stan/lib/stan_math/stan/math.hpp:19, #> from stan/src/stan/model/model_header.hpp:4, -#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpARA4r4/model-1333c39680a.hpp:2: +#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpSm0PAw/model-167fc6f536d12.hpp:2: #> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)': #> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers #> 194 | if (cdf_n < 0.0) @@ -1035,9 +1035,9 @@

Examples#> Chain 1 Iteration: 1 / 1000 [ 0%] (Warmup) #> Chain 1 Iteration: 100 / 1000 [ 10%] (Warmup) #> Chain 2 Iteration: 1 / 1000 [ 0%] (Warmup) +#> Chain 2 Iteration: 100 / 1000 [ 10%] (Warmup) #> Chain 1 Iteration: 200 / 1000 [ 20%] (Warmup) #> Chain 1 Iteration: 300 / 1000 [ 30%] (Warmup) -#> Chain 2 Iteration: 100 / 1000 [ 10%] (Warmup) #> Chain 2 Iteration: 200 / 1000 [ 20%] (Warmup) #> Chain 1 Iteration: 400 / 1000 [ 40%] (Warmup) #> Chain 2 Iteration: 300 / 1000 [ 30%] (Warmup) @@ -1049,11 +1049,11 @@

Examples#> Chain 2 Iteration: 500 / 1000 [ 50%] (Warmup) #> Chain 1 Iteration: 800 / 1000 [ 80%] (Sampling) #> Chain 2 Iteration: 501 / 1000 [ 50%] (Sampling) -#> Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling) #> Chain 2 Iteration: 600 / 1000 [ 60%] (Sampling) +#> Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling) #> Chain 1 Iteration: 1000 / 1000 [100%] (Sampling) -#> Chain 1 finished in 1.1 seconds. #> Chain 2 Iteration: 700 / 1000 [ 70%] (Sampling) +#> Chain 1 finished in 1.2 seconds. #> Chain 2 Iteration: 800 / 1000 [ 80%] (Sampling) #> Chain 2 Iteration: 900 / 1000 [ 90%] (Sampling) #> Chain 2 Iteration: 1000 / 1000 [100%] (Sampling) @@ -1223,7 +1223,7 @@

Examples#> from stan/lib/stan_math/stan/math/rev.hpp:16, #> from stan/lib/stan_math/stan/math.hpp:19, #> from stan/src/stan/model/model_header.hpp:4, -#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpARA4r4/model-1333c49b505.hpp:2: +#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpSm0PAw/model-167fc7acf1b6a.hpp:2: #> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)': #> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers #> 194 | if (cdf_n < 0.0) @@ -1237,9 +1237,9 @@

Examples#> Chain 1 Iteration: 100 / 1000 [ 10%] (Warmup) #> Chain 1 Iteration: 200 / 1000 [ 20%] (Warmup) #> Chain 1 Iteration: 300 / 1000 [ 30%] (Warmup) -#> Chain 1 Iteration: 400 / 1000 [ 40%] (Warmup) #> Chain 2 Iteration: 100 / 1000 [ 10%] (Warmup) #> Chain 2 Iteration: 200 / 1000 [ 20%] (Warmup) +#> Chain 1 Iteration: 400 / 1000 [ 40%] (Warmup) #> Chain 1 Iteration: 500 / 1000 [ 50%] (Warmup) #> Chain 1 Iteration: 501 / 1000 [ 50%] (Sampling) #> Chain 2 Iteration: 300 / 1000 [ 30%] (Warmup) @@ -1251,9 +1251,9 @@

Examples#> Chain 1 Iteration: 700 / 1000 [ 70%] (Sampling) #> Chain 2 Iteration: 700 / 1000 [ 70%] (Sampling) #> Chain 1 Iteration: 800 / 1000 [ 80%] (Sampling) +#> Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling) #> Chain 2 Iteration: 800 / 1000 [ 80%] (Sampling) #> Chain 2 Iteration: 900 / 1000 [ 90%] (Sampling) -#> Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling) #> Chain 1 Iteration: 1000 / 1000 [100%] (Sampling) #> Chain 2 Iteration: 1000 / 1000 [100%] (Sampling) #> Chain 1 finished in 0.8 seconds. @@ -1268,7 +1268,7 @@

Examplessummary(mod) #> GAM formula: #> out ~ gp(time, by = temp, c = 5/4, k = 40, scale = FALSE) -#> <environment: 0x0000022096fc2b20> +#> <environment: 0x000001b209a819f0> #> #> Family: #> gaussian @@ -1353,7 +1353,7 @@

Examples#> 0 of 1000 iterations saturated the maximum tree depth of 12 (0%) #> E-FMI indicated no pathological behavior #> -#> Samples were drawn using NUTS(diag_e) at Fri Nov 01 3:28:33 PM 2024. +#> Samples were drawn using NUTS(diag_e) at Sat Nov 02 2:11:19 PM 2024. #> For each parameter, n_eff is a crude measure of effective sample size, #> and Rhat is the potential scale reduction factor on split MCMC chains #> (at convergence, Rhat = 1) @@ -1399,7 +1399,7 @@

Examples#> from stan/lib/stan_math/stan/math/rev.hpp:16, #> from stan/lib/stan_math/stan/math.hpp:19, #> from stan/src/stan/model/model_header.hpp:4, -#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpARA4r4/model-1333c13fdc66.hpp:2: +#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpSm0PAw/model-167fc46896bd7.hpp:2: #> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)': #> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers #> 194 | if (cdf_n < 0.0) @@ -1414,8 +1414,8 @@

Examples#> Chain 2 Iteration: 100 / 1000 [ 10%] (Warmup) #> Chain 1 Iteration: 200 / 1000 [ 20%] (Warmup) #> Chain 2 Iteration: 200 / 1000 [ 20%] (Warmup) -#> Chain 1 Iteration: 300 / 1000 [ 30%] (Warmup) #> Chain 2 Iteration: 300 / 1000 [ 30%] (Warmup) +#> Chain 1 Iteration: 300 / 1000 [ 30%] (Warmup) #> Chain 2 Iteration: 400 / 1000 [ 40%] (Warmup) #> Chain 1 Iteration: 400 / 1000 [ 40%] (Warmup) #> Chain 2 Iteration: 500 / 1000 [ 50%] (Warmup) @@ -1424,20 +1424,20 @@

Examples#> Chain 1 Iteration: 501 / 1000 [ 50%] (Sampling) #> Chain 2 Iteration: 600 / 1000 [ 60%] (Sampling) #> Chain 1 Iteration: 600 / 1000 [ 60%] (Sampling) -#> Chain 1 Iteration: 700 / 1000 [ 70%] (Sampling) #> Chain 2 Iteration: 700 / 1000 [ 70%] (Sampling) -#> Chain 1 Iteration: 800 / 1000 [ 80%] (Sampling) +#> Chain 1 Iteration: 700 / 1000 [ 70%] (Sampling) #> Chain 2 Iteration: 800 / 1000 [ 80%] (Sampling) -#> Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling) +#> Chain 1 Iteration: 800 / 1000 [ 80%] (Sampling) #> Chain 2 Iteration: 900 / 1000 [ 90%] (Sampling) -#> Chain 1 Iteration: 1000 / 1000 [100%] (Sampling) -#> Chain 1 finished in 3.7 seconds. +#> Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling) #> Chain 2 Iteration: 1000 / 1000 [100%] (Sampling) #> Chain 2 finished in 3.8 seconds. +#> Chain 1 Iteration: 1000 / 1000 [100%] (Sampling) +#> Chain 1 finished in 4.1 seconds. #> #> Both chains finished successfully. -#> Mean chain execution time: 3.7 seconds. -#> Total execution time: 3.9 seconds. +#> Mean chain execution time: 3.9 seconds. +#> Total execution time: 4.2 seconds. #> # Inspect the model file to see the modification to the linear predictor @@ -1621,7 +1621,7 @@

Examples#> from stan/lib/stan_math/stan/math/rev.hpp:16, #> from stan/lib/stan_math/stan/math.hpp:19, #> from stan/src/stan/model/model_header.hpp:4, -#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpARA4r4/model-1333c2ff22082.hpp:2: +#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpSm0PAw/model-167fc34d76735.hpp:2: #> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)': #> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers #> 194 | if (cdf_n < 0.0) @@ -1635,8 +1635,8 @@

Examples#> Chain 1 Iteration: 100 / 1000 [ 10%] (Warmup) #> Chain 2 Iteration: 100 / 1000 [ 10%] (Warmup) #> Chain 1 Iteration: 200 / 1000 [ 20%] (Warmup) -#> Chain 1 Iteration: 300 / 1000 [ 30%] (Warmup) #> Chain 2 Iteration: 200 / 1000 [ 20%] (Warmup) +#> Chain 1 Iteration: 300 / 1000 [ 30%] (Warmup) #> Chain 2 Iteration: 300 / 1000 [ 30%] (Warmup) #> Chain 2 Iteration: 400 / 1000 [ 40%] (Warmup) #> Chain 1 Iteration: 400 / 1000 [ 40%] (Warmup) @@ -1664,7 +1664,7 @@

Examplessummary(mod) #> GAM formula: #> cbind(y, ntrials) ~ series + s(x, by = series) -#> <environment: 0x0000022096fc2b20> +#> <environment: 0x000001b209a819f0> #> #> Family: #> binomial @@ -1724,7 +1724,7 @@

Examples#> 0 of 1000 iterations saturated the maximum tree depth of 12 (0%) #> E-FMI indicated no pathological behavior #> -#> Samples were drawn using NUTS(diag_e) at Fri Nov 01 3:29:34 PM 2024. +#> Samples were drawn using NUTS(diag_e) at Sat Nov 02 2:12:34 PM 2024. #> For each parameter, n_eff is a crude measure of effective sample size, #> and Rhat is the potential scale reduction factor on split MCMC chains #> (at convergence, Rhat = 1) diff --git a/docs/reference/update.mvgam-1.png b/docs/reference/update.mvgam-1.png index c636889a..f9adfc82 100644 Binary files a/docs/reference/update.mvgam-1.png and b/docs/reference/update.mvgam-1.png differ diff --git a/docs/reference/update.mvgam-2.png b/docs/reference/update.mvgam-2.png index 2c75b286..ef1f5a28 100644 Binary files a/docs/reference/update.mvgam-2.png and b/docs/reference/update.mvgam-2.png differ diff --git a/docs/reference/update.mvgam-3.png b/docs/reference/update.mvgam-3.png index 38e51387..2ad057a7 100644 Binary files a/docs/reference/update.mvgam-3.png and b/docs/reference/update.mvgam-3.png differ diff --git a/docs/reference/update.mvgam.html b/docs/reference/update.mvgam.html index be4d25a1..c136b98d 100644 --- a/docs/reference/update.mvgam.html +++ b/docs/reference/update.mvgam.html @@ -212,16 +212,16 @@

Argumentsstudent_t() or gaussian(), or dispersion parameters in nb() or betar()), -these parameters will be shared across all series. This is handy if you have multiple -time series that you believe share some properties, such as being from the same -species over different spatial units. Default is FALSE.

+these parameters will be shared across all outcome variables. This is handy if you have multiple +outcomes (time series in most mvgam models) that you believe share some properties, +such as being from the same species over different spatial units. Default is FALSE.

priors

An optional data.frame with prior -definitions (in JAGS or Stan syntax). if using Stan, this can also be an object of -class brmsprior (see. prior for details). See get_mvgam_priors and -'Details' for more information on changing default prior distributions

+definitions (in JAGS or Stan syntax) or, preferentially, If using Stan, a vector containing +objects of class brmsprior (see. prior for details). +See get_mvgam_priors and Details' for more information on changing default prior distributions

chains
@@ -298,22 +298,35 @@

Examples chains = 2) #> Compiling Stan program using cmdstanr #> +#> In file included from stan/lib/stan_math/stan/math/prim/prob/von_mises_lccdf.hpp:5, +#> from stan/lib/stan_math/stan/math/prim/prob/von_mises_ccdf_log.hpp:4, +#> from stan/lib/stan_math/stan/math/prim/prob.hpp:359, +#> from stan/lib/stan_math/stan/math/prim.hpp:16, +#> from stan/lib/stan_math/stan/math/rev.hpp:16, +#> from stan/lib/stan_math/stan/math.hpp:19, +#> from stan/src/stan/model/model_header.hpp:4, +#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpSm0PAw/model-167fc5c134cd2.hpp:2: +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)': +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers +#> 194 | if (cdf_n < 0.0) +#> | +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: adding '-flarge-source-files' will allow for more column-tracking support, at the expense of compilation time and memory #> Start sampling #> Running MCMC with 2 parallel chains... #> #> Chain 1 Iteration: 1 / 1000 [ 0%] (Warmup) #> Chain 1 Iteration: 100 / 1000 [ 10%] (Warmup) #> Chain 1 Iteration: 200 / 1000 [ 20%] (Warmup) -#> Chain 2 Iteration: 1 / 1000 [ 0%] (Warmup) -#> Chain 2 Iteration: 100 / 1000 [ 10%] (Warmup) -#> Chain 2 Iteration: 200 / 1000 [ 20%] (Warmup) #> Chain 1 Iteration: 300 / 1000 [ 30%] (Warmup) +#> Chain 2 Iteration: 1 / 1000 [ 0%] (Warmup) #> Chain 1 Iteration: 400 / 1000 [ 40%] (Warmup) #> Chain 1 Iteration: 500 / 1000 [ 50%] (Warmup) #> Chain 1 Iteration: 501 / 1000 [ 50%] (Sampling) #> Chain 1 Iteration: 600 / 1000 [ 60%] (Sampling) #> Chain 1 Iteration: 700 / 1000 [ 70%] (Sampling) #> Chain 1 Iteration: 800 / 1000 [ 80%] (Sampling) +#> Chain 2 Iteration: 100 / 1000 [ 10%] (Warmup) +#> Chain 2 Iteration: 200 / 1000 [ 20%] (Warmup) #> Chain 2 Iteration: 300 / 1000 [ 30%] (Warmup) #> Chain 2 Iteration: 400 / 1000 [ 40%] (Warmup) #> Chain 2 Iteration: 500 / 1000 [ 50%] (Warmup) @@ -323,19 +336,19 @@

Examples#> Chain 1 Iteration: 1000 / 1000 [100%] (Sampling) #> Chain 2 Iteration: 700 / 1000 [ 70%] (Sampling) #> Chain 2 Iteration: 800 / 1000 [ 80%] (Sampling) -#> Chain 1 finished in 0.3 seconds. #> Chain 2 Iteration: 900 / 1000 [ 90%] (Sampling) +#> Chain 1 finished in 0.3 seconds. #> Chain 2 Iteration: 1000 / 1000 [100%] (Sampling) #> Chain 2 finished in 0.4 seconds. #> #> Both chains finished successfully. #> Mean chain execution time: 0.4 seconds. -#> Total execution time: 0.5 seconds. +#> Total execution time: 0.6 seconds. #> summary(mod) #> GAM formula: #> y ~ s(season, bs = "cc") -#> <environment: 0x00000220a4ce1640> +#> <environment: 0x000001b214d3f240> #> #> Family: #> poisson @@ -359,27 +372,27 @@

Examples#> #> #> GAM coefficient (beta) estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> (Intercept) -0.84 -0.46 -0.11 1 472 -#> s(season).1 -1.30 -0.53 0.04 1 578 -#> s(season).2 -2.00 -1.20 -0.45 1 566 -#> s(season).3 -2.30 -1.40 -0.56 1 566 -#> s(season).4 -1.90 -1.10 -0.31 1 759 -#> s(season).5 -0.97 -0.32 0.39 1 618 -#> s(season).6 -0.45 0.14 0.73 1 588 -#> s(season).7 -0.05 0.56 1.10 1 756 -#> s(season).8 0.72 1.20 1.70 1 512 +#> 2.5% 50% 97.5% Rhat n_eff +#> (Intercept) 0.550 0.75 0.930 1 859 +#> s(season).1 -0.890 -0.48 -0.094 1 860 +#> s(season).2 -0.730 -0.24 0.230 1 804 +#> s(season).3 0.002 0.41 0.820 1 908 +#> s(season).4 0.390 0.76 1.100 1 701 +#> s(season).5 0.370 0.72 1.100 1 776 +#> s(season).6 0.310 0.71 1.100 1 849 +#> s(season).7 0.190 0.61 1.000 1 787 +#> s(season).8 -0.530 -0.13 0.240 1 530 #> #> Approximate significance of GAM smooths: #> edf Ref.df Chi.sq p-value -#> s(season) 3.69 8 55.4 9e-05 *** +#> s(season) 4.69 8 23.5 0.00064 *** #> --- #> Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 #> #> Latent trend parameter AR estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> ar1[1] -0.84 0.17 0.91 1 646 -#> sigma[1] 0.01 0.21 0.62 1 257 +#> 2.5% 50% 97.5% Rhat n_eff +#> ar1[1] -0.99 -0.50 0.21 1.01 250 +#> sigma[1] 0.22 0.43 0.70 1.00 341 #> #> Stan MCMC diagnostics: #> n_eff / iter looks reasonable for all parameters @@ -388,7 +401,7 @@

Examples#> 0 of 1000 iterations saturated the maximum tree depth of 12 (0%) #> E-FMI indicated no pathological behavior #> -#> Samples were drawn using NUTS(diag_e) at Fri Nov 01 3:31:30 PM 2024. +#> Samples were drawn using NUTS(diag_e) at Sat Nov 02 2:13:11 PM 2024. #> For each parameter, n_eff is a crude measure of effective sample size, #> and Rhat is the potential scale reduction factor on split MCMC chains #> (at convergence, Rhat = 1) @@ -407,7 +420,7 @@

Examples#> from stan/lib/stan_math/stan/math/rev.hpp:16, #> from stan/lib/stan_math/stan/math.hpp:19, #> from stan/src/stan/model/model_header.hpp:4, -#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpARA4r4/model-1333c71b95a48.hpp:2: +#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpSm0PAw/model-167fc357d2ff8.hpp:2: #> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)': #> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers #> 194 | if (cdf_n < 0.0) @@ -418,26 +431,26 @@

Examples#> #> Chain 1 Iteration: 1 / 1000 [ 0%] (Warmup) #> Chain 1 Iteration: 100 / 1000 [ 10%] (Warmup) -#> Chain 1 Iteration: 200 / 1000 [ 20%] (Warmup) #> Chain 2 Iteration: 1 / 1000 [ 0%] (Warmup) -#> Chain 2 Iteration: 100 / 1000 [ 10%] (Warmup) -#> Chain 2 Iteration: 200 / 1000 [ 20%] (Warmup) +#> Chain 1 Iteration: 200 / 1000 [ 20%] (Warmup) #> Chain 1 Iteration: 300 / 1000 [ 30%] (Warmup) #> Chain 1 Iteration: 400 / 1000 [ 40%] (Warmup) #> Chain 1 Iteration: 500 / 1000 [ 50%] (Warmup) #> Chain 1 Iteration: 501 / 1000 [ 50%] (Sampling) #> Chain 1 Iteration: 600 / 1000 [ 60%] (Sampling) +#> Chain 2 Iteration: 100 / 1000 [ 10%] (Warmup) +#> Chain 2 Iteration: 200 / 1000 [ 20%] (Warmup) #> Chain 2 Iteration: 300 / 1000 [ 30%] (Warmup) #> Chain 2 Iteration: 400 / 1000 [ 40%] (Warmup) #> Chain 2 Iteration: 500 / 1000 [ 50%] (Warmup) #> Chain 2 Iteration: 501 / 1000 [ 50%] (Sampling) -#> Chain 2 Iteration: 600 / 1000 [ 60%] (Sampling) #> Chain 1 Iteration: 700 / 1000 [ 70%] (Sampling) #> Chain 1 Iteration: 800 / 1000 [ 80%] (Sampling) -#> Chain 2 Iteration: 700 / 1000 [ 70%] (Sampling) -#> Chain 2 Iteration: 800 / 1000 [ 80%] (Sampling) #> Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling) +#> Chain 2 Iteration: 600 / 1000 [ 60%] (Sampling) +#> Chain 2 Iteration: 700 / 1000 [ 70%] (Sampling) #> Chain 1 Iteration: 1000 / 1000 [100%] (Sampling) +#> Chain 2 Iteration: 800 / 1000 [ 80%] (Sampling) #> Chain 2 Iteration: 900 / 1000 [ 90%] (Sampling) #> Chain 2 Iteration: 1000 / 1000 [100%] (Sampling) #> Chain 1 finished in 0.4 seconds. @@ -445,12 +458,12 @@

Examples#> #> Both chains finished successfully. #> Mean chain execution time: 0.4 seconds. -#> Total execution time: 0.5 seconds. +#> Total execution time: 0.6 seconds. #> summary(updated_mod) #> GAM formula: #> y ~ s(season, bs = "cc") -#> <environment: 0x00000220a4ce1640> +#> <environment: 0x000001b214d3f240> #> #> Family: #> poisson @@ -474,28 +487,28 @@

Examples#> #> #> GAM coefficient (beta) estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> (Intercept) -0.850 -0.46 -0.160 1 736 -#> s(season).1 -1.200 -0.52 0.055 1 996 -#> s(season).2 -2.000 -1.20 -0.440 1 845 -#> s(season).3 -2.300 -1.40 -0.620 1 710 -#> s(season).4 -1.900 -1.10 -0.360 1 845 -#> s(season).5 -0.960 -0.32 0.350 1 842 -#> s(season).6 -0.480 0.13 0.780 1 974 -#> s(season).7 -0.058 0.56 1.100 1 687 -#> s(season).8 0.720 1.20 1.800 1 637 +#> 2.5% 50% 97.5% Rhat n_eff +#> (Intercept) 0.560 0.76 0.94 1 665 +#> s(season).1 -0.900 -0.48 -0.12 1 844 +#> s(season).2 -0.740 -0.23 0.23 1 699 +#> s(season).3 0.012 0.40 0.80 1 647 +#> s(season).4 0.390 0.76 1.20 1 458 +#> s(season).5 0.300 0.70 1.10 1 607 +#> s(season).6 0.290 0.71 1.10 1 623 +#> s(season).7 0.250 0.62 1.10 1 585 +#> s(season).8 -0.520 -0.12 0.25 1 758 #> #> Approximate significance of GAM smooths: -#> edf Ref.df Chi.sq p-value -#> s(season) 4.11 8 55.5 1e-04 *** +#> edf Ref.df Chi.sq p-value +#> s(season) 4.24 8 25.3 0.0082 ** #> --- #> Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 #> #> Latent trend AR parameter estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> ar1[1] -0.900 0.096 0.94 1 951 -#> ar2[1] -0.930 0.042 0.95 1 884 -#> sigma[1] 0.014 0.170 0.52 1 377 +#> 2.5% 50% 97.5% Rhat n_eff +#> ar1[1] -0.96 -0.50 0.19 1.01 216 +#> ar2[1] -0.64 0.23 0.94 1.02 156 +#> sigma[1] 0.21 0.39 0.65 1.00 312 #> #> Stan MCMC diagnostics: #> n_eff / iter looks reasonable for all parameters @@ -504,7 +517,7 @@

Examples#> 0 of 1000 iterations saturated the maximum tree depth of 12 (0%) #> E-FMI indicated no pathological behavior #> -#> Samples were drawn using NUTS(diag_e) at Fri Nov 01 3:32:00 PM 2024. +#> Samples were drawn using NUTS(diag_e) at Sat Nov 02 2:13:56 PM 2024. #> For each parameter, n_eff is a crude measure of effective sample size, #> and Rhat is the potential scale reduction factor on split MCMC chains #> (at convergence, Rhat = 1) @@ -528,7 +541,7 @@

Examples#> from stan/lib/stan_math/stan/math/rev.hpp:16, #> from stan/lib/stan_math/stan/math.hpp:19, #> from stan/src/stan/model/model_header.hpp:4, -#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpARA4r4/model-1333c4af798e.hpp:2: +#> from C:/Users/uqnclar2/AppData/Local/Temp/RtmpSm0PAw/model-167fc7bf96e.hpp:2: #> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)': #> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers #> 194 | if (cdf_n < 0.0) @@ -555,23 +568,23 @@

Examples#> Chain 2 Iteration: 600 / 1000 [ 60%] (Sampling) #> Chain 1 Iteration: 700 / 1000 [ 70%] (Sampling) #> Chain 1 Iteration: 800 / 1000 [ 80%] (Sampling) +#> Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling) #> Chain 2 Iteration: 700 / 1000 [ 70%] (Sampling) #> Chain 2 Iteration: 800 / 1000 [ 80%] (Sampling) -#> Chain 2 Iteration: 900 / 1000 [ 90%] (Sampling) -#> Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling) #> Chain 1 Iteration: 1000 / 1000 [100%] (Sampling) +#> Chain 2 Iteration: 900 / 1000 [ 90%] (Sampling) #> Chain 2 Iteration: 1000 / 1000 [100%] (Sampling) #> Chain 1 finished in 0.4 seconds. #> Chain 2 finished in 0.4 seconds. #> #> Both chains finished successfully. #> Mean chain execution time: 0.4 seconds. -#> Total execution time: 0.5 seconds. +#> Total execution time: 0.6 seconds. #> summary(updated_mod) #> GAM formula: #> cbind(y, trials) ~ s(season, bs = "cc") -#> <environment: 0x00000220a4ce1640> +#> <environment: 0x000001b214d3f240> #> #> Family: #> binomial @@ -595,27 +608,27 @@

Examples#> #> #> GAM coefficient (beta) estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> (Intercept) -3.80 -3.40 -3.1000 1.00 620 -#> s(season).1 -1.30 -0.54 0.0071 1.00 544 -#> s(season).2 -2.00 -1.20 -0.4600 1.00 558 -#> s(season).3 -2.30 -1.40 -0.5600 1.01 364 -#> s(season).4 -1.90 -1.10 -0.3400 1.01 576 -#> s(season).5 -1.00 -0.29 0.4300 1.00 637 -#> s(season).6 -0.50 0.14 0.7800 1.00 776 -#> s(season).7 -0.08 0.55 1.2000 1.00 894 -#> s(season).8 0.64 1.30 1.8000 1.01 199 +#> 2.5% 50% 97.5% Rhat n_eff +#> (Intercept) -2.800 -2.50 -2.400 1.00 447 +#> s(season).1 -0.930 -0.52 -0.099 1.00 632 +#> s(season).2 -0.740 -0.25 0.240 1.00 626 +#> s(season).3 -0.031 0.43 0.900 1.00 480 +#> s(season).4 0.430 0.85 1.300 1.01 341 +#> s(season).5 0.380 0.81 1.200 1.00 453 +#> s(season).6 0.360 0.79 1.200 1.00 749 +#> s(season).7 0.200 0.63 1.200 1.00 456 +#> s(season).8 -0.620 -0.14 0.230 1.01 441 #> #> Approximate significance of GAM smooths: -#> edf Ref.df Chi.sq p-value -#> s(season) 3.59 8 59.7 0.00013 *** +#> edf Ref.df Chi.sq p-value +#> s(season) 4.32 8 28.9 0.0045 ** #> --- #> Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 #> #> Latent trend parameter AR estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> ar1[1] -0.880 0.18 0.95 1.01 543 -#> sigma[1] 0.011 0.25 0.69 1.02 220 +#> 2.5% 50% 97.5% Rhat n_eff +#> ar1[1] -0.95 -0.48 0.039 1.00 254 +#> sigma[1] 0.33 0.56 0.830 1.01 233 #> #> Stan MCMC diagnostics: #> n_eff / iter looks reasonable for all parameters @@ -624,7 +637,7 @@

Examples#> 0 of 1000 iterations saturated the maximum tree depth of 12 (0%) #> E-FMI indicated no pathological behavior #> -#> Samples were drawn using NUTS(diag_e) at Fri Nov 01 3:32:28 PM 2024. +#> Samples were drawn using NUTS(diag_e) at Sat Nov 02 2:14:41 PM 2024. #> For each parameter, n_eff is a crude measure of effective sample size, #> and Rhat is the potential scale reduction factor on split MCMC chains #> (at convergence, Rhat = 1) diff --git a/man/jsdgam.Rd b/man/jsdgam.Rd index 1523f169..1434c292 100644 --- a/man/jsdgam.Rd +++ b/man/jsdgam.Rd @@ -13,7 +13,7 @@ jsdgam( newdata, family = poisson(), unit = time, - subgr = series, + species = series, share_obs_params = FALSE, priors, n_lv = 2, @@ -38,17 +38,13 @@ jsdgam( for a GLM except that smooth terms, \code{s()}, \code{te()}, \code{ti()}, \code{t2()}, as well as time-varying \code{dynamic()} terms and nonparametric \code{gp()} terms, can be added to the right hand side to specify that the linear predictor depends on smooth functions of predictors -(or linear functionals of these). In \code{nmix()} family models, the \code{formula} is used to -set up a linear predictor for the detection probability. Details of the formula syntax used by \pkg{mvgam} +(or linear functionals of these). Details of the formula syntax used by \pkg{mvgam} can be found in \code{\link{mvgam_formulae}}} \item{factor_formula}{A \code{character} string specifying the linear predictor -effects for the latent factors. These are exactly like the formula -for a GLM except that smooth terms, \code{s()}, \code{te()}, \code{ti()}, \code{t2()}, as well as time-varying -\code{dynamic()} terms and nonparametric \code{gp()} terms, can be added to the right hand side -to specify that the linear predictor depends on smooth functions of predictors -(or linear functionals of these). Details of the formula syntax used by \pkg{mvgam} -can be found in \code{\link{mvgam_formulae}}} +effects for the latent factors. Use \code{by = trend} within calls to functional terms +(i.e. \code{s()}, \code{te()}, \code{ti()}, \code{t2()}, \code{dynamic()}, or \code{gp()}) to ensure that each factor +captures a different axis of variation. See the example below as an illustration} \item{knots}{An optional \code{list} containing user specified knot values to be used for basis construction. For most bases the user simply supplies the knots to be used, which must match up with the \code{k} value supplied @@ -93,34 +89,24 @@ Defaults to \code{time} to be consistent with other functionalities in \pkg{mvgam}, though note that the data need not be time series in this case. See examples below for further details and explanations} -\item{subgr}{A subgrouping \code{factor} variable specifying which element in \code{data} represents the -different observational units. Defaults to \code{series} to be consistent with other functionalities -in \pkg{mvgam}, though note that the data need not be time series in this case. -But note that models that use the hierarchical correlations (by supplying a value for \code{gr}) -\emph{should not} include a \code{series} element in \code{data}. Rather, this element will be created internally based -on the supplied variables for \code{gr} and \code{subgr}. For example, if you are modelling -counts for a group of species (labelled as \code{species} in the data) across sampling sites -(labelled as \code{site} in the data) in three -different geographical regions (labelled as \code{region}), and you would like the residuals to be correlated -within regions, then you should specify -\code{unit = site}, \code{gr = region}, and \code{subgr = species}. Internally, \code{mvgam()} will appropriately order -the data by \code{unit} (in this case, by \code{site}) and create -the \code{series} element for the data using something like: \code{series = as.factor(paste0(group, '_', subgroup))}} +\item{species}{An unquoted string representing the \code{factor} variable that indexes +the different outcome variables in \code{data} (usually \code{'species'} in a JSDM). +Defaults to \code{series} to be consistent with other \code{mvgam} models} \item{share_obs_params}{\code{logical}. If \code{TRUE} and the \code{family} has additional family-specific observation parameters (e.g. variance components in \code{student_t()} or \code{gaussian()}, or dispersion parameters in \code{nb()} or \code{betar()}), -these parameters will be shared across all series. This is handy if you have multiple -time series that you believe share some properties, such as being from the same -species over different spatial units. Default is \code{FALSE}.} +these parameters will be shared across all outcome variables. This is handy if you have multiple +outcomes (time series in most \code{mvgam} models) that you believe share some properties, +such as being from the same species over different spatial units. Default is \code{FALSE}.} \item{priors}{An optional \code{data.frame} with prior -definitions (in JAGS or Stan syntax). if using Stan, this can also be an object of -class \code{brmsprior} (see. \code{\link[brms]{prior}} for details). See \link{get_mvgam_priors} and -'Details' for more information on changing default prior distributions} +definitions (in JAGS or Stan syntax) or, preferentially, If using Stan, a vector containing +objects of class \code{brmsprior} (see. \code{\link[brms]{prior}} for details). +See \link{get_mvgam_priors} and Details' for more information on changing default prior distributions} \item{n_lv}{\code{integer} the number of latent dynamic factors to use if \code{use_lv == TRUE}. -Cannot be \code{> n_series}. Defaults arbitrarily to \code{min(2, floor(n_series / 2))}} +Cannot be \code{n_species}. Defaults arbitrarily to \code{2}} \item{chains}{\code{integer} specifying the number of parallel chains for the model. Ignored if \code{algorithm \%in\% c('meanfield', 'fullrank', 'pathfinder', 'laplace')}} @@ -199,7 +185,7 @@ the size of the returned object, unless \code{run_model == FALSE}} A \code{list} object of class \code{mvgam} containing model output, the text representation of the model file, the mgcv model output (for easily generating simulations at -unsampled covariate values), Dunn-Smyth residuals for each series and key information needed +unsampled covariate values), Dunn-Smyth residuals for each species and key information needed for other functions in the package. See \code{\link{mvgam-class}} for details. Use \code{methods(class = "mvgam")} for an overview on available methods } @@ -219,7 +205,29 @@ these effects can be modelled with the full power of latent factor Hierarchical flexibility to model full communities of species. When calling \link{jsdgam}, an initial State-Space model using \code{trend = 'None'} is set up and then modified to include the latent factors and their linear predictors. Consequently, you can inspect priors for these models using \link{get_mvgam_priors} by supplying the relevant -\code{formula}, \code{factor_formula}, \code{data} and \code{family} arguments and using \code{trend = 'None'} +\code{formula}, \code{factor_formula}, \code{data} and \code{family} arguments and keeping the default \code{trend = 'None'}. + +In a JSDGAM, the expectation of response \eqn{Y_{ij}} is modelled with + +\deqn{g(\mu_{ij}) = X_i'\beta + u_i'\theta_j,} + +where \eqn{g(.)} is a known link function, +\eqn{X_i'} is a design matrix of linear predictors (with associated \eqn{\beta} coefficients), +\eqn{u_i} are \eqn{n_{lv}}-variate latent factors +(\eqn{n_{lv}}<<\eqn{n_{species}}) and +\eqn{\theta_j} are species-specific loadings on the latent factors, respectively. The design matrix +\eqn{X} and \eqn{\beta} coefficients are constructed and modelled using \code{formula} and can contain +any of \code{mvgam}'s predictor effects, including random intercepts and slopes, multidimensional penalized +smooths, GP effects etc... The factor loadings \eqn{\theta_j} are constrained for identifiability but can +be used to reconstruct an estimate of the species' residual variance-covariance matrix (see the example below +for an illustration of this). The latent factors are further modelled using: +\deqn{ +u_i \sim \text{Normal}(Q_i\beta_{factor}, 1) \quad +} +where the second design matrix \eqn{Q} and associated \eqn{\beta_{factor}} coefficients are +constructed and modelled using \code{factor_formula}. Again, the effects that make up this linear +predictor can contain any of \code{mvgam}'s allowed predictor effects, providing enormous flexibility for +modelling species' communities. } \examples{ \donttest{ @@ -355,7 +363,7 @@ mod <- jsdgam(formula = count ~ # The data and the grouping variables data = dat, unit = site, - subgr = species, + species = species, # Poisson observations family = poisson(), diff --git a/man/mvgam.Rd b/man/mvgam.Rd index f2769e68..72f8ce14 100644 --- a/man/mvgam.Rd +++ b/man/mvgam.Rd @@ -148,9 +148,9 @@ See \code{\link{mvgam_families}} for more details} \item{share_obs_params}{\code{logical}. If \code{TRUE} and the \code{family} has additional family-specific observation parameters (e.g. variance components in \code{student_t()} or \code{gaussian()}, or dispersion parameters in \code{nb()} or \code{betar()}), -these parameters will be shared across all series. This is handy if you have multiple -time series that you believe share some properties, such as being from the same -species over different spatial units. Default is \code{FALSE}.} +these parameters will be shared across all outcome variables. This is handy if you have multiple +outcomes (time series in most \code{mvgam} models) that you believe share some properties, +such as being from the same species over different spatial units. Default is \code{FALSE}.} \item{use_lv}{\code{logical}. If \code{TRUE}, use dynamic factors to estimate series' latent trends in a reduced dimension format. Only available for @@ -228,9 +228,9 @@ up by any other means. Only available for some families(\code{poisson()}, \code{ when using \code{Cmdstan} as the backend} \item{priors}{An optional \code{data.frame} with prior -definitions (in JAGS or Stan syntax). if using Stan, this can also be an object of -class \code{brmsprior} (see. \code{\link[brms]{prior}} for details). See \link{get_mvgam_priors} and -'Details' for more information on changing default prior distributions} +definitions (in JAGS or Stan syntax) or, preferentially, If using Stan, a vector containing +objects of class \code{brmsprior} (see. \code{\link[brms]{prior}} for details). +See \link{get_mvgam_priors} and Details' for more information on changing default prior distributions} \item{refit}{Logical indicating whether this is a refit, called using \link{update.mvgam}. Users should leave as \code{FALSE}} diff --git a/man/update.mvgam.Rd b/man/update.mvgam.Rd index bcbb1439..f9a1eaed 100644 --- a/man/update.mvgam.Rd +++ b/man/update.mvgam.Rd @@ -137,14 +137,14 @@ See \code{\link{mvgam_families}} for more details} \item{share_obs_params}{\code{logical}. If \code{TRUE} and the \code{family} has additional family-specific observation parameters (e.g. variance components in \code{student_t()} or \code{gaussian()}, or dispersion parameters in \code{nb()} or \code{betar()}), -these parameters will be shared across all series. This is handy if you have multiple -time series that you believe share some properties, such as being from the same -species over different spatial units. Default is \code{FALSE}.} +these parameters will be shared across all outcome variables. This is handy if you have multiple +outcomes (time series in most \code{mvgam} models) that you believe share some properties, +such as being from the same species over different spatial units. Default is \code{FALSE}.} \item{priors}{An optional \code{data.frame} with prior -definitions (in JAGS or Stan syntax). if using Stan, this can also be an object of -class \code{brmsprior} (see. \code{\link[brms]{prior}} for details). See \link{get_mvgam_priors} and -'Details' for more information on changing default prior distributions} +definitions (in JAGS or Stan syntax) or, preferentially, If using Stan, a vector containing +objects of class \code{brmsprior} (see. \code{\link[brms]{prior}} for details). +See \link{get_mvgam_priors} and Details' for more information on changing default prior distributions} \item{chains}{\code{integer} specifying the number of parallel chains for the model. Ignored if \code{algorithm \%in\% c('meanfield', 'fullrank', 'pathfinder', 'laplace')}} diff --git a/tests/testthat/Rplots.pdf b/tests/testthat/Rplots.pdf index a406cdbb..f10ab246 100644 Binary files a/tests/testthat/Rplots.pdf and b/tests/testthat/Rplots.pdf differ diff --git a/tests/testthat/test-jsdgam.R b/tests/testthat/test-jsdgam.R index dc1f25f4..09920e8b 100644 --- a/tests/testthat/test-jsdgam.R +++ b/tests/testthat/test-jsdgam.R @@ -321,7 +321,7 @@ test_that("family must be correctly specified", { factor_formula = ~ s(soil.dry, k = 5, by = trend) - 1, data = spiderdat, unit = site, - subgr = taxon, + species = taxon, n_lv = 3, family = 'banana'), 'family not recognized') @@ -337,7 +337,7 @@ test_that("response variable must be specified", { factor_formula = ~ s(soil.dry, k = 5, by = trend) - 1, data = spiderdat, unit = site, - subgr = taxon, + species = taxon, n_lv = 3, family = nb()), 'Not sure how to deal with this response variable specification') @@ -353,14 +353,14 @@ test_that("unit must exist in data", { factor_formula = ~ s(soil.dry, k = 5, by = trend) - 1, data = spiderdat, unit = banana, - subgr = taxon, + species = taxon, n_lv = 3, family = nb()), 'Variable "banana" not found in data') }) -test_that("subgr must exist in data", { +test_that("species must exist in data", { expect_error(jsdgam(formula = abundance ~ # Environmental model includes species-level interecepts # and random slopes for a linear effect of reflection @@ -370,13 +370,13 @@ test_that("subgr must exist in data", { factor_formula = ~ s(soil.dry, k = 5, by = trend) - 1, data = spiderdat, unit = site, - subgr = banana, + species = banana, n_lv = 3, family = nb()), 'Variable "banana" not found in data') }) -test_that("subgr must be a factor in data", { +test_that("species must be a factor in data", { spiderdat$taxon <- as.numeric(spiderdat$taxon) expect_error(jsdgam(formula = abundance ~ # Environmental model includes species-level interecepts @@ -387,7 +387,7 @@ test_that("subgr must be a factor in data", { factor_formula = ~ s(soil.dry, k = 5, by = trend) - 1, data = spiderdat, unit = site, - subgr = taxon, + species = taxon, n_lv = 3, family = nb()), 'Variable "taxon" must be a factor type') @@ -404,13 +404,13 @@ test_that("unit must be a numeric / integer in data", { factor_formula = ~ s(soil.dry, k = 5, by = trend) - 1, data = spiderdat, unit = site, - subgr = taxon, + species = taxon, n_lv = 3, family = nb()), 'Variable "site" must be either numeric or integer type') }) -test_that("n_lv must be <= number of subgroups", { +test_that("n_lv must be <= number of species", { expect_error(jsdgam(formula = abundance ~ # Environmental model includes species-level interecepts # and random slopes for a linear effect of reflection @@ -420,10 +420,10 @@ test_that("n_lv must be <= number of subgroups", { factor_formula = ~ s(soil.dry, k = 5, by = trend) - 1, data = spiderdat, unit = site, - subgr = taxon, + species = taxon, n_lv = 15, family = nb()), - 'Number of factors must be <= number of levels in subgr') + 'Number of factors must be <= number of levels in species') }) test_that("knots must be a list", { @@ -440,7 +440,7 @@ test_that("knots must be a list", { length.out = 4), data = spiderdat, unit = site, - subgr = taxon, + species = taxon, n_lv = 3, family = nb(), run_model = FALSE), @@ -461,7 +461,7 @@ test_that("errors about knot lengths should be propagated from mgcv", { length.out = 4)), data = spiderdat, unit = site, - subgr = taxon, + species = taxon, n_lv = 3, family = nb(), run_model = FALSE), @@ -497,7 +497,7 @@ test_that("jsdgam should initiate correctly", { length.out = 5)), data = spiderdat, unit = site, - subgr = taxon, + species = taxon, n_lv = 3, family = nb(), run_model = FALSE)