Skip to content

Commit

Permalink
add update and residual_cor functions for jsdgams
Browse files Browse the repository at this point in the history
  • Loading branch information
nicholasjclark committed Nov 4, 2024
1 parent 9d1f5e8 commit 2e49488
Show file tree
Hide file tree
Showing 86 changed files with 2,754 additions and 2,000 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ Suggests:
tweedie,
splines2,
extraDistr,
corpcor,
wrswoR,
xts,
lubridate,
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ S3method(print,mvgam)
S3method(print,mvgam_conditional_effects)
S3method(print,mvgam_prefit)
S3method(print,mvgammodel)
S3method(residual_cor,jsdgam)
S3method(residuals,mvgam)
S3method(rhat,mvgam)
S3method(score,mvgam_forecast)
Expand All @@ -68,6 +69,7 @@ S3method(stancode,mvgam_prefit)
S3method(standata,mvgam_prefit)
S3method(summary,mvgam)
S3method(summary,mvgam_prefit)
S3method(update,jsdgam)
S3method(update,mvgam)
S3method(variables,mvgam)
export("%>%")
Expand Down Expand Up @@ -142,6 +144,7 @@ export(predictions)
export(prior)
export(prior_)
export(prior_string)
export(residual_cor)
export(rhat)
export(roll_eval_mvgam)
export(s)
Expand Down
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# mvgam 1.1.4 (development version; not yet on CRAN)
## New functionalities
* Added function `jsdgam()` to estimate Joint Species Distribution Models in which both the latent factors and the observation model components can include any of mvgam's complex linear predictor effects. See `?mvgam::jsdgam` for details
* Added function `jsdgam()` to estimate Joint Species Distribution Models in which both the latent factors and the observation model components can include any of mvgam's complex linear predictor effects. Also added a function `residual_cor()` to compute residual correlation, covariance and precision matrices from `jsdgam` models. See `?mvgam::jsdgam` and `?mvgam::residual_cor` for details
* Added a `stability.mvgam()` method to compute stability metrics from models fit with Vector Autoregressive dynamics (#21 and #76)
* Added functionality to estimate hierarchical error correlations when using multivariate latent process models and when the data are nested among levels of a relevant grouping factor (#75); see `?mvgam::AR` for an example
* Added `ZMVN()` error models for estimating Zero-Mean Multivariate Normal errors; convenient for working with non time-series data where latent residuals are expected to be correlated (such as when fitting Joint Species Distribution Models); see `?mvgam::ZMVN` for examples
Expand All @@ -9,6 +9,7 @@
## Bug fixes
* Fixed a minor bug in the way `trend_map` recognises levels of the `series` factor
* Bug fix to ensure `lfo_cv` recognises the actual times in `time`, just in case the user supplies data that doesn't start at `t = 1`. Also updated documentation to better reflect this
* Bug fix to ensure `update.mvgam` captures any `knots` or `trend_knots` arguments that were passed to the original model call

# mvgam 1.1.3
## New functionalities
Expand Down
34 changes: 21 additions & 13 deletions R/jsdgam.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@
#'\code{Stan}'s `reduce_sum` function and have a slow running model that cannot be sped
#'up by any other means. Currently works for all families when using \code{Cmdstan}
#'as the backend
#'@param priors An optional \code{data.frame} with prior
#'definitions (in Stan syntax) or, preferentially, a vector containing
#' objects of class `brmsprior` (see. \code{\link[brms]{prior}} for details).
#' See [get_mvgam_priors] and for more information on changing default prior distributions
#'@param ... Other arguments to pass to [mvgam]
#'@author Nicholas J Clark
#'@details Joint Species Distribution Models allow for responses of multiple species to be
Expand All @@ -77,19 +81,20 @@
#' any of `mvgam`'s predictor effects, including random intercepts and slopes, multidimensional penalized
#' smooths, GP effects etc... The factor loadings \eqn{\theta_j} are constrained for identifiability but can
#' be used to reconstruct an estimate of the species' residual variance-covariance matrix
#' using \eqn{\theta \theta'} (see the example below
#' for an illustration of this). The latent factors are further modelled using:
#' using \eqn{\Theta \Theta'} (see the example below and [residual_cor()] for details).
#' The latent factors are further modelled using:
#'\deqn{
#'u_i \sim \text{Normal}(Q_i\beta_{factor}, 1) \quad
#'}
#'where the second design matrix \eqn{Q} and associated \eqn{\beta_{factor}} coefficients are
#'constructed and modelled using `factor_formula`. Again, the effects that make up this linear
#'predictor can contain any of `mvgam`'s allowed predictor effects, providing enormous flexibility for
#'modelling species' communities.
#'@seealso [mvgam]
#'@seealso [mvgam()], [residual_cor()]
#'@references Nicholas J Clark & Konstans Wells (2020). Dynamic generalised additive models (DGAMs) for forecasting discrete ecological time series.
#'Methods in Ecology and Evolution. 14:3, 771-784.
#'
#' \cr
#' \cr
#'David I Warton, F Guillaume Blanchet, Robert B O’Hara, Otso Ovaskainen, Sara Taskinen, Steven C
#'Walker & Francis KC Hui (2015). So many variables: joint modeling in community ecology.
#'Trends in Ecology & Evolution 30:12, 766-779.
Expand Down Expand Up @@ -255,15 +260,18 @@
#' gratia::draw(mod, trend_effects = TRUE)
#' }
#'
#' # Calculate (median) residual spatial correlations
#' med_loadings <- matrix(apply(as.matrix(mod$model_output, 'lv_coefs'),
#' 2, median),
#' nrow = N_species, ncol = 4)
#' cormat <- cov2cor(tcrossprod(med_loadings))
#' rownames(cormat) <- colnames(cormat) <- levels(dat$species)
#'
#' # A quick and dirty correlation matrix plot
#' image(cormat)
#' # Calculate residual spatial correlations
#' post_cors <- residual_cor(mod)
#' names(post_cors)

#' # Look at lower and upper credible interval estimates for
#' # some of the estimated correlations
#' post_cors$cor[1:5, 1:5]
#' post_cors$cor_upper[1:5, 1:5]
#' post_cors$cor_lower[1:5, 1:5]

#' # A quick and dirty plot of the posterior median correlations
#' image(post_cors$cor)
#'
#' # Posterior predictive checks and ELPD-LOO can ascertain model fit
#' pp_check(mod, type = "ecdf_overlay_grouped",
Expand Down
8 changes: 4 additions & 4 deletions R/monotonic.R
Original file line number Diff line number Diff line change
Expand Up @@ -185,10 +185,10 @@ smooth.construct.moi.smooth.spec <- function(object, data, knots){

# Generate basis functions
i_spline_basis <- splines2::iSpline(x,
knots = k,
degree = nk,
Boundary.knots = boundary,
intercept = TRUE)
knots = k,
degree = nk,
Boundary.knots = boundary,
intercept = TRUE)

nbasis <- dim(i_spline_basis)[2]

Expand Down
17 changes: 10 additions & 7 deletions R/mvgam_setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,22 @@ mvgam_setup <- function(formula,
maxit = 5) {

if(missing(knots)){
init_gam(formula(formula),
data = dat,
family = family)
out <- init_gam(formula(formula),
data = dat,
family = family)
attr(out, 'knots') <- NULL
} else {
if(!is.list(knots)){
stop('all "knot" arguments must be supplied as lists',
call. = FALSE)
}
init_gam(formula(formula),
data = dat,
family = family,
knots = knots)
out <- init_gam(formula(formula),
data = dat,
family = family,
knots = knots)
attr(out, 'knots') <- knots
}
out
}

#' Generic JAGAM setup function
Expand Down
185 changes: 185 additions & 0 deletions R/residual_cor.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
#' Extract residual correlations based on latent factors from a fitted jsdgam
#'
#' Compute residual correlation estimates from Joint Species Distribution
#' \code{jsdgam} models using latent factor loadings
#'
#' @name residual_cor.jsdgam
#' @inheritParams brms::residuals.brmsfit
#' @param object \code{list} object of class \code{mvgam} resulting from a call to [jsdgam()]
#' @param robust If `FALSE` (the default) the mean is used as a measure of central tendency.
#' If `TRUE`, the median is used instead. Only used if `summary` is `TRUE`
#' @param ... ignored
#' @return If `summary = TRUE`, a `list` with the following components:
#' \itemize{
#' \item{cor, cor_lower, cor_upper}{: A set of \eqn{p \times p} correlation matrices,
#' containing either the posterior median or mean estimate, plus lower and upper limits
#' of the corresponding credible intervals supplied to `probs`}
#' \item{sig_cor}{: A \eqn{p \times p} correlation matrix containing only those correlations whose credible
#' interval does not contain zero. All other correlations are set to zero}
#' \item{prec, prec_lower, prec_upper}{: A set of \eqn{p \times p} precision matrices,
#' containing either the posterior median or mean estimate, plus lower and upper limits
#' of the corresponding credible intervals supplied to `probs`}
#' \item{sig_prec}{: A \eqn{p \times p} precision matrix containing only those precisions whose credible
#' interval does not contain zero. All other precisions are set to zero}
#' \item{cov}{: A \eqn{p \times p} posterior median or mean covariance matrix}
#' \item{trace}{: The median/mean point estimator of the trace (sum of the diagonal elements)
#' of the residual covariance matrix `cov`}
#' }
#' If `summary = FALSE`, this function returns a `list` containing the following components:
#' \itemize{
#' \item{all_cormat}{: A \eqn{n_{draws} \times p \times p} `array` of posterior
#' residual correlation matrix draws}
#' \item{all_covmat}{: A \eqn{n_{draws} \times p \times p} `array` of posterior
#' residual covariance matrix draws}
#' \item{all_presmat}{: A \eqn{n_{draws} \times p \times p} `array` of posterior
#' residual precision matrix draws}
#' \item{all_trace}{: A \eqn{n_{draws}} `vector` of posterior covariance trace draws}
#' }
#' @details
#' Hui (2016) provides an excellent description of the quantities that this function calculates, so this passage
#' is heavily paraphrased from his associated `boral` package.
#'
#' In Joint Species Distribution Models, the residual covariance matrix is calculated
#' based on the matrix of latent factor loading matrix \eqn{\Theta}, where the residual covariance
#' matrix \eqn{\Sigma = \Theta\Theta'}. A strong residual covariance/correlation matrix
#' between two species can be interpreted as evidence of species interaction (e.g.,
#' facilitation or competition),
#' missing covariates, as well as any additional species correlation not accounted for by shared
#' environmental captured in `formula`.
#'
#' The residual precision matrix (also known as partial correlation matrix, Ovaskainen et al., 2016)
#' is defined as the inverse of the residual correlation matrix. The precision matrix is often used to
#' identify direct or causal relationships between two species e.g., two species can have a zero
#' precision but still be correlated, which can be interpreted as saying that two species are not
#' directly associated, but they are still correlated *through* other species. In other words, they
#' are conditionally independent given the other species. It is important that the precision matrix
#' does not exhibit the exact same properties of the correlation e.g., the diagonal elements are
#' not equal to 1. Nevertheless, relatively larger values of precision imply a stronger
#' direct relationships between two species.
#'
#' In addition to the residual correlation and precision matrices, the median or mean point estimator
#' of trace of the residual covariance matrix is returned,
#' \eqn{\sum\limits_{j=1}^p [\Theta\Theta']_{jj}}. Often used in other areas of multivariate
#' statistics, the trace may be interpreted as the amount of covariation explained by the latent factors.
#' One situation where the trace may be useful is when comparing a pure latent factor model
#' (where no terms are suppled to `formula`) versus a model with latent
#' factors and some additional predictors in `formula` -- the proportional difference in trace
#' between these two models may be interpreted as the proportion of covariation between species explained
#' by the predictors in `formula`. Of course, the trace itself is random due to the MCMC sampling, and so it
#' is not always guaranteed to produce sensible answers.
#' @author Nicholas J Clark
#' @references
#' Francis KC Hui (2016). BORAL - Bayesian ordination and regression analysis of
#' multivariate abundance data in R. Methods in Ecology and Evolution. 7, 744-750.
#' \cr
#' \cr
#' Otso Ovaskainen et al. (2016). Using latent variable models to identify large networks of
#' species-to-species associations at different spatial scales. Methods in Ecology and Evolution,
#' 7, 549-555.
#' @seealso [jsdgam()]
#' @export
residual_cor <- function(object, ...){
UseMethod("residual_cor", object)
}

#' @rdname residual_cor.jsdgam
#' @method residual_cor jsdgam
#' @export
residual_cor.jsdgam <- function(object,
summary = TRUE,
robust = FALSE,
probs = c(0.025, 0.975),
...) {

insight::check_if_installed("corpcor")

# Take draws of factor loadings
n_lv <- object$n_lv
p <- NCOL(object$ytimes)
sp_names <- levels(object$obs_data$series)
loadings <- as.matrix(object$model_output, 'lv_coefs')

# Initiate array to store all posterior correlation and covariance matrices
all_cormat <- all_covmat <- all_precmat <- array(0, dim = c(nrow(loadings), p, p))
all_trace_rescor <- numeric(NROW(loadings))

# Calculate posterior covariance, correlation, precision and trace estimates
for(i in 1:NROW(loadings)){
lv_coefs <- matrix(loadings[i, ], nrow = p, ncol = n_lv)

lambdalambdaT <- tcrossprod(lv_coefs)
all_covmat[i, , ] <- lambdalambdaT
all_trace_rescor[i] <- sum(diag(lambdalambdaT))
all_cormat[i, , ] <- cov2cor(lambdalambdaT)
all_precmat[i, , ] <- corpcor::cor2pcor(lambdalambdaT)
}

if(!summary){
out <- list(all_cormat = all_cormat,
all_covmat = all_covmat,
all_precmat = all_precmat,
all_trace = all_trace_rescor)
} else {

#### If summary, calculate summary statistics ####
# Initiate summary correlation and covariance matrices
sig_cormat <- cormat <- cormat_lower <- cormat_upper <-
sig_precmat <- precmat <- precmat_lower <- precmat_upper <-
covmat <- matrix(0, nrow = p, ncol = p)

rownames(cormat) <- rownames(cormat_lower) <- rownames(cormat_upper) <-
rownames(sig_cormat) <- rownames(precmat) <- rownames(precmat_lower) <-
rownames(precmat_upper) <- rownames(sig_precmat) <- rownames(covmat) <-
colnames(cormat) <- colnames(cormat_lower) <- colnames(cormat_upper) <-
colnames(sig_cormat) <- colnames(precmat) <- colnames(precmat_lower) <-
colnames(precmat_upper) <- colnames(sig_precmat) <- colnames(covmat) <-
sp_names

# Calculate posterior summaries
for (j in 1:p) {
for (j2 in 1:p) {
if (robust) {
covmat[j, j2] <- median(all_covmat[, j, j2])
cormat[j, j2] <- median(all_cormat[, j, j2])
precmat[j, j2] <- median(all_precmat[, j, j2])
} else {
covmat[j, j2] <- mean(all_covmat[, j, j2])
cormat[j, j2] <- mean(all_cormat[, j, j2])
precmat[j, j2] <- mean(all_precmat[, j, j2])
}

sig_cormat[j, j2] <- cormat[j, j2]
cormat_lower[j, j2] <- quantile(all_cormat[, j, j2], probs = min(probs), na.rm = TRUE)
cormat_upper[j, j2] <- quantile(all_cormat[, j, j2], probs = max(probs), na.rm = TRUE)
if (0 > cormat_lower[j, j2] & 0 < cormat_upper[j, j2]) {
sig_cormat[j, j2] <- 0
}

sig_precmat[j, j2] <- precmat[j, j2]
precmat_lower[j, j2] <- quantile(all_precmat[, j, j2], probs = min(probs), na.rm = TRUE)
precmat_upper[j, j2] <- quantile(all_precmat[, j, j2], probs = max(probs), na.rm = TRUE)
if (0 > precmat_lower[j, j2] & 0 < precmat_upper[j, j2]) {
sig_precmat[j, j2] <- 0
}
}
}

if (robust) {
final_trace <- median(all_trace_rescor)
} else {
final_trace <- mean(all_trace_rescor)
}

out <- list(cor = cormat,
cor_lower = cormat_lower,
cor_upper = cormat_upper,
sig_cor = sig_cormat,
cov = covmat,
prec = precmat,
prec_lower = precmat_lower,
prec_upper = precmat_upper,
sig_prec = sig_precmat,
trace = final_trace)
}
return(out)
}
1 change: 1 addition & 0 deletions R/residuals.mvgam.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#'
#' @inheritParams brms::residuals.brmsfit
#' @param object An object of class `mvgam`
#' @param ... ignored
#' @details This method gives residuals as Dunn-Smyth (randomized quantile) residuals. Any
#' observations that were missing (i.e. `NA`) in the original data will have missing values
#' in the residuals
Expand Down
Loading

0 comments on commit 2e49488

Please sign in to comment.