fix offset problem in glmms

biodiverse · Feb 23, 2025 · 13ed9e9 · 13ed9e9
1 parent a49ef64
commit 13ed9e9
Show file tree

Hide file tree

Showing 22 changed files with 450 additions and 151 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: spAbundance
 Type: Package
 Title: Univariate and Multivariate Spatial Modeling of Species Abundance 
-Version: 0.2.1
+Version: 0.2.2
 Authors@R: c(person("Jeffrey", "Doser", role=c("aut", "cre"), email="[email protected]"), person("Andrew", "Finley", role = c("aut")))
 Description: Fits single-species (univariate) and multi-species (multivariate) non-spatial and spatial abundance models in a Bayesian framework using Markov Chain Monte Carlo (MCMC). Spatial models are fit using Nearest Neighbor Gaussian Processes (NNGPs). Details on NNGP models are given in Datta, Banerjee, Finley, and Gelfand (2016) <doi:10.1080/01621459.2015.1044091> and Finley, Datta, and Banerjee (2022) <doi:10.18637/jss.v103.i05>. Fits single-species and multi-species spatial and non-spatial versions of generalized linear mixed models (Gaussian, Poisson, Negative Binomial), N-mixture models (Royle 2004 <doi:10.1111/j.0006-341X.2004.00142.x>) and hierarchical distance sampling models (Royle, Dawson, Bates (2004) <doi:10.1890/03-3127>). Multi-species spatial models are fit using a spatial factor modeling approach with NNGPs for computational efficiency. 
 License: GPL (>= 3)

diff --git a/NAMESPACE b/NAMESPACE
@@ -6,6 +6,7 @@ export(simMsAbund)
 export(simMsNMix)
 export(simDS)
 export(simMsDS)
+export(simDynAbund)
 export(NMix)
 export(spNMix)
 export(msNMix)
@@ -25,6 +26,8 @@ export(waicAbund)
 export(ppcAbund)
 export(svcAbund)
 export(svcMsAbund)
+export(dynAbund)
+export(spDynAbund)
 
 S3method("predict", "NMix")
 S3method("print", "NMix")
@@ -130,6 +133,18 @@ S3method("fitted", "sfMsDS")
 S3method("plot", "sfMsDS")
 S3method("summary", "sfMsDS")
 
+S3method("predict", "dynAbund")
+S3method("print", "dynAbund")
+S3method("fitted", "dynAbund")
+S3method("plot", "dynAbund")
+S3method("summary", "dynAbund")
+
+# S3method("predict", "spDynAbund")
+S3method("print", "spDynAbund")
+S3method("fitted", "spDynAbund")
+# S3method("plot", "spDynAbund")
+S3method("summary", "spDynAbund")
+
 importFrom("stats", "dist", "dnbinom", "rbinom", "rnorm", "rnbinom", "rpois", "coefficients", "glm", "is.empty.model", "model.matrix", "model.response", "terms", "runif", "quantile", "dbinom", "var", "rgamma", "sd", "integrate", "rmultinom")
 importFrom("coda", "mcmc", "gelman.diag", "mcmc.list", "effectiveSize")
 importFrom("abind", "abind")

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# spAbundance 0.3.0
+
++ Added in the `loadings.diag` argument to `simMsAbund()` to allow for simulating data where the diagonal of the factor loadings matrix is set to 1 (i.e., the default and what was done in previous versions) or to random positive values. 
++ Updated `ppcAbund()` to work with `svcAbund()` models (Poisson and NB).
++ Fixed a bug in all GLMM models that prevented the models from running when a site-level offset was supplied into the model. Thanks to Sonia Illanas for brining this to my attention.
+
 # spAbundance 0.2.1
 
 + Fixed a `C++` memory issue in `predict.spAbund()` that could result in crashes under certain situations.

diff --git a/R/abund.R b/R/abund.R
@@ -1,8 +1,8 @@
 abund <- function(formula, data, inits, priors, tuning,
-		  n.batch, batch.length, accept.rate = 0.43, family = 'Poisson',
-		  n.omp.threads = 1, verbose = TRUE,
-		  n.report = 100, n.burn = round(.10 * n.batch * batch.length), n.thin = 1,
-		  n.chains = 1, save.fitted = TRUE, ...) {
+                  n.batch, batch.length, accept.rate = 0.43, family = 'Poisson',
+                  n.omp.threads = 1, verbose = TRUE,
+                  n.report = 100, n.burn = round(.10 * n.batch * batch.length), n.thin = 1,
+                  n.chains = 1, save.fitted = TRUE, ...) {
 
   ptm <- proc.time()
 
@@ -59,7 +59,7 @@ abund <- function(formula, data, inits, priors, tuning,
       offset <- data$offset
       if (length(offset) == 1) {
         offset <- matrix(offset, nrow(y), ncol(y))
-      } else if (length(dim(offset)) == 1) { # Value for each site
+      } else if (length(dim(offset)) == 0 & length(offset) > 1) { # Value for each site
         if (length(offset) != nrow(y)) {
           stop(paste0("offset must be a single value, vector of length ", nrow(y), " or a matrix with ",
                      nrow(y), " rows and ", ncol(y), " columns."))

diff --git a/R/generics.R b/R/generics.R
@@ -17,12 +17,12 @@ summary.ppcAbund <- function(object, level = 'both',
   cat(paste("Number of Chains: ", n.chains, "\n", sep = ""))
   cat(paste("Total Posterior Samples: ",n.post * n.chains,"\n\n", sep=""))
 
-  if (object$class %in% c('NMix', 'spNMix', 'abund', 'spAbund', 'DS', 'spDS')) {
+  if (object$class %in% c('NMix', 'spNMix', 'abund', 'spAbund', 'DS', 'spDS', 'svcAbund', 'dynAbund')) {
     cat("Bayesian p-value: ", round(mean(object$fit.y.rep > object$fit.y), digits), "\n")
     cat("Fit statistic: ", object$fit.stat, "\n")
   }
 
-  if (object$class %in% c('msAbund', 'spMsAbund', 'lfMsAbund', 'sfMsAbund',
+  if (object$class %in% c('msAbund', 'spMsAbund', 'lfMsAbund', 'sfMsAbund', 'svcMsAbund',
 			  'msNMix', 'spMsNMix', 'lfMsNMix', 'sfMsNMix',
 			  'msDS', 'lfMsDS', 'sfMsDS')) {
 

diff --git a/R/lfMsAbund.R b/R/lfMsAbund.R
@@ -113,7 +113,7 @@ lfMsAbund <- function(formula, data, inits, priors,
       offset <- data$offset
       if (length(offset) == 1) {
         offset <- matrix(offset, ncol(y), dim(y)[3])
-      } else if (length(dim(offset)) == 1) { # Value for each site
+      } else if (length(dim(offset)) == 0 & length(offset) > 1) { # Value for each site
         if (length(offset) != ncol(y)) {
           stop(paste0("offset must be a single value, vector of length ", ncol(y), " or a matrix with ",
                      ncol(y), " rows and ", dim(y)[3], " columns."))

diff --git a/R/msAbund.R b/R/msAbund.R
@@ -100,7 +100,7 @@ msAbund <- function(formula, data, inits, priors, tuning,
       offset <- data$offset
       if (length(offset) == 1) {
         offset <- matrix(offset, ncol(y), dim(y)[3])
-      } else if (length(dim(offset)) == 1) { # Value for each site
+      } else if (length(dim(offset)) == 0 & length(offset) > 1) { # Value for each site
         if (length(offset) != ncol(y)) {
           stop(paste0("offset must be a single value, vector of length ", ncol(y), " or a matrix with ",
                      ncol(y), " rows and ", dim(y)[3], " columns."))

diff --git a/R/plot-generics.R b/R/plot-generics.R
@@ -5,7 +5,7 @@ overallPlot <- function(x, param, density = TRUE, ...) {
   indx <- 1:n.post
   if (param == 'beta.comm') {
     if (class(x) %in% c('abund', 'spAbund', 'svcAbund', 'NMix', 
-			     'spNMix', 'DS', 'spDS')) {
+			     'spNMix', 'DS', 'spDS', 'dynAbund')) {
       stop("beta.comm is not a parameter in the fitted model")
     }
     for (i in 1:n.chains) {
@@ -15,7 +15,7 @@ overallPlot <- function(x, param, density = TRUE, ...) {
   }
   if (param == 'tau.sq.beta') {
     if (class(x) %in% c('abund', 'spAbund', 'svcAbund', 'NMix', 
-			     'spNMix', 'DS', 'spDS')) {
+			     'spNMix', 'DS', 'spDS', 'dynAbund')) {
       stop("tau.sq.beta is not a parameter in the fitted model")
     }
     for (i in 1:n.chains) {
@@ -25,7 +25,7 @@ overallPlot <- function(x, param, density = TRUE, ...) {
   }
   if (param == 'lambda') {
     if (class(x) %in% c('abund', 'spAbund', 'svcAbund', 'NMix', 
-			     'spNMix', 'DS', 'spDS', 'msAbund', 'msNMix', 'msDS')) {
+			     'spNMix', 'DS', 'spDS', 'msAbund', 'msNMix', 'msDS', 'dynAbund')) {
       stop("lambda is not a parameter in the fitted model")
     }
     for (i in 1:n.chains) {
@@ -35,7 +35,7 @@ overallPlot <- function(x, param, density = TRUE, ...) {
   }
   if (param == 'tau.sq') {
     if (!(class(x) %in% c('abund', 'spAbund', 'svcAbund', 'svcMsAbund', 
-			     'msAbund', 'lfMsAbund', 'sfMsAbund'))) {
+			     'msAbund', 'lfMsAbund', 'sfMsAbund', 'dynAbund'))) {
       stop("tau.sq is not a parameter in the fitted model")
     }
     for (i in 1:n.chains) {
@@ -45,7 +45,7 @@ overallPlot <- function(x, param, density = TRUE, ...) {
   }
   if (param == 'alpha.comm') {
     if (class(x) %in% c('abund', 'spAbund', 'msAbund', 'lfMsAbund', 'sfMsAbund', 
-			     'svcAbund', 'svcMsAbund', 'NMix', 'spNMix', 'DS', 'spDS')) {
+			     'svcAbund', 'svcMsAbund', 'NMix', 'spNMix', 'DS', 'spDS', 'dynAbund')) {
       stop("alpha.comm is not a parameter in the fitted model")
     }
     for (i in 1:n.chains) {
@@ -55,7 +55,7 @@ overallPlot <- function(x, param, density = TRUE, ...) {
   }
   if (param == 'tau.sq.alpha') {
     if (class(x) %in% c('abund', 'spAbund', 'msAbund', 'lfMsAbund', 'sfMsAbund', 
-			     'svcAbund', 'svcMsAbund', 'NMix', 'spNMix', 'DS', 'spDS')) {
+			     'svcAbund', 'svcMsAbund', 'NMix', 'spNMix', 'DS', 'spDS', 'dynAbund')) {
       stop("tau.sq.alpha is not a parameter in the fitted model")
     }
     for (i in 1:n.chains) {
@@ -83,7 +83,7 @@ overallPlot <- function(x, param, density = TRUE, ...) {
   }
   if (param == 'alpha') {
     if (class(x) %in% c('abund', 'spAbund', 'msAbund', 'lfMsAbund', 'sfMsAbund', 
-			     'svcAbund', 'svcMsAbund')) {
+			     'svcAbund', 'svcMsAbund', 'dynAbund')) {
       stop("alpha is not a parameter in the fitted model")
     }
     for (i in 1:n.chains) {
@@ -102,8 +102,8 @@ overallPlot <- function(x, param, density = TRUE, ...) {
   }
   if (param == 'sigma.sq.p') {
     if (class(x) %in% c('abund', 'spAbund', 'msAbund', 'lfMsAbund', 'sfMsAbund', 
-			     'svcAbund', 'svcMsAbund')) {
-      stop("N is not a parameter in the fitted model")
+			     'svcAbund', 'svcMsAbund', 'dynAbund')) {
+      stop("sigma.sq.p is not a parameter in the fitted model")
     }
     if (!x$pRE) {
       stop("sigma.sq.p is not a parameter in the fitted model")
@@ -113,22 +113,6 @@ overallPlot <- function(x, param, density = TRUE, ...) {
       indx <- (n.post * i + 1):(n.post * (i + 1))
     }
   }
-  # if (param == 'N') {
-  #   if (class(x) %in% c('abund', 'spAbund', 'msAbund', 'lfMsAbund', 'sfMsAbund', 
-  #       		     'svcAbund', 'svcMsAbund')) {
-  #     stop("N is not a parameter in the fitted model")
-  #   }
-  #   for (i in 1:n.chains) {
-  #     curr.samples[[i]] <- coda::mcmc(x$N.samples[indx, , drop = FALSE])
-  #     indx <- (n.post * i + 1):(n.post * (i + 1))
-  #   }
-  # }
-  # if (param == 'mu') {
-  #   for (i in 1:n.chains) {
-  #     curr.samples[[i]] <- coda::mcmc(x$mu.samples[indx, , drop = FALSE])
-  #     indx <- (n.post * i + 1):(n.post * (i + 1))
-  #   }
-  # }
   if (param == 'beta.star') {
     if (!x$muRE) {
       stop("the model was not fit with any abundance random effects (beta.star)")
@@ -140,7 +124,7 @@ overallPlot <- function(x, param, density = TRUE, ...) {
   }
   if (param == 'alpha.star') {
     if (class(x) %in% c('abund', 'spAbund', 'msAbund', 'lfMsAbund', 'sfMsAbund', 
-			     'svcAbund', 'svcMsAbund')) {
+			     'svcAbund', 'svcMsAbund', 'dynAbund')) {
       stop("alpha.star is not a parameter in the fitted model")
     }
     if (!x$pRE) {
@@ -209,3 +193,6 @@ plot.svcMsAbund <- function(x, param, density = TRUE, ...) {
 plot.svcAbund <- function(x, param, density = TRUE, ...) {
   overallPlot(x, param, density)
 }
+plot.dynAbund <- function(x, param, density = TRUE, ...) {
+  overallPlot(x, param, density)
+}
diff --git a/R/ppcAbund.R b/R/ppcAbund.R
@@ -18,8 +18,9 @@ ppcAbund <- function(object, fit.stat, group, type = 'marginal', ...) {
   if (!(class(object) %in% c('NMix', 'spNMix', 'abund', 'spAbund', 
 			     'msAbund', 'lfMsAbund', 'sfMsAbund', 
 			     'msNMix', 'spNMix', 'lfMsNMix', 'sfMsNMix', 
-			     'DS', 'spDS', 'msDS', 'lfMsDS', 'sfMsDS'))) {
-    stop("error: object must be one of the following classes: NMix, spNMix, abund, spAbund, msAbund, lfMsAbund, sfMsAbund, msNMix, lfMsNMix, sfMsNMix, DS, spDS, msDS, lfMsDS, sfMsDS")
+			     'DS', 'spDS', 'msDS', 'lfMsDS', 'sfMsDS', 
+           'svcMsAbund', 'svcAbund', 'dynAbund'))) {
+    stop("error: object must be one of the following classes: NMix, spNMix, abund, spAbund, msAbund, lfMsAbund, sfMsAbund, msNMix, lfMsNMix, sfMsNMix, DS, spDS, msDS, lfMsDS, sfMsDS, svcMsAbund, svcAbund, dynAbund")
   }
   # Fit statistic ---------------------
   if (missing(fit.stat)) {
@@ -40,7 +41,8 @@ ppcAbund <- function(object, fit.stat, group, type = 'marginal', ...) {
   if (!(group %in% c(0, 1, 2))) {
     stop("error: group must be 0 (raw data), 1 (sites), or 2 (replicates)")
   }
-  if (group != 0 & class(object) %in% c('abund', 'spAbund', 'msAbund', 'lfMsAbund')) {
+  if (group != 0 & class(object) %in% c('abund', 'spAbund', 'svcAbund', 'msAbund', 'lfMsAbund', 
+                                        'sfMsAbund', 'svcMsAbund', 'dynAbund')) {
     stop("error: group must be 0 (raw data) for abundance GLM models")
   }
 
@@ -314,8 +316,68 @@ ppcAbund <- function(object, fit.stat, group, type = 'marginal', ...) {
     out$n.chains <- object$n.chains
   } 
 
+  # Single-species dynamic models -----------------------------------------
+  if (class(object) %in% c('dynAbund', 'spDynAbund')) {
+    y <- object$y
+    J <- nrow(y)
+    y.rep.samples <- fitted.dynAbund(object)
+    mu.samples <- object$mu.samples
+    n.samples <- object$n.post * object$n.chains
+    fit.y <- rep(NA, n.samples)
+    fit.y.rep <- rep(NA, n.samples)
+    n.time <- apply(y, 1, function(a) sum(!is.na(a)))
+    time.indx <- vector(mode = 'list', length = J)
+    for (j in 1:J) {
+      time.indx[[j]] <- which(!is.na(y[j, ]))
+    }
+    n.time.max <- ncol(object$y)
+    e <- 0.0001
+    if (group == 0) {
+      if (fit.stat %in% c('chi-squared', 'chi-square')) {
+        fit.big.y.rep <- array(NA, dim = c(J, n.time.max, n.samples))
+        fit.big.y <- array(NA, dim = c(J, n.time.max, n.samples))
+        for (i in 1:n.samples) {
+          for (j in 1:J) {
+            E <- mu.samples[i, j, time.indx[[j]]] * object$offset[j, time.indx[[j]]]
+            fit.big.y.rep[j, time.indx[[j]], i] <- (y.rep.samples[i, j, time.indx[[j]]] - E)^2 / (E + e)
+            fit.big.y[j, time.indx[[j]], i] <- (y[j, time.indx[[j]]] - E)^2 / (E + e)
+	        } # j
+          fit.y[i] <- sum(fit.big.y[, , i], na.rm = TRUE)
+          fit.y.rep[i] <- sum(fit.big.y.rep[, , i], na.rm = TRUE)
+        } # i
+      } else if (fit.stat == 'freeman-tukey') {
+        fit.big.y.rep <- array(NA, dim = c(J, n.time.max, n.samples))
+        fit.big.y <- array(NA, dim = c(J, n.time.max, n.samples))
+        for (i in 1:n.samples) {
+          for (j in 1:J) {
+            E <- mu.samples[i, j, time.indx[[j]]] * object$offset[j, time.indx[[j]]]
+            fit.big.y.rep[j, time.indx[[j]], i] <- (sqrt(y.rep.samples[i, j, time.indx[[j]]]) - sqrt(E))^2
+            fit.big.y[j, time.indx[[j]], i] <- (sqrt(y[j, time.indx[[j]]]) - sqrt(E))^2
+          } # j
+          fit.y[i] <- sum(fit.big.y[, , i], na.rm = TRUE)
+          fit.y.rep[i] <- sum(fit.big.y.rep[, , i], na.rm = TRUE)
+        } # i
+      }
+    }
+    out$fit.y <- fit.y
+    out$fit.y.rep <- fit.y.rep
+    out$fit.y.group.quants <- apply(fit.big.y, c(1, 2), quantile, c(0.025, 0.25, 0.5, 0.75, 0.975), na.rm = TRUE)
+    out$fit.y.rep.group.quants <- apply(fit.big.y.rep, c(1, 2), quantile, c(0.025, 0.25, 0.5, 0.75, 0.975), na.rm = TRUE)
+    # For summaries
+    out$group <- group
+    out$fit.stat <- fit.stat
+    out$class <- class(object)
+    out$call <- cl
+    out$n.samples <- object$n.samples
+    out$n.burn <- object$n.burn
+    out$n.thin <- object$n.thin
+    out$n.post <- object$n.post
+    out$n.chains <- object$n.chains
+  } 
+
+
   # Multi-species abundance models ----------------------------------------
-  if (class(object) %in% c('msAbund', 'lfMsAbund', 'sfMsAbund')) {
+  if (class(object) %in% c('msAbund', 'lfMsAbund', 'sfMsAbund', 'svcMsAbund')) {
     if (object$dist %in% c('Gaussian', 'zi-Gaussian')) {
       stop("ppcAbund is not supported for Gaussian or zi-Gaussian GLM(M)s. These are linear (mixed) models, and classic tools for residual diagnostics can be applied using object$y and object$y.rep.samples to generate residuals")
     }