diff --git a/DESCRIPTION b/DESCRIPTION index 24b7b1b..420ffab 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,9 +11,9 @@ Depends: methods, stats, mvtnorm, - R (>= 4.0.0) + R (>= 4.4.0) LazyData: true -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 Suggests: knitr, rmarkdown, diff --git a/NAMESPACE b/NAMESPACE index 1f0c96b..6617ec4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,5 +19,6 @@ importFrom(stats,model.matrix) importFrom(stats,model.offset) importFrom(stats,optim) importFrom(stats,plogis) +importFrom(stats,pnorm) importFrom(stats,qnorm) importFrom(stats,quantile) diff --git a/R/auto_occ.R b/R/auto_occ.R index 7229aeb..95f0007 100644 --- a/R/auto_occ.R +++ b/R/auto_occ.R @@ -122,7 +122,7 @@ #' @export #' @importFrom stats as.formula #' @importFrom stats optim -#' @importFrom stats qnorm +#' @importFrom stats qnorm pnorm #' @importFrom methods new #' #' @returns diff --git a/R/compare_models.R b/R/compare_models.R index 92dc4d7..8681f6a 100644 --- a/R/compare_models.R +++ b/R/compare_models.R @@ -25,7 +25,7 @@ #' order they are input (e.g., \code{m1 == model_list[[1]])}). The default #' data.frame includes: #' -#' \itemize{ +#' \describe{ #' \item{model}{The model names} #' \item{npar}{The number of parameters in the model} #' \item{AIC}{AIC score of a fitted model} diff --git a/R/format_y.R b/R/format_y.R index 6f0d1bd..3a8b266 100644 --- a/R/format_y.R +++ b/R/format_y.R @@ -30,7 +30,7 @@ #' \code{\link{auto_occ}}. The classes of the associated columns in \code{x} may vary somewhat #' \code{format_y}. Mor specifically: #' -#' \itemize{ +#' \describe{ #' \item{The site column}{Should preferably be either of class \code{character} or #' \code{factor}. If this column is either a \code{numeric} or \code{integer}, #' \code{format_y} will provide a warning that it is converting the column diff --git a/R/predict.R b/R/predict.R index bdd33a2..c0ff2aa 100644 --- a/R/predict.R +++ b/R/predict.R @@ -15,7 +15,10 @@ #' #' @param level Tolerance / confidence level for predictions. Defaults to \code{0.95}. #' -#' @param nsim The number of parmater simulations to be made via the +#' @param condition The type of estimate you want to generate if you are estimating +#' occupancy. Can either be \code{'unconditional'}, \code{'z0'}, or \code{'z1'}. +#' Defaults to \code{'unconditional'}, see details for additional information. +#' @param nsim The number of parameter simulations to be made via the #' models estimated parameters, variance covariance matrix, #' and \code{\link[mvtnorm]{rmvnorm}}. Defaults to 3000. #' @@ -45,16 +48,30 @@ #' model. This method was chosen as it creates functionally similar uncertainty #' estimates as a Bayesian autologistic occupancy model. #' -#' For occupancy, each set of simulated parameters is used to -#' generate two logit-linear predictions for the supplied covariates: +#' Predictions can be returned as either *unconditional* or *conditional*: +#' \itemize{ +#' \item *Unconditional (steady-state)*: the long-term expected occupancy +#' probability under a given set of covariates, without conditioning on the +#' actual occupancy state of a site. This is analogous to the equilibrium +#' expectation of a dynamic occupancy model (\eqn{\gamma / (\gamma + \epsilon)}) and +#' can be returned by including \code{condition = 'unconditional'} as an arguement. +#' \item *Conditional*: the occupancy probability given a site’s occupancy +#' state in the previous time interval. These predictions depend on whether +#' a site was occupied (\code{condition = 'z1'}) or unoccupied (\code{condition = 'z0'}) at time \eqn{t}, and so they are more +#' specific to site history. +#' } +#' +#' For unconditional occupancy predictions, each set of simulated parameters is used to +#' generate two logit-linear predictors: #' #' \deqn{\Large \mathrm{logit}(\psi_a) = \beta_0 + \beta_1 \times x_1 + \dots + \beta_n \times x_n} #' -#'and +#' and #' -#'\deqn{\Large \mathrm{logit}(\psi_b) = \beta_0 + \beta_1 \times x_1 + \dots + \beta_n \times x_n + \theta} +#' \deqn{\Large \mathrm{logit}(\psi_b) = \beta_0 + \beta_1 \times x_1 + \dots + \beta_n \times x_n + \theta} #' -#' where \eqn{\theta} is the estimated autologistic term. Following this, the expected occupancy of an autologisitic occupancy model is +#' where \eqn{\theta} is the estimated autologistic term. Following this, the +#' unconditional expected occupancy is: #' #' \deqn{\huge #' \frac{ @@ -63,10 +80,7 @@ #' \mathrm{ilogit}(\psi_a) + (1 - \mathrm{ilogit}(\psi_b)) #' } #'} -#' which is similar to the expected occupancy of a dynamic occupancy model (\eqn{\gamma \div (\gamma + \epsilon)}) -#' where \eqn{\gamma} is colonization and \eqn{\epsilon} is extinction. Following this calculation for all simuated -#' parameter estimates and covariate values, the median estimate and confidence intervals are collected across -#' simulations for each covariate value. +#' #' #' Detection predictions are more straight-forward given there is no need to #' derive the expected value. Simulations are still carried out to create to @@ -76,7 +90,7 @@ #' #' If \code{newdata} is supplied, this function will return a \code{data.frame} #' with a number of rows equal to \code{newdata} and three columns. -#' \itemize{ +#' \describe{ #' \item{estimate}{Median estimate from the model} #' \item{lower}{Lower confidence interval, based on confidence level supplied to \code{level}} #' \item{upper}{Upper confidence interval, based on confidence level supplied to \code{level}} @@ -195,7 +209,7 @@ #' lines(opo_income$upper ~ income_real$Income, lwd = 2, lty = 2) -predict.auto_occ_fit <- function(object,type, newdata = NULL,level = 0.95, nsim = 3000, seed = NULL,...){ +predict.auto_occ_fit <- function(object,type, newdata = NULL,level = 0.95, nsim = 3000, condition = c("unconditional", "z0", "z1"), seed = NULL,...){ if(!inherits(object,"auto_occ_fit")){ stop("model object must be of class auto_occ_fit") } @@ -205,6 +219,12 @@ predict.auto_occ_fit <- function(object,type, newdata = NULL,level = 0.95, nsim if(!type %in% c("psi","rho")){ stop("type must be either' psi' for occupancy or 'rho' for detection." ) } + if(length(condition)>1){ + condition <- "unconditional" + } + if(!condition %in% c("unconditional", "z0", "z1")){ + stop("condition must be either 'unconditional', 'z0', or z1'") + } if(!is.numeric(level)){ stop("level must be a numeric") } @@ -363,8 +383,22 @@ predict.auto_occ_fit <- function(object,type, newdata = NULL,level = 0.95, nsim # logit-predictions with theta e2 <- cbind(X,1) %*% t(mvn_samples) e2 <- sweep(e2, 1, offset, FUN = "+") - # calculate expected occupancy - e3 <- plogis(e1) / (plogis(e1) + (1 - plogis(e2))) + # now decide based on condition + if (condition == "z0") { + # colonization-only predictions + e3 <- plogis(e1) + } + if (condition == "z1") { + # persistence-only predictions + e3 <- plogis(e2) + } + if(condition == "unconditional"){ + # unconditional / steady-state + e3 <- plogis(e1) / (plogis(e1) + (1 - plogis(e2))) + } + { + + } } if(type == "rho"){ e2 <- X %*% t(mvn_samples) @@ -385,6 +419,18 @@ predict.auto_occ_fit <- function(object,type, newdata = NULL,level = 0.95, nsim lower = predictions[,1], upper = predictions[,3] ) + # add on the prediction type + if(type == "psi"){ + if(condition == "unconditional"){ + pred_frame$pred_type <- "psi-unconditional" + } else if(condition == "z0"){ + pred_frame$pred_type <- "psi-z0" + } else if(condition == "z1"){ + pred_frame$pred_type <- "psi-z1" + } + } else if(type == "rho"){ + pred_frame$pred_type <- "rho" + } if(tack_on){ pred_frame <- cbind.data.frame( list( diff --git a/man/compare_models.Rd b/man/compare_models.Rd index 53a17b5..4f4a33e 100644 --- a/man/compare_models.Rd +++ b/man/compare_models.Rd @@ -24,7 +24,7 @@ A data.frame with a number of rows equal to the number of models in order they are input (e.g., \code{m1 == model_list[[1]])}). The default data.frame includes: -\itemize{ +\describe{ \item{model}{The model names} \item{npar}{The number of parameters in the model} \item{AIC}{AIC score of a fitted model} diff --git a/man/format_y.Rd b/man/format_y.Rd index 01b6740..85c4e7e 100644 --- a/man/format_y.Rd +++ b/man/format_y.Rd @@ -36,7 +36,7 @@ of a species detection history get converted into what would be accepted by \code{\link{auto_occ}}. The classes of the associated columns in \code{x} may vary somewhat \code{format_y}. Mor specifically: - \itemize{ + \describe{ \item{The site column}{Should preferably be either of class \code{character} or \code{factor}. If this column is either a \code{numeric} or \code{integer}, \code{format_y} will provide a warning that it is converting the column diff --git a/man/predict.auto_occ_fit.Rd b/man/predict.auto_occ_fit.Rd index a4e8ef3..7213792 100644 --- a/man/predict.auto_occ_fit.Rd +++ b/man/predict.auto_occ_fit.Rd @@ -11,6 +11,7 @@ newdata = NULL, level = 0.95, nsim = 3000, + condition = c("unconditional", "z0", "z1"), seed = NULL, ... ) @@ -26,10 +27,14 @@ If omitted, the fitted linear predictors are used.} \item{level}{Tolerance / confidence level for predictions. Defaults to \code{0.95}.} -\item{nsim}{The number of parmater simulations to be made via the +\item{nsim}{The number of parameter simulations to be made via the models estimated parameters, variance covariance matrix, and \code{\link[mvtnorm]{rmvnorm}}. Defaults to 3000.} +\item{condition}{The type of estimate you want to generate if you are estimating +occupancy. Can either be \code{'unconditional'}, \code{'z0'}, or \code{'z1'}. +Defaults to \code{'unconditional'}, see details for additional information.} + \item{seed}{The random seed to set for simulations, defaults to \code{NULL}.} \item{...}{additional arguments. Not used yet.} @@ -37,7 +42,7 @@ and \code{\link[mvtnorm]{rmvnorm}}. Defaults to 3000.} \value{ If \code{newdata} is supplied, this function will return a \code{data.frame} with a number of rows equal to \code{newdata} and three columns. -\itemize{ +\describe{ \item{estimate}{Median estimate from the model} \item{lower}{Lower confidence interval, based on confidence level supplied to \code{level}} \item{upper}{Upper confidence interval, based on confidence level supplied to \code{level}} @@ -63,8 +68,21 @@ and a variance covariance matrix supplied by the \code{\link[autoOcc]{auto_occ}} model. This method was chosen as it creates functionally similar uncertainty estimates as a Bayesian autologistic occupancy model. -For occupancy, each set of simulated parameters is used to -generate two logit-linear predictions for the supplied covariates: +Predictions can be returned as either *unconditional* or *conditional*: +\itemize{ + \item *Unconditional (steady-state)*: the long-term expected occupancy + probability under a given set of covariates, without conditioning on the + actual occupancy state of a site. This is analogous to the equilibrium + expectation of a dynamic occupancy model (\eqn{\gamma / (\gamma + \epsilon)}) and + can be returned by including \code{condition = 'unconditional'} as an arguement. + \item *Conditional*: the occupancy probability given a site’s occupancy + state in the previous time interval. These predictions depend on whether + a site was occupied (\code{condition = 'z1'}) or unoccupied (\code{condition = 'z0'}) at time \eqn{t}, and so they are more + specific to site history. +} + +For unconditional occupancy predictions, each set of simulated parameters is used to +generate two logit-linear predictors: \deqn{\Large \mathrm{logit}(\psi_a) = \beta_0 + \beta_1 \times x_1 + \dots + \beta_n \times x_n} @@ -72,7 +90,8 @@ and \deqn{\Large \mathrm{logit}(\psi_b) = \beta_0 + \beta_1 \times x_1 + \dots + \beta_n \times x_n + \theta} -where \eqn{\theta} is the estimated autologistic term. Following this, the expected occupancy of an autologisitic occupancy model is +where \eqn{\theta} is the estimated autologistic term. Following this, the +unconditional expected occupancy is: \deqn{\huge \frac{ @@ -81,10 +100,7 @@ where \eqn{\theta} is the estimated autologistic term. Following this, the expec \mathrm{ilogit}(\psi_a) + (1 - \mathrm{ilogit}(\psi_b)) } } -which is similar to the expected occupancy of a dynamic occupancy model (\eqn{\gamma \div (\gamma + \epsilon)}) -where \eqn{\gamma} is colonization and \eqn{\epsilon} is extinction. Following this calculation for all simuated -parameter estimates and covariate values, the median estimate and confidence intervals are collected across -simulations for each covariate value. + Detection predictions are more straight-forward given there is no need to derive the expected value. Simulations are still carried out to create to