diff --git a/DESCRIPTION b/DESCRIPTION index 6961131..ff9ec6c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,16 +12,23 @@ Description: This package fits models to bycatch data, and can expand to fleet-w License: GPL-3 Encoding: UTF-8 Depends: - R (>= 4.0.0), Rcpp (>= 1.0.5), methods + R (>= 3.4.0) Imports: - rstan (>= 2.21.2), - rstantools (>= 2.1.1), ggplot2, loo (>= 2.4.1), dplyr (>= 1.0.2), - rlang (>= 0.4.1) -LinkingTo: StanHeaders (>= 2.21.0.7), rstan (>= 2.21.2), BH (>= 1.75.0.0), - Rcpp (>= 1.0.5), RcppEigen (>= 0.3.3.9.1) + rlang (>= 0.4.1), + methods, + Rcpp (>= 0.12.0), + RcppParallel (>= 5.0.1), + rstan (>= 2.18.1), + rstantools (>= 2.3.0) +LinkingTo: BH (>= 1.66.0), + Rcpp (>= 0.12.0), + RcppEigen (>= 0.3.3.3.0), + RcppParallel (>= 5.0.1), + rstan (>= 2.18.1), + StanHeaders (>= 2.18.0) Suggests: testthat, parallel, diff --git a/NAMESPACE b/NAMESPACE index 9504a1f..b1781c7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,7 +9,6 @@ export(plot_fitted) import(Rcpp) import(ggplot2) import(methods) -import(rstantools) importFrom(rstan,extract) importFrom(rstan,sampling) importFrom(rstan,vb) diff --git a/R/bycatch-package.R b/R/bycatch-package.R index b519343..e5a4923 100644 --- a/R/bycatch-package.R +++ b/R/bycatch-package.R @@ -8,10 +8,9 @@ #' @useDynLib bycatch, .registration = TRUE #' @import methods #' @import Rcpp -#' @import rstantools #' @importFrom rstan sampling #' #' @references -#' Stan Development Team (2018). RStan: the R interface to Stan. R package version 2.18.2. http://mc-stan.org +#' Stan Development Team (2023). RStan: the R interface to Stan. R package version 2.21.8. https://mc-stan.org #' NULL diff --git a/R/stanmodels.R b/R/stanmodels.R index 11277a3..2d6e93f 100644 --- a/R/stanmodels.R +++ b/R/stanmodels.R @@ -1,43 +1,25 @@ -# Part of the bayesdfa package for estimating model parameters -# Copyright (C) 2015, 2016, 2017 Trustees of Columbia University -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 3 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# Generated by rstantools. Do not edit by hand. -# This file is only intended to be used during the installation process -# nocov start -MODELS_HOME <- "src" -if (!file.exists(MODELS_HOME)) MODELS_HOME <- sub("R$", "src", getwd()) +# names of stan models +stanmodels <- c("bycatch") -stan_files <- dir(file.path(MODELS_HOME, "stan_files"), - pattern = "stan$", full.names = TRUE -) -stanmodels <- lapply(stan_files, function(f) { - model_cppname <- sub("\\.stan$", "", basename(f)) - stanfit <- rstan::stanc(f, - allow_undefined = TRUE, - obfuscate_model_name = FALSE - ) - stanfit$model_cpp <- list( - model_cppname = stanfit$model_name, - model_cppcode = stanfit$cppcode - ) - return(do.call(methods::new, args = c(stanfit[-(1:3)], - Class = "stanmodel", - mk_cppmodule = function(x) get(paste0("model_", model_cppname)) - ))) +# load each stan module +Rcpp::loadModule("stan_fit4bycatch_mod", what = TRUE) + +# instantiate each stanmodel object +stanmodels <- sapply(stanmodels, function(model_name) { + # create C++ code for stan model + stan_file <- if(dir.exists("stan")) "stan" else file.path("inst", "stan") + stan_file <- file.path(stan_file, paste0(model_name, ".stan")) + stanfit <- rstan::stanc_builder(stan_file, + allow_undefined = TRUE, + obfuscate_model_name = FALSE) + stanfit$model_cpp <- list(model_cppname = stanfit$model_name, + model_cppcode = stanfit$cppcode) + # create stanmodel object + methods::new(Class = "stanmodel", + model_name = stanfit$model_name, + model_code = stanfit$model_code, + model_cpp = stanfit$model_cpp, + mk_cppmodule = function(x) get(paste0("rstantools_model_", model_name))) }) -names(stanmodels) <- sub("\\.stan$", "", basename(stan_files)) -rm(MODELS_HOME) -# nocov end diff --git a/R/zzz.R b/R/zzz.R deleted file mode 100644 index ab4f6b4..0000000 --- a/R/zzz.R +++ /dev/null @@ -1,4 +0,0 @@ -.onLoad <- function(libname, pkgname) { - modules <- paste0("stan_fit4", names(stanmodels), "_mod") - for (m in modules) loadModule(m, what = TRUE) -} diff --git a/bycatch.Rproj b/bycatch.Rproj index cba1b6b..69fafd4 100644 --- a/bycatch.Rproj +++ b/bycatch.Rproj @@ -14,6 +14,7 @@ LaTeX: pdfLaTeX AutoAppendNewline: Yes StripTrailingWhitespace: Yes +LineEndingConversion: Posix BuildType: Package PackageUseDevtools: Yes diff --git a/docs/404.html b/docs/404.html index 105968f..69b5c6a 100644 --- a/docs/404.html +++ b/docs/404.html @@ -11,8 +11,7 @@ - - + - - + -// v0.0.1 -// Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. - -document.addEventListener('DOMContentLoaded', function() { - const codeList = document.getElementsByClassName("sourceCode"); - for (var i = 0; i < codeList.length; i++) { - var linkList = codeList[i].getElementsByTagName('a'); - for (var j = 0; j < linkList.length; j++) { - if (linkList[j].innerHTML === "") { - linkList[j].setAttribute('aria-hidden', 'true'); - } - } - } -}); diff --git a/docs/articles/a01_overview_files/header-attrs-2.9/header-attrs.js b/docs/articles/a01_overview_files/header-attrs-2.9/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/a01_overview_files/header-attrs-2.9/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/a02_fitting_models.html b/docs/articles/a02_fitting_models.html index f30af88..f4096a1 100644 --- a/docs/articles/a02_fitting_models.html +++ b/docs/articles/a02_fitting_models.html @@ -11,8 +11,7 @@ - - + -// v0.0.1 -// Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. - -document.addEventListener('DOMContentLoaded', function() { - const codeList = document.getElementsByClassName("sourceCode"); - for (var i = 0; i < codeList.length; i++) { - var linkList = codeList[i].getElementsByTagName('a'); - for (var j = 0; j < linkList.length; j++) { - if (linkList[j].innerHTML === "") { - linkList[j].setAttribute('aria-hidden', 'true'); - } - } - } -}); diff --git a/docs/articles/a02_fitting_models_files/figure-html/unnamed-chunk-16-1.png b/docs/articles/a02_fitting_models_files/figure-html/unnamed-chunk-16-1.png deleted file mode 100644 index c1e5d43..0000000 Binary files a/docs/articles/a02_fitting_models_files/figure-html/unnamed-chunk-16-1.png and /dev/null differ diff --git a/docs/articles/a02_fitting_models_files/header-attrs-2.9/header-attrs.js b/docs/articles/a02_fitting_models_files/header-attrs-2.9/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/a02_fitting_models_files/header-attrs-2.9/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/a03_expanding_estimates.html b/docs/articles/a03_expanding_estimates.html index d6bdd71..2f029ce 100644 --- a/docs/articles/a03_expanding_estimates.html +++ b/docs/articles/a03_expanding_estimates.html @@ -11,8 +11,7 @@ - - + -// v0.0.1 -// Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. - -document.addEventListener('DOMContentLoaded', function() { - const codeList = document.getElementsByClassName("sourceCode"); - for (var i = 0; i < codeList.length; i++) { - var linkList = codeList[i].getElementsByTagName('a'); - for (var j = 0; j < linkList.length; j++) { - if (linkList[j].innerHTML === "") { - linkList[j].setAttribute('aria-hidden', 'true'); - } - } - } -}); diff --git a/docs/articles/a03_expanding_estimates_files/header-attrs-2.9/header-attrs.js b/docs/articles/a03_expanding_estimates_files/header-attrs-2.9/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/a03_expanding_estimates_files/header-attrs-2.9/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/a04_diagnosing_problems.html b/docs/articles/a04_diagnosing_problems.html index ab60faa..d605d68 100644 --- a/docs/articles/a04_diagnosing_problems.html +++ b/docs/articles/a04_diagnosing_problems.html @@ -11,8 +11,7 @@ - - + -
- - - - -vignettes/background.Rmd
- background.Rmd
Previous authors, including Gardner et al. (2008) and Martin et al. (2015) have assumed that rare even bycatch follows a Poisson process, where the observed bycatch events \(y_{t}\) are modeled according to some estimated bycatch rate \(\lambda\),
-\[p(y_{t}|\lambda) = e^{-\lambda}\frac{\lambda^{y_{t}}}{y_{t}!}\] where \(\lambda\) is the mean of the Poisson distribution. The mean rate parameter \(\lambda\) can be further decomposed into a per - event (e.g. set) parameter and the number of observed sets \(n_{t}\), \(\lambda = \theta * n_{t}\). In a GLM setting, the log-link function is often used to model effects of covariates, e.g. \(log(\lambda) = log(\theta) + b_{1}*x_{1} + b_{2}*x_{2}...\), and in this setting the known number of sets \(n_{t}\) are treated as an offset, \(log(\lambda) = log(\theta) + b_{1}*x_{1} + b_{2}*x_{2} + log(n_{t})\).
-Estimation of \(\lambda\) or \(\theta\) can be done in a maximum likelihood or Bayesian setting, and here we use Bayesian methods to estimate the posterior distribution of the parameters given observed bycatch data,
-\[p(\theta|\underline{\mathbf{y}}) \propto p(\theta)p(\underline{\mathbf{y}}|\theta)\]
-In addition to the Poisson model described here, we include the Negative Binomial model, which allows for the variance to be greater than the mean. We use the ‘nbinom2’ alternative parameterization of the Negative Binomial, where \(Var(Y) = u + \frac{u^2}{\tau}\), where \(\tau\) controls the degree of overdispersion. Two additional extensions to these models are the zero-inflated extensions. We adopt hurdle models to model extra zeros, so that \(p(y_{t}=0) = \phi\) and \(p(y_{t}>0) = 1-\phi\).
-As described above, any type of fixed effects can be included as a predictors of bycatch in the formula interface. Coefficients are estimated for all coefficients using the log-link, \(log(\lambda) = log(\theta) + b_{1}*x_{1} + b_{2}*x_{2} + log(n_{t})\), where \(x_{1}\) and \(x_{2}\) are covariates.
-In some cases, such as when covariates may be missing or not completely observed, it may be useful to include time effects as time - varying. We have implemented these as random effects, in link (log) space. We can modify the predictions of bycatch rate to be \(log(\lambda) = log(\theta) + b_{1}*x_{1} + b_{2}*x_{2} + log(n_{t}) + \omega_{t}\), where \(\omega_{t}\) is the latent time effect. We constrain the time effects to be a random walk, so that \(\omega_{t} \sim Normal(\omega_{t-1},\sigma_{\omega})\). Extensions of this model could include autoregressive coefficients to help aid in stationarity.
-A major goal of bycatch analyses with partially observed datasets is estimating the total unobserved bycatch. Because the total numbers of effort (sets, trips, etc) is known, and the number of observed units of effort is known, the numbers of unobserved units is also known, \(N_{t}-n_{t}\).
-Using the Bayesian framework, we can generate samples from the posterior predictive distribution of unobserved sets, using the same distribution that is assumed for the observed sets (Poisson, Negative Binomial, etc). This posterior predictive approach assumes that the distribution of unobserved bycatch is
-\[P(Y_{t}-y_{t} | \theta, N_{t}, n_{t}) = \int_{\theta} p(Y_{t}-y_{t} | \theta, N_{t} - n_{t})p(\theta | \underline{\mathbf{y}})d\theta\]
-The integration includes a product of two quantities. The first is the probability of individual bycatch values (e.g. 0, 1, 2, …) conditioned on the estimated parameters \(\theta\) and number of unobserved units \(N_{t}-n_{t}\), and the second represents the posterior distribution of the parameters given all data, \(p(\theta | \underline{\mathbf{y}})\). By generating large numbers of samples from the posterior predictive distribution, we can generate credible intervals and other summary statistics on the distribution.
-vignettes/expanding.Rmd
- expanding.Rmd
-#library(devtools)
-#devtools::install_github("ericward-noaa/bycatch")
-library(bycatch)
-set.seed(123)
Previous authors, including Gardner et al. (2008) and Martin et al. (2015) have assumed that rare even bycatch follows a Poisson process, where the observed bycatch events \(y_{t}\) are modeled according to some estimated bycatch rate \(\lambda\),
-\[p(y_{t}|\lambda) = e^{-\lambda}\frac{\lambda^{y_{t}}}{y_{t}!}\] where \(\lambda\) is the mean of the Poisson distribution. The mean rate parameter \(\lambda\) can be further decomposed into a per - event (e.g. set) parameter and the number of observed sets \(n_{t}\), \(\lambda = \theta * n_{t}\). In a GLM setting, the log-link function is often used to model effects of covariates, e.g. \(log(\lambda) = log(\theta) + B_{1}*x_{1} + B_{2}*x_{2}...\), and in this setting the known number of sets \(n_{t}\) are treated as an offset, \(log(\lambda) = log(\theta) + B_{1}*x_{1} + B_{2}*x_{2} + log(n_{t})\).
-The reported effort (here, sets) represents the total events that are observed. Focusing on a single year of data, the expansion rate, \(p_{obs}\), represents the fraction (n / 100) of the sets that are observed. The total sets are also assumed known, \(N = e_{obs} + e_{unobs}\). So we are interested in estimating \(e_{unobs}\) given \(N\) and \(e_{obs}\), where \(N = e_{obs} / p_{obs}\) and \(e_{unobs} = e_{obs} * (1/p_{obs} - 1)\).
-
-# replace this with your own data frame
-d = data.frame("Year"= 2002:2014,
- "Takes" = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 0, 0, 0),
- "expansionRate" = c(24, 22, 14, 32, 28, 25, 30, 7, 26, 21, 22, 23, 27),
- "Sets" = c(391, 340, 330, 660, 470, 500, 330, 287, 756, 673, 532, 351, 486))
We’ll start by fitting a model with constant bycatch rate,
-
-fit = fit_bycatch(Takes ~ 1, data=d, time="Year", effort="Sets", family="poisson",
- time_varying = FALSE)
Using our example above, the observer coverage for that dataset was less than 100% and so our estimates need to be expanded to the fleetwide level. There are some important control
arguments here that are left at defaults, but should maybe be changed for huge numbers of bycatch.
-expanded = expand(fit, coverage = d$expansionRate)
And we can then plot these estimates. Like the previous function we can specify whether to include the raw points or not.
-
-plot_expanded(fitted_model=fit, expanded_estimates = expanded, xlab="Year", ylab = "Fleet-level bycatch", include_points = TRUE)
We can also do things like summarize the expanded estimates in table form
-
-df = data.frame("time" = d[,"Year"],
- "mean" = apply(expanded, 2, mean),
- "median" = apply(expanded, 2, quantile, 0.5),
- "lower95" = apply(expanded, 2, quantile, 0.025),
- "upper95" = apply(expanded, 2, quantile, 0.975))
-
-write.table(df, "estimated_bycatch.csv", row.names=F, col.names=T, sep=",")
Stan Development Team (2018). RStan: the R interface to Stan. R package version 2.18.2. http://mc-stan.org
+Stan Development Team (2023). RStan: the R interface to Stan. R package version 2.21.8. https://mc-stan.org