Skip to content

Commit

Permalink
remove pfilter files; update examples
Browse files Browse the repository at this point in the history
  • Loading branch information
Nicholas Clark committed Apr 22, 2024
1 parent 48e6121 commit 5e33da0
Show file tree
Hide file tree
Showing 11 changed files with 213 additions and 1,280 deletions.
16 changes: 9 additions & 7 deletions R/add_nmixture.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ add_nmixture = function(model_file,
data_train,
data_test = NULL,
trend_map = NULL,
nmix_trendmap = FALSE,
nmix_trendmap = TRUE,
orig_trend_model){

insight::check_if_installed("extraDistr",
Expand Down Expand Up @@ -152,7 +152,7 @@ add_nmix_data = function(model_data,
data_train,
data_test,
trend_map,
nmix_trendmap){
nmix_trendmap = TRUE){
model_data$ytimes_array <- as.vector(model_data$ytimes)

#### Perform necessary checks on 'cap' (positive integers, no missing values) ####
Expand Down Expand Up @@ -481,7 +481,9 @@ add_nmix_posterior = function(model_output,
obs_data,
test_data,
mgcv_model,
n_lv, Z, K_inds){
n_lv,
Z,
K_inds){

# Function to add samples to the 'sim' slot of a stanfit object
add_samples = function(model_output, names, samples, nsamples, nchains,
Expand Down Expand Up @@ -574,15 +576,15 @@ add_nmix_posterior = function(model_output,
dplyr::mutate(min_cap = max(truth, na.rm = TRUE)) %>%
dplyr::pull(min_cap)
}
if(is.null(K_inds)){
K_inds <- matrix(1:length(truth), ncol = 1)
}

# K_inds was originally supplied in series, time order
# so the corresponding truth must be supplied that way
truth_df %>%
dplyr::arrange(series, time) %>%
dplyr::pull(y) -> orig_y
if(is.null(K_inds)){
K_inds <- matrix(1:length(orig_y), ncol = 1)
}
min_cap <- suppressWarnings(get_min_cap(orig_y, K_inds))
min_cap[!is.finite(min_cap)] <- 0

Expand Down Expand Up @@ -703,7 +705,7 @@ add_nmix_posterior = function(model_output,
function(x) paste0('lv_coefs[',
x[1], ',',
x[2], ']'))
lv_coef_samps <- t(replicate(NROW(ps), as.vector(t(Z))))
lv_coef_samps <- as.matrix(replicate(NROW(ps), as.vector(t(Z))))
model_output <- add_samples(model_output = model_output,
names = lv_coef_names,
samples = lv_coef_samps,
Expand Down
223 changes: 97 additions & 126 deletions R/families.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,144 +67,115 @@ student_t = function(link = 'identity'){
#' @rdname mvgam_families
#' @examples
#' \donttest{
#' # An N-mixture example using family nmix()
#' # Simulate data from a Poisson-Binomial N-Mixture model
#' # True abundance is predicted by a single nonlinear function of temperature
#' # as well as a nonlinear long-term trend (as a Gaussian Process function)
#' set.seed(123)
#' gamdat <- gamSim(n = 80); N <- NROW(gamdat)
#' abund_linpred <- gamdat$y; temperature <- gamdat$x2
#' # Example showing how to set up N-mixture models
#' set.seed(999)
#'# Simulate observations for species 1, which shows a declining trend and 0.7 detection probability
#'data.frame(site = 1,
#' # five replicates per year; six years
#' replicate = rep(1:5, 6),
#' time = sort(rep(1:6, 5)),
#' species = 'sp_1',
#' # true abundance declines nonlinearly
#' truth = c(rep(28, 5),
#' rep(26, 5),
#' rep(23, 5),
#' rep(16, 5),
#' rep(14, 5),
#' rep(14, 5)),
#' # observations are taken with detection prob = 0.7
#' obs = c(rbinom(5, 28, 0.7),
#' rbinom(5, 26, 0.7),
#' rbinom(5, 23, 0.7),
#' rbinom(5, 15, 0.7),
#' rbinom(5, 14, 0.7),
#' rbinom(5, 14, 0.7))) %>%
#' # add 'series' information, which is an identifier of site, replicate and species
#' dplyr::mutate(series = paste0('site_', site,
#' '_', species,
#' '_rep_', replicate),
#' time = as.numeric(time),
#' # add a 'cap' variable that defines the maximum latent N to
#' # marginalize over when estimating latent abundance; in other words
#' # how large do we realistically think the true abundance could be?
#' cap = 100) %>%
#' dplyr::select(- replicate) -> testdat
#'
#' # A function to simulate from a squared exponential Gaussian Process
#' sim_gp = function(N, c, alpha, rho){
#' Sigma <- alpha ^ 2 *
#' exp(-0.5 * ((outer(1:N, 1:N, "-") / rho) ^ 2)) +
#' diag(1e-9, N)
#' c + mgcv::rmvn(1,
#' mu = rep(0, N),
#' V = Sigma)
#' }
#' trend <- sim_gp(alpha = 3,
#' rho = 16,
#' c = 0,
#' N = N)
#' true_abund <- floor(10 + abund_linpred + trend)
#'
#' # Detection probability increases linearly with decreasing rainfall
#' rainfall <- rnorm(N)
#' detect_linpred <- 0.4 + -0.55 * rainfall
#' detect_prob <- plogis(detect_linpred)
#'
#' # Simulate observed counts
#' obs_abund <- rbinom(N, size = true_abund, prob = detect_prob)
#'
#' # Plot true and observed time series
#' plot(true_abund,
#' type = 'l',
#' ylab = 'Abundance',
#' xlab = 'Time',
#' ylim = c(0, max(true_abund)),
#' bty = 'l',
#' lwd = 2)
#' lines(obs_abund, col = 'darkred', lwd = 2)
#' title(main = 'True = black; observed = red')
#'# Now add another species that has a different temporal trend and a smaller
#'# detection probability (0.45 for this species)
#'testdat = testdat %>%
#' dplyr::bind_rows(data.frame(site = 1,
#' replicate = rep(1:5, 6),
#' time = sort(rep(1:6, 5)),
#' species = 'sp_2',
#' truth = c(rep(4, 5),
#' rep(7, 5),
#' rep(15, 5),
#' rep(16, 5),
#' rep(19, 5),
#' rep(18, 5)),
#' obs = c(rbinom(5, 4, 0.45),
#' rbinom(5, 7, 0.45),
#' rbinom(5, 15, 0.45),
#' rbinom(5, 16, 0.45),
#' rbinom(5, 19, 0.45),
#' rbinom(5, 18, 0.45))) %>%
#' dplyr::mutate(series = paste0('site_', site,
#' '_', species,
#' '_rep_', replicate),
#' time = as.numeric(time),
#' cap = 50) %>%
#' dplyr::select(-replicate))
#'
#' # Gather data into a dataframe suitable for mvgam modelling;
#' # This will require a 'cap' variable specifying the maximum K to marginalise
#' # over when estimating latent abundance (it does NOT have to be a fixed value)
#' model_dat <- data.frame(obs_abund,
#' temperature,
#' rainfall,
#' cap = max(obs_abund) + 20,
#' time = 1:N,
#' series = as.factor('series1'))
#' # series identifiers
#' testdat$species <- factor(testdat$species,
#' levels = unique(testdat$species))
#' testdat$series <- factor(testdat$series,
#' levels = unique(testdat$series))
#'
#' # Training and testing folds
#' data_train <- model_dat %>% dplyr::filter(time <= 74)
#' data_test <- model_dat %>% dplyr::filter(time > 74)
#'
#' # Fit a model with informative priors on the two intercept parameters
#' # and on the length scale of the GP temporal trend parameter
#' # Note that the 'trend_formula' applies to the latent count process
#' # (a Poisson process with log-link), while the 'formula' applies to the
#' # detection probability (logit link)
#' mod <- mvgam(formula = obs_abund ~ rainfall,
#' trend_formula = ~ s(temperature, k = 5) +
#' gp(time, k = 10, c = 5/4, scale = FALSE),
#' family = nmix(),
#' data = data_train,
#' newdata = data_test,
#' priors = c(prior(std_normal(), class = '(Intercept)'),
#' prior(normal(2, 2), class = '(Intercept)_trend'),
#' prior(normal(15, 5), class = 'rho_gp_trend(time)')))
#'
#' # Model summary and diagnostics
#' summary(mod)
#' plot(mod, type = 'residuals')
#' # The trend_map to state how replicates are structured
#' testdat %>%
#' # each unique combination of site*species is a separate process
#'dplyr::mutate(trend = as.numeric(factor(paste0(site, species)))) %>%
#' dplyr::select(trend, series) %>%
#' dplyr::distinct() -> trend_map
#'trend_map
#'
#' # Intercept parameters
#' mcmc_plot(mod,
#' variable = "Intercept",
#' regex = TRUE,
#' type = 'hist')
#' # Fit a model
#' mod <- mvgam(
#' # the observation formula sets up linear predictors for
#' # detection probability on the logit scale
#' formula = obs ~ species - 1,
#'
#' # Hindcasts and forecasts of latent abundance (with truth overlain)
#' fc <- forecast(mod, type = 'latent_N')
#' plot(fc); points(true_abund, pch = 16, cex = 0.8)
#' # the trend_formula sets up the linear predictors for
#' # the latent abundance processes on the log scale
#' trend_formula = ~ s(time, by = trend, k = 4) + species,
#'
#' # Latent abundance predictions are restricted based on 'cap'
#' max(model_dat$cap); range(fc$forecasts[[1]])
#' # the trend_map takes care of the mapping
#' trend_map = trend_map,
#'
#' # Hindcasts and forecasts of detection probability (with truth overlain)
#' fc <- forecast(mod, type = 'detection')
#' plot(fc); points(detect_prob, pch = 16, cex = 0.8)
#' # nmix() family and data
#' family = nmix(),
#' data = testdat,
#'
#' # Hindcasts and forecasts of observations
#' # (after accounting for detection error)
#' fc <- forecast(mod, type = 'response')
#' plot(fc)
#' # priors can be set in the usual way
#' priors = c(prior(std_normal(), class = b),
#' prior(normal(1, 1.5), class = Intercept_trend)),
#' samples = 1000)
#'
#' # Hindcasts and forecasts of response expectations
#' # (with truth overlain)
#' fc <- forecast(object = mod, type = 'expected')
#' plot(fc); points(detect_prob * true_abund, pch = 16, cex = 0.8)
#' # The usual diagnostics
#' summary(mod)
#' code(mod)
#' loo(mod)
#'
#' # Plot conditional effects
#' # Plotting conditional effects
#' library(ggplot2)
#'
#' # Effects on true abundance can be visualised using type = 'link'
#' abund_plots <- plot(conditional_effects(mod,
#' type = 'link',
#' effects = c('temperature', 'time')),
#' plot = FALSE)
#'
#' # Effect of temperature on abundance
#' abund_plots[[1]] +
#' ylab('Latent abundance')
#'
#' # Long-term trend in abundance
#' abund_plots[[2]] +
#' ylab('Latent abundance')
#'
#' # Effect of rainfall on detection probability
#' det_plot <- plot(conditional_effects(mod,
#' type = 'detection',
#' effects = 'rainfall'),
#' plot = FALSE)
#' det_plot[[1]] +
#' ylab('Pr(detection)')
#'
#' # More targeted plots can use marginaleffects capabilities;
#' # Here visualise how response predictions might change
#' # if we considered different possible 'cap' limits on latent
#' # abundance and different rainfall measurements
#' plot_predictions(mod, condition = list('temperature',
#' cap = c(15, 20, 40),
#' rainfall = c(-1, 1)),
#' type = 'response',
#' conf_level = 0.5) +
#' ylab('Observed abundance') +
#' theme_classic()
#' plot_predictions(mod, condition = 'species',
#' type = 'detection') +
#' ylab('Pr(detection)') +
#' ylim(c(0, 1)) +
#' theme_classic() +
#' theme(legend.position = 'none')
#'
#' # Example showcasing how cbind() is needed for Binomial observations
#' # Simulate two time series of Binomial trials
Expand Down
20 changes: 0 additions & 20 deletions R/mvgam.R
Original file line number Diff line number Diff line change
Expand Up @@ -329,26 +329,6 @@
#' # View the model code in Stan language
#' code(mod1)
#'
#' # Inspect the data objects needed to condition the model
#' str(mod1$model_data)
#'
#' # The following code can be used to run the model outside of mvgam; first using rstan
#' model_data <- mod1$model_data
#' library(rstan)
#' fit <- stan(model_code = mod1$model_file,
#' data = model_data)
#'
#' # Now using cmdstanr
#' library(cmdstanr)
#' model_data <- mod1$model_data
#' cmd_mod <- cmdstan_model(write_stan_file(mod1$model_file),
#' stanc_options = list('canonicalize=deprecations,braces,parentheses'))
#' cmd_mod$print()
#' fit <- cmd_mod$sample(data = model_data,
#' chains = 4,
#' parallel_chains = 4,
#' refresh = 100)
#'
#' # Now fit the model using mvgam with the Stan backend
#' mod1 <- mvgam(formula = y ~ s(season, bs = 'cc'),
#' data = dat$data_train,
Expand Down
Loading

0 comments on commit 5e33da0

Please sign in to comment.