diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 0eb001e0..7a3c9298 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -61,5 +61,5 @@ jobs: shell: Rscript {0} - name: Test coverage - run: covr::codecov() + run: covr::codecov(line_exclusions = "R/stan_utils.R") shell: Rscript {0} diff --git a/NAMESPACE b/NAMESPACE index 16a8d14e..c76dad26 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -109,6 +109,7 @@ export(gp) export(hindcast) export(hypotheses) export(irf) +export(jsdgam) export(lfo_cv) export(lognormal) export(loo) diff --git a/NEWS.md b/NEWS.md index ceafd6cb..9418880e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,10 @@ # mvgam 1.1.4 (development version; not yet on CRAN) ## New functionalities -* Added a `stability.mvgam` method to compute stability metrics from models fit with Vector Autoregressive dynamics (#21) +* Added function `jsdgam()` to estimate Joint Species Distribution Models in which both the latent factors and the observation model components can include any of mvgam's complex linear predictor effects. See `?mvgam::jsdgam` for details +* Added a `stability.mvgam()` method to compute stability metrics from models fit with Vector Autoregressive dynamics (#21 and #76) * Added functionality to estimate hierarchical error correlations when using multivariate latent process models and when the data are nested among levels of a relevant grouping factor (#75); see `?mvgam::AR` for an example * Added `ZMVN()` error models for estimating Zero-Mean Multivariate Normal errors; convenient for working with non time-series data where latent residuals are expected to be correlated (such as when fitting Joint Species Distribution Models); see `?mvgam::ZMVN` for examples -======= -* Added a `fevd.mvgam` method to compute forecast error variance decompositions from models fit with Vector Autoregressive dynamics +* Added a `fevd.mvgam()` method to compute forecast error variance decompositions from models fit with Vector Autoregressive dynamics (#21 and #76) ## Bug fixes * Fixed a minor bug in the way `trend_map` recognises levels of the `series` factor @@ -15,8 +15,8 @@ * Allow intercepts to be included in process models when `trend_formula` is supplied. This breaks the assumption that the process has to be zero-centred, adding more modelling flexibility but also potentially inducing nonidentifiabilities with respect to any observation model intercepts. Thoughtful priors are a must for these models * Added `standata.mvgam_prefit`, `stancode.mvgam` and `stancode.mvgam_prefit` methods for better alignment with 'brms' workflows * Added 'gratia' to *Enhancements* to allow popular methods such as `draw()` to be used for 'mvgam' models if 'gratia' is already installed -* Added an `ensemble.mvgam_forecast` method to generate evenly weighted combinations of probabilistic forecast distributions -* Added an `irf.mvgam` method to compute Generalized and Orthogonalized Impulse Response Functions (IRFs) from models fit with Vector Autoregressive dynamics +* Added an `ensemble.mvgam_forecast()` method to generate evenly weighted combinations of probabilistic forecast distributions +* Added an `irf.mvgam()` method to compute Generalized and Orthogonalized Impulse Response Functions (IRFs) from models fit with Vector Autoregressive dynamics ## Deprecations * The `drift` argument has been deprecated. It is now recommended for users to include parametric fixed effects of "time" in their respective GAM formulae to capture any expected drift effects diff --git a/R/add_MACor.R b/R/add_MACor.R index 824d8186..1c94505f 100644 --- a/R/add_MACor.R +++ b/R/add_MACor.R @@ -1893,7 +1893,7 @@ add_MaCor = function(model_file, model_file[grep("// posterior predictions", model_file, fixed = TRUE)] <- paste0('// computed (full) error covariance matrix\n', - 'matrix[n_lv, n_lv] Sigma;\n', + 'matrix[n_lv, n_lv] Sigma;\n', 'Sigma = rep_matrix(0, n_lv, n_lv);\n', 'for (g in 1 : n_groups){\n', 'Sigma[group_inds[g], group_inds[g]] = multiply_lower_tri_self_transpose(L_Sigma_group[g]);\n', @@ -1904,7 +1904,7 @@ add_MaCor = function(model_file, model_file[grep("// posterior predictions", model_file, fixed = TRUE)] <- paste0('// computed (full) error covariance matrix\n', - 'matrix[n_series, n_series] Sigma;\n', + 'matrix[n_series, n_series] Sigma;\n', 'Sigma = rep_matrix(0, n_series, n_series);\n', 'for (g in 1 : n_groups){\n', 'Sigma[group_inds[g], group_inds[g]] = multiply_lower_tri_self_transpose(L_Sigma_group[g]);\n', diff --git a/R/forecast.mvgam.R b/R/forecast.mvgam.R index c9e849bb..c0bf46b8 100644 --- a/R/forecast.mvgam.R +++ b/R/forecast.mvgam.R @@ -84,17 +84,24 @@ forecast.mvgam = function(object, type <- match.arg(arg = type, choices = c("link", "response", "trend", "expected", "detection", "latent_N")) + + if(inherits(object, 'jsdgam')){ + orig_trend_model <- attr(object$model_data, 'prepped_trend_model') + } else { + orig_trend_model <- object$trend_model + } + data_train <- validate_series_time(object$obs_data, - trend_model = object$trend_model) + trend_model = orig_trend_model) n_series <- NCOL(object$ytimes) # Check whether a forecast has already been computed forecasts_exist <- FALSE if(!is.null(object$test_data) && !missing(data_test)){ object$test_data <- validate_series_time(object$test_data, - trend_model = object$trend_model) + trend_model = orig_trend_model) data_test <- validate_series_time(data_test, - trend_model = object$trend_model) + trend_model = orig_trend_model) if(max(data_test$index..time..index) <= max(object$test_data$index..time..index)){ forecasts_exist <- TRUE @@ -126,7 +133,7 @@ forecast.mvgam = function(object, if(is.null(object$test_data)){ data_test <- validate_series_time(data_test, name = 'newdata', - trend_model = object$trend_model) + trend_model = orig_trend_model) data.frame(series = object$obs_data$series, time = object$obs_data$time) %>% dplyr::group_by(series) %>% @@ -176,7 +183,7 @@ forecast.mvgam = function(object, data_test$y <- rep(NA, NROW(data_test)) } data_test <- validate_series_time(data_test, name = 'newdata', - trend_model = object$trend_model) + trend_model = orig_trend_model) } # Generate draw-specific forecasts @@ -198,7 +205,7 @@ forecast.mvgam = function(object, # Extract hindcasts data_train <- validate_series_time(object$obs_data, - trend_model = object$trend_model) + trend_model = orig_trend_model) ends <- seq(0, dim(mcmc_chains(object$model_output, 'ypred'))[2], length.out = NCOL(object$ytimes) + 1) starts <- ends + 1 @@ -330,12 +337,12 @@ forecast.mvgam = function(object, } else { # If forecasts already exist, simply extract them data_test <- validate_series_time(object$test_data, - trend_model = object$trend_model) + trend_model = orig_trend_model) last_train <- max(object$obs_data$index..time..index) - (min(object$obs_data$index..time..index) - 1) data_train <- validate_series_time(object$obs_data, - trend_model = object$trend_model) + trend_model = orig_trend_model) ends <- seq(0, dim(mcmc_chains(object$model_output, 'ypred'))[2], length.out = NCOL(object$ytimes) + 1) starts <- ends + 1 @@ -593,8 +600,13 @@ forecast_draws = function(object, # Check arguments validate_pos_integer(n_cores) + if(inherits(object, 'jsdgam')){ + orig_trend_model <- attr(object$model_data, 'prepped_trend_model') + } else { + orig_trend_model <- object$trend_model + } data_test <- validate_series_time(data_test, name = 'newdata', - trend_model = object$trend_model) + trend_model = orig_trend_model) data_test <- sort_data(data_test) n_series <- NCOL(object$ytimes) use_lv <- object$use_lv @@ -695,7 +707,7 @@ forecast_draws = function(object, # No need to compute in parallel if there was no trend model nmix_notrend <- FALSE - if(!inherits(object$trend_model, 'mvgam_trend') & + if(!inherits(orig_trend_model, 'mvgam_trend') & object$family == 'nmix'){ nmix_notrend <- TRUE } diff --git a/R/globals.R b/R/globals.R index e90a1c69..7c47f8b3 100644 --- a/R/globals.R +++ b/R/globals.R @@ -19,4 +19,5 @@ utils::globalVariables(c("y", "year", "smooth_vals", "smooth_num", "time_lag", "dis_time", "maxt", "orig_rows", "matches", "time.", "file_name", ".data", "horizon", "target", "Series", "evd", "mean_evd", - "total_evd")) + "total_evd", "smooth_label", "by_variable", + "gr", "tot_subgrs", "subgr")) diff --git a/R/index-mvgam.R b/R/index-mvgam.R index cbf69a55..b6b33ac0 100644 --- a/R/index-mvgam.R +++ b/R/index-mvgam.R @@ -48,10 +48,13 @@ variables.mvgam = function(x, ...){ # Linear predictor parameters observation_linpreds <- data.frame(orig_name = parnames[grepl('mus[', parnames, - fixed = TRUE)], + fixed = TRUE) & + !grepl('trend_mus[', + parnames, + fixed = TRUE)], alias = NA) - if(!is.null(x$trend_call)){ + if(!is.null(x$trend_call) & !inherits(x, 'jsdgam')){ trend_linpreds <- data.frame(orig_name = parnames[grepl('trend_mus[', parnames, fixed = TRUE)], @@ -71,7 +74,7 @@ variables.mvgam = function(x, ...){ mgcv_names <- names(coef(x$mgcv_model)) observation_betas <- data.frame(orig_name = b_names, alias = mgcv_names) - if(!is.null(x$trend_call)){ + if(!is.null(x$trend_call) & !inherits(x, 'jsdgam')){ b_names <- colnames(mcmc_chains(x$model_output, 'b_trend')) mgcv_names <- gsub('series', 'trend', paste0(names(coef(x$trend_mgcv_model)), '_trend')) @@ -97,7 +100,7 @@ variables.mvgam = function(x, ...){ } trend_re_params <- NULL - if(!is.null(x$trend_call)){ + if(!is.null(x$trend_call) & !inherits(x, 'jsdgam')){ if(any(unlist(purrr::map(x$trend_mgcv_model$smooth, inherits, 'random.effect')))){ re_labs <- unlist(lapply(purrr::map(x$trend_mgcv_model$smooth, 'term'), paste, collapse = ','))[ @@ -125,7 +128,7 @@ variables.mvgam = function(x, ...){ observation_smoothpars <- NULL } - if(any(grepl('rho_trend[', parnames, fixed = TRUE))){ + if(any(grepl('rho_trend[', parnames, fixed = TRUE)) & !inherits(x, 'jsdgam')){ trend_smoothpars <- data.frame(orig_name = parnames[grepl('rho_trend[', parnames, fixed = TRUE)], @@ -136,7 +139,8 @@ variables.mvgam = function(x, ...){ # Trend state parameters if(any(grepl('trend[', parnames, fixed = TRUE) & - !grepl('_trend[', parnames, fixed = TRUE))){ + !grepl('_trend[', parnames, fixed = TRUE)) & + !inherits(x, 'jsdgam')){ trend_states <- grepl('trend[', parnames, fixed = TRUE) & !grepl('_trend[', parnames, fixed = TRUE) trends <- data.frame(orig_name = parnames[trend_states], diff --git a/R/jsdgam.R b/R/jsdgam.R new file mode 100644 index 00000000..b3e8ee06 --- /dev/null +++ b/R/jsdgam.R @@ -0,0 +1,585 @@ +#'Fit Joint Species Distribution Models in \pkg{mvgam} +#' +#'This function sets up a Joint Species Distribution Model whereby the residual associations among +#'species can be modelled in a reduced-rank format using a set of latent factors. The factor +#'specification is extremely flexible, allowing users to include spatial, temporal or any other type +#'of predictor effects to more efficiently capture unmodelled residual associations, while the +#'observation model can also be highly flexible (including all smooth, GP and other effects that +#'\pkg{mvgam} can handle) +#' +#'@inheritParams mvgam +#'@inheritParams ZMVN +#'@param factor_formula A \code{character} string specifying the linear predictor +#'effects for the latent factors. These are exactly like the formula +#'for a GLM except that smooth terms, `s()`, `te()`, `ti()`, `t2()`, as well as time-varying +#'`dynamic()` terms and nonparametric `gp()` terms, can be added to the right hand side +#'to specify that the linear predictor depends on smooth functions of predictors +#'(or linear functionals of these). Details of the formula syntax used by \pkg{mvgam} +#'can be found in \code{\link{mvgam_formulae}} +#'@param factor_knots An optional \code{list} containing user specified knot values to +#' be used for basis construction of any smooth terms in `factor_formula`. +#'For most bases the user simply supplies the knots to be used, which must match up with the `k` value supplied +#'(note that the number of knots is not always just `k`). Different terms can use different numbers of knots, +#'unless they share a covariate +#'@param data A \code{dataframe} or \code{list} containing the model response variable and covariates +#'required by the GAM \code{formula} and \code{factor_formula} objects +#'@param family \code{family} specifying the exponential observation family for the series. Currently supported +#'families are: +#'\itemize{ +#' \item`gaussian()` for real-valued data +#' \item`betar()` for proportional data on `(0,1)` +#' \item`lognormal()` for non-negative real-valued data +#' \item`student_t()` for real-valued data +#' \item`Gamma()` for non-negative real-valued data +#' \item`bernoulli()` for binary data +#' \item`poisson()` for count data +#' \item`nb()` for overdispersed count data +#' \item`binomial()` for count data with imperfect detection when the number of trials is known; +#' note that the `cbind()` function must be used to bind the discrete observations and the discrete number +#' of trials +#' \item`beta_binomial()` as for `binomial()` but allows for overdispersion} +#'Default is `poisson()`. See \code{\link{mvgam_families}} for more details +#'@param ... Other arguments to pass to [mvgam()] +#'@author Nicholas J Clark +#'@details Joint Species Distribution Models allow for responses of multiple species to be +#'learned hierarchically, whereby responses to environmental variables in `formula` can be partially +#'pooled and any latent, unmodelled residual associations can also be learned. In \pkg{mvgam}, both of +#'these effects can be modelled with the full power of latent factor Hierarchical GAMs, providing unmatched +#'flexibility to model full communities of species +#'@seealso [mvgam] +#'@return A \code{list} object of classes \code{jsdgam} and \code{mvgam} containing model output, +#'the text representation of the model file, +#'the mgcv model output (for easily generating simulations at +#'unsampled covariate values), Dunn-Smyth residuals for each series and key information needed +#'for other functions in the package. See \code{\link{mvgam-class}} for details. +#'Use `methods(class = "mvgam")` and `methods(class = "jsdgam")` for an overview on available methods +#'@examples +#'\dontrun{ +#' # Simulate latent count data for 500 spatial locations and 10 species +#' set.seed(0) +#' N_points <- 500 +#' N_species <- 10 +#' +#' # Species-level intercepts (on the log scale) +#' alphas <- runif(N_species, 2, 2.25) +#' +#' # Simulate a covariate and species-level responses to it +#' temperature <- rnorm(N_points) +#' betas <- runif(N_species, -0.5, 0.5) +#' +#' # Simulate background spatial points uniformly over a space +#' lon <- runif(N_points, min = 150, max = 155) +#' lat <- runif(N_points, min = -20, max = -19) +#' +#' # Set up spatial basis functions as a tensor product of lat and lon; +#' # keep this number small so there are noticeable spatiotemporal 'clusters' +#' sm <- mgcv::smoothCon(mgcv::te(lon, lat, k = 5), +#' data = data.frame(lon, lat), +#' knots = NULL)[[1]] +#' +#' # The design matrix for this smooth is in the 'X' slot +#' des_mat <- sm$X +#' dim(des_mat) +#' +#' # Function to generate a random covariance matrix where all variables +#' # have unit variance (i.e. diagonals are all 1) +#' random_Sigma = function(N){ +#' L_Omega <- matrix(0, N, N); +#' L_Omega[1, 1] <- 1; +#' for (i in 2 : N) { +#' bound <- 1; +#' for (j in 1 : (i - 1)) { +#' L_Omega[i, j] <- runif(1, -sqrt(bound), sqrt(bound)); +#' bound <- bound - L_Omega[i, j] ^ 2; +#' } +#' L_Omega[i, i] <- sqrt(bound); +#' } +#' Sigma <- L_Omega %*% t(L_Omega); +#' return(Sigma) +#' } +#' +#' # Simulate a variance-covariance matrix for the correlations among +#' # basis coefficients +#' Sigma <- random_Sigma(N = NCOL(des_mat)) +#' +#' # Now simulate the species-level basis coefficients hierarchically, where +#' # spatial basis function correlations are a convex sum of a base correlation +#' # matrix and a species-level correlation matrix +#' basis_coefs <- matrix(NA, nrow = N_species, ncol = NCOL(Sigma)) +#' base_field <- mgcv::rmvn(1, mu = rep(0, NCOL(Sigma)), V = Sigma) +#' for(t in 1:N_species){ +#' corOmega <- (cov2cor(Sigma) * 0.7) + +#' (0.3 * cov2cor(random_Sigma(N = NCOL(des_mat)))) +#' basis_coefs[t, ] <- mgcv::rmvn(1, mu = rep(0, NCOL(Sigma)), V = corOmega) +#' } +#' +#' # Simulate the latent spatiotemporal processes +#' st_process <- do.call(rbind, lapply(seq_len(N_species), function(t){ +#' data.frame(lat = lat, +#' lon = lon, +#' species = paste0('species_', t), +#' temperature = temperature, +#' process = alphas[t] + +#' betas[t] * temperature + +#' des_mat %*% basis_coefs[t,]) +#' })) +#' +#' # Now take noisy observations at some of the points (60) +#' obs_points <- sample(1:N_points, size = 60, replace = FALSE) +#' obs_points <- data.frame(lat = lat[obs_points], +#' lon = lon[obs_points], +#' site = 1:60) +#' +#' # Keep only the process data at these points +#' st_process %>% +#' dplyr::inner_join(obs_points, by = c('lat', 'lon')) %>% +#' # now take noisy Poisson observations of the process +#' dplyr::mutate(count = rpois(NROW(.), lambda = exp(process))) %>% +#' dplyr::mutate(species = factor(species, +#' levels = paste0('species_', 1:N_species))) %>% +#' dplyr::group_by(lat, lon) -> dat +#' +#' # View the count distributions for each species +#' library(ggplot2) +#' ggplot(dat, aes(x = count)) + +#' geom_histogram() + +#' facet_wrap(~ species, scales = 'free') +#' +#' ggplot(dat, aes(x = lat, y = lon, col = log(count + 1))) + +#' geom_point(size = 2.25) + +#' facet_wrap(~ species, scales = 'free') + +#' viridis::scale_color_viridis() + +#' theme_classic() +#' +#' # Fit a JSDM that estimates a hierarchical temperature responses +#' # and that uses three latent spatial factors +#' mod <- jsdgam(formula = count ~ +#' # Environmental model includes species-level interecepts +#' # and random slopes for a linear effect of temperature +#' species + +#' s(species, bs = 're', by = temperature), +#' +#' # Each factor estimates a different nonlinear spatial process, using +#' # 'by = trend' as in other mvgam State-Space models +#' factor_formula = ~ te(lat, lon, k = 5, by = trend) - 1, +#' n_lv = 3, +#' +#' # The data and the grouping variables +#' data = dat, +#' unit = site, +#' subgr = species, +#' +#' # Poisson observations +#' family = poisson()) +#' +#' # Plot species-level intercept estimates +#' plot_predictions(mod, condition = 'species', +#' type = 'link') +#' +#' # Plot species' hierarchical responses to temperature +#' plot_predictions(mod, condition = c('temperature', 'species', 'species'), +#' type = 'link') +#' +#' # Plotting species' average geographical predictions against the observed +#' # data gives some idea of whether more flexibility in the spatial process +#' # may be needed +#' plot_predictions(mod, condition = c('lat', 'species', 'species'), +#' points = 0.5) +#' plot_predictions(mod, condition = c('lon', 'species', 'species'), +#' points = 0.5) +#' +#' # Calculate (median) residual spatial correlations +#' med_loadings <- matrix(apply(as.matrix(mod$model_output, 'lv_coefs'), +#' 2, median), +#' nrow = N_species, ncol = 3) +#' cormat <- cov2cor(tcrossprod(med_loadings)) +#' rownames(cormat) <- colnames(cormat) <- levels(dat$species) +#' +#' # Plot it using corrplot (if installed) +#' if(requireNamespace('corrplot', quietly = TRUE)){ +#' corrplot::corrplot(cormat, type = 'lower', tl.col = 'black', +#' tl.srt = 45) +#' } +#' +#' # Posterior predictive checks and ELPD-LOO can ascertain model fit +#' pp_check(mod, type = "ecdf_overlay_grouped", +#' group = "species", ndraws = 50) +#' loo(mod) +#'} +#'@export +jsdgam = function(formula, + factor_formula = ~ -1, + knots, + factor_knots, + data, + newdata, + family = poisson(), + unit = time, + subgr = series, + share_obs_params = FALSE, + priors, + n_lv = 2, + chains = 4, + burnin = 500, + samples = 500, + thin = 1, + parallel = TRUE, + threads = 1, + silent = 1, + max_treedepth = 12, + adapt_delta = 0.85, + backend = getOption("brms.backend", "cmdstanr"), + algorithm = getOption("brms.algorithm", "sampling"), + run_model = TRUE, + return_model_data = FALSE, + ...){ + + #### Validate arguments and initialise the model skeleton #### + validate_pos_integer(n_lv) + + # Prep the trend so that the data can be structured in the usual + # mvgam fashion (with 'time' and 'series' variables) + unit <- deparse0(substitute(unit)) + subgr <- deparse0(substitute(subgr)) + prepped_trend <- prep_jsdgam_trend(unit = unit, + subgr = subgr, + data = data) + data_train <- validate_series_time(data = data, + trend_model = prepped_trend) + + # Set up a simple trend_map to get the model dimensions correct; + # this requires that we only have n_lv trends and that each series + # only maps to one distinct trend, resulting in a loading matrix of + # the correct size (n_series x n_lv) + trend_map <- prep_jsdgam_trendmap(data_train, n_lv) + + # Set up the model structure but leave autoformat off so that the + # model file can be easily modified + mod <- mvgam(formula = formula, + trend_formula = factor_formula, + knots = knots, + trend_knots = factor_knots, + family = family, + share_obs_params = share_obs_params, + priors = priors, + trend_model = 'None', + trend_map = trend_map, + data = data_train, + noncentred = TRUE, + run_model = FALSE, + autoformat = FALSE, + backend = backend, + ...) + model_file <- mod$model_file + + #### Modify model data and model file #### + # Remove Z from supplied data + model_file <- model_file[-grep("matrix[n_series, n_lv] Z; // matrix mapping series to latent states", + model_file, fixed = TRUE)] + + # Update transformed data + if(any(grepl('transformed data {', model_file, fixed = TRUE))){ + model_file[grep('transformed data {', model_file, fixed = TRUE)] <- + paste0('transformed data {\n', + '// Ensures identifiability of the model - no rotation of factors\n', + 'int M;\n', + 'M = n_lv * (n_series - n_lv) + n_lv * (n_lv - 1) / 2 + n_lv;') + } else { + model_file[grep('parameters {', model_file, fixed = TRUE)[1]] <- + paste0('// Ensures identifiability of the model - no rotation of factors\n', + 'int M;\n', + 'M = n_lv * (n_series - n_lv) + n_lv * (n_lv - 1) / 2 + n_lv;', + '}\nparameters {') + } + model_file <- readLines(textConnection(model_file), n = -1) + + # Update parameters + model_file <- model_file[-grep("// latent state SD terms", + model_file, fixed = TRUE)] + model_file <- model_file[-grep("vector[n_lv] sigma;", + model_file, fixed = TRUE)] + model_file[grep("matrix[n, n_lv] LV_raw;", + model_file, fixed = TRUE)] <- paste0( + "matrix[n, n_lv] LV_raw;\n\n", + "// factor lower triangle loading coefficients\n", + "vector[M] L;") + model_file <- readLines(textConnection(model_file), n = -1) + + # Update transformed parameters + model_file <- model_file[-grep("// latent states", + model_file, fixed = TRUE)] + model_file <- model_file[-grep("lv_coefs = Z;", + model_file, fixed = TRUE)] + model_file <- model_file[-grep("matrix[n, n_lv] LV;", + model_file, fixed = TRUE)] + model_file <- model_file[-grep("trend_mus = X_trend * b_trend;", + model_file, fixed = TRUE)] + model_file[grep("matrix[n_series, n_lv] lv_coefs;", + model_file, fixed = TRUE)] <- paste0( + "matrix[n_series, n_lv] lv_coefs = rep_matrix(0, n_series, n_lv);\n", + 'matrix[n, n_lv] LV;\n') + + starts <- grep("LV = LV_raw .* rep_matrix(sigma', rows(LV_raw));", + model_file, fixed = TRUE) + ends <- starts + 5 + model_file <- model_file[-(starts:ends)] + model_file[grep("// latent process linear predictors", + model_file, fixed = TRUE)] <- paste0( + "// latent process linear predictors\n", + "trend_mus = X_trend * b_trend;\n\n", + "// constraints allow identifiability of loadings\n", + "{\n", + "int index;\n", + "index = 0;\n", + "for (j in 1 : n_lv) {\n", + "for (i in j : n_series) {\n", + "index = index + 1;\n", + "lv_coefs[i, j] = L[index];\n", + "}\n", + "}\n", + "}\n\n", + "// raw latent factors (with linear predictors)\n", + "for (j in 1 : n_lv) {\n", "for (i in 1 : n) {\n", + "LV[i, j] = trend_mus[ytimes_trend[i, j]] + LV_raw[i, j];\n", + "}\n}\n") + + model_file <- model_file[-grep("// derived latent states", + model_file, fixed = TRUE)] + model_file <- readLines(textConnection(model_file), n = -1) + + # Update model block + sigma_prior <- grep("// priors for latent state SD parameters", + model_file, fixed = TRUE) + 1 + model_file <- model_file[-sigma_prior] + model_file[grep("// priors for latent state SD parameters", + model_file, fixed = TRUE)] <- paste0( + "// priors for factor loading coefficients\n", + "L ~ std_normal();") + model_file <- readLines(textConnection(model_file), n = -1) + + # Update generated quantities + model_file[grep('matrix[n, n_series] mus;', + model_file, + fixed = TRUE)] <- paste0( + 'matrix[n, n_series] mus;\n', + 'vector[n_lv] sigma;') + model_file[grep("penalty = 1.0 / (sigma .* sigma);", + model_file, fixed = TRUE)] <- paste0( + "penalty = rep_vector(1.0, n_lv);\n", + "sigma = rep_vector(1.0, n_lv);") + model_file <- readLines(textConnection(model_file), n = -1) + model_file <- sanitise_modelfile(model_file) + + # Remove Z from model_data as it is no longer needed + model_data <- mod$model_data + model_data$Z <- NULL + + #### Autoformat the Stan code #### + if(requireNamespace('cmdstanr', quietly = TRUE) & backend == 'cmdstanr'){ + if(requireNamespace('cmdstanr') & + cmdstanr::cmdstan_version() >= "2.29.0") { + model_file <- .autoformat(model_file, + overwrite_file = FALSE, + backend = 'cmdstanr', + silent = silent >= 1L) + } + model_file <- readLines(textConnection(model_file), + n = -1) + } else { + model_file <- .autoformat(model_file, + overwrite_file = FALSE, + backend = 'rstan', + silent = silent >= 1L) + model_file <- readLines(textConnection(model_file), + n = -1) + } + + # Remove lp__ from monitor params if VB is to be used + param <- unique(c(mod$monitor_pars, 'Sigma', 'LV')) + if(algorithm %in% c('meanfield', 'fullrank', 'pathfinder', 'laplace')){ + param <- param[!param %in% 'lp__'] + } + + #### Determine what to return #### + if(!run_model){ + mod$model_file <- model_file + mod$monitor_pars <- param + attr(model_data, 'trend_model') <- 'None' + attr(model_data, 'prepped_trend_model') <- prepped_trend + attr(model_data, 'noncentred') <- NULL + attr(model_data, 'threads') <- threads + mod$model_data <- model_data + out <- mod + } else { + # Check if cmdstan is accessible; if not, use rstan + if(backend == 'cmdstanr'){ + if(!requireNamespace('cmdstanr', quietly = TRUE)){ + warning('cmdstanr library not found. Defaulting to rstan') + use_cmdstan <- FALSE + } else { + use_cmdstan <- TRUE + if(is.null(cmdstanr::cmdstan_version(error_on_NA = FALSE))){ + warning('cmdstanr library found but Cmdstan not found. Defaulting to rstan') + use_cmdstan <- FALSE + } + } + } + + #### Run the model #### + if(use_cmdstan){ + + # Prepare threading and generate the model + cmd_mod <- .model_cmdstanr(model_file, + threads = threads, + silent = silent) + + # Condition the model using Cmdstan + out_gam_mod <- .sample_model_cmdstanr(model = cmd_mod, + algorithm = algorithm, + prior_simulation = FALSE, + data = model_data, + chains = chains, + parallel = parallel, + silent = silent, + max_treedepth = max_treedepth, + adapt_delta = adapt_delta, + threads = threads, + burnin = burnin, + samples = samples, + param = param, + save_all_pars = FALSE, + ...) + + } else { + # Condition the model using rstan + requireNamespace('rstan', quietly = TRUE) + out_gam_mod <- .sample_model_rstan(model = model_file, + algorithm = algorithm, + prior_simulation = FALSE, + data = model_data, + chains = chains, + parallel = parallel, + silent = silent, + max_treedepth = max_treedepth, + adapt_delta = adapt_delta, + threads = threads, + burnin = burnin, + samples = samples, + thin = thin, + ...) + } + + # After modeling (add a new class to make predictions and other post-processing + # simpler) + out1 <- mod + out1$model_output <- out_gam_mod + class(out1) <- c('mvgam') + mod_residuals <- dsresids_vec(out1) + rm(out1) + + # Add the posterior median coefficients to the mgcv objects + ss_gam <- mod$mgcv_model + V <- cov(mcmc_chains(out_gam_mod, 'b')) + ss_gam$Vp <- ss_gam$Vc <- V + p <- mcmc_summary(out_gam_mod, 'b', + variational = algorithm %in% c('meanfield', + 'fullrank', + 'pathfinder', + 'laplace'))[,c(4)] + names(p) <- names(ss_gam$coefficients) + ss_gam$coefficients <- p + + trend_mgcv_model <- mod$trend_mgcv_model + V <- cov(mcmc_chains(out_gam_mod, 'b_trend')) + trend_mgcv_model$Vp <- trend_mgcv_model$Vc <- V + p <- mcmc_summary(out_gam_mod, 'b_trend', + variational = algorithm %in% c('meanfield', + 'fullrank', + 'pathfinder', + 'laplace'))[,c(4)] + names(p) <- names(trend_mgcv_model$coefficients) + trend_mgcv_model$coefficients <- p + + #### Return the output as class mvgam #### + trim_data <- list() + attr(trim_data, 'threads') <- threads + attr(trim_data, 'noncentred') <- NULL + attr(trim_data, 'trend_model') <- 'None' + attr(trim_data, 'prepped_trend_model') <- prepped_trend + + out <- structure(list(call = mod$call, + trend_call = factor_formula, + family = mod$family, + share_obs_params = mod$share_obs_params, + trend_model = 'None', + trend_map = trend_map, + drift = FALSE, + priors = mod$priors, + model_output = out_gam_mod, + model_file = model_file, + model_data = if(return_model_data){ + model_data + } else { + trim_data + }, + inits = NULL, + monitor_pars = param, + sp_names = mod$sp_names, + trend_sp_names = mod$trend_sp_names, + mgcv_model = ss_gam, + trend_mgcv_model = trend_mgcv_model, + ytimes = mod$ytimes, + resids = mod_residuals, + use_lv = TRUE, + n_lv = n_lv, + upper_bounds = mod$upper_bounds, + obs_data = mod$obs_data, + test_data = mod$test_data, + fit_engine = 'stan', + backend = backend, + algorithm = algorithm, + max_treedepth = max_treedepth, + adapt_delta = adapt_delta), + class = c('mvgam', 'jsdgam')) + } + + return(out) +} + +#' Prep trend for jsdgam +#' @noRd +prep_jsdgam_trend = function(data, unit, subgr){ + + unit <- as_one_character(unit) + subgr <- as_one_character(subgr) + validate_var_exists(data = data, + variable = unit, + type = 'num/int', + name = 'data', + trend_char = 'ZMVN') + validate_var_exists(data = data, + variable = subgr, + type = 'factor', + name = 'data', + trend_char = 'ZMVN') + out <- structure(list(trend_model = 'ZMVN', + ma = FALSE, + cor = TRUE, + unit = unit, + gr = "NA", + subgr = subgr, + label = NULL), + class = 'mvgam_trend') +} + +#' @noRd +prep_jsdgam_trendmap = function(data, n_lv){ + if(n_lv > nlevels(data$series)){ + stop('Number of factors must be <= number of levels in subgr', + call. = FALSE) + } + data.frame(trend = rep(1:n_lv, + nlevels(data$series))[1:nlevels(data$series)], + series = factor(levels(data$series), + levels = levels(data$series))) +} diff --git a/R/marginaleffects.mvgam.R b/R/marginaleffects.mvgam.R index 68059f5a..6c9c706a 100644 --- a/R/marginaleffects.mvgam.R +++ b/R/marginaleffects.mvgam.R @@ -171,11 +171,10 @@ get_data.mvgam = function (x, source = "environment", verbose = TRUE, ...) { unique(x$trend_map$trend))) # Trend-level data, before any slicing that took place - trend_level_data <- data.frame(trend_series = trend_indicators, - series = orig_dat$series, - time = orig_dat$time, - y = orig_dat$y, - row_num = 1:length(x$obs_data$index..time..index)) + orig_dat %>% + dplyr::bind_cols(data.frame(trend_series = trend_indicators, + row_num = 1:length(x$obs_data$index..time..index))) -> + trend_level_data # # We only kept one time observation per trend trend_level_data %>% @@ -373,6 +372,36 @@ find_predictors.mvgam = function(x, effects = c('fixed', } } + # Any other required variables, needed for grouped models + if(!inherits(attr(x$model_data, 'trend_model'), 'mvgam_trend')){ + trend_model <- list(trend_model = attr(x$model_data, 'trend_model'), + unit = 'time', + gr = 'NA', + subgr = 'series') + } else { + trend_model <- attr(x$model_data, 'trend_model') + } + other_vars <- c(trend_model$unit, + trend_model$gr, + trend_model$subgr) + if(!is.null(attr(x$model_data, 'prepped_trend_model'))){ + prepped_model <- attr(x$model_data, 'prepped_trend_model') + other_vars <- c(other_vars, + c(prepped_model$unit, + prepped_model$gr, + prepped_model$subgr)) + } + + if(flatten){ + other_vars <- setdiff(unique(other_vars), + c('NA', preds)) + preds <- c(preds, other_vars) + } else { + other_vars <- setdiff(unique(other_vars), + c('NA', preds$conditional)) + preds$conditional <- c(preds$conditional, other_vars) + } + return(preds) } @@ -422,6 +451,36 @@ find_predictors.mvgam_prefit = function(x, effects = c('fixed', } } + # Any other required variables, needed for grouped models + if(!inherits(attr(x$model_data, 'trend_model'), 'mvgam_trend')){ + trend_model <- list(trend_model = attr(x$model_data, 'trend_model'), + unit = 'time', + gr = 'NA', + subgr = 'series') + } else { + trend_model <- attr(x$model_data, 'trend_model') + } + other_vars <- c(trend_model$unit, + trend_model$gr, + trend_model$subgr) + if(!is.null(attr(x$model_data, 'prepped_trend_model'))){ + prepped_model <- attr(x$model_data, 'prepped_trend_model') + other_vars <- c(other_vars, + c(prepped_model$unit, + prepped_model$gr, + prepped_model$subgr)) + } + + if(flatten){ + other_vars <- setdiff(unique(other_vars), + c('NA', preds)) + preds <- c(preds, other_vars) + } else { + other_vars <- setdiff(unique(other_vars), + c('NA', preds$conditional)) + preds$conditional <- c(preds$conditional, other_vars) + } + return(preds) } diff --git a/R/model.frame.mvgam.R b/R/model.frame.mvgam.R index 22ec6659..d8eefb48 100644 --- a/R/model.frame.mvgam.R +++ b/R/model.frame.mvgam.R @@ -39,7 +39,38 @@ model.frame.mvgam = function(formula, trend_effects = FALSE, ...){ if(formula$family == 'nmix'){ out$cap <- formula$obs_data$cap } + + # Any other required variables, needed for grouped models + if(!inherits(attr(formula$model_data, 'trend_model'), 'mvgam_trend')){ + trend_model <- list(trend_model = attr(formula$model_data, 'trend_model'), + unit = 'time', + gr = 'NA', + subgr = 'series') + } else { + trend_model <- attr(formula$model_data, 'trend_model') + } + other_vars <- c(trend_model$unit, + trend_model$gr, + trend_model$subgr) + if(!is.null(attr(formula$model_data, 'prepped_trend_model'))){ + prepped_model <- attr(formula$model_data, 'prepped_trend_model') + other_vars <- c(other_vars, + c(prepped_model$unit, + prepped_model$gr, + prepped_model$subgr)) + } + other_vars <- setdiff(unique(other_vars), + c('NA', colnames(out))) + + if(length(other_vars)){ + orig_names <- colnames(out) + for(i in 1:length(other_vars)){ + out <- cbind(out, formula$obs_data[[other_vars[i]]]) + } + colnames(out) <- c(orig_names, other_vars) + } } + return(out) } @@ -71,11 +102,41 @@ model.frame.mvgam_prefit = function(formula, trend_effects = FALSE, ...){ # Now add the observed response, in case there are any # NAs there that need to be updated out[,resp] <- formula$obs_data$y - } - # Ensure 'cap' is included if this is an N-mixture model - if(formula$family == 'nmix'){ - out$cap <- formula$obs_data$cap + # Ensure 'cap' is included if this is an N-mixture model + if(formula$family == 'nmix'){ + out$cap <- formula$obs_data$cap + } + + # Any other required variables, needed for grouped models + if(!inherits(attr(formula$model_data, 'trend_model'), 'mvgam_trend')){ + trend_model <- list(trend_model = attr(formula$model_data, 'trend_model'), + unit = 'time', + gr = 'NA', + subgr = 'series') + } else { + trend_model <- attr(formula$model_data, 'trend_model') + } + other_vars <- c(trend_model$unit, + trend_model$gr, + trend_model$subgr) + if(!is.null(attr(formula$model_data, 'prepped_trend_model'))){ + prepped_model <- attr(formula$model_data, 'prepped_trend_model') + other_vars <- c(other_vars, + c(prepped_model$unit, + prepped_model$gr, + prepped_model$subgr)) + } + other_vars <- setdiff(unique(other_vars), + c('NA', colnames(out))) + + if(length(other_vars)){ + orig_names <- colnames(out) + for(i in 1:length(other_vars)){ + out <- cbind(out, formula$obs_data[[other_vars[i]]]) + } + colnames(out) <- c(orig_names, other_vars) + } } return(out) diff --git a/R/mvgam.R b/R/mvgam.R index eb191c86..08a266ac 100644 --- a/R/mvgam.R +++ b/R/mvgam.R @@ -13,7 +13,7 @@ #'@importFrom rlang missing_arg #'@param formula A \code{character} string specifying the GAM observation model formula. These are exactly like the formula #'for a GLM except that smooth terms, `s()`, `te()`, `ti()`, `t2()`, as well as time-varying -#'`dynamic()` terms, can be added to the right hand side +#'`dynamic()` terms and nonparametric `gp()` terms, can be added to the right hand side #'to specify that the linear predictor depends on smooth functions of predictors #'(or linear functionals of these). In `nmix()` family models, the `formula` is used to #'set up a linear predictor for the detection probability. Details of the formula syntax used by \pkg{mvgam} @@ -31,14 +31,14 @@ #'for both the observation mode (captured by `formula`) and the process model (captured by `trend_formula`). #'Users are recommended to drop one of these using the `- 1` convention in the formula right hand side. #'@param knots An optional \code{list} containing user specified knot values to be used for basis construction. -#'For most bases the user simply supplies the knots to be used, which must match up with the k value supplied +#'For most bases the user simply supplies the knots to be used, which must match up with the `k` value supplied #'(note that the number of knots is not always just `k`). Different terms can use different numbers of knots, #'unless they share a covariate #'@param trend_knots As for `knots` above, this is an optional \code{list} of knot values for smooth #'functions within the `trend_formula` #'@param data A \code{dataframe} or \code{list} containing the model response variable and covariates #'required by the GAM \code{formula} and optional \code{trend_formula}. Most models should include columns: -#'#'\itemize{ +#'\itemize{ #' \item`series` (a \code{factor} index of the series IDs; the number of levels should be identical #' to the number of unique series labels (i.e. `n_series = length(levels(data$series))`)) #' \item`time` (\code{numeric} or \code{integer} index of the time point for each observation). diff --git a/R/predict.mvgam.R b/R/predict.mvgam.R index 0261a75d..a35341a7 100644 --- a/R/predict.mvgam.R +++ b/R/predict.mvgam.R @@ -106,10 +106,16 @@ predict.mvgam = function(object, # newdata needs to have a 'series' indicator in it for integrating # over the trend uncertainties - if(!'series' %in% names(newdata)){ - newdata$series <- factor(rep(levels(object$obs_data$series)[1], - length(newdata[[1]])), - levels = levels(object$obs_data$series)) + if(inherits(object, 'jsdgam')){ + newdata <- validate_series_time(data = newdata, + trend_model = attr(object$model_data, 'prepped_trend_model'), + check_levels = FALSE, + check_times = FALSE) + } else { + newdata <- validate_series_time(data = newdata, + trend_model = object$trend_model, + check_levels = FALSE, + check_times = FALSE) } type <- match.arg(arg = type, choices = c("link", @@ -151,8 +157,8 @@ predict.mvgam = function(object, # If a linear predictor was supplied for the latent process models, calculate - # predictions by assuming the trend is stationary (this is basically what brms) - # does when predicting for autocor() models + # predictions by assuming the trend is stationary (this is basically what brms + # does when predicting for autocor() models) if(!is.null(object$trend_call)){ # Linear predictor matrix for the latent process models @@ -215,21 +221,67 @@ predict.mvgam = function(object, }) names(family_extracts) <- names(family_pars) - # Pre-multiply the linear predictors - all_linpreds <- as.matrix(as.vector(t(apply(as.matrix(betas), 1, - function(row) Xp %*% row + - attr(Xp, 'model.offset'))))) - attr(all_linpreds, 'model.offset') <- 0 - # Trend stationary predictions if(!process_error){ family_extracts <- list(sigma_obs = .Machine$double.eps) } - trend_predictions <- mvgam_predict(family = 'gaussian', - Xp = all_linpreds, - type = 'response', - betas = 1, - family_pars = family_extracts) + if(inherits(object, 'jsdgam')){ + # JSDMs should generate one set of predictions per latent variable and then + # create a weighted set of predictions based on the loading estimates + lv_coefs <- mcmc_chains(object$model_output, 'lv_coefs') + n_draws <- dim(mcmc_chains(object$model_output, 'b'))[1] + series_ind <- as.numeric(newdata$series) + all_linpreds <- as.matrix(as.vector(t(apply(as.matrix(betas), 1, + function(row) Xp %*% row + + attr(Xp, 'model.offset'))))) + attr(all_linpreds, 'model.offset') <- 0 + trend_predictions_raw <- lapply(1:object$n_lv, function(x){ + pred_vec <- mvgam_predict(family = 'gaussian', + Xp = all_linpreds, + type = 'response', + betas = 1, + family_pars = family_extracts) + matrix(pred_vec, nrow = NROW(betas)) + }) + + # Create weighted set of predictions using the loadings + weighted_mat = function(pred_matrices, + weights, + draw = 1, + obs = 1){ + lv_draws <- unlist(lapply(pred_matrices, + function(x) x[draw, obs]), + use.names = FALSE) + as.vector(lv_draws %*% weights) + } + + trend_predictions <- matrix(NA, nrow = n_draws, + ncol = length(newdata[[1]])) + n_lv <- object$n_lv + for(i in 1:n_draws){ + for(x in 1:length(newdata[[1]])){ + trend_predictions[i, x] <- weighted_mat(trend_predictions_raw, + matrix(lv_coefs[i,], + nrow = n_lv)[,series_ind[x]], + draw = i, + obs = x) + } + } + trend_predictions <- as.vector(trend_predictions) + + } else { + # Pre-multiply the linear predictors + all_linpreds <- as.matrix(as.vector(t(apply(as.matrix(betas), 1, + function(row) Xp %*% row + + attr(Xp, 'model.offset'))))) + attr(all_linpreds, 'model.offset') <- 0 + trend_predictions <- mvgam_predict(family = 'gaussian', + Xp = all_linpreds, + type = 'response', + betas = 1, + family_pars = family_extracts) + } + } else if(attr(object$model_data, 'trend_model') != 'None' & process_error){ # If no linear predictor for the trends but a dynamic trend model was used, diff --git a/R/stan_utils.R b/R/stan_utils.R index db411492..0fc64b8e 100644 --- a/R/stan_utils.R +++ b/R/stan_utils.R @@ -24,7 +24,7 @@ #' str(sdata) #' code = function(object){ - if(!class(object) %in% c('mvgam', 'mvgam_prefit')){ + if(!inherits(object, c('mvgam', 'mvgam_prefit'))){ stop('argument "object" must be of class "mvgam" or "mvgam_prefit"') } @@ -3380,7 +3380,7 @@ check_energy <- function(fit, quiet=FALSE, sampler_params) { #' @param quiet Logical (verbose or not?) #' @details Utility function written by Michael Betancourt (https://betanalpha.github.io/) #' @noRd -check_n_eff <- function(fit, quiet=FALSE, fit_summary) { +check_n_eff <- function(fit, quiet=FALSE, fit_summary, ignore_b_trend = FALSE) { if(missing(fit_summary)){ fit_summary <- rstan::summary(fit, probs = c(0.5))$summary } @@ -3388,7 +3388,24 @@ check_n_eff <- function(fit, quiet=FALSE, fit_summary) { if(any(grep('LV', rownames(fit_summary)))){ fit_summary <- fit_summary[-grep('LV', rownames(fit_summary)), ] fit_summary <- fit_summary[-grep('lv_coefs', rownames(fit_summary)), ] + + if(any(grepl('L[', rownames(fit_summary), fixed = TRUE))){ + fit_summary <- fit_summary[-grep('L[', rownames(fit_summary), fixed = TRUE), ] + } + if(any(grepl('LV_raw[', rownames(fit_summary), fixed = TRUE))){ + fit_summary <- fit_summary[-grep('LV_raw[', rownames(fit_summary), fixed = TRUE), ] + } + } + + if(ignore_b_trend){ + if(any(grepl('b_trend[', rownames(fit_summary), fixed = TRUE))){ + fit_summary <- fit_summary[-grep('b_trend[', rownames(fit_summary), fixed = TRUE), ] + } + if(any(grepl('trend_mus[', rownames(fit_summary), fixed = TRUE))){ + fit_summary <- fit_summary[-grep('trend_mus[', rownames(fit_summary), fixed = TRUE), ] + } } + N <- dim(fit_summary)[[1]] iter <- dim(rstan::extract(fit)[[1]])[[1]] @@ -3415,7 +3432,7 @@ check_n_eff <- function(fit, quiet=FALSE, fit_summary) { #' @param quiet Logical (verbose or not?) #' @details Utility function written by Michael Betancourt (https://betanalpha.github.io/) #' @noRd -check_rhat <- function(fit, quiet=FALSE, fit_summary) { +check_rhat <- function(fit, quiet=FALSE, fit_summary, ignore_b_trend = FALSE) { if(missing(fit_summary)){ fit_summary <- rstan::summary(fit, probs = c(0.5))$summary } @@ -3423,7 +3440,24 @@ check_rhat <- function(fit, quiet=FALSE, fit_summary) { if(any(grep('LV', rownames(fit_summary)))){ fit_summary <- fit_summary[-grep('LV', rownames(fit_summary)), ] fit_summary <- fit_summary[-grep('lv_coefs', rownames(fit_summary)), ] + + if(any(grepl('L[', rownames(fit_summary), fixed = TRUE))){ + fit_summary <- fit_summary[-grep('L[', rownames(fit_summary), fixed = TRUE), ] + } + if(any(grepl('LV_raw[', rownames(fit_summary), fixed = TRUE))){ + fit_summary <- fit_summary[-grep('LV_raw[', rownames(fit_summary), fixed = TRUE), ] + } + } + + if(ignore_b_trend){ + if(any(grepl('b_trend[', rownames(fit_summary), fixed = TRUE))){ + fit_summary <- fit_summary[-grep('b_trend[', rownames(fit_summary), fixed = TRUE), ] + } + if(any(grepl('trend_mus[', rownames(fit_summary), fixed = TRUE))){ + fit_summary <- fit_summary[-grep('trend_mus[', rownames(fit_summary), fixed = TRUE), ] + } } + N <- dim(fit_summary)[[1]] no_warning <- TRUE @@ -3447,11 +3481,11 @@ check_rhat <- function(fit, quiet=FALSE, fit_summary) { #' @param quiet Logical (verbose or not?) #' @details Utility function written by Michael Betancourt (https://betanalpha.github.io/) #' @noRd -check_all_diagnostics <- function(fit, max_treedepth = 10) { +check_all_diagnostics <- function(fit, max_treedepth = 10, ignore_b_trend = FALSE) { sampler_params <- rstan::get_sampler_params(fit, inc_warmup=FALSE) fit_summary <- rstan::summary(fit, probs = c(0.5))$summary - check_n_eff(fit, fit_summary = fit_summary) - check_rhat(fit, fit_summary = fit_summary) + check_n_eff(fit, fit_summary = fit_summary, ignore_b_trend = ignore_b_trend) + check_rhat(fit, fit_summary = fit_summary, ignore_b_trend = ignore_b_trend) check_div(fit, sampler_params = sampler_params) check_treedepth(fit, max_depth = max_treedepth, sampler_params = sampler_params) diff --git a/R/summary.mvgam.R b/R/summary.mvgam.R index 3460b395..f86638a9 100644 --- a/R/summary.mvgam.R +++ b/R/summary.mvgam.R @@ -55,8 +55,8 @@ summary.mvgam = function(object, 'sigma_raw', conservative = trend_model == 'None') - if(!is.null(object$trend_call)){ - object$trend_mgcv_model <- mvgam:::compute_edf(object$trend_mgcv_model, + if(!is.null(object$trend_call) & !inherits(object, 'jsdgam')){ + object$trend_mgcv_model <- compute_edf(object$trend_mgcv_model, object, 'rho_trend', 'sigma_raw_trend') @@ -348,7 +348,7 @@ if(object$use_lv){ } else { trend_model <- object$trend_model } - if(trend_model == 'None' & object$family == 'nmix'){ + if(trend_model == 'None' & object$family == 'nmix' | inherits(object, 'jsdgam')){ } else { cat("\nProcess error parameter estimates:\n") @@ -652,7 +652,7 @@ if(grepl('hiercor', validate_trend_model(object$trend_model))){ } -if(!is.null(object$trend_call)){ +if(!is.null(object$trend_call) & !inherits(object, 'jsdgam')){ if(include_betas){ cat("\nGAM process model coefficient (beta) estimates:\n") coef_names <- paste0(names(object$trend_mgcv_model$coefficients), '_trend') @@ -763,8 +763,14 @@ if(!is.null(object$trend_call)){ if(object$fit_engine == 'stan' & object$algorithm == 'sampling'){ cat('\nStan MCMC diagnostics:\n') + if(inherits(object, 'jsdgam')){ + ignore_b_trend <- TRUE + } else { + ignore_b_trend <- FALSE + } check_all_diagnostics(object$model_output, - max_treedepth = object$max_treedepth) + max_treedepth = object$max_treedepth, + ignore_b_trend = ignore_b_trend) sampler <- attr(object$model_output@sim$samples[[1]], "args")$sampler_t cat("\nSamples were drawn using ", sampler, " at ", object$model_output@date, ".\n", diff --git a/R/validations.R b/R/validations.R index 36c9a919..7f130a6b 100644 --- a/R/validations.R +++ b/R/validations.R @@ -3,7 +3,9 @@ #'@noRd validate_series_time = function(data, name = 'data', - trend_model){ + trend_model, + check_levels = TRUE, + check_times = TRUE){ # First validation requires the full trend_model object if(!inherits(trend_model, 'mvgam_trend')){ @@ -104,34 +106,39 @@ validate_series_time = function(data, data$index..time..index <- times } - # Series factor must have all unique levels present - if(!all(levels(data$series) %in% unique(data$series))){ - stop(paste0('Mismatch between factor levels of "series" and unique values of "series"', - '\n', - 'Use\n `setdiff(levels(data$series), unique(data$series))` \nand', - '\n', - ' `intersect(levels(data$series), unique(data$series))`\nfor guidance'), - call. = FALSE) + # Series factor must have all unique levels present if this is a + # forecast check + if(check_levels){ + if(!all(levels(data$series) %in% unique(data$series))){ + stop(paste0('Mismatch between factor levels of "series" and unique values of "series"', + '\n', + 'Use\n `setdiff(levels(data$series), unique(data$series))` \nand', + '\n', + ' `intersect(levels(data$series), unique(data$series))`\nfor guidance'), + call. = FALSE) + } } # Ensure each series has an observation, even if NA, for each # unique timepoint (only for trend models that require discrete time with # regularly spaced sampling intervals) - all_times_avail = function(time, min_time, max_time){ - identical(as.numeric(sort(time)), - as.numeric(seq.int(from = min_time, to = max_time))) - } - min_time <- as.numeric(min(data$index..time..index)) - max_time <- as.numeric(max(data$index..time..index)) - data.frame(series = data$series, - time = data$index..time..index) %>% - dplyr::group_by(series) %>% - dplyr::summarise(all_there = all_times_avail(time, - min_time, - max_time)) -> checked_times - if(any(checked_times$all_there == FALSE)){ - stop("One or more series in ", name, " is missing observations for one or more timepoints", - call. = FALSE) + if(check_times){ + all_times_avail = function(time, min_time, max_time){ + identical(as.numeric(sort(time)), + as.numeric(seq.int(from = min_time, to = max_time))) + } + min_time <- as.numeric(min(data$index..time..index)) + max_time <- as.numeric(max(data$index..time..index)) + data.frame(series = data$series, + time = data$index..time..index) %>% + dplyr::group_by(series) %>% + dplyr::summarise(all_there = all_times_avail(time, + min_time, + max_time)) -> checked_times + if(any(checked_times$all_there == FALSE)){ + stop("One or more series in ", name, " is missing observations for one or more timepoints", + call. = FALSE) + } } return(data) @@ -203,16 +210,20 @@ validate_series_groups = function(data, trend_model, name = 'data'){ subgr = data[[trend_model$subgr]]) # Check that each level of gr contains all possible levels of subgr - if(stats::var(gr_dat %>% - dplyr::group_by(gr) %>% - dplyr::summarise(tot_subgrs = length(unique(subgr))) %>% - dplyr::ungroup() %>% - dplyr::pull(tot_subgrs)) != 0){ - stop(paste0('Some levels of "', trend_model$gr, '" do not contain all\n', - 'unique levels of "', trend_model$subgr, '"', - " in ", name), - call. = FALSE) + gr_total_levels <- gr_dat %>% + dplyr::group_by(gr) %>% + dplyr::summarise(tot_subgrs = length(unique(subgr))) %>% + dplyr::ungroup() %>% + dplyr::pull(tot_subgrs) + if(length(gr_total_levels) > 1){ + if(stats::var(gr_total_levels) != 0){ + stop(paste0('Some levels of "', trend_model$gr, '" do not contain all\n', + 'unique levels of "', trend_model$subgr, '"', + " in ", name), + call. = FALSE) + } } + gr_dat %>% dplyr::mutate(series = interaction(gr, subgr, drop = TRUE, sep = '_', lex.order = TRUE)) -> gr_dat @@ -259,6 +270,11 @@ validate_var_exists = function(data, } } +#'@noRd +deparse_variable = function(...){ + deparse0(substitute(...)) +} + #'@noRd as_one_logical = function (x, allow_na = FALSE) { s <- substitute(x) diff --git a/docs/news/index.html b/docs/news/index.html index 6dcb4a88..c019db17 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -10,7 +10,7 @@ mvgam - 1.1.3 + 1.1.4 + + + + + +
+
+
+ +
+

This function sets up a Joint Species Distribution Model whereby the residual associations among +species can be modelled in a reduced-rank format using a set of latent factors. The factor +specification is extremely flexible, allowing users to include spatial, temporal or any other type +of predictor effects to more efficiently capture unmodelled residual associations, while the +observation model can also be highly flexible (including all smooth, GP and other effects that +mvgam can handle)

+
+ +
+

Usage

+
jsdgam(
+  formula,
+  factor_formula = ~-1,
+  knots,
+  factor_knots,
+  data,
+  newdata,
+  family = poisson(),
+  unit = time,
+  subgr = series,
+  share_obs_params = FALSE,
+  priors,
+  n_lv = 2,
+  chains = 4,
+  burnin = 500,
+  samples = 500,
+  thin = 1,
+  parallel = TRUE,
+  threads = 1,
+  silent = 1,
+  max_treedepth = 12,
+  adapt_delta = 0.85,
+  backend = getOption("brms.backend", "cmdstanr"),
+  algorithm = getOption("brms.algorithm", "sampling"),
+  run_model = TRUE,
+  return_model_data = FALSE,
+  ...
+)
+
+ +
+

Arguments

+
formula
+

A character string specifying the GAM observation model formula. These are exactly like the formula +for a GLM except that smooth terms, s(), te(), ti(), t2(), as well as time-varying +dynamic() terms and nonparametric gp() terms, can be added to the right hand side +to specify that the linear predictor depends on smooth functions of predictors +(or linear functionals of these). In nmix() family models, the formula is used to +set up a linear predictor for the detection probability. Details of the formula syntax used by mvgam +can be found in mvgam_formulae

+ + +
factor_formula
+

A character string specifying the linear predictor +effects for the latent factors. These are exactly like the formula +for a GLM except that smooth terms, s(), te(), ti(), t2(), as well as time-varying +dynamic() terms and nonparametric gp() terms, can be added to the right hand side +to specify that the linear predictor depends on smooth functions of predictors +(or linear functionals of these). Details of the formula syntax used by mvgam +can be found in mvgam_formulae

+ + +
knots
+

An optional list containing user specified knot values to be used for basis construction. +For most bases the user simply supplies the knots to be used, which must match up with the k value supplied +(note that the number of knots is not always just k). Different terms can use different numbers of knots, +unless they share a covariate

+ + +
factor_knots
+

An optional list containing user specified knot values to +be used for basis construction of any smooth terms in factor_formula. +For most bases the user simply supplies the knots to be used, which must match up with the k value supplied +(note that the number of knots is not always just k). Different terms can use different numbers of knots, +unless they share a covariate

+ + +
data
+

A dataframe or list containing the model response variable and covariates +required by the GAM formula and factor_formula objects

+ + +
newdata
+

Optional dataframe or list of test data containing the same variables +as in data. If included, the +observations in variable y will be set to NA when fitting the model so that posterior +simulations can be obtained

+ + +
family
+

family specifying the exponential observation family for the series. Currently supported +families are:

Default is poisson(). See mvgam_families for more details

+ + +
share_obs_params
+

logical. If TRUE and the family +has additional family-specific observation parameters (e.g. variance components in +student_t() or gaussian(), or dispersion parameters in nb() or betar()), +these parameters will be shared across all series. This is handy if you have multiple +time series that you believe share some properties, such as being from the same +species over different spatial units. Default is FALSE.

+ + +
priors
+

An optional data.frame with prior +definitions (in JAGS or Stan syntax). if using Stan, this can also be an object of +class brmsprior (see. prior for details). See get_mvgam_priors and +'Details' for more information on changing default prior distributions

+ + +
n_lv
+

integer the number of latent dynamic factors to use if use_lv == TRUE. +Cannot be > n_series. Defaults arbitrarily to min(2, floor(n_series / 2))

+ + +
chains
+

integer specifying the number of parallel chains for the model. Ignored +if algorithm %in% c('meanfield', 'fullrank', 'pathfinder', 'laplace')

+ + +
burnin
+

integer specifying the number of warmup iterations of the Markov chain to run +to tune sampling algorithms. Ignored +if algorithm %in% c('meanfield', 'fullrank', 'pathfinder', 'laplace')

+ + +
samples
+

integer specifying the number of post-warmup iterations of the Markov chain to run for +sampling the posterior distribution

+ + +
thin
+

Thinning interval for monitors. Ignored +if algorithm %in% c('meanfield', 'fullrank', 'pathfinder', 'laplace')

+ + +
parallel
+

logical specifying whether multiple cores should be used for +generating MCMC simulations in parallel. If TRUE, the number of cores to use will be +min(c(chains, parallel::detectCores() - 1))

+ + +
threads
+

integer Experimental option to use multithreading for within-chain +parallelisation in Stan. We recommend its use only if you are experienced with +Stan's reduce_sum function and have a slow running model that cannot be sped +up by any other means. Only available for some families(poisson(), nb(), gaussian()) and +when using Cmdstan as the backend

+ + +
silent
+

Verbosity level between 0 and 2. If 1 (the default), most of the informational +messages of compiler and sampler are suppressed. If 2, even more messages are suppressed. The +actual sampling progress is still printed. Set refresh = 0 to turn this off as well. If using +backend = "rstan" you can also set open_progress = FALSE to prevent opening additional +progress bars.

+ + +
max_treedepth
+

positive integer placing a cap on the number of simulation steps evaluated during each iteration when +use_stan == TRUE. Default is 12. Increasing this value can sometimes help with exploration of complex +posterior geometries, but it is rarely fruitful to go above a max_treedepth of 14

+ + +
adapt_delta
+

positive numeric between 0 and 1 defining the target average proposal acceptance probability +during Stan's adaptation period, if use_stan == TRUE. Default is 0.8. In general you should not need to change adapt_delta +unless you see a warning message about divergent transitions, in which case you can increase adapt_delta from the default +to a value closer to 1 (e.g. from 0.95 to 0.99, or from 0.99 to 0.999, etc). +The step size used by the numerical integrator is a function of adapt_delta in that increasing +adapt_delta will result in a smaller step size and fewer divergences. Increasing adapt_delta will +typically result in a slower sampler, but it will always lead to a more robust sampler

+ + +
backend
+

Character string naming the package to use as the backend for fitting +the Stan model (if use_stan = TRUE). Options are "cmdstanr" (the default) or "rstan". Can be set globally +for the current R session via the "brms.backend" option (see options). Details on +the rstan and cmdstanr packages are available at https://mc-stan.org/rstan/ and +https://mc-stan.org/cmdstanr/, respectively

+ + +
algorithm
+

Character string naming the estimation approach to use. +Options are "sampling" for MCMC (the default), "meanfield" for +variational inference with factorized normal distributions, +"fullrank" for variational inference with a multivariate normal +distribution, "laplace" for a Laplace approximation (only available +when using cmdstanr as the backend) or "pathfinder" for the pathfinder +algorithm (only currently available when using cmdstanr as the backend). +Can be set globally for the current R session via the +"brms.algorithm" option (see options). Limited testing +suggests that "meanfield" performs best out of the non-MCMC approximations for +dynamic GAMs, possibly because of the difficulties estimating covariances among the +many spline parameters and latent trend parameters. But rigorous testing has not +been carried out

+ + +
run_model
+

logical. If FALSE, the model is not fitted but instead the function will +return the model file and the data / initial values that are needed to fit the model outside of mvgam

+ + +
return_model_data
+

logical. If TRUE, the list of data that is needed to fit the +model is returned, along with the initial values for smooth and AR parameters, once the model is fitted. +This will be helpful if users wish to modify the model file to add +other stochastic elements that are not currently available in mvgam. +Default is FALSE to reduce +the size of the returned object, unless run_model == FALSE

+ + +
...
+

Other arguments to pass to mvgam()

+ +
+
+

Value

+ + +

A list object of classes jsdgam and mvgam containing model output, +the text representation of the model file, +the mgcv model output (for easily generating simulations at +unsampled covariate values), Dunn-Smyth residuals for each series and key information needed +for other functions in the package. See mvgam-class for details. +Use methods(class = "mvgam") and methods(class = "jsdgam") for an overview on available methods

+
+
+

Details

+

Joint Species Distribution Models allow for responses of multiple species to be +learned hierarchically, whereby responses to environmental variables in formula can be partially +pooled and any latent, unmodelled residual associations can also be learned. In mvgam, both of +these effects can be modelled with the full power of latent factor Hierarchical GAMs, providing unmatched +flexibility to model full communities of species

+
+
+

See also

+

mvgam

+
+
+

Author

+

Nicholas J Clark

+
+ +
+

Examples

+
if (FALSE) {
+# Simulate latent count data for 500 spatial locations and 10 species
+set.seed(0)
+N_points <- 500
+N_species <- 10
+
+# Species-level intercepts (on the log scale)
+alphas <- runif(N_species, 2, 2.25)
+
+# Simulate a covariate and species-level responses to it
+temperature <- rnorm(N_points)
+betas <- runif(N_species, -0.5, 0.5)
+
+# Simulate background spatial points uniformly over a space
+lon <- runif(N_points, min = 150, max = 155)
+lat <- runif(N_points, min = -20, max = -19)
+
+# Set up spatial basis functions as a tensor product of lat and lon;
+# keep this number small so there are noticeable spatiotemporal 'clusters'
+sm <- mgcv::smoothCon(mgcv::te(lon, lat, k = 5),
+                      data = data.frame(lon, lat),
+                      knots = NULL)[[1]]
+
+# The design matrix for this smooth is in the 'X' slot
+des_mat <- sm$X
+dim(des_mat)
+
+# Function to generate a random covariance matrix where all variables
+# have unit variance (i.e. diagonals are all 1)
+random_Sigma = function(N){
+  L_Omega <- matrix(0, N, N);
+  L_Omega[1, 1] <- 1;
+  for (i in 2 : N) {
+    bound <- 1;
+    for (j in 1 : (i - 1)) {
+      L_Omega[i, j] <- runif(1, -sqrt(bound), sqrt(bound));
+      bound <- bound - L_Omega[i, j] ^ 2;
+    }
+    L_Omega[i, i] <- sqrt(bound);
+  }
+  Sigma <- L_Omega %*% t(L_Omega);
+  return(Sigma)
+}
+
+# Simulate a variance-covariance matrix for the correlations among
+# basis coefficients
+Sigma <- random_Sigma(N = NCOL(des_mat))
+
+# Now simulate the species-level basis coefficients hierarchically, where
+# spatial basis function correlations are a convex sum of a base correlation
+# matrix and a species-level correlation matrix
+basis_coefs <- matrix(NA, nrow = N_species, ncol = NCOL(Sigma))
+base_field <- mgcv::rmvn(1, mu = rep(0, NCOL(Sigma)), V = Sigma)
+for(t in 1:N_species){
+  corOmega <- (cov2cor(Sigma) * 0.7) +
+                      (0.3 * cov2cor(random_Sigma(N = NCOL(des_mat))))
+  basis_coefs[t, ] <- mgcv::rmvn(1, mu = rep(0, NCOL(Sigma)), V = corOmega)
+}
+
+# Simulate the latent spatiotemporal processes
+st_process <- do.call(rbind, lapply(seq_len(N_species), function(t){
+  data.frame(lat = lat,
+             lon = lon,
+             species = paste0('species_', t),
+             temperature = temperature,
+             process = alphas[t] +
+               betas[t] * temperature +
+               des_mat %*% basis_coefs[t,])
+}))
+
+# Now take noisy observations at some of the points (60)
+obs_points <- sample(1:N_points, size = 60, replace = FALSE)
+obs_points <- data.frame(lat = lat[obs_points],
+                         lon = lon[obs_points],
+                         site = 1:60)
+
+# Keep only the process data at these points
+st_process %>%
+  dplyr::inner_join(obs_points, by = c('lat', 'lon')) %>%
+  # now take noisy Poisson observations of the process
+  dplyr::mutate(count = rpois(NROW(.), lambda = exp(process))) %>%
+  dplyr::mutate(species = factor(species,
+                                 levels = paste0('species_', 1:N_species))) %>%
+  dplyr::group_by(lat, lon) -> dat
+
+# View the count distributions for each species
+library(ggplot2)
+ggplot(dat, aes(x = count)) +
+  geom_histogram() +
+  facet_wrap(~ species, scales = 'free')
+
+ggplot(dat, aes(x = lat, y = lon, col = log(count + 1))) +
+  geom_point(size = 2.25) +
+  facet_wrap(~ species, scales = 'free') +
+  viridis::scale_color_viridis() +
+  theme_classic()
+
+# Fit a JSDM that estimates a hierarchical temperature responses
+# and that uses three latent spatial factors
+mod <- jsdgam(formula = count ~
+                # Environmental model includes species-level interecepts
+                # and random slopes for a linear effect of temperature
+                species +
+                s(species, bs = 're', by = temperature),
+
+              # Each factor estimates a different nonlinear spatial process, using
+              # 'by = trend' as in other mvgam State-Space models
+              factor_formula = ~ te(lat, lon, k = 5, by = trend) - 1,
+              n_lv = 3,
+
+              # The data and the grouping variables
+              data = dat,
+              unit = site,
+              subgr = species,
+
+              # Poisson observations
+              family = poisson())
+
+# Plot species-level intercept estimates
+plot_predictions(mod, condition = 'species',
+                 type = 'link')
+
+# Plot species' hierarchical responses to temperature
+plot_predictions(mod, condition = c('temperature', 'species', 'species'),
+                 type = 'link')
+
+# Plotting species' average geographical predictions against the observed
+# data gives some idea of whether more flexibility in the spatial process
+# may be needed
+plot_predictions(mod, condition = c('lat', 'species', 'species'),
+                 points = 0.5)
+plot_predictions(mod, condition = c('lon', 'species', 'species'),
+                 points = 0.5)
+
+# Calculate (median) residual spatial correlations
+med_loadings <- matrix(apply(as.matrix(mod$model_output, 'lv_coefs'),
+                             2, median),
+                       nrow = N_species, ncol = 3)
+cormat <- cov2cor(tcrossprod(med_loadings))
+rownames(cormat) <- colnames(cormat) <- levels(dat$species)
+
+# Plot it using corrplot (if installed)
+if(requireNamespace('corrplot', quietly = TRUE)){
+  corrplot::corrplot(cormat, type = 'lower', tl.col = 'black',
+                     tl.srt = 45)
+}
+
+# Posterior predictive checks and ELPD-LOO can ascertain model fit
+pp_check(mod, type = "ecdf_overlay_grouped",
+         group = "species", ndraws = 50)
+loo(mod)
+}
+
+
+
+ + +
+ + + + + + + diff --git a/docs/reference/mvgam-6.png b/docs/reference/mvgam-6.png index 158e2a72..d5117cd6 100644 Binary files a/docs/reference/mvgam-6.png and b/docs/reference/mvgam-6.png differ diff --git a/docs/reference/mvgam.html b/docs/reference/mvgam.html index d4e4a268..580c5b33 100644 --- a/docs/reference/mvgam.html +++ b/docs/reference/mvgam.html @@ -129,7 +129,7 @@

Argumentss(), te(), ti(), t2(), as well as time-varying -dynamic() terms, can be added to the right hand side +dynamic() terms and nonparametric gp() terms, can be added to the right hand side to specify that the linear predictor depends on smooth functions of predictors (or linear functionals of these). In nmix() family models, the formula is used to set up a linear predictor for the detection probability. Details of the formula syntax used by mvgam @@ -153,7 +153,7 @@

ArgumentsArgumentsExamples chains = 2) #> Compiling Stan program using cmdstanr #> +<<<<<<< HEAD +#> In file included from stan/lib/stan_math/stan/math/prim/prob/von_mises_lccdf.hpp:5, +#> from stan/lib/stan_math/stan/math/prim/prob/von_mises_ccdf_log.hpp:4, +#> from stan/lib/stan_math/stan/math/prim/prob.hpp:359, +#> from stan/lib/stan_math/stan/math/prim.hpp:16, +#> from stan/lib/stan_math/stan/math/rev.hpp:16, +#> from stan/lib/stan_math/stan/math.hpp:19, +#> from stan/src/stan/model/model_header.hpp:4, +#> from C:/Users/uqnclar2/AppData/Local/Temp/Rtmp2bnpq5/model-3cd048a27de0.hpp:2: +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)': +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers +#> 194 | if (cdf_n < 0.0) +#> | +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: adding '-flarge-source-files' will allow for more column-tracking support, at the expense of compilation time and memory +======= +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> Start sampling #> Running MCMC with 2 parallel chains... #> @@ -761,7 +781,20 @@

Examples#> Chain 2 Iteration: 600 / 1000 [ 60%] (Sampling) #> Chain 1 Iteration: 700 / 1000 [ 70%] (Sampling) #> Chain 2 Iteration: 700 / 1000 [ 70%] (Sampling) +#> Chain 1 Iteration: 800 / 1000 [ 80%] (Sampling) #> Chain 2 Iteration: 800 / 1000 [ 80%] (Sampling) +<<<<<<< HEAD +#> Chain 2 Iteration: 900 / 1000 [ 90%] (Sampling) +#> Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling) +#> Chain 2 Iteration: 1000 / 1000 [100%] (Sampling) +#> Chain 2 finished in 0.9 seconds. +#> Chain 1 Iteration: 1000 / 1000 [100%] (Sampling) +#> Chain 1 finished in 0.9 seconds. +#> +#> Both chains finished successfully. +#> Mean chain execution time: 0.9 seconds. +#> Total execution time: 1.2 seconds. +======= #> Chain 1 Iteration: 800 / 1000 [ 80%] (Sampling) #> Chain 2 Iteration: 900 / 1000 [ 90%] (Sampling) #> Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling) @@ -773,13 +806,18 @@

Examples#> Both chains finished successfully. #> Mean chain execution time: 1.8 seconds. #> Total execution time: 1.9 seconds. +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> # Extract the model summary summary(mod1) #> GAM formula: #> y ~ s(season, bs = "cc", k = 6) +<<<<<<< HEAD +#> <environment: 0x000001b48de84448> +======= #> <environment: 0x000001f1510c0cc8> +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> #> Family: #> poisson @@ -832,7 +870,11 @@

Examples#> 0 of 1000 iterations saturated the maximum tree depth of 12 (0%) #> E-FMI indicated no pathological behavior #> +<<<<<<< HEAD +#> Samples were drawn using NUTS(diag_e) at Wed Oct 30 3:09:16 PM 2024. +======= #> Samples were drawn using NUTS(diag_e) at Mon Oct 28 6:56:55 PM 2024. +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> For each parameter, n_eff is a crude measure of effective sample size, #> and Rhat is the potential scale reduction factor on split MCMC chains #> (at convergence, Rhat = 1) @@ -858,7 +900,11 @@

Examplesstr(fc) #> List of 16 #> $ call :Class 'formula' language y ~ s(season, bs = "cc", k = 6) +<<<<<<< HEAD +#> .. ..- attr(*, ".Environment")=<environment: 0x000001b48de84448> +======= #> .. ..- attr(*, ".Environment")=<environment: 0x000001f1510c0cc8> +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> $ trend_call : NULL #> $ family : chr "poisson" #> $ family_pars : NULL @@ -900,6 +946,15 @@

Examples#> .. .. ..$ : NULL #> .. .. ..$ : chr [1:60] "ypred[1,3]" "ypred[2,3]" "ypred[3,3]" "ypred[4,3]" ... #> $ forecasts :List of 3 +<<<<<<< HEAD +#> ..$ series_1: int [1:1000, 1:20] 5 3 0 21 4 9 10 9 10 9 ... +#> ..$ series_2: int [1:1000, 1:20] 1 3 4 23 4 6 11 7 18 13 ... +#> ..$ series_3: int [1:1000, 1:20] 5 20 6 4 21 13 7 26 24 5 ... +#> - attr(*, "class")= chr "mvgam_forecast" +plot(fc) +#> Out of sample DRPS: +#> 55.198255 +======= #> ..$ series_1: int [1:1000, 1:20] 9 39 3 7 2 4 19 11 23 14 ... #> ..$ series_2: int [1:1000, 1:20] 11 8 8 9 19 11 33 7 41 10 ... #> ..$ series_3: int [1:1000, 1:20] 15 37 6 15 16 6 5 13 13 7 ... @@ -907,6 +962,7 @@

Examplesplot(fc) #> Out of sample DRPS: #> 54.788292 +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f # Plot the estimated seasonal smooth function @@ -1004,26 +1060,62 @@

Examples chains = 2) #> Compiling Stan program using cmdstanr #> +<<<<<<< HEAD +#> In file included from stan/lib/stan_math/stan/math/prim/prob/von_mises_lccdf.hpp:5, +#> from stan/lib/stan_math/stan/math/prim/prob/von_mises_ccdf_log.hpp:4, +#> from stan/lib/stan_math/stan/math/prim/prob.hpp:359, +#> from stan/lib/stan_math/stan/math/prim.hpp:16, +#> from stan/lib/stan_math/stan/math/rev.hpp:16, +#> from stan/lib/stan_math/stan/math.hpp:19, +#> from stan/src/stan/model/model_header.hpp:4, +#> from C:/Users/uqnclar2/AppData/Local/Temp/Rtmp2bnpq5/model-3cd02ffc2afc.hpp:2: +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)': +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers +#> 194 | if (cdf_n < 0.0) +#> | +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: adding '-flarge-source-files' will allow for more column-tracking support, at the expense of compilation time and memory +======= +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> Start sampling #> Running MCMC with 2 parallel chains... #> #> Chain 1 Iteration: 1 / 1000 [ 0%] (Warmup) #> Chain 2 Iteration: 1 / 1000 [ 0%] (Warmup) +<<<<<<< HEAD +#> Chain 1 Iteration: 200 / 1000 [ 20%] (Warmup) +#> Chain 1 Iteration: 300 / 1000 [ 30%] (Warmup) +#> Chain 2 Iteration: 100 / 1000 [ 10%] (Warmup) +#> Chain 2 Iteration: 200 / 1000 [ 20%] (Warmup) +#> Chain 1 Iteration: 400 / 1000 [ 40%] (Warmup) +#> Chain 1 Iteration: 500 / 1000 [ 50%] (Warmup) +#> Chain 1 Iteration: 501 / 1000 [ 50%] (Sampling) +#> Chain 2 Iteration: 300 / 1000 [ 30%] (Warmup) +#> Chain 1 Iteration: 600 / 1000 [ 60%] (Sampling) +======= #> Chain 1 Iteration: 100 / 1000 [ 10%] (Warmup) #> Chain 2 Iteration: 100 / 1000 [ 10%] (Warmup) #> Chain 2 Iteration: 200 / 1000 [ 20%] (Warmup) #> Chain 1 Iteration: 200 / 1000 [ 20%] (Warmup) #> Chain 2 Iteration: 300 / 1000 [ 30%] (Warmup) #> Chain 1 Iteration: 300 / 1000 [ 30%] (Warmup) +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> Chain 2 Iteration: 400 / 1000 [ 40%] (Warmup) #> Chain 1 Iteration: 400 / 1000 [ 40%] (Warmup) #> Chain 1 Iteration: 500 / 1000 [ 50%] (Warmup) #> Chain 2 Iteration: 500 / 1000 [ 50%] (Warmup) +<<<<<<< HEAD +#> Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling) +#> Chain 2 Iteration: 501 / 1000 [ 50%] (Sampling) +#> Chain 2 Iteration: 600 / 1000 [ 60%] (Sampling) +#> Chain 1 Iteration: 1000 / 1000 [100%] (Sampling) +#> Chain 1 finished in 1.1 seconds. +======= #> Chain 1 Iteration: 501 / 1000 [ 50%] (Sampling) #> Chain 2 Iteration: 501 / 1000 [ 50%] (Sampling) #> Chain 1 Iteration: 600 / 1000 [ 60%] (Sampling) #> Chain 2 Iteration: 600 / 1000 [ 60%] (Sampling) #> Chain 1 Iteration: 700 / 1000 [ 70%] (Sampling) +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> Chain 2 Iteration: 700 / 1000 [ 70%] (Sampling) #> Chain 1 Iteration: 800 / 1000 [ 80%] (Sampling) #> Chain 2 Iteration: 800 / 1000 [ 80%] (Sampling) @@ -1035,8 +1127,13 @@

Examples#> Chain 2 finished in 3.6 seconds. #> #> Both chains finished successfully. +<<<<<<< HEAD +#> Mean chain execution time: 1.6 seconds. +#> Total execution time: 2.2 seconds. +======= #> Mean chain execution time: 3.5 seconds. #> Total execution time: 3.7 seconds. +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> # The mapping matrix is now supplied as data to the model in the 'Z' element @@ -1191,6 +1288,22 @@

Examples chains = 2) #> Compiling Stan program using cmdstanr #> +<<<<<<< HEAD +#> In file included from stan/lib/stan_math/stan/math/prim/prob/von_mises_lccdf.hpp:5, +#> from stan/lib/stan_math/stan/math/prim/prob/von_mises_ccdf_log.hpp:4, +#> from stan/lib/stan_math/stan/math/prim/prob.hpp:359, +#> from stan/lib/stan_math/stan/math/prim.hpp:16, +#> from stan/lib/stan_math/stan/math/rev.hpp:16, +#> from stan/lib/stan_math/stan/math.hpp:19, +#> from stan/src/stan/model/model_header.hpp:4, +#> from C:/Users/uqnclar2/AppData/Local/Temp/Rtmp2bnpq5/model-3cd02a5f713a.hpp:2: +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)': +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers +#> 194 | if (cdf_n < 0.0) +#> | +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: adding '-flarge-source-files' will allow for more column-tracking support, at the expense of compilation time and memory +======= +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> Start sampling #> Running MCMC with 2 parallel chains... #> @@ -1214,23 +1327,32 @@

Examples#> Chain 2 Iteration: 700 / 1000 [ 70%] (Sampling) #> Chain 1 Iteration: 800 / 1000 [ 80%] (Sampling) #> Chain 2 Iteration: 800 / 1000 [ 80%] (Sampling) -#> Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling) #> Chain 2 Iteration: 900 / 1000 [ 90%] (Sampling) +#> Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling) #> Chain 1 Iteration: 1000 / 1000 [100%] (Sampling) #> Chain 2 Iteration: 1000 / 1000 [100%] (Sampling) #> Chain 1 finished in 1.3 seconds. #> Chain 2 finished in 1.3 seconds. #> #> Both chains finished successfully. +<<<<<<< HEAD +#> Mean chain execution time: 0.8 seconds. +#> Total execution time: 0.9 seconds. +======= #> Mean chain execution time: 1.3 seconds. #> Total execution time: 1.4 seconds. +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> # Inspect the model summary, forecast and time-varying coefficient distribution summary(mod) #> GAM formula: #> out ~ gp(time, by = temp, c = 5/4, k = 40, scale = FALSE) +<<<<<<< HEAD +#> <environment: 0x000001b48de84448> +======= #> <environment: 0x000001f1510c0cc8> +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> #> Family: #> gaussian @@ -1316,7 +1438,11 @@

Examples#> 0 of 1000 iterations saturated the maximum tree depth of 12 (0%) #> E-FMI indicated no pathological behavior #> +<<<<<<< HEAD +#> Samples were drawn using NUTS(diag_e) at Wed Oct 30 3:10:33 PM 2024. +======= #> Samples were drawn using NUTS(diag_e) at Mon Oct 28 6:58:47 PM 2024. +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> For each parameter, n_eff is a crude measure of effective sample size, #> and Rhat is the potential scale reduction factor on split MCMC chains #> (at convergence, Rhat = 1) @@ -1355,6 +1481,22 @@

Examples chains = 2) #> Compiling Stan program using cmdstanr #> +<<<<<<< HEAD +#> In file included from stan/lib/stan_math/stan/math/prim/prob/von_mises_lccdf.hpp:5, +#> from stan/lib/stan_math/stan/math/prim/prob/von_mises_ccdf_log.hpp:4, +#> from stan/lib/stan_math/stan/math/prim/prob.hpp:359, +#> from stan/lib/stan_math/stan/math/prim.hpp:16, +#> from stan/lib/stan_math/stan/math/rev.hpp:16, +#> from stan/lib/stan_math/stan/math.hpp:19, +#> from stan/src/stan/model/model_header.hpp:4, +#> from C:/Users/uqnclar2/AppData/Local/Temp/Rtmp2bnpq5/model-3cd069b136c.hpp:2: +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)': +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers +#> 194 | if (cdf_n < 0.0) +#> | +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: adding '-flarge-source-files' will allow for more column-tracking support, at the expense of compilation time and memory +======= +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> Start sampling #> Running MCMC with 2 parallel chains... #> @@ -1372,6 +1514,12 @@

Examples#> Chain 1 Iteration: 501 / 1000 [ 50%] (Sampling) #> Chain 2 Iteration: 500 / 1000 [ 50%] (Warmup) #> Chain 2 Iteration: 501 / 1000 [ 50%] (Sampling) +<<<<<<< HEAD +#> Chain 1 Iteration: 500 / 1000 [ 50%] (Warmup) +#> Chain 1 Iteration: 501 / 1000 [ 50%] (Sampling) +#> Chain 2 Iteration: 600 / 1000 [ 60%] (Sampling) +======= +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> Chain 1 Iteration: 600 / 1000 [ 60%] (Sampling) #> Chain 2 Iteration: 600 / 1000 [ 60%] (Sampling) #> Chain 1 Iteration: 700 / 1000 [ 70%] (Sampling) @@ -1381,6 +1529,15 @@

Examples#> Chain 2 Iteration: 900 / 1000 [ 90%] (Sampling) #> Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling) #> Chain 2 Iteration: 1000 / 1000 [100%] (Sampling) +<<<<<<< HEAD +#> Chain 2 finished in 3.7 seconds. +#> Chain 1 Iteration: 1000 / 1000 [100%] (Sampling) +#> Chain 1 finished in 4.1 seconds. +#> +#> Both chains finished successfully. +#> Mean chain execution time: 3.9 seconds. +#> Total execution time: 4.2 seconds. +======= #> Chain 2 finished in 13.9 seconds. #> Chain 1 Iteration: 1000 / 1000 [100%] (Sampling) #> Chain 1 finished in 14.9 seconds. @@ -1388,6 +1545,7 @@

Examples#> Both chains finished successfully. #> Mean chain execution time: 14.4 seconds. #> Total execution time: 15.0 seconds. +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> # Inspect the model file to see the modification to the linear predictor @@ -1564,6 +1722,22 @@

Examples#> This warning is displayed once per session. #> Compiling Stan program using cmdstanr #> +<<<<<<< HEAD +#> In file included from stan/lib/stan_math/stan/math/prim/prob/von_mises_lccdf.hpp:5, +#> from stan/lib/stan_math/stan/math/prim/prob/von_mises_ccdf_log.hpp:4, +#> from stan/lib/stan_math/stan/math/prim/prob.hpp:359, +#> from stan/lib/stan_math/stan/math/prim.hpp:16, +#> from stan/lib/stan_math/stan/math/rev.hpp:16, +#> from stan/lib/stan_math/stan/math.hpp:19, +#> from stan/src/stan/model/model_header.hpp:4, +#> from C:/Users/uqnclar2/AppData/Local/Temp/Rtmp2bnpq5/model-3cd0368e4f93.hpp:2: +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)': +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers +#> 194 | if (cdf_n < 0.0) +#> | +#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: adding '-flarge-source-files' will allow for more column-tracking support, at the expense of compilation time and memory +======= +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> Start sampling #> Running MCMC with 2 parallel chains... #> @@ -1574,6 +1748,20 @@

Examples#> Chain 1 Iteration: 200 / 1000 [ 20%] (Warmup) #> Chain 2 Iteration: 200 / 1000 [ 20%] (Warmup) #> Chain 1 Iteration: 300 / 1000 [ 30%] (Warmup) +<<<<<<< HEAD +#> Chain 2 Iteration: 200 / 1000 [ 20%] (Warmup) +#> Chain 2 Iteration: 300 / 1000 [ 30%] (Warmup) +#> Chain 1 Iteration: 400 / 1000 [ 40%] (Warmup) +#> Chain 2 Iteration: 400 / 1000 [ 40%] (Warmup) +#> Chain 1 Iteration: 500 / 1000 [ 50%] (Warmup) +#> Chain 1 Iteration: 501 / 1000 [ 50%] (Sampling) +#> Chain 2 Iteration: 500 / 1000 [ 50%] (Warmup) +#> Chain 2 Iteration: 501 / 1000 [ 50%] (Sampling) +#> Chain 1 Iteration: 600 / 1000 [ 60%] (Sampling) +#> Chain 1 Iteration: 700 / 1000 [ 70%] (Sampling) +#> Chain 2 Iteration: 600 / 1000 [ 60%] (Sampling) +#> Chain 2 Iteration: 700 / 1000 [ 70%] (Sampling) +======= #> Chain 1 Iteration: 400 / 1000 [ 40%] (Warmup) #> Chain 2 Iteration: 300 / 1000 [ 30%] (Warmup) #> Chain 1 Iteration: 500 / 1000 [ 50%] (Warmup) @@ -1584,15 +1772,23 @@

Examples#> Chain 2 Iteration: 501 / 1000 [ 50%] (Sampling) #> Chain 1 Iteration: 700 / 1000 [ 70%] (Sampling) #> Chain 2 Iteration: 600 / 1000 [ 60%] (Sampling) +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> Chain 1 Iteration: 800 / 1000 [ 80%] (Sampling) #> Chain 2 Iteration: 700 / 1000 [ 70%] (Sampling) #> Chain 2 Iteration: 800 / 1000 [ 80%] (Sampling) #> Chain 1 Iteration: 900 / 1000 [ 90%] (Sampling) +<<<<<<< HEAD +#> Chain 2 Iteration: 1000 / 1000 [100%] (Sampling) +#> Chain 1 Iteration: 1000 / 1000 [100%] (Sampling) +#> Chain 1 finished in 1.1 seconds. +#> Chain 2 finished in 1.1 seconds. +======= #> Chain 2 Iteration: 900 / 1000 [ 90%] (Sampling) #> Chain 1 Iteration: 1000 / 1000 [100%] (Sampling) #> Chain 1 finished in 3.0 seconds. #> Chain 2 Iteration: 1000 / 1000 [100%] (Sampling) #> Chain 2 finished in 3.1 seconds. +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> #> Both chains finished successfully. #> Mean chain execution time: 3.1 seconds. @@ -1601,7 +1797,11 @@

Examplessummary(mod) #> GAM formula: #> cbind(y, ntrials) ~ series + s(x, by = series) +<<<<<<< HEAD +#> <environment: 0x000001b48de84448> +======= #> <environment: 0x000001f1510c0cc8> +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> #> Family: #> binomial @@ -1661,7 +1861,11 @@

Examples#> 0 of 1000 iterations saturated the maximum tree depth of 12 (0%) #> E-FMI indicated no pathological behavior #> +<<<<<<< HEAD +#> Samples were drawn using NUTS(diag_e) at Wed Oct 30 3:11:36 PM 2024. +======= #> Samples were drawn using NUTS(diag_e) at Mon Oct 28 7:00:34 PM 2024. +>>>>>>> 79e31b766e09ac39e22b93cb199f3eb4fda5e08f #> For each parameter, n_eff is a crude measure of effective sample size, #> and Rhat is the potential scale reduction factor on split MCMC chains #> (at convergence, Rhat = 1) diff --git a/docs/reference/pairs.mvgam-1.png b/docs/reference/pairs.mvgam-1.png index f718d406..669817df 100644 Binary files a/docs/reference/pairs.mvgam-1.png and b/docs/reference/pairs.mvgam-1.png differ diff --git a/docs/reference/pairs.mvgam-2.png b/docs/reference/pairs.mvgam-2.png index 5809b0a9..3e93aee8 100644 Binary files a/docs/reference/pairs.mvgam-2.png and b/docs/reference/pairs.mvgam-2.png differ diff --git a/docs/reference/pairs.mvgam.html b/docs/reference/pairs.mvgam.html index b2b21c16..023dc254 100644 --- a/docs/reference/pairs.mvgam.html +++ b/docs/reference/pairs.mvgam.html @@ -12,7 +12,7 @@ mvgam - 1.1.3 + 1.1.4