diff --git a/NAMESPACE b/NAMESPACE index 871b7259..3c8ad740 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -92,12 +92,17 @@ importFrom(bayesplot,color_scheme_set) importFrom(bayesplot,log_posterior) importFrom(bayesplot,neff_ratio) importFrom(bayesplot,nuts_params) +importFrom(brms,dstudent_t) importFrom(brms,get_prior) importFrom(brms,logm1) importFrom(brms,lognormal) importFrom(brms,mcmc_plot) importFrom(brms,ndraws) importFrom(brms,prior_string) +importFrom(brms,pstudent_t) +importFrom(brms,qstudent_t) +importFrom(brms,rstudent_t) +importFrom(brms,student) importFrom(grDevices,devAskNewPage) importFrom(grDevices,hcl.colors) importFrom(grDevices,rgb) @@ -164,7 +169,6 @@ importFrom(stats,dlnorm) importFrom(stats,dnbinom) importFrom(stats,dnorm) importFrom(stats,dpois) -importFrom(stats,dt) importFrom(stats,ecdf) importFrom(stats,formula) importFrom(stats,frequency) @@ -187,12 +191,10 @@ importFrom(stats,poisson) importFrom(stats,ppois) importFrom(stats,predict) importFrom(stats,printCoefmat) -importFrom(stats,pt) importFrom(stats,qcauchy) importFrom(stats,qnorm) importFrom(stats,qqline) importFrom(stats,qqnorm) -importFrom(stats,qt) importFrom(stats,quantile) importFrom(stats,rbeta) importFrom(stats,reformulate) @@ -201,7 +203,6 @@ importFrom(stats,rlnorm) importFrom(stats,rnbinom) importFrom(stats,rnorm) importFrom(stats,rpois) -importFrom(stats,rt) importFrom(stats,runif) importFrom(stats,sd) importFrom(stats,setNames) diff --git a/R/compute_edf.R b/R/compute_edf.R index 2de5d945..1bd7ae35 100644 --- a/R/compute_edf.R +++ b/R/compute_edf.R @@ -71,19 +71,16 @@ compute_edf = function(mgcv_model, object, rho_names, sigma_raw_names){ mu <- fitted(object, summary = FALSE)[best_draw, 1:length(eta)] } - # Because some mvgam families do not (yet) have simple functions to compute - # prediction variance, we will resort to brute force sampling - preds <- do.call(rbind, lapply(1:75, function(x){ - # luckily the predict function is vectorized!! - predict(object, process_error = FALSE, type = 'response')[best_draw,] - })) - pred_variance <- apply(preds, 2, var) - if(any(pred_variance == 0)){ - pred_variance[which(pred_variance == 0)] <- - mu[which(pred_variance == 0)] + # Calculate variance using family's mean-variance relationship + mu_variance <- predict(object, + process_error = FALSE, + type = 'variance')[best_draw, 1:length(eta)] + if(any(mu_variance == 0)){ + mu_variance[which(mu_variance == 0)] <- + mu[which(mu_variance == 0)] } - w <- as.numeric(mgcv_model$family$mu.eta(eta) / pred_variance) + w <- as.numeric(mgcv_model$family$mu.eta(eta) / mu_variance) XWX <- t(X) %*% (w * X) rho <- mgcv_model$sp lambda <- exp(rho) diff --git a/R/evaluate_mvgams.R b/R/evaluate_mvgams.R index 8b3a0246..b47a9e42 100644 --- a/R/evaluate_mvgams.R +++ b/R/evaluate_mvgams.R @@ -667,7 +667,7 @@ drps_score <- function(truth, fc, interval_width = 0.9, } #nsum <- 1000 - nsum <- max(c(truth, fc)) + 1000 + nsum <- max(c(truth, fc), na.rm = TRUE) + 1000 Fy = ecdf(fc) ysum <- 0:nsum indicator <- ifelse(ysum - truth >= 0, 1, 0) @@ -714,6 +714,9 @@ sis_score <- function(truth, fc, interval_width = 0.9, #' @importFrom scoringRules es_sample #' @noRd energy_score <- function(truth, fc, log = FALSE) { + # es_sample can't handle any NAs + has_nas <- apply(fc, 2, function(x) any(is.na(x))) + fc <- fc[,!has_nas] if(log){ truth <- log(truth + 0.001) fc <- log(fc + 0.001) diff --git a/R/families.R b/R/families.R index dbeea197..734cf212 100644 --- a/R/families.R +++ b/R/families.R @@ -1,6 +1,6 @@ #' Supported mvgam families #' @importFrom stats make.link dgamma pgamma rgamma qnorm plnorm runif pbeta dlnorm dpois pnorm ppois plogis gaussian poisson Gamma dnbinom rnbinom dnorm dbeta -#' @importFrom brms lognormal +#' @importFrom brms lognormal student rstudent_t qstudent_t dstudent_t pstudent_t #' @param link a specification for the family link function. At present these cannot #' be changed #' @details \code{mvgam} currently supports the following standard observation families: @@ -20,12 +20,14 @@ #'\itemize{ #' \item \code{\link[brms]{lognormal}} for non-negative real-valued data #' \item \code{tweedie} for count data (power parameter `p` fixed at `1.5`) -#' \item \code{student-t} for real-valued data +#' \item `student_t()` (or \code{\link[brms]{student}}) for real-valued data #' } #'Note that only `poisson()`, `nb()`, and `tweedie()` are available if #'using `JAGS`. All families, apart from `tweedie()`, are supported if #'using `Stan`. #' @name mvgam_families +#' @details Note that currently it is not possible to change the default link +#' functions in `mvgam`, so any call to change these will be silently ignored #' @author Nicholas J Clark NULL @@ -36,7 +38,7 @@ tweedie = function(link = 'log'){ structure(list(family = "tweedie", link = 'log', linkfun = linktemp$linkfun, linkinv = linktemp$linkinv, mu.eta = linktemp$mu.eta, valideta = linktemp$valideta), - class = c("extended.family","family")) + class = c("extended.family", "family")) } #' @rdname mvgam_families @@ -46,7 +48,7 @@ student_t = function(link = 'identity'){ structure(list(family = "student", link = 'identity', linkfun = linktemp$linkfun, linkinv = linktemp$linkinv, mu.eta = linktemp$mu.eta, valideta = linktemp$valideta), - class = c("extended.family","family")) + class = c("extended.family", "family")) } #### Non-exported functions for performing family-specific tasks #### @@ -71,57 +73,20 @@ beta_shapes = function(mu, phi) { shape2 = (1 - mu) * phi)) } -# Scaled Student's t distribution -# Original author: mjskay (https://github.com/mjskay/ggdist/blob/master/R/student_t.R) -#' -#' Density, distribution function, quantile function and random generation for the -#' scaled Student's t distribution, parameterized by degrees of freedom (`df`), -#' location (`mu`), and scale (`sigma`). -#' -#' @inheritParams stats::dt -#' @param mu Location parameter (median) -#' @param sigma Scale parameter -#' -#' @name student_t -#' @importFrom stats dt pt qt rt -#' @noRd -dstudent_t = function(x, df, mu = 0, sigma = 1, log = FALSE) { - if (log) { - dt((x - mu)/sigma, df = df, log = TRUE) - log(sigma) - } - else { - dt((x - mu)/sigma, df = df) / sigma - } -} - -#' @noRd -pstudent_t = function(q, df, mu = 0, sigma = 1, lower.tail = TRUE, log.p = FALSE) { - pt((q - mu)/sigma, df = df, lower.tail = lower.tail, log.p = log.p) -} - -#' @noRd -qstudent_t = function(p, df, mu = 0, sigma = 1, lower.tail = TRUE, log.p = FALSE) { - qt(p, df = df, lower.tail = lower.tail, log.p = log.p)*sigma + mu -} - -#' @noRd -rstudent_t = function(n, df, mu = 0, sigma = 1) { - as.vector(rt(n, df = df)*sigma + mu) -} - #' Generic prediction function #' @importFrom stats predict #' @param Xp A `mgcv` linear predictor matrix #' @param family \code{character}. The `family` slot of the model's family argument #' @param betas Vector of regression coefficients of length `NCOL(Xp)` -#' @param type Either `link`, `expected` or `response` +#' @param type Either `link`, `expected`, `response` or `variance` #' @param family_pars Additional arguments for each specific observation process (i.e. #' overdispersion parameter if `family == "nb"`) #' @param density logical. Rather than calculating a prediction, evaluate the log-likelihood. #' Use this option when particle filtering #' @param truth Observation to use for evaluating the likelihood (if `density == TRUE`) #' @details A generic prediction function that will make it easier to add new -#' response distributions in future +#' response distributions in future. Use `type = variance` for computing family-level +#' variance as a function of the mean #' @noRd mvgam_predict = function(Xp, family, betas, type = 'link', @@ -140,6 +105,8 @@ mvgam_predict = function(Xp, family, betas, log = TRUE) } + } else if(type == 'variance') { + out <- rep.int(1, NROW(Xp)) } else { out <- rnorm(n = NROW(Xp), mean = ((matrix(Xp, ncol = NCOL(Xp)) %*% @@ -166,6 +133,12 @@ mvgam_predict = function(Xp, family, betas, betas)) + attr(Xp, 'model.offset'), sdlog = as.vector(family_pars$sigma_obs)) + } else if(type == 'variance'){ + mu <- ((matrix(Xp, ncol = NCOL(Xp)) %*% + betas)) + attr(Xp, 'model.offset') + sd <- as.vector(family_pars$sigma_obs) + out <- (exp((sd) ^ 2) - 1) * exp((2 * mu + sd ^ 2)) + } else { mu <- as.vector((matrix(Xp, ncol = NCOL(Xp)) %*% betas) + attr(Xp, 'model.offset')) @@ -180,12 +153,16 @@ mvgam_predict = function(Xp, family, betas, betas) + attr(Xp, 'model.offset')) if(density){ out <- dstudent_t(truth, - df = family_pars$nu, + df = as.vector(family_pars$nu), mu = out, sigma = as.vector(family_pars$sigma_obs), log = TRUE) } + } else if(type == 'variance') { + out <- as.vector(family_pars$nu) / + (pmax(2.01, as.vector(family_pars$nu)) - 2) + } else { out <- rstudent_t(n = NROW(Xp), df = family_pars$nu, @@ -211,7 +188,10 @@ mvgam_predict = function(Xp, family, betas, lambda = exp(((matrix(Xp, ncol = NCOL(Xp)) %*% betas)) + attr(Xp, 'model.offset'))) - } else { + } else if(type == 'variance'){ + out <- exp(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*% + betas) + attr(Xp, 'model.offset'))) + } else { out <- exp(((matrix(Xp, ncol = NCOL(Xp)) %*% betas)) + attr(Xp, 'model.offset')) @@ -236,6 +216,10 @@ mvgam_predict = function(Xp, family, betas, betas)) + attr(Xp, 'model.offset')), size = as.vector(family_pars$phi)) + } else if(type == 'variance'){ + mu <- exp(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*% + betas) + attr(Xp, 'model.offset'))) + out <- mu + mu^2 / as.vector(family_pars$phi) } else { out <- exp(((matrix(Xp, ncol = NCOL(Xp)) %*% betas)) + @@ -263,7 +247,11 @@ mvgam_predict = function(Xp, family, betas, out <- rbeta(n = NROW(Xp), shape1 = shape_pars$shape1, shape2 = shape_pars$shape2) - } else { + } else if(type == 'variance'){ + mu <- plogis(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*% + betas) + attr(Xp, 'model.offset'))) + out <- mu * (1 - mu) / (1 + exp(as.vector(family_pars$phi))) + } else { out <- plogis(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*% betas) + attr(Xp, 'model.offset'))) } @@ -288,7 +276,10 @@ mvgam_predict = function(Xp, family, betas, exp(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*% betas) + attr(Xp, 'model.offset'))), shape = as.vector(family_pars$shape)) - } else { + } else if(type == 'variance'){ + out <- as.vector((matrix(Xp, ncol = NCOL(Xp)) %*% + betas) + attr(Xp, 'model.offset')) ^ 2 + } else { out <- as.vector(family_pars$shape) / (as.vector(family_pars$shape) / exp(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*% betas) + attr(Xp, 'model.offset')))) @@ -319,7 +310,12 @@ mvgam_predict = function(Xp, family, betas, # Power parameter is fixed p = 1.5, phi = as.vector(family_pars$phi))) - } else { + } else if(type == 'variance'){ + out <- (exp(((matrix(Xp, ncol = NCOL(Xp)) %*% + betas)) + + attr(Xp, 'model.offset')) ^ 1.5) * + as.vector(family_pars$phi) + } else { out <- exp(((matrix(Xp, ncol = NCOL(Xp)) %*% betas)) + attr(Xp, 'model.offset')) @@ -353,9 +349,9 @@ family_to_mgcvfam = function(family){ } else if(family$family == 'student'){ gaussian() } else if(family$family == 'lognormal'){ - mgcv::tw() + mgcv::Tweedie(p = 1.5, link = 'identity') } else if(family$family == 'tweedie'){ - mgcv::Tweedie(p=1.5) + mgcv::Tweedie(p = 1.5, link = 'log') } else { family } diff --git a/R/predict.mvgam.R b/R/predict.mvgam.R index e24b271e..a85fce3b 100644 --- a/R/predict.mvgam.R +++ b/R/predict.mvgam.R @@ -10,7 +10,8 @@ #'If \code{expected} is used, predictions reflect the expectation of the response (the mean) #'but ignore uncertainty in the observation process. When \code{response} is used, #'the predictions take uncertainty in the observation process into account to return -#'predictions on the outcome scale +#'predictions on the outcome scale. When \code{variance} is used, the variance of the response +#'with respect to the mean (mean-variance relationship) is returned #'@param process_error Logical. If \code{TRUE} and a dynamic trend model was fit, #'expected uncertainty in the process model is accounted for by using draws #'from the latent trend SD parameters. If \code{FALSE}, uncertainty in the latent trend @@ -54,7 +55,8 @@ predict.mvgam = function(object, newdata, type <- match.arg(arg = type, choices = c("link", "expected", - "response")) + "response", + "variance")) # If a linear predictor was supplied for the latent process models, calculate # predictions by assuming the trend is stationary (this is basically what brms) diff --git a/R/sim_mvgam.R b/R/sim_mvgam.R index b02d3d65..028f1863 100644 --- a/R/sim_mvgam.R +++ b/R/sim_mvgam.R @@ -29,7 +29,7 @@ #'@param trend_rel Depracated. Use `prop_trend` instead #'@param freq \code{integer}. The seasonal frequency of the series #'@param family \code{family} specifying the exponential observation family for the series. Currently supported -#'families are: `nb()`, `poisson()`, `tweedie()`, `gaussian()`, `betar()`, `lognormal()`, `student_t()` and `Gamma()` +#'families are: `nb()`, `poisson()`, `tweedie()`, `gaussian()`, `betar()`, `lognormal()`, `student()` and `Gamma()` #'@param phi \code{vector} of dispersion parameters for the series (i.e. `size` for Negative Binomial or #'`phi` for Tweedie or Beta). If \code{length(phi) < n_series}, the first element of `phi` will #'be replicated `n_series` times. Defaults to \code{5} for Negative Binomial and Tweedie; \code{10} for diff --git a/R/update.mvgam.R b/R/update.mvgam.R index b5f82883..9daa28a2 100644 --- a/R/update.mvgam.R +++ b/R/update.mvgam.R @@ -66,6 +66,52 @@ update.mvgam = function(object, formula, lfo = FALSE, ...){ + dots <- list(...) + + # Extract sampling parameters + if("backend" %in% names(dots)){ + backend <- dots$backend + } else { + backend <- getOption("brms.backend", "cmdstanr") + } + + total_iter <- object$model_output@sim$iter + burnin <- object$model_output@sim$warmup + samples <- total_iter - burnin + max_treedepth <- object$max_treedepth + adapt_delta <- object$adapt_delta + chains <- object$model_output@sim$chains + + if("chains" %in% names(dots)){ + chains <- dots$chains + } else { + chains <- object$model_output@sim$chains + } + + if("burnin" %in% names(dots)){ + burnin <- dots$burnin + } else { + burnin <- burnin + } + + if("samples" %in% names(dots)){ + samples <- dots$samples + } else { + samples <- samples + } + + if("max_treedepth" %in% names(dots)){ + max_treedepth <- dots$max_treedepth + } else { + max_treedepth <- max_treedepth + } + + if("adapt_delta" %in% names(dots)){ + adapt_delta <- dots$adapt_delta + } else { + adapt_delta <- adapt_delta + } + if(missing(formula)){ formula <- object$call } @@ -164,6 +210,12 @@ update.mvgam = function(object, formula, lfo = lfo, use_stan = ifelse(object$fit_engine == 'stan', TRUE, FALSE), + backend = backend, + chains = chains, + burnin = burnin, + samples = samples, + adapt_delta = adapt_delta, + max_treedepth = max_treedepth, ...) } else { updated_mod <- mvgam(formula = formula, @@ -178,6 +230,12 @@ update.mvgam = function(object, formula, lfo = lfo, use_stan = ifelse(object$fit_engine == 'stan', TRUE, FALSE), + backend = backend, + chains = chains, + burnin = burnin, + samples = samples, + adapt_delta = adapt_delta, + max_treedepth = max_treedepth, ...) } diff --git a/R/validations.R b/R/validations.R index e1bbf14f..dfee3a11 100644 --- a/R/validations.R +++ b/R/validations.R @@ -113,7 +113,7 @@ validate_family_resrictions = function(response, family){ } } - # negatives and zeros not allowed for several families + # negatives and/or zeros not allowed for several families if(family$family %in% c('lognormal', 'Gamma')){ if(any(response<= 0)){ stop(paste0('Values <= 0 not allowed for ', family$family, ' responses'), diff --git a/_pkgdown.yml b/_pkgdown.yml index 6090a3bb..cd48e79b 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -6,5 +6,5 @@ authors: template: bootstrap: 5 - bootswatch: flatly + bootswatch: sandstone diff --git a/docs/404.html b/docs/404.html index a3901a0a..2f20e8b8 100644 --- a/docs/404.html +++ b/docs/404.html @@ -39,6 +39,7 @@