Skip to content

Commit

Permalink
add variance as a prediction type
Browse files Browse the repository at this point in the history
  • Loading branch information
nicholasjclark committed Sep 20, 2023
1 parent 32a6d5e commit e6748e4
Show file tree
Hide file tree
Showing 89 changed files with 1,748 additions and 451 deletions.
9 changes: 5 additions & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,17 @@ importFrom(bayesplot,color_scheme_set)
importFrom(bayesplot,log_posterior)
importFrom(bayesplot,neff_ratio)
importFrom(bayesplot,nuts_params)
importFrom(brms,dstudent_t)
importFrom(brms,get_prior)
importFrom(brms,logm1)
importFrom(brms,lognormal)
importFrom(brms,mcmc_plot)
importFrom(brms,ndraws)
importFrom(brms,prior_string)
importFrom(brms,pstudent_t)
importFrom(brms,qstudent_t)
importFrom(brms,rstudent_t)
importFrom(brms,student)
importFrom(grDevices,devAskNewPage)
importFrom(grDevices,hcl.colors)
importFrom(grDevices,rgb)
Expand Down Expand Up @@ -164,7 +169,6 @@ importFrom(stats,dlnorm)
importFrom(stats,dnbinom)
importFrom(stats,dnorm)
importFrom(stats,dpois)
importFrom(stats,dt)
importFrom(stats,ecdf)
importFrom(stats,formula)
importFrom(stats,frequency)
Expand All @@ -187,12 +191,10 @@ importFrom(stats,poisson)
importFrom(stats,ppois)
importFrom(stats,predict)
importFrom(stats,printCoefmat)
importFrom(stats,pt)
importFrom(stats,qcauchy)
importFrom(stats,qnorm)
importFrom(stats,qqline)
importFrom(stats,qqnorm)
importFrom(stats,qt)
importFrom(stats,quantile)
importFrom(stats,rbeta)
importFrom(stats,reformulate)
Expand All @@ -201,7 +203,6 @@ importFrom(stats,rlnorm)
importFrom(stats,rnbinom)
importFrom(stats,rnorm)
importFrom(stats,rpois)
importFrom(stats,rt)
importFrom(stats,runif)
importFrom(stats,sd)
importFrom(stats,setNames)
Expand Down
19 changes: 8 additions & 11 deletions R/compute_edf.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,19 +71,16 @@ compute_edf = function(mgcv_model, object, rho_names, sigma_raw_names){
mu <- fitted(object, summary = FALSE)[best_draw, 1:length(eta)]
}

# Because some mvgam families do not (yet) have simple functions to compute
# prediction variance, we will resort to brute force sampling
preds <- do.call(rbind, lapply(1:75, function(x){
# luckily the predict function is vectorized!!
predict(object, process_error = FALSE, type = 'response')[best_draw,]
}))
pred_variance <- apply(preds, 2, var)
if(any(pred_variance == 0)){
pred_variance[which(pred_variance == 0)] <-
mu[which(pred_variance == 0)]
# Calculate variance using family's mean-variance relationship
mu_variance <- predict(object,
process_error = FALSE,
type = 'variance')[best_draw, 1:length(eta)]
if(any(mu_variance == 0)){
mu_variance[which(mu_variance == 0)] <-
mu[which(mu_variance == 0)]
}

w <- as.numeric(mgcv_model$family$mu.eta(eta) / pred_variance)
w <- as.numeric(mgcv_model$family$mu.eta(eta) / mu_variance)
XWX <- t(X) %*% (w * X)
rho <- mgcv_model$sp
lambda <- exp(rho)
Expand Down
5 changes: 4 additions & 1 deletion R/evaluate_mvgams.R
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,7 @@ drps_score <- function(truth, fc, interval_width = 0.9,
}

#nsum <- 1000
nsum <- max(c(truth, fc)) + 1000
nsum <- max(c(truth, fc), na.rm = TRUE) + 1000
Fy = ecdf(fc)
ysum <- 0:nsum
indicator <- ifelse(ysum - truth >= 0, 1, 0)
Expand Down Expand Up @@ -714,6 +714,9 @@ sis_score <- function(truth, fc, interval_width = 0.9,
#' @importFrom scoringRules es_sample
#' @noRd
energy_score <- function(truth, fc, log = FALSE) {
# es_sample can't handle any NAs
has_nas <- apply(fc, 2, function(x) any(is.na(x)))
fc <- fc[,!has_nas]
if(log){
truth <- log(truth + 0.001)
fc <- log(fc + 0.001)
Expand Down
98 changes: 47 additions & 51 deletions R/families.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' Supported mvgam families
#' @importFrom stats make.link dgamma pgamma rgamma qnorm plnorm runif pbeta dlnorm dpois pnorm ppois plogis gaussian poisson Gamma dnbinom rnbinom dnorm dbeta
#' @importFrom brms lognormal
#' @importFrom brms lognormal student rstudent_t qstudent_t dstudent_t pstudent_t
#' @param link a specification for the family link function. At present these cannot
#' be changed
#' @details \code{mvgam} currently supports the following standard observation families:
Expand All @@ -20,12 +20,14 @@
#'\itemize{
#' \item \code{\link[brms]{lognormal}} for non-negative real-valued data
#' \item \code{tweedie} for count data (power parameter `p` fixed at `1.5`)
#' \item \code{student-t} for real-valued data
#' \item `student_t()` (or \code{\link[brms]{student}}) for real-valued data
#' }
#'Note that only `poisson()`, `nb()`, and `tweedie()` are available if
#'using `JAGS`. All families, apart from `tweedie()`, are supported if
#'using `Stan`.
#' @name mvgam_families
#' @details Note that currently it is not possible to change the default link
#' functions in `mvgam`, so any call to change these will be silently ignored
#' @author Nicholas J Clark
NULL

Expand All @@ -36,7 +38,7 @@ tweedie = function(link = 'log'){
structure(list(family = "tweedie", link = 'log', linkfun = linktemp$linkfun,
linkinv = linktemp$linkinv, mu.eta = linktemp$mu.eta,
valideta = linktemp$valideta),
class = c("extended.family","family"))
class = c("extended.family", "family"))
}

#' @rdname mvgam_families
Expand All @@ -46,7 +48,7 @@ student_t = function(link = 'identity'){
structure(list(family = "student", link = 'identity', linkfun = linktemp$linkfun,
linkinv = linktemp$linkinv, mu.eta = linktemp$mu.eta,
valideta = linktemp$valideta),
class = c("extended.family","family"))
class = c("extended.family", "family"))
}

#### Non-exported functions for performing family-specific tasks ####
Expand All @@ -71,57 +73,20 @@ beta_shapes = function(mu, phi) {
shape2 = (1 - mu) * phi))
}

# Scaled Student's t distribution
# Original author: mjskay (https://github.com/mjskay/ggdist/blob/master/R/student_t.R)
#'
#' Density, distribution function, quantile function and random generation for the
#' scaled Student's t distribution, parameterized by degrees of freedom (`df`),
#' location (`mu`), and scale (`sigma`).
#'
#' @inheritParams stats::dt
#' @param mu Location parameter (median)
#' @param sigma Scale parameter
#'
#' @name student_t
#' @importFrom stats dt pt qt rt
#' @noRd
dstudent_t = function(x, df, mu = 0, sigma = 1, log = FALSE) {
if (log) {
dt((x - mu)/sigma, df = df, log = TRUE) - log(sigma)
}
else {
dt((x - mu)/sigma, df = df) / sigma
}
}

#' @noRd
pstudent_t = function(q, df, mu = 0, sigma = 1, lower.tail = TRUE, log.p = FALSE) {
pt((q - mu)/sigma, df = df, lower.tail = lower.tail, log.p = log.p)
}

#' @noRd
qstudent_t = function(p, df, mu = 0, sigma = 1, lower.tail = TRUE, log.p = FALSE) {
qt(p, df = df, lower.tail = lower.tail, log.p = log.p)*sigma + mu
}

#' @noRd
rstudent_t = function(n, df, mu = 0, sigma = 1) {
as.vector(rt(n, df = df)*sigma + mu)
}

#' Generic prediction function
#' @importFrom stats predict
#' @param Xp A `mgcv` linear predictor matrix
#' @param family \code{character}. The `family` slot of the model's family argument
#' @param betas Vector of regression coefficients of length `NCOL(Xp)`
#' @param type Either `link`, `expected` or `response`
#' @param type Either `link`, `expected`, `response` or `variance`
#' @param family_pars Additional arguments for each specific observation process (i.e.
#' overdispersion parameter if `family == "nb"`)
#' @param density logical. Rather than calculating a prediction, evaluate the log-likelihood.
#' Use this option when particle filtering
#' @param truth Observation to use for evaluating the likelihood (if `density == TRUE`)
#' @details A generic prediction function that will make it easier to add new
#' response distributions in future
#' response distributions in future. Use `type = variance` for computing family-level
#' variance as a function of the mean
#' @noRd
mvgam_predict = function(Xp, family, betas,
type = 'link',
Expand All @@ -140,6 +105,8 @@ mvgam_predict = function(Xp, family, betas,
log = TRUE)
}

} else if(type == 'variance') {
out <- rep.int(1, NROW(Xp))
} else {
out <- rnorm(n = NROW(Xp),
mean = ((matrix(Xp, ncol = NCOL(Xp)) %*%
Expand All @@ -166,6 +133,12 @@ mvgam_predict = function(Xp, family, betas,
betas)) +
attr(Xp, 'model.offset'),
sdlog = as.vector(family_pars$sigma_obs))
} else if(type == 'variance'){
mu <- ((matrix(Xp, ncol = NCOL(Xp)) %*%
betas)) + attr(Xp, 'model.offset')
sd <- as.vector(family_pars$sigma_obs)
out <- (exp((sd) ^ 2) - 1) * exp((2 * mu + sd ^ 2))

} else {
mu <- as.vector((matrix(Xp, ncol = NCOL(Xp)) %*%
betas) + attr(Xp, 'model.offset'))
Expand All @@ -180,12 +153,16 @@ mvgam_predict = function(Xp, family, betas,
betas) + attr(Xp, 'model.offset'))
if(density){
out <- dstudent_t(truth,
df = family_pars$nu,
df = as.vector(family_pars$nu),
mu = out,
sigma = as.vector(family_pars$sigma_obs),
log = TRUE)
}

} else if(type == 'variance') {
out <- as.vector(family_pars$nu) /
(pmax(2.01, as.vector(family_pars$nu)) - 2)

} else {
out <- rstudent_t(n = NROW(Xp),
df = family_pars$nu,
Expand All @@ -211,7 +188,10 @@ mvgam_predict = function(Xp, family, betas,
lambda = exp(((matrix(Xp, ncol = NCOL(Xp)) %*%
betas)) +
attr(Xp, 'model.offset')))
} else {
} else if(type == 'variance'){
out <- exp(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*%
betas) + attr(Xp, 'model.offset')))
} else {
out <- exp(((matrix(Xp, ncol = NCOL(Xp)) %*%
betas)) +
attr(Xp, 'model.offset'))
Expand All @@ -236,6 +216,10 @@ mvgam_predict = function(Xp, family, betas,
betas)) +
attr(Xp, 'model.offset')),
size = as.vector(family_pars$phi))
} else if(type == 'variance'){
mu <- exp(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*%
betas) + attr(Xp, 'model.offset')))
out <- mu + mu^2 / as.vector(family_pars$phi)
} else {
out <- exp(((matrix(Xp, ncol = NCOL(Xp)) %*%
betas)) +
Expand Down Expand Up @@ -263,7 +247,11 @@ mvgam_predict = function(Xp, family, betas,
out <- rbeta(n = NROW(Xp),
shape1 = shape_pars$shape1,
shape2 = shape_pars$shape2)
} else {
} else if(type == 'variance'){
mu <- plogis(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*%
betas) + attr(Xp, 'model.offset')))
out <- mu * (1 - mu) / (1 + exp(as.vector(family_pars$phi)))
} else {
out <- plogis(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*%
betas) + attr(Xp, 'model.offset')))
}
Expand All @@ -288,7 +276,10 @@ mvgam_predict = function(Xp, family, betas,
exp(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*%
betas) + attr(Xp, 'model.offset'))),
shape = as.vector(family_pars$shape))
} else {
} else if(type == 'variance'){
out <- as.vector((matrix(Xp, ncol = NCOL(Xp)) %*%
betas) + attr(Xp, 'model.offset')) ^ 2
} else {
out <- as.vector(family_pars$shape) /
(as.vector(family_pars$shape) / exp(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*%
betas) + attr(Xp, 'model.offset'))))
Expand Down Expand Up @@ -319,7 +310,12 @@ mvgam_predict = function(Xp, family, betas,
# Power parameter is fixed
p = 1.5,
phi = as.vector(family_pars$phi)))
} else {
} else if(type == 'variance'){
out <- (exp(((matrix(Xp, ncol = NCOL(Xp)) %*%
betas)) +
attr(Xp, 'model.offset')) ^ 1.5) *
as.vector(family_pars$phi)
} else {
out <- exp(((matrix(Xp, ncol = NCOL(Xp)) %*%
betas)) +
attr(Xp, 'model.offset'))
Expand Down Expand Up @@ -353,9 +349,9 @@ family_to_mgcvfam = function(family){
} else if(family$family == 'student'){
gaussian()
} else if(family$family == 'lognormal'){
mgcv::tw()
mgcv::Tweedie(p = 1.5, link = 'identity')
} else if(family$family == 'tweedie'){
mgcv::Tweedie(p=1.5)
mgcv::Tweedie(p = 1.5, link = 'log')
} else {
family
}
Expand Down
6 changes: 4 additions & 2 deletions R/predict.mvgam.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
#'If \code{expected} is used, predictions reflect the expectation of the response (the mean)
#'but ignore uncertainty in the observation process. When \code{response} is used,
#'the predictions take uncertainty in the observation process into account to return
#'predictions on the outcome scale
#'predictions on the outcome scale. When \code{variance} is used, the variance of the response
#'with respect to the mean (mean-variance relationship) is returned
#'@param process_error Logical. If \code{TRUE} and a dynamic trend model was fit,
#'expected uncertainty in the process model is accounted for by using draws
#'from the latent trend SD parameters. If \code{FALSE}, uncertainty in the latent trend
Expand Down Expand Up @@ -54,7 +55,8 @@ predict.mvgam = function(object, newdata,

type <- match.arg(arg = type, choices = c("link",
"expected",
"response"))
"response",
"variance"))

# If a linear predictor was supplied for the latent process models, calculate
# predictions by assuming the trend is stationary (this is basically what brms)
Expand Down
2 changes: 1 addition & 1 deletion R/sim_mvgam.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#'@param trend_rel Depracated. Use `prop_trend` instead
#'@param freq \code{integer}. The seasonal frequency of the series
#'@param family \code{family} specifying the exponential observation family for the series. Currently supported
#'families are: `nb()`, `poisson()`, `tweedie()`, `gaussian()`, `betar()`, `lognormal()`, `student_t()` and `Gamma()`
#'families are: `nb()`, `poisson()`, `tweedie()`, `gaussian()`, `betar()`, `lognormal()`, `student()` and `Gamma()`
#'@param phi \code{vector} of dispersion parameters for the series (i.e. `size` for Negative Binomial or
#'`phi` for Tweedie or Beta). If \code{length(phi) < n_series}, the first element of `phi` will
#'be replicated `n_series` times. Defaults to \code{5} for Negative Binomial and Tweedie; \code{10} for
Expand Down
Loading

0 comments on commit e6748e4

Please sign in to comment.