diff --git a/DESCRIPTION b/DESCRIPTION index 11606e5d..f6165343 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,9 +1,11 @@ Package: mvgam Title: Multivariate (Dynamic) Generalized Additive Models Version: 1.1.4 -Date: 2024-11-25 +Date: 2024-12-02 Authors@R: c(person("Nicholas J", "Clark", , "nicholas.j.clark1214@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-7131-3301")), + person("Sarah", "Heaps", role = c("ctb"), + comment = c("VARMA parameterisations", ORCID = "0000-0002-5543-037X")), person("Scott", "Pease", role = c("ctb"), comment = c("broom enhancements", ORCID = "0009-0006-8977-9285")), person("Matthijs", "Hollanders", role = c("ctb"), diff --git a/NEWS.md b/NEWS.md index caf9ee0d..e1ed5511 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # mvgam 1.1.4 (development version; not yet on CRAN) ## New functionalities * Improved various plotting functions by returning `ggplot` objects in place of base plots (thanks to @mhollanders #38) +* Added the brier score (`score = 'brier'`) as an option for scoring forecasts of binary variables when using `family = bernoulli()` * Added `augment()` function to add residuals and fitted values to an mvgam object's observed data (thanks to @swpease #83) * Added support for approximate `gp()` effects with more than one covariate and with different kernel functions (#79) * Added function `jsdgam()` to estimate Joint Species Distribution Models in which both the latent factors and the observation model components can include any of mvgam's complex linear predictor effects. Also added a function `residual_cor()` to compute residual correlation, covariance and precision matrices from `jsdgam` models. See `?mvgam::jsdgam` and `?mvgam::residual_cor` for details diff --git a/R/evaluate_mvgams.R b/R/evaluate_mvgams.R index 7c04b012..04ac1184 100644 --- a/R/evaluate_mvgams.R +++ b/R/evaluate_mvgams.R @@ -274,7 +274,7 @@ eval_mvgam = function(object, }) # Calculate score and interval coverage per series - if(object$family %in% c('poisson', 'negative binomial')){ + if(object$family %in% c('poisson', 'negative binomial', 'binomial', 'beta_binomial')){ series_score <- lapply(seq_len(n_series), function(series){ DRPS <- data.frame(drps_mcmc_object(as.vector(as.matrix(series_truths[[series]])), series_fcs[[series]], @@ -615,7 +615,7 @@ crps_edf <- function(y, dat, w = NULL) { sapply(y, f) } -# Calculate out of sample CRPS +# Compute CRPS # code borrowed from scoringRules: https://github.com/FK83/scoringRules/blob/master/R/scores_sample_univ.R #' @noRd crps_score <- function(truth, fc, method = "edf", w = NULL, @@ -642,7 +642,7 @@ crps_score <- function(truth, fc, method = "edf", w = NULL, } -# Calculate out of sample DRPS +# Compute DRPS #' @noRd drps_score <- function(truth, fc, interval_width = 0.9, log = FALSE){ @@ -668,7 +668,7 @@ drps_score <- function(truth, fc, interval_width = 0.9, return(c(score, in_interval)) } -# Calculate out of sample scaled interval score +# Compute the scaled interval score #' @noRd sis_score <- function(truth, fc, interval_width = 0.9, log = FALSE){ @@ -697,6 +697,20 @@ sis_score <- function(truth, fc, interval_width = 0.9, return(c(score, in_interval)) } +# Compute the Brier score +#' @noRd +brier_score <- function(truth, + fc, + interval_width = 0.9){ + + score <- (truth - fc) ^ 2 + score <- sum(score) / length(score) + + # Cannot evaluate coverage for binary truths + in_interval <- NA + return(c(score, in_interval)) +} + #' Compute the multivariate energy score #' @noRd energy_score <- function(truth, fc, log = FALSE) { @@ -714,24 +728,6 @@ energy_score <- function(truth, fc, log = FALSE) { return(es) } -#' Wrapper to calculate energy score on all observations in fc_horizon -#' @noRd -energy_mcmc_object <- function(truths, fcs, log = FALSE, - weights){ - fc_horizon <- length(fcs[[1]][1,]) - fcs_per_horizon <- lapply(seq_len(fc_horizon), function(horizon){ - do.call(rbind, lapply(seq_along(fcs), function(fc){ - fcs[[fc]][,horizon] - })) - }) - - unlist(lapply(seq_len(fc_horizon), function(horizon){ - energy_score(truth = truths[,horizon], - fc = fcs_per_horizon[[horizon]], - log = log) - })) -} - #' Compute the variogram score, using the median pairwise difference #' from the forecast distribution (scoringRules::vs_sample uses the #' mean, which is not appropriate for skewed distributions) @@ -770,6 +766,45 @@ variogram_score = function(truth, fc, log = FALSE, weights){ } +#' Compute the energy score on all observations in fc_horizon +#' @noRd +energy_mcmc_object <- function(truths, fcs, log = FALSE, + weights){ + fc_horizon <- length(fcs[[1]][1,]) + fcs_per_horizon <- lapply(seq_len(fc_horizon), function(horizon){ + do.call(rbind, lapply(seq_along(fcs), function(fc){ + fcs[[fc]][,horizon] + })) + }) + + unlist(lapply(seq_len(fc_horizon), function(horizon){ + energy_score(truth = truths[,horizon], + fc = fcs_per_horizon[[horizon]], + log = log) + })) +} + +#' Compute the Brier score on all observations in fc_horizon +#' @noRd +brier_mcmc_object <- function(truth, + fc, + log = FALSE, + weights){ + + indices_keep <- which(!is.na(truth)) + if(length(indices_keep) == 0){ + scores = data.frame('brier' = rep(NA, length(truth)), + 'interval' = rep(NA, length(truth))) + } else { + scores <- matrix(NA, nrow = length(truth), ncol = 2) + for(i in indices_keep){ + scores[i,] <- mvgam:::brier_score(truth = as.vector(truth)[i], + fc = fc[,i]) + } + } + scores +} + #' Wrapper to calculate variogram score on all observations in fc_horizon #' @noRd variogram_mcmc_object <- function(truths, fcs, log = FALSE, diff --git a/R/plot_mvgam_fc.R b/R/plot_mvgam_fc.R index 72c124f8..de2f9846 100644 --- a/R/plot_mvgam_fc.R +++ b/R/plot_mvgam_fc.R @@ -416,7 +416,8 @@ plot_mvgam_fc = function(object, series = 1, newdata, data_test, score <- NULL message('No non-missing values in data_test$y; cannot calculate forecast score') } else { - if(object$family %in% c('poisson', 'negative binomial', 'tweedie')){ + if(object$family %in% c('poisson', 'negative binomial', 'tweedie', + 'binomial', 'beta_binomial')){ if(max(fc, na.rm = TRUE) > 50000){ score <- sum(crps_mcmc_object(as.vector(truth), fc)[,1], na.rm = TRUE) @@ -683,7 +684,7 @@ plot.mvgam_forecast = function(x, series = 1, } } - if(type == 'response'){ + if(type == 'response' || c(type == 'expected' & object$family == 'bernoulli')){ # Plot training and testing points points(c(object$train_observations[[s_name]], object$test_observations[[s_name]]), pch = 16, col = "white", cex = 0.8) @@ -698,7 +699,8 @@ plot.mvgam_forecast = function(x, series = 1, score <- NULL message(paste0('No non-missing values in test_observations; cannot calculate forecast score\n')) } else { - if(object$family %in% c('poisson', 'negative binomial', 'tweedie')){ + if(object$family %in% c('poisson', 'negative binomial', 'tweedie', + 'binomial', 'beta_binomial')){ if(max(fc, na.rm = TRUE) > 50000){ score <- sum(crps_mcmc_object(as.vector(truth), fc)[,1], na.rm = TRUE) @@ -709,7 +711,12 @@ plot.mvgam_forecast = function(x, series = 1, message(paste0('Out of sample DRPS:\n', score)) } - } else { + } else if(object$family == 'bernoulli'){ + score <- sum(brier_mcmc_object(as.vector(truth), + fc)[,1], na.rm = TRUE) + message(paste0('Out of sample Brier:\n', score)) + + } else { score <- sum(crps_mcmc_object(as.vector(truth), fc)[,1], na.rm = TRUE) message(paste0('Out of sample CRPS:\n', score)) diff --git a/R/score.mvgam_forecast.R b/R/score.mvgam_forecast.R index 258d9e22..f8155a2c 100644 --- a/R/score.mvgam_forecast.R +++ b/R/score.mvgam_forecast.R @@ -7,26 +7,29 @@ #'@param score \code{character} specifying the type of proper scoring rule to use for evaluation. Options are: #'`sis` (i.e. the Scaled Interval Score), `energy`, `variogram`, `elpd` #'(i.e. the Expected log pointwise Predictive Density), -#'`drps` (i.e. the Discrete Rank Probability Score) or `crps` (the Continuous Rank Probability Score). +#'`drps` (i.e. the Discrete Rank Probability Score), `crps` (the Continuous Rank Probability Score) +#'or `brier` (the latter of which is only applicable for `bernoulli` models. #'Note that when choosing `elpd`, the supplied object must have forecasts on the `link` scale so that -#'expectations can be calculated prior to scoring. For all other scores, forecasts should be supplied +#'expectations can be calculated prior to scoring. If choosing `brier`, the object must have forecasts +#'on the `expected` scale (i.e. probability predictions). For all other scores, forecasts should be supplied #'on the `response` scale (i.e. posterior predictions) #'@param log \code{logical}. Should the forecasts and truths be logged prior to scoring? #'This is often appropriate for comparing -#'performance of models when series vary in their observation ranges +#'performance of models when series vary in their observation ranges. Ignored if `score = 'brier'` #'@param weights optional \code{vector} of weights (where \code{length(weights) == n_series}) #'for weighting pairwise correlations when evaluating the variogram score for multivariate #'forecasts. Useful for down-weighting series that have larger magnitude observations or that #'are of less interest when forecasting. Ignored if \code{score != 'variogram'} #'@param interval_width proportional value on `[0.05,0.95]` defining the forecast interval -#'for calculating coverage and, if `score = 'sis'`, for calculating the interval score +#'for calculating coverage and, if `score = 'sis'`, for calculating the interval score. +#'Ignored if `score = 'brier'` #'@param n_cores \code{integer} specifying number of cores for calculating scores in parallel #'@param ... Ignored #'@return a \code{list} containing scores and interval coverages per forecast horizon. -#'If \code{score %in% c('drps', 'crps', 'elpd')}, +#'If \code{score %in% c('drps', 'crps', 'elpd', 'brier')}, #'the list will also contain return the sum of all series-level scores per horizon. If #'\code{score %in% c('energy','variogram')}, no series-level scores are computed and the only score returned -#'will be for all series. For all scores apart from `elpd`, the `in_interval` column in each series-level +#'will be for all series. For all scores apart from `elpd` and `brier`, the `in_interval` column in each series-level #'slot is a binary indicator of whether or not the true value was within the forecast's corresponding #'posterior empirical quantiles. Intervals are not calculated when using `elpd` because forecasts #'will only contain the linear predictors @@ -43,16 +46,36 @@ #' #'# Extract forecasts into a 'mvgam_forecast' object #'fc <- forecast(mod) +#'plot(fc) #' #'# Compute Discrete Rank Probability Scores and 0.90 interval coverages #'fc_scores <- score(fc, score = 'drps') #'str(fc_scores) +#' +#'# An example using binary data +#'data <- sim_mvgam(family = bernoulli()) +#'mod <- mvgam(y ~ s(season, bs = 'cc', k = 6), +#' trend_model = AR(), +#' data = data$data_train, +#' newdata = data$data_test, +#' family = bernoulli(), +#' chains = 2) +#' +#'# Extract forecasts on the expectation (probability) scale +#'fc <- forecast(mod, type = 'expected') +#'plot(fc) +#' +#'# Compute Brier scores +#'fc_scores <- score(fc, score = 'brier') +#'str(fc_scores) #'} #'@method score mvgam_forecast #'@seealso \code{\link{forecast.mvgam}}, \code{\link{ensemble}} #'@export -score.mvgam_forecast = function(object, score = 'crps', - log = FALSE, weights, +score.mvgam_forecast = function(object, + score = 'crps', + log = FALSE, + weights, interval_width = 0.9, n_cores = 1, ...){ @@ -60,6 +83,7 @@ score.mvgam_forecast = function(object, score = 'crps', score <- match.arg(arg = score, choices = c('crps', 'drps', + 'brier', 'elpd', 'sis', 'energy', @@ -75,6 +99,13 @@ score.mvgam_forecast = function(object, score = 'crps', call. = FALSE) } + if(object$type != 'expected' & score == 'brier'){ + stop('cannot evaluate brier scores unless probability predictions are supplied. Use "type == expected" when forecasting instead', + call. = FALSE) + } + + validate_pos_integer(n_cores) + validate_proportional(interval_width) if(interval_width < 0.05 || interval_width > 0.95){ stop('interval width must be between 0.05 and 0.95, inclusive') } @@ -219,6 +250,28 @@ score.mvgam_forecast = function(object, score = 'crps', series_score$all_series <- all_scores } + if(score == 'brier'){ + if(object$family != 'bernoulli'){ + stop('brier score only applicable for Bernoulli forecasts', + call. = FALSE) + } + series_score <- lapply(seq_len(n_series), function(series){ + BRIER <- data.frame(brier_mcmc_object(truths[series,], + object$forecasts[[series]], + log = log)) + colnames(BRIER) <- c('score','in_interval') + BRIER$interval_width <- interval_width + BRIER$eval_horizon <- seq(1, NCOL(object$forecasts[[1]])) + BRIER$score_type <- 'brier' + BRIER + }) + names(series_score) <- object$series_names + all_scores <- data.frame(score = rowSums(do.call(cbind, lapply(seq_len(n_series), function(series){ + series_score[[series]]$score + }))), eval_horizon = seq(1, NCOL(object$forecasts[[1]])), score_type = 'sum_brier') + series_score$all_series <- all_scores + } + series_score } diff --git a/man/mvgam-package.Rd b/man/mvgam-package.Rd index 0452626a..97510c69 100644 --- a/man/mvgam-package.Rd +++ b/man/mvgam-package.Rd @@ -23,6 +23,7 @@ Useful links: Other contributors: \itemize{ + \item Sarah Heaps (\href{https://orcid.org/0000-0002-5543-037X}{ORCID}) (VARMA parameterisations) [contributor] \item Scott Pease (\href{https://orcid.org/0009-0006-8977-9285}{ORCID}) (broom enhancements) [contributor] \item Matthijs Hollanders (\href{https://orcid.org/0000-0003-0796-1018}{ORCID}) (ggplot visualizations) [contributor] } diff --git a/man/score.mvgam_forecast.Rd b/man/score.mvgam_forecast.Rd index d6a04278..d7e43e9e 100644 --- a/man/score.mvgam_forecast.Rd +++ b/man/score.mvgam_forecast.Rd @@ -23,14 +23,16 @@ score(object, ...) \item{score}{\code{character} specifying the type of proper scoring rule to use for evaluation. Options are: \code{sis} (i.e. the Scaled Interval Score), \code{energy}, \code{variogram}, \code{elpd} (i.e. the Expected log pointwise Predictive Density), -\code{drps} (i.e. the Discrete Rank Probability Score) or \code{crps} (the Continuous Rank Probability Score). +\code{drps} (i.e. the Discrete Rank Probability Score), \code{crps} (the Continuous Rank Probability Score) +or \code{brier} (the latter of which is only applicable for \code{bernoulli} models. Note that when choosing \code{elpd}, the supplied object must have forecasts on the \code{link} scale so that -expectations can be calculated prior to scoring. For all other scores, forecasts should be supplied +expectations can be calculated prior to scoring. If choosing \code{brier}, the object must have forecasts +on the \code{expected} scale (i.e. probability predictions). For all other scores, forecasts should be supplied on the \code{response} scale (i.e. posterior predictions)} \item{log}{\code{logical}. Should the forecasts and truths be logged prior to scoring? This is often appropriate for comparing -performance of models when series vary in their observation ranges} +performance of models when series vary in their observation ranges. Ignored if \code{score = 'brier'}} \item{weights}{optional \code{vector} of weights (where \code{length(weights) == n_series}) for weighting pairwise correlations when evaluating the variogram score for multivariate @@ -38,7 +40,8 @@ forecasts. Useful for down-weighting series that have larger magnitude observati are of less interest when forecasting. Ignored if \code{score != 'variogram'}} \item{interval_width}{proportional value on \verb{[0.05,0.95]} defining the forecast interval -for calculating coverage and, if \code{score = 'sis'}, for calculating the interval score} +for calculating coverage and, if \code{score = 'sis'}, for calculating the interval score. +Ignored if \code{score = 'brier'}} \item{n_cores}{\code{integer} specifying number of cores for calculating scores in parallel} @@ -46,10 +49,10 @@ for calculating coverage and, if \code{score = 'sis'}, for calculating the inter } \value{ a \code{list} containing scores and interval coverages per forecast horizon. -If \code{score \%in\% c('drps', 'crps', 'elpd')}, +If \code{score \%in\% c('drps', 'crps', 'elpd', 'brier')}, the list will also contain return the sum of all series-level scores per horizon. If \code{score \%in\% c('energy','variogram')}, no series-level scores are computed and the only score returned -will be for all series. For all scores apart from \code{elpd}, the \code{in_interval} column in each series-level +will be for all series. For all scores apart from \code{elpd} and \code{brier}, the \code{in_interval} column in each series-level slot is a binary indicator of whether or not the true value was within the forecast's corresponding posterior empirical quantiles. Intervals are not calculated when using \code{elpd} because forecasts will only contain the linear predictors @@ -70,10 +73,28 @@ mod <- mvgam(y ~ 1, # Extract forecasts into a 'mvgam_forecast' object fc <- forecast(mod) +plot(fc) # Compute Discrete Rank Probability Scores and 0.90 interval coverages fc_scores <- score(fc, score = 'drps') str(fc_scores) + +# An example using binary data +data <- sim_mvgam(family = bernoulli()) +mod <- mvgam(y ~ s(season, bs = 'cc', k = 6), + trend_model = AR(), + data = data$data_train, + newdata = data$data_test, + family = bernoulli(), + chains = 2) + +# Extract forecasts on the expectation (probability) scale +fc <- forecast(mod, type = 'expected') +plot(fc) + +# Compute Brier scores +fc_scores <- score(fc, score = 'brier') +str(fc_scores) } } \seealso{ diff --git a/src/RcppExports.o b/src/RcppExports.o index 5cf5ae21..03b01119 100644 Binary files a/src/RcppExports.o and b/src/RcppExports.o differ diff --git a/src/trend_funs.o b/src/trend_funs.o index e09f58e3..1c0d5a8f 100644 Binary files a/src/trend_funs.o and b/src/trend_funs.o differ diff --git a/tests/testthat/test-binomial.R b/tests/testthat/test-binomial.R index a32dcfdf..ccc42a45 100644 --- a/tests/testthat/test-binomial.R +++ b/tests/testthat/test-binomial.R @@ -125,6 +125,16 @@ test_that("binomial() post-processing works", { expect_true(inherits(fc, 'mvgam_forecast')) expect_no_error(plot(fc)) expect_no_error(plot(fc, realisations = TRUE)) + expect_list(score(fc, score = 'drps')) + expect_error( + score(fc, score = 'brier'), + 'cannot evaluate brier scores unless probability predictions are supplied. Use "type == expected" when forecasting instead' + ) + fc <- forecast(mod, newdata = dat_test, type = 'expected') + expect_error( + score(fc, score = 'brier'), + 'brier score only applicable for Bernoulli forecasts' + ) expect_no_error(SW(plot(mod, type = 'smooths', trend_effects = TRUE))) expect_no_error(plot(mod, type = 'smooths',