diff --git a/DESCRIPTION b/DESCRIPTION
index 11606e5d..f6165343 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,9 +1,11 @@
 Package: mvgam
 Title: Multivariate (Dynamic) Generalized Additive Models
 Version: 1.1.4
-Date: 2024-11-25
+Date: 2024-12-02
 Authors@R: c(person("Nicholas J", "Clark", , "nicholas.j.clark1214@gmail.com", 
                     role = c("aut", "cre"), comment = c(ORCID = "0000-0001-7131-3301")),
+             person("Sarah", "Heaps", role = c("ctb"),
+                    comment = c("VARMA parameterisations", ORCID = "0000-0002-5543-037X")),
              person("Scott", "Pease", role = c("ctb"),
                     comment = c("broom enhancements", ORCID = "0009-0006-8977-9285")),
              person("Matthijs", "Hollanders", role = c("ctb"),
diff --git a/NEWS.md b/NEWS.md
index caf9ee0d..e1ed5511 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,7 @@
 # mvgam 1.1.4 (development version; not yet on CRAN)
 ## New functionalities
 * Improved various plotting functions by returning `ggplot` objects in place of base plots (thanks to @mhollanders #38)
+* Added the brier score (`score = 'brier'`) as an option for scoring forecasts of binary variables when using `family = bernoulli()`
 * Added `augment()` function to add residuals and fitted values to an mvgam object's observed data (thanks to @swpease #83)
 * Added support for approximate `gp()` effects with more than one covariate and with different kernel functions (#79) 
 * Added function `jsdgam()` to estimate Joint Species Distribution Models in which both the latent factors and the observation model components can include any of mvgam's complex linear predictor effects. Also added a function `residual_cor()` to compute residual correlation, covariance and precision matrices from `jsdgam` models. See `?mvgam::jsdgam` and `?mvgam::residual_cor` for details
diff --git a/R/evaluate_mvgams.R b/R/evaluate_mvgams.R
index 7c04b012..04ac1184 100644
--- a/R/evaluate_mvgams.R
+++ b/R/evaluate_mvgams.R
@@ -274,7 +274,7 @@ eval_mvgam = function(object,
     })
 
     # Calculate score and interval coverage per series
-    if(object$family %in% c('poisson', 'negative binomial')){
+    if(object$family %in% c('poisson', 'negative binomial', 'binomial', 'beta_binomial')){
       series_score <- lapply(seq_len(n_series), function(series){
         DRPS <- data.frame(drps_mcmc_object(as.vector(as.matrix(series_truths[[series]])),
                                               series_fcs[[series]],
@@ -615,7 +615,7 @@ crps_edf <- function(y, dat, w = NULL) {
   sapply(y, f)
 }
 
-# Calculate out of sample CRPS
+# Compute CRPS
 # code borrowed from scoringRules: https://github.com/FK83/scoringRules/blob/master/R/scores_sample_univ.R
 #' @noRd
 crps_score <- function(truth, fc, method = "edf", w = NULL,
@@ -642,7 +642,7 @@ crps_score <- function(truth, fc, method = "edf", w = NULL,
 }
 
 
-# Calculate out of sample DRPS
+# Compute DRPS
 #' @noRd
 drps_score <- function(truth, fc, interval_width = 0.9,
                        log = FALSE){
@@ -668,7 +668,7 @@ drps_score <- function(truth, fc, interval_width = 0.9,
   return(c(score, in_interval))
 }
 
-# Calculate out of sample scaled interval score
+# Compute the scaled interval score
 #' @noRd
 sis_score <- function(truth, fc, interval_width = 0.9,
                        log = FALSE){
@@ -697,6 +697,20 @@ sis_score <- function(truth, fc, interval_width = 0.9,
   return(c(score, in_interval))
 }
 
+# Compute the Brier score
+#' @noRd
+brier_score <- function(truth,
+                        fc,
+                        interval_width = 0.9){
+
+  score <- (truth - fc) ^ 2
+  score <- sum(score) / length(score)
+
+  # Cannot evaluate coverage for binary truths
+  in_interval <- NA
+  return(c(score, in_interval))
+}
+
 #' Compute the multivariate energy score
 #' @noRd
 energy_score <- function(truth, fc, log = FALSE) {
@@ -714,24 +728,6 @@ energy_score <- function(truth, fc, log = FALSE) {
   return(es)
 }
 
-#' Wrapper to calculate energy score on all observations in fc_horizon
-#' @noRd
-energy_mcmc_object <- function(truths, fcs, log = FALSE,
-                                  weights){
-  fc_horizon <- length(fcs[[1]][1,])
-  fcs_per_horizon <- lapply(seq_len(fc_horizon), function(horizon){
-    do.call(rbind, lapply(seq_along(fcs), function(fc){
-      fcs[[fc]][,horizon]
-    }))
-  })
-
-  unlist(lapply(seq_len(fc_horizon), function(horizon){
-    energy_score(truth = truths[,horizon],
-                 fc = fcs_per_horizon[[horizon]],
-                 log = log)
-  }))
-}
-
 #' Compute the variogram score, using the median pairwise difference
 #' from the forecast distribution (scoringRules::vs_sample uses the
 #' mean, which is not appropriate for skewed distributions)
@@ -770,6 +766,45 @@ variogram_score = function(truth, fc, log = FALSE, weights){
 
 }
 
+#' Compute the energy score on all observations in fc_horizon
+#' @noRd
+energy_mcmc_object <- function(truths, fcs, log = FALSE,
+                                  weights){
+  fc_horizon <- length(fcs[[1]][1,])
+  fcs_per_horizon <- lapply(seq_len(fc_horizon), function(horizon){
+    do.call(rbind, lapply(seq_along(fcs), function(fc){
+      fcs[[fc]][,horizon]
+    }))
+  })
+
+  unlist(lapply(seq_len(fc_horizon), function(horizon){
+    energy_score(truth = truths[,horizon],
+                 fc = fcs_per_horizon[[horizon]],
+                 log = log)
+  }))
+}
+
+#' Compute the Brier score on all observations in fc_horizon
+#' @noRd
+brier_mcmc_object <- function(truth,
+                              fc,
+                              log = FALSE,
+                              weights){
+
+  indices_keep <- which(!is.na(truth))
+  if(length(indices_keep) == 0){
+    scores = data.frame('brier' = rep(NA, length(truth)),
+                        'interval' = rep(NA, length(truth)))
+  } else {
+    scores <- matrix(NA, nrow = length(truth), ncol = 2)
+    for(i in indices_keep){
+      scores[i,] <- mvgam:::brier_score(truth = as.vector(truth)[i],
+                                fc = fc[,i])
+    }
+  }
+  scores
+}
+
 #' Wrapper to calculate variogram score on all observations in fc_horizon
 #' @noRd
 variogram_mcmc_object <- function(truths, fcs, log = FALSE,
diff --git a/R/plot_mvgam_fc.R b/R/plot_mvgam_fc.R
index 72c124f8..de2f9846 100644
--- a/R/plot_mvgam_fc.R
+++ b/R/plot_mvgam_fc.R
@@ -416,7 +416,8 @@ plot_mvgam_fc = function(object, series = 1, newdata, data_test,
       score <- NULL
       message('No non-missing values in data_test$y; cannot calculate forecast score')
     } else {
-      if(object$family %in% c('poisson', 'negative binomial', 'tweedie')){
+      if(object$family %in% c('poisson', 'negative binomial', 'tweedie',
+                              'binomial', 'beta_binomial')){
         if(max(fc, na.rm = TRUE) > 50000){
           score <- sum(crps_mcmc_object(as.vector(truth),
                                         fc)[,1], na.rm = TRUE)
@@ -683,7 +684,7 @@ plot.mvgam_forecast = function(x, series = 1,
     }
   }
 
-  if(type == 'response'){
+  if(type == 'response' || c(type == 'expected' & object$family == 'bernoulli')){
     # Plot training and testing points
     points(c(object$train_observations[[s_name]],
              object$test_observations[[s_name]]), pch = 16, col = "white", cex = 0.8)
@@ -698,7 +699,8 @@ plot.mvgam_forecast = function(x, series = 1,
       score <- NULL
       message(paste0('No non-missing values in test_observations; cannot calculate forecast score\n'))
     } else {
-      if(object$family %in% c('poisson', 'negative binomial', 'tweedie')){
+      if(object$family %in% c('poisson', 'negative binomial', 'tweedie',
+                              'binomial', 'beta_binomial')){
         if(max(fc, na.rm = TRUE) > 50000){
           score <- sum(crps_mcmc_object(as.vector(truth),
                                         fc)[,1], na.rm = TRUE)
@@ -709,7 +711,12 @@ plot.mvgam_forecast = function(x, series = 1,
           message(paste0('Out of sample DRPS:\n', score))
         }
 
-      } else {
+      } else if(object$family == 'bernoulli'){
+        score <- sum(brier_mcmc_object(as.vector(truth),
+                                      fc)[,1], na.rm = TRUE)
+        message(paste0('Out of sample Brier:\n', score))
+
+        } else {
         score <- sum(crps_mcmc_object(as.vector(truth),
                                       fc)[,1], na.rm = TRUE)
         message(paste0('Out of sample CRPS:\n', score))
diff --git a/R/score.mvgam_forecast.R b/R/score.mvgam_forecast.R
index 258d9e22..f8155a2c 100644
--- a/R/score.mvgam_forecast.R
+++ b/R/score.mvgam_forecast.R
@@ -7,26 +7,29 @@
 #'@param score \code{character} specifying the type of proper scoring rule to use for evaluation. Options are:
 #'`sis` (i.e. the Scaled Interval Score), `energy`, `variogram`, `elpd`
 #'(i.e. the Expected log pointwise Predictive Density),
-#'`drps` (i.e. the Discrete Rank Probability Score) or `crps` (the Continuous Rank Probability Score).
+#'`drps` (i.e. the Discrete Rank Probability Score), `crps` (the Continuous Rank Probability Score)
+#'or `brier` (the latter of which is only applicable for `bernoulli` models.
 #'Note that when choosing `elpd`, the supplied object must have forecasts on the `link` scale so that
-#'expectations can be calculated prior to scoring. For all other scores, forecasts should be supplied
+#'expectations can be calculated prior to scoring. If choosing `brier`, the object must have forecasts
+#'on the `expected` scale (i.e. probability predictions). For all other scores, forecasts should be supplied
 #'on the `response` scale (i.e. posterior predictions)
 #'@param log \code{logical}. Should the forecasts and truths be logged prior to scoring?
 #'This is often appropriate for comparing
-#'performance of models when series vary in their observation ranges
+#'performance of models when series vary in their observation ranges. Ignored if `score = 'brier'`
 #'@param weights optional \code{vector} of weights (where \code{length(weights) == n_series})
 #'for weighting pairwise correlations when evaluating the variogram score for multivariate
 #'forecasts. Useful for down-weighting series that have larger magnitude observations or that
 #'are of less interest when forecasting. Ignored if \code{score != 'variogram'}
 #'@param interval_width proportional value on `[0.05,0.95]` defining the forecast interval
-#'for calculating coverage and, if `score = 'sis'`, for calculating the interval score
+#'for calculating coverage and, if `score = 'sis'`, for calculating the interval score.
+#'Ignored if `score = 'brier'`
 #'@param n_cores \code{integer} specifying number of cores for calculating scores in parallel
 #'@param ... Ignored
 #'@return a \code{list} containing scores and interval coverages per forecast horizon.
-#'If \code{score %in% c('drps', 'crps', 'elpd')},
+#'If \code{score %in% c('drps', 'crps', 'elpd', 'brier')},
 #'the list will also contain return the sum of all series-level scores per horizon. If
 #'\code{score %in% c('energy','variogram')}, no series-level scores are computed and the only score returned
-#'will be for all series. For all scores apart from `elpd`, the `in_interval` column in each series-level
+#'will be for all series. For all scores apart from `elpd` and `brier`, the `in_interval` column in each series-level
 #'slot is a binary indicator of whether or not the true value was within the forecast's corresponding
 #'posterior empirical quantiles. Intervals are not calculated when using `elpd` because forecasts
 #'will only contain the linear predictors
@@ -43,16 +46,36 @@
 #'
 #'# Extract forecasts into a 'mvgam_forecast' object
 #'fc <- forecast(mod)
+#'plot(fc)
 #'
 #'# Compute Discrete Rank Probability Scores and 0.90 interval coverages
 #'fc_scores <- score(fc, score = 'drps')
 #'str(fc_scores)
+#'
+#'# An example using binary data
+#'data <- sim_mvgam(family = bernoulli())
+#'mod <- mvgam(y ~ s(season, bs = 'cc', k = 6),
+#'             trend_model = AR(),
+#'             data = data$data_train,
+#'             newdata = data$data_test,
+#'             family = bernoulli(),
+#'             chains = 2)
+#'
+#'# Extract forecasts on the expectation (probability) scale
+#'fc <- forecast(mod, type = 'expected')
+#'plot(fc)
+#'
+#'# Compute Brier scores
+#'fc_scores <- score(fc, score = 'brier')
+#'str(fc_scores)
 #'}
 #'@method score mvgam_forecast
 #'@seealso \code{\link{forecast.mvgam}}, \code{\link{ensemble}}
 #'@export
-score.mvgam_forecast = function(object, score = 'crps',
-                                log = FALSE, weights,
+score.mvgam_forecast = function(object,
+                                score = 'crps',
+                                log = FALSE,
+                                weights,
                                 interval_width = 0.9,
                                 n_cores = 1,
                                 ...){
@@ -60,6 +83,7 @@ score.mvgam_forecast = function(object, score = 'crps',
   score <- match.arg(arg = score,
                            choices = c('crps',
                                        'drps',
+                                       'brier',
                                        'elpd',
                                        'sis',
                                        'energy',
@@ -75,6 +99,13 @@ score.mvgam_forecast = function(object, score = 'crps',
          call. = FALSE)
   }
 
+  if(object$type != 'expected' & score == 'brier'){
+    stop('cannot evaluate brier scores unless probability predictions are supplied. Use "type == expected" when forecasting instead',
+         call. = FALSE)
+  }
+
+  validate_pos_integer(n_cores)
+  validate_proportional(interval_width)
   if(interval_width < 0.05 || interval_width > 0.95){
     stop('interval width must be between 0.05 and 0.95, inclusive')
   }
@@ -219,6 +250,28 @@ score.mvgam_forecast = function(object, score = 'crps',
     series_score$all_series <- all_scores
   }
 
+  if(score == 'brier'){
+    if(object$family != 'bernoulli'){
+      stop('brier score only applicable for Bernoulli forecasts',
+           call. = FALSE)
+    }
+    series_score <- lapply(seq_len(n_series), function(series){
+      BRIER <- data.frame(brier_mcmc_object(truths[series,],
+                                          object$forecasts[[series]],
+                                          log = log))
+      colnames(BRIER) <- c('score','in_interval')
+      BRIER$interval_width <- interval_width
+      BRIER$eval_horizon <- seq(1, NCOL(object$forecasts[[1]]))
+      BRIER$score_type <- 'brier'
+      BRIER
+    })
+    names(series_score) <- object$series_names
+    all_scores <- data.frame(score = rowSums(do.call(cbind, lapply(seq_len(n_series), function(series){
+      series_score[[series]]$score
+    }))), eval_horizon = seq(1, NCOL(object$forecasts[[1]])), score_type = 'sum_brier')
+    series_score$all_series <- all_scores
+  }
+
   series_score
 }
 
diff --git a/man/mvgam-package.Rd b/man/mvgam-package.Rd
index 0452626a..97510c69 100644
--- a/man/mvgam-package.Rd
+++ b/man/mvgam-package.Rd
@@ -23,6 +23,7 @@ Useful links:
 
 Other contributors:
 \itemize{
+  \item Sarah Heaps (\href{https://orcid.org/0000-0002-5543-037X}{ORCID}) (VARMA parameterisations) [contributor]
   \item Scott Pease (\href{https://orcid.org/0009-0006-8977-9285}{ORCID}) (broom enhancements) [contributor]
   \item Matthijs Hollanders (\href{https://orcid.org/0000-0003-0796-1018}{ORCID}) (ggplot visualizations) [contributor]
 }
diff --git a/man/score.mvgam_forecast.Rd b/man/score.mvgam_forecast.Rd
index d6a04278..d7e43e9e 100644
--- a/man/score.mvgam_forecast.Rd
+++ b/man/score.mvgam_forecast.Rd
@@ -23,14 +23,16 @@ score(object, ...)
 \item{score}{\code{character} specifying the type of proper scoring rule to use for evaluation. Options are:
 \code{sis} (i.e. the Scaled Interval Score), \code{energy}, \code{variogram}, \code{elpd}
 (i.e. the Expected log pointwise Predictive Density),
-\code{drps} (i.e. the Discrete Rank Probability Score) or \code{crps} (the Continuous Rank Probability Score).
+\code{drps} (i.e. the Discrete Rank Probability Score), \code{crps} (the Continuous Rank Probability Score)
+or \code{brier} (the latter of which is only applicable for \code{bernoulli} models.
 Note that when choosing \code{elpd}, the supplied object must have forecasts on the \code{link} scale so that
-expectations can be calculated prior to scoring. For all other scores, forecasts should be supplied
+expectations can be calculated prior to scoring. If choosing \code{brier}, the object must have forecasts
+on the \code{expected} scale (i.e. probability predictions). For all other scores, forecasts should be supplied
 on the \code{response} scale (i.e. posterior predictions)}
 
 \item{log}{\code{logical}. Should the forecasts and truths be logged prior to scoring?
 This is often appropriate for comparing
-performance of models when series vary in their observation ranges}
+performance of models when series vary in their observation ranges. Ignored if \code{score = 'brier'}}
 
 \item{weights}{optional \code{vector} of weights (where \code{length(weights) == n_series})
 for weighting pairwise correlations when evaluating the variogram score for multivariate
@@ -38,7 +40,8 @@ forecasts. Useful for down-weighting series that have larger magnitude observati
 are of less interest when forecasting. Ignored if \code{score != 'variogram'}}
 
 \item{interval_width}{proportional value on \verb{[0.05,0.95]} defining the forecast interval
-for calculating coverage and, if \code{score = 'sis'}, for calculating the interval score}
+for calculating coverage and, if \code{score = 'sis'}, for calculating the interval score.
+Ignored if \code{score = 'brier'}}
 
 \item{n_cores}{\code{integer} specifying number of cores for calculating scores in parallel}
 
@@ -46,10 +49,10 @@ for calculating coverage and, if \code{score = 'sis'}, for calculating the inter
 }
 \value{
 a \code{list} containing scores and interval coverages per forecast horizon.
-If \code{score \%in\% c('drps', 'crps', 'elpd')},
+If \code{score \%in\% c('drps', 'crps', 'elpd', 'brier')},
 the list will also contain return the sum of all series-level scores per horizon. If
 \code{score \%in\% c('energy','variogram')}, no series-level scores are computed and the only score returned
-will be for all series. For all scores apart from \code{elpd}, the \code{in_interval} column in each series-level
+will be for all series. For all scores apart from \code{elpd} and \code{brier}, the \code{in_interval} column in each series-level
 slot is a binary indicator of whether or not the true value was within the forecast's corresponding
 posterior empirical quantiles. Intervals are not calculated when using \code{elpd} because forecasts
 will only contain the linear predictors
@@ -70,10 +73,28 @@ mod <- mvgam(y ~ 1,
 
 # Extract forecasts into a 'mvgam_forecast' object
 fc <- forecast(mod)
+plot(fc)
 
 # Compute Discrete Rank Probability Scores and 0.90 interval coverages
 fc_scores <- score(fc, score = 'drps')
 str(fc_scores)
+
+# An example using binary data
+data <- sim_mvgam(family = bernoulli())
+mod <- mvgam(y ~ s(season, bs = 'cc', k = 6),
+            trend_model = AR(),
+            data = data$data_train,
+            newdata = data$data_test,
+            family = bernoulli(),
+            chains = 2)
+
+# Extract forecasts on the expectation (probability) scale
+fc <- forecast(mod, type = 'expected')
+plot(fc)
+
+# Compute Brier scores
+fc_scores <- score(fc, score = 'brier')
+str(fc_scores)
 }
 }
 \seealso{
diff --git a/src/RcppExports.o b/src/RcppExports.o
index 5cf5ae21..03b01119 100644
Binary files a/src/RcppExports.o and b/src/RcppExports.o differ
diff --git a/src/trend_funs.o b/src/trend_funs.o
index e09f58e3..1c0d5a8f 100644
Binary files a/src/trend_funs.o and b/src/trend_funs.o differ
diff --git a/tests/testthat/test-binomial.R b/tests/testthat/test-binomial.R
index a32dcfdf..ccc42a45 100644
--- a/tests/testthat/test-binomial.R
+++ b/tests/testthat/test-binomial.R
@@ -125,6 +125,16 @@ test_that("binomial() post-processing works", {
   expect_true(inherits(fc, 'mvgam_forecast'))
   expect_no_error(plot(fc))
   expect_no_error(plot(fc, realisations = TRUE))
+  expect_list(score(fc, score = 'drps'))
+  expect_error(
+    score(fc, score = 'brier'),
+    'cannot evaluate brier scores unless probability predictions are supplied. Use "type == expected" when forecasting instead'
+  )
+  fc <- forecast(mod, newdata = dat_test, type = 'expected')
+  expect_error(
+    score(fc, score = 'brier'),
+    'brier score only applicable for Bernoulli forecasts'
+  )
 
   expect_no_error(SW(plot(mod, type = 'smooths', trend_effects = TRUE)))
   expect_no_error(plot(mod, type = 'smooths',