From 4adaf0c3c17af5934accb1bbd852b2593f5fa47e Mon Sep 17 00:00:00 2001
From: Matthijs Hollanders <matthijs.hollanders@gmail.com>
Date: Thu, 24 Oct 2024 15:05:59 +1100
Subject: [PATCH 1/3] ggplot2 plot_mvgam_resids()

---
 R/plot_mvgam_resids.R | 575 ++++++++++++++----------------------------
 1 file changed, 194 insertions(+), 381 deletions(-)

diff --git a/R/plot_mvgam_resids.R b/R/plot_mvgam_resids.R
index ad769576..b4315f93 100644
--- a/R/plot_mvgam_resids.R
+++ b/R/plot_mvgam_resids.R
@@ -26,7 +26,7 @@
 #'@return A series of base \code{R} plots
 #'@export
 plot_mvgam_resids = function(object, series = 1,
-                             newdata, data_test){
+                             newdata, data_test) {
 
   # Check arguments
   if (!(inherits(object, "mvgam"))) {
@@ -47,405 +47,218 @@ plot_mvgam_resids = function(object, series = 1,
                                                          'trend_model'))
   }
 
-# Plotting colours
-probs <- c(0.05, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.95)
-c_light <- c("#DCBCBC")
-c_light_highlight <- c("#C79999")
-c_mid <- c("#B97C7C")
-c_mid_highlight <- c("#A25050")
-c_dark <- c("#8F2727")
-c_dark_highlight <- c("#7C0000")
-
-# Prediction indices for the particular series
-data_train <- validate_series_time(object$obs_data,
-                                   trend_model = attr(object$model_data,
-                                                      'trend_model'))
-ends <- seq(0, dim(mcmc_chains(object$model_output, 'ypred'))[2],
-            length.out = NCOL(object$ytimes) + 1)
-starts <- ends + 1
-starts <- c(1, starts[-c(1, (NCOL(object$ytimes)+1))])
-ends <- ends[-1]
-
-# Pull out series' residuals
-series_residuals <- object$resids[[series]]
-
-# Get indices of training horizon
-if(class(data_train)[1] == 'list'){
-  data_train_df <- data.frame(time = data_train$index..time..index,
-                              y = data_train$y,
-                              series = data_train$series)
-  obs_length <- length(data_train_df %>%
-                         dplyr::filter(series == !!(levels(data_train_df$series)[series])) %>%
-                         dplyr::select(time, y) %>%
-                         dplyr::distinct() %>%
-                         dplyr::arrange(time) %>%
-                         dplyr::pull(y))
-} else {
-  obs_length <- length(data_train %>%
-                         dplyr::filter(series == !!(levels(data_train$series)[series])) %>%
-                         dplyr::select(index..time..index, y) %>%
-                         dplyr::distinct() %>%
-                         dplyr::arrange(index..time..index) %>%
-                         dplyr::pull(y))
-}
-
-if(missing(data_test)){
-
-} else {
-
-  if(object$fit_engine == 'stan'){
-    linkfun <- family_invlinks(object$family)
-    preds <- linkfun(mcmc_chains(object$model_output, 'mus')[,seq(series,
-                                                                    dim(mcmc_chains(object$model_output, 'mus'))[2],
-                                                                    by = NCOL(object$ytimes))])
+  # Plotting colours
+  probs <- c(0.05, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.95)
+  c_light <- c("#DCBCBC")
+  c_light_highlight <- c("#C79999")
+  c_mid <- c("#B97C7C")
+  c_mid_highlight <- c("#A25050")
+  c_dark <- c("#8F2727")
+  c_dark_highlight <- c("#7C0000")
+
+  # Prediction indices for the particular series
+  data_train <- validate_series_time(object$obs_data,
+                                     trend_model = attr(object$model_data,
+                                                        'trend_model'))
+  ends <- seq(0, dim(mcmc_chains(object$model_output, 'ypred'))[2],
+              length.out = NCOL(object$ytimes) + 1)
+  starts <- ends + 1
+  starts <- c(1, starts[-c(1, (NCOL(object$ytimes)+1))])
+  ends <- ends[-1]
+
+  # Pull out series' residuals
+  series_residuals <- object$resids[[series]]
+
+  # Get indices of training horizon
+  if(class(data_train)[1] == 'list'){
+    data_train_df <- data.frame(time = data_train$index..time..index,
+                                y = data_train$y,
+                                series = data_train$series)
+    obs_length <- length(data_train_df %>%
+                           dplyr::filter(series == !!(levels(data_train_df$series)[series])) %>%
+                           dplyr::select(time, y) %>%
+                           dplyr::distinct() %>%
+                           dplyr::arrange(time) %>%
+                           dplyr::pull(y))
   } else {
-    preds <- mcmc_chains(object$model_output, 'mus')[,starts[series]:ends[series]]
+    obs_length <- length(data_train %>%
+                           dplyr::filter(series == !!(levels(data_train$series)[series])) %>%
+                           dplyr::select(index..time..index, y) %>%
+                           dplyr::distinct() %>%
+                           dplyr::arrange(index..time..index) %>%
+                           dplyr::pull(y))
   }
 
-  # Add variables to data_test if missing
-  s_name <- levels(data_train$series)[series]
-  if(!missing(data_test)){
-
-    if(!'y' %in% names(data_test)){
-      data_test$y <- rep(NA, NROW(data_test))
-    }
+  if(missing(data_test)){
 
-    if(class(data_test)[1] == 'list'){
-      if(!'time' %in% names(data_test)){
-        stop('data_train does not contain a "time" column')
-      }
-
-      if(!'series' %in% names(data_test)){
-        data_test$series <- factor('series1')
-      }
-
-    } else {
-      if(!'time' %in% colnames(data_test)){
-        stop('data_train does not contain a "time" column')
-      }
-
-      if(!'series' %in% colnames(data_test)){
-        data_test$series <- factor('series1')
-      }
-    }
-    # If the posterior predictions do not already cover the data_test period, the forecast needs to be
-    # generated using the latent trend dynamics; note, this assumes that there is no gap between the training and
-    # testing datasets
-    if(class(data_train)[1] == 'list'){
-      all_obs <- c(data.frame(y = data_train$y,
-                              series = data_train$series,
-                              time = data_train$index..time..index) %>%
-                     dplyr::filter(series == s_name) %>%
-                     dplyr::select(time, y) %>%
-                     dplyr::distinct() %>%
-                     dplyr::arrange(time) %>%
-                     dplyr::pull(y),
-                   data.frame(y = data_test$y,
-                              series = data_test$series,
-                              time = data_test$time) %>%
-                     dplyr::filter(series == s_name) %>%
-                     dplyr::select(time, y) %>%
-                     dplyr::distinct() %>%
-                     dplyr::arrange(time) %>%
-                     dplyr::pull(y))
-    } else {
-      all_obs <- c(data_train %>%
-                     dplyr::filter(series == s_name) %>%
-                     dplyr::select(index..time..index, y) %>%
-                     dplyr::distinct() %>%
-                     dplyr::arrange(index..time..index) %>%
-                     dplyr::pull(y),
-                   data_test %>%
-                     dplyr::filter(series == s_name) %>%
-                     dplyr::select(time, y) %>%
-                     dplyr::distinct() %>%
-                     dplyr::arrange(time) %>%
-                     dplyr::pull(y))
-    }
+  } else {
 
-    if(dim(preds)[2] != length(all_obs)){
+    if(object$fit_engine == 'stan'){
       linkfun <- family_invlinks(object$family)
-      fc_preds <- linkfun(forecast.mvgam(object, series = series,
-                                             data_test = data_test,
-                                 type = 'link'))
-      preds <- cbind(preds, fc_preds)
+      preds <- linkfun(mcmc_chains(object$model_output, 'mus')[,seq(series,
+                                                                    dim(mcmc_chains(object$model_output, 'mus'))[2],
+                                                                    by = NCOL(object$ytimes))])
+    } else {
+      preds <- mcmc_chains(object$model_output, 'mus')[,starts[series]:ends[series]]
     }
 
-    # Calculate out of sample residuals
-    preds <- preds[,tail(1:dim(preds)[2], length(data_test$time))]
-    truth <- data_test$y
-    n_obs <- length(truth)
-
-   if(NROW(preds) > 2000){
-     sample_seq <- sample(1:NROW(preds), 2000, F)
-   } else {
-     sample_seq <- 1:NROW(preds)
-   }
+    # Add variables to data_test if missing
+    s_name <- levels(data_train$series)[series]
+    if(!missing(data_test)){
 
-    series_residuals <- get_forecast_resids(object = object,
-                                            series = series,
-                                            truth = truth,
-                                            preds = preds,
-                                            family = object$family,
-                                            sample_seq = sample_seq)
+      if(!'y' %in% names(data_test)){
+        data_test$y <- rep(NA, NROW(data_test))
+      }
 
-  }
-}
+      if(class(data_test)[1] == 'list'){
+        if(!'time' %in% names(data_test)){
+          stop('data_train does not contain a "time" column')
+        }
 
-# Graphical parameters
-.pardefault <- par(no.readonly=T)
-on.exit(par(.pardefault))
-layout(matrix(1:4, ncol = 2, nrow = 2, byrow = TRUE))
-oldpar <- par(mar=c(2.5, 2.3, 2, 2),
-              oma = c(1, 1, 0, 0),
-              mgp = c(2, 0.5, 0))
+        if(!'series' %in% names(data_test)){
+          data_test$series <- factor('series1')
+        }
 
-# Extract expectation (fitted) values
-preds <- hindcast(object, type = 'expected')$hindcasts[[series]]
-series_residuals <- object$resids[[series]]
-limits <- quantile(preds, probs = c(0.025, 0.975), na.rm = TRUE)
+      } else {
+        if(!'time' %in% colnames(data_test)){
+          stop('data_train does not contain a "time" column')
+        }
 
-# Plot resids vs fitted
-plot(1,
-     xlim = limits,
-     bty = 'L',
-     xlab = '',
-     ylab = '',
-     pch = 16,
-     col = 'white',
-     cex = 1,
-     ylim = range(series_residuals, na.rm = T))
+        if(!'series' %in% colnames(data_test)){
+          data_test$series <- factor('series1')
+        }
+      }
+      # If the posterior predictions do not already cover the data_test period, the forecast needs to be
+      # generated using the latent trend dynamics; note, this assumes that there is no gap between the training and
+      # testing datasets
+      if(class(data_train)[1] == 'list'){
+        all_obs <- c(data.frame(y = data_train$y,
+                                series = data_train$series,
+                                time = data_train$index..time..index) %>%
+                       dplyr::filter(series == s_name) %>%
+                       dplyr::select(time, y) %>%
+                       dplyr::distinct() %>%
+                       dplyr::arrange(time) %>%
+                       dplyr::pull(y),
+                     data.frame(y = data_test$y,
+                                series = data_test$series,
+                                time = data_test$time) %>%
+                       dplyr::filter(series == s_name) %>%
+                       dplyr::select(time, y) %>%
+                       dplyr::distinct() %>%
+                       dplyr::arrange(time) %>%
+                       dplyr::pull(y))
+      } else {
+        all_obs <- c(data_train %>%
+                       dplyr::filter(series == s_name) %>%
+                       dplyr::select(index..time..index, y) %>%
+                       dplyr::distinct() %>%
+                       dplyr::arrange(index..time..index) %>%
+                       dplyr::pull(y),
+                     data_test %>%
+                       dplyr::filter(series == s_name) %>%
+                       dplyr::select(time, y) %>%
+                       dplyr::distinct() %>%
+                       dplyr::arrange(time) %>%
+                       dplyr::pull(y))
+      }
 
-draws <- sample(1:dim(preds)[1],
-                min(150, dim(preds)[1]),
-                replace = FALSE)
-moddata <- vector(mode = 'list')
-for(i in draws){
-  points(x = preds[i,],
-         y = series_residuals[i,],
-         pch = 16,
-         cex = 0.7,
-         col = '#80808020')
-  moddata[[which(draws == i)]] <- data.frame(y = series_residuals[i,],
-                             x = preds[i,])
-}
-title('Resids vs Fitted', line = 0)
-title(ylab = "DS residuals", line = 1.5)
-title(xlab = "Fitted values", line = 1.5)
+      if(dim(preds)[2] != length(all_obs)){
+        linkfun <- family_invlinks(object$family)
+        fc_preds <- linkfun(forecast.mvgam(object, series = series,
+                                           data_test = data_test,
+                                           type = 'link'))
+        preds <- cbind(preds, fc_preds)
+      }
 
-# Plot a fitted line
-moddata <- do.call(rbind, moddata)
+      # Calculate out of sample residuals
+      preds <- preds[,tail(1:dim(preds)[2], length(data_test$time))]
+      truth <- data_test$y
+      n_obs <- length(truth)
 
-predvals <- seq(min(preds[draws, ], na.rm = TRUE),
-                max(preds[draws, ], na.rm = TRUE),
-                length.out = 200)
-medmod <- bam(y ~ s(x), data = moddata, discrete = TRUE)
-medpreds <- predict(medmod, newdata = data.frame(x = predvals),
-                      type = 'response', se.fit = TRUE)
+      if(NROW(preds) > 2000){
+        sample_seq <- sample(1:NROW(preds), 2000, F)
+      } else {
+        sample_seq <- 1:NROW(preds)
+      }
 
-polygon(c(predvals, rev(predvals)), c(medpreds$fit + 3*medpreds$se.fit,
-                                        rev(medpreds$fit - 3*medpreds$se.fit)),
-        col = "#7C000040",
-        border = NA)
-lines(x = predvals,
-      y = medpreds$fit,
-      col = "#7C000060",
-      lwd = 3)
+      series_residuals <- get_forecast_resids(object = object,
+                                              series = series,
+                                              truth = truth,
+                                              preds = preds,
+                                              family = object$family,
+                                              sample_seq = sample_seq)
 
-# Q-Q plot
-coords <- qqnorm(series_residuals[1,], plot.it = F)
-resid_coords_y <- matrix(NA, nrow = NROW(series_residuals), ncol = length(coords$y))
-for(i in 1:NROW(series_residuals)){
-  if(all(is.na(series_residuals[i,]))){
-    resid_coords_y[i,] <- rep(NA, length(coords$y))
-  } else {
-    norm_coords <- qqnorm(series_residuals[i,], plot.it = FALSE)
-    coords_y <- norm_coords$y
-    coords_y[abs(coords_y) > 3.75] <- NA
-    resid_coords_y[i,] <- coords_y[order(norm_coords$x)]
+    }
   }
-}
 
-cred <- sapply(1:NCOL(resid_coords_y),
-               function(n) quantile(resid_coords_y[,n],
-                                    probs = probs,
-                                    na.rm = TRUE))
-pred_vals <- coords$x[order(coords$x)]
-pred_vals <- pred_vals[complete.cases(cred[1,])]
-plot(x = pred_vals,
-     y = cred[5,][complete.cases(cred[1,])],
-     bty = 'L',
-     xlab = '',
-     ylab = '',
-     pch = 16,
-     col = 'white',
-     cex = 1,
-     ylim = range(cred, na.rm = T),
-     tck = -0.04)
-title('Normal Q-Q Plot', line = 0)
-title(ylab = "Sample Quantiles", line = 1.5)
-title(xlab = "Theoretical Quantiles", line = 1.5)
-polygon(c(pred_vals, rev(pred_vals)), c(cred[1,][complete.cases(cred[1,])],
-                                        rev(cred[9,][complete.cases(cred[1,])])),
-        col = c_light, border = NA)
-polygon(c(pred_vals, rev(pred_vals)), c(cred[2,][complete.cases(cred[1,])],
-                                        rev(cred[8,][complete.cases(cred[1,])])),
-        col = c_light_highlight, border = NA)
-polygon(c(pred_vals, rev(pred_vals)), c(cred[3,][complete.cases(cred[1,])],
-                                        rev(cred[7,][complete.cases(cred[1,])])),
-        col = c_mid, border = NA)
-polygon(c(pred_vals, rev(pred_vals)), c(cred[4,][complete.cases(cred[1,])],
-                                        rev(cred[6,][complete.cases(cred[1,])])),
-        col = c_mid_highlight, border = NA)
-lines(pred_vals, cred[5,][complete.cases(cred[1,])], col = c_dark, lwd = 2.5)
-qqline(cred[5,][complete.cases(cred[1,])], col = '#FFFFFF60', lwd = 3)
-qqline(cred[5,][complete.cases(cred[1,])], col = 'black', lwd = 2.5)
-
-# ACF plot
-acf1 <- acf(series_residuals[1,], plot = F,
-            na.action = na.pass)
-resid_acf <- matrix(NA, nrow = NROW(series_residuals),
-                    ncol = length(acf1$acf[,,1]))
-for(i in 1:NROW(series_residuals)){
-  resid_acf[i, ] <- acf(series_residuals[i,], plot = F,
-                        na.action = na.pass)$acf[,,1]
+  # plot predictions and residuals (commented out code is my first attempt per draw...)
+  library(ggplot2)
+  library(patchwork)
+  # preds <- hindcast(object, type = 'expected')$hindcasts[[series]]
+  # resids <- object$resids[[series]]
+  # n_draw <- dim(preds)[[1]]
+  # n_obs <- dim(preds)[[2]]
+  # draws <- sample(1:n_draw, min(150, n_draw),
+  #                 replace = FALSE)
+  # fvr <- data.frame(preds = c(preds),
+  #            resids = c(resids),
+  #            obs = rep(1:n_obs, n_draw),
+  #            .draw = rep(1:n_draw, each = n_obs)) %>%
+  #   dplyr::filter(.draw %in% draws) |>
+  #   ggplot(aes(preds, resids)) +
+  #   geom_point(shape = 16, alpha = 0.2) +
+  #   geom_smooth(method = "gam", colour = "#7C000060", fill = "#7C000040") +
+  #   labs(title = "Resids vs Fitted",
+  #        x = "DS residuals",
+  #        y = "Fitted values")
+  median_resids <- apply(object$resids[[series]], 2, median)
+  fvr_plot <- data.frame(preds =  apply(hindcast(object, type = 'expected')$hindcasts[[series]], 2, median),
+                         resids = median_resids) |>
+    ggplot(aes(preds, resids)) +
+    geom_point(shape = 16, alpha = 2/3) +
+    geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs"), colour = "#7C000060", fill = "#7C000040") +
+    labs(title = "Resids vs Fitted",
+         x = "DS residuals",
+         y = "Fitted values")
+
+  # Q-Q plot
+  qq_plot <- data.frame(resids = median_resids) |>
+    ggplot(aes(sample = resids)) +
+    stat_qq(shape = 16, alpha = 2/3) +
+    stat_qq_line(colour = c_dark) +
+    labs(title = "Normal Q-Q Plot",
+         x = "Theoretical Quantiles",
+         y = "Sample Quantiles")
+
+  # ACF plot
+  acf_resids <- acf(median_resids, plot = F, na.action = na.pass)
+  acf_plot <- data.frame(acf = acf_resids$acf[,,1],
+                         lag = acf_resids$lag[,1,1]) |>
+    dplyr::filter(lag > 0) |>
+    ggplot(aes(x = lag, y = 0, yend = acf)) +
+    geom_hline(yintercept = c(-1, 1) * qnorm((1 + 0.95) / 2) / sqrt(acf_resids$n.used),
+               linetype = "dashed") +
+    geom_segment(colour = c_dark) +
+    labs(title = "ACF",
+         x = "Lag",
+         y = "Autocorrelation")
+
+
+  # PACF plot
+  pacf_resids <- pacf(median_resids, plot = F, na.action = na.pass)
+  pacf_plot <- data.frame(pacf = pacf_resids$acf[,,1],
+                          lag = pacf_resids$lag[,1,1]) |>
+    dplyr::filter(lag > 0) |>
+    ggplot(aes(x = lag, y = 0, yend = pacf)) +
+    geom_hline(yintercept = c(-1, 1) * qnorm((1 + 0.95) / 2) / sqrt(pacf_resids$n.used),
+               linetype = "dashed") +
+    geom_segment(colour = c_dark) +
+    labs(title = "pACF",
+         x = "Lag",
+         y = "Autocorrelation")
+
+  # return
+  suppressWarnings(print((fvr_plot | qq_plot) / (acf_plot | pacf_plot)))
 }
 
-sorted_x <- seq(1:NCOL(resid_acf))
-N <- length(sorted_x)
-idx <- rep(1:N, each = 2)
-repped_x <- rep(sorted_x, each = 2)
-
-x <- sapply(1:length(idx),
-            function(k) if(k %% 2 == 0)
-              repped_x[k] + min(diff(sorted_x))/2 else
-                repped_x[k] - min(diff(sorted_x))/2)
-cred <- sapply(1:NCOL(resid_acf),
-               function(n) quantile(resid_acf[,n],
-                                    probs = probs, na.rm = T))
-cred <- cred[, -1]
-clim <- qnorm((1 + .95)/2)/sqrt(acf1$n.used)
-plot(1, type = "n", bty = 'L',
-     xlab = '',
-     ylab = '',
-     xlim = c(1, N-1),
-     xaxt = 'n',
-     ylim = range(c(cred,
-                    -clim - 0.05,
-                    clim + 0.05), na.rm = TRUE))
-axis(1, at = seq(1, NCOL(cred), by = 2))
-title('ACF', line = 0)
-title(ylab = "Autocorrelation", line = 1.5)
-title(xlab = "Lag", line = 1.5)
-
-N <- N - 1
-rect(xleft = x[seq(1, N*2, by = 2)],
-     xright = x[seq(2, N*2, by = 2)],
-     ytop =  cred[9,],
-     ybottom =  cred[1,],
-     col = c_light,
-     border = 'transparent')
-rect(xleft = x[seq(1, N*2, by = 2)],
-     xright = x[seq(2, N*2, by = 2)],
-     ytop =  cred[8,],
-     ybottom =  cred[2,],
-     col = c_light_highlight,
-     border = 'transparent')
-rect(xleft = x[seq(1, N*2, by = 2)],
-     xright = x[seq(2, N*2, by = 2)],
-     ytop =  cred[7,],
-     ybottom =  cred[3,],
-     col = c_mid,
-     border = 'transparent')
-rect(xleft = x[seq(1, N*2, by = 2)],
-     xright = x[seq(2, N*2, by = 2)],
-     ytop =  cred[6,],
-     ybottom =  cred[4,],
-     col = c_mid_highlight,
-     border = 'transparent')
-
-for (k in 1:N) {
-  lines(x = c(x[seq(1, N*2, by = 2)][k],x[seq(2, N*2, by = 2)][k]),
-        y = c(cred[5,k], cred[5,k]),
-        col = c_dark, lwd = 2)
-}
-abline(h = clim,  col = '#FFFFFF60', lwd = 2.85)
-abline(h = clim,  col = 'black', lwd = 2.5, lty = 'dashed')
-abline(h = -clim,  col = '#FFFFFF60', lwd = 2.85)
-abline(h = -clim, col = 'black', lwd = 2.5, lty = 'dashed')
-
-# PACF plot
-pacf1 <- pacf(series_residuals[1,], plot = F,
-            na.action = na.pass)
-resid_pacf <- matrix(NA, nrow = NROW(series_residuals),
-                    ncol = length(pacf1$acf[,,1]))
-for(i in 1:NROW(series_residuals)){
-  resid_pacf[i, ] <- pacf(series_residuals[i,], plot = F,
-                        na.action = na.pass)$acf[,,1]
-}
-
-sorted_x <- seq(1:NCOL(resid_pacf))
-N <- length(sorted_x)
-idx <- rep(1:N, each = 2)
-repped_x <- rep(sorted_x, each = 2)
-
-x <- sapply(1:length(idx),
-            function(k) if(k %% 2 == 0)
-              repped_x[k] + min(diff(sorted_x))/2 else
-                repped_x[k] - min(diff(sorted_x))/2)
-cred <- sapply(1:NCOL(resid_pacf),
-               function(n) quantile(resid_pacf[,n],
-                                    probs = probs, na.rm = T))
-
-clim <- qnorm((1 + .95)/2)/sqrt(pacf1$n.used)
-plot(1, type = "n", bty = 'L',
-     xlab = '',
-     ylab = '',
-     xlim = c(1, length(sorted_x)),
-     xaxt = 'n',
-     ylim = range(c(cred,
-                    -clim - 0.05,
-                    clim + 0.05), na.rm = TRUE))
-axis(1, at = seq(1, NCOL(cred), by = 2))
-title('pACF', line = 0)
-title(ylab = "Autocorrelation", line = 1.5)
-title(xlab = "Lag", line = 1.5)
-
-rect(xleft = x[seq(1, N*2, by = 2)],
-     xright = x[seq(2, N*2, by = 2)],
-     ytop =  cred[9,],
-     ybottom =  cred[1,],
-     col = c_light,
-     border = 'transparent')
-rect(xleft = x[seq(1, N*2, by = 2)],
-     xright = x[seq(2, N*2, by = 2)],
-     ytop =  cred[8,],
-     ybottom =  cred[2,],
-     col = c_light_highlight,
-     border = 'transparent')
-rect(xleft = x[seq(1, N*2, by = 2)],
-     xright = x[seq(2, N*2, by = 2)],
-     ytop =  cred[7,],
-     ybottom =  cred[3,],
-     col = c_mid,
-     border = 'transparent')
-rect(xleft = x[seq(1, N*2, by = 2)],
-     xright = x[seq(2, N*2, by = 2)],
-     ytop =  cred[6,],
-     ybottom =  cred[4,],
-     col = c_mid_highlight,
-     border = 'transparent')
-
-for (k in 1:N) {
-  lines(x = c(x[seq(1, N*2, by = 2)][k],x[seq(2, N*2, by = 2)][k]),
-        y = c(cred[5,k], cred[5,k]),
-        col = c_dark, lwd = 2)
-}
-abline(h = clim, col = '#FFFFFF60', lwd = 2.85)
-abline(h = clim, col = 'black', lwd = 2.5, lty = 'dashed')
-abline(h = -clim, col = '#FFFFFF60', lwd = 2.85)
-abline(h = -clim, col = 'black', lwd = 2.5, lty = 'dashed')
-
-layout(1)
-
-}

From 1b4177f7d177557e19e02a6580963d36b194dcef Mon Sep 17 00:00:00 2001
From: Matthijs Hollanders <matthijs.hollanders@gmail.com>
Date: Sat, 26 Oct 2024 12:36:45 +1100
Subject: [PATCH 2/3] ggplot2 plot_mvgam_factors()

---
 R/plot_mvgam_factors.R | 106 ++++++++++++++++++++---------------------
 1 file changed, 51 insertions(+), 55 deletions(-)

diff --git a/R/plot_mvgam_factors.R b/R/plot_mvgam_factors.R
index bf0b0762..9477ec75 100644
--- a/R/plot_mvgam_factors.R
+++ b/R/plot_mvgam_factors.R
@@ -28,7 +28,7 @@
 #'plot_mvgam_factors(mod)
 #'}
 #'@export
-plot_mvgam_factors = function(object, plot = TRUE){
+plot_mvgam_factors = function(object, plot = TRUE) {
 
   # Check arguments
   if (!(inherits(object, "mvgam"))) {
@@ -47,6 +47,7 @@ plot_mvgam_factors = function(object, plot = TRUE){
   starts <- c(1, starts[-c(1, object$n_lv + 1)])
   ends <- ends[-1]
   probs <- c(0.05, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.95)
+  probs <- c(0.2, 0.4, 0.6, 0.9)
 
   # Set up plot environment
   if(plot){
@@ -68,62 +69,57 @@ plot_mvgam_factors = function(object, plot = TRUE){
     }
   }
 
-  # Loop across each lv and calculate probability that the lv was dropped
-  lv_estimates <- do.call(rbind, lapply(1:object$n_lv, function(x){
+  lv_estimates <- lapply(1:object$n_lv,
+                         \(x) {
 
-    if(object$fit_engine == 'stan'){
-      inds_lv <- seq(x, dim(mcmc_chains(object$model_output, 'LV'))[2], by = object$n_lv)
-      preds <- mcmc_chains(object$model_output, 'LV')[,inds_lv]
-    } else {
-      preds <- mcmc_chains(object$model_output, 'LV')[,starts[x]:ends[x]]
-    }
-
-    # Keep only the in-sample observations for testing against the null of white noise
-    preds <- preds[,1:(length(object$obs_data$y) / NCOL(object$ytimes))]
+                           if(object$fit_engine == 'stan'){
+                             inds_lv <- seq(x, dim(mcmc_chains(object$model_output, 'LV'))[2], by = object$n_lv)
+                             preds <- mcmc_chains(object$model_output, 'LV')[,inds_lv]
+                           } else {
+                             preds <- mcmc_chains(object$model_output, 'LV')[,starts[x]:ends[x]]
+                           }
 
-    cred <- sapply(1:NCOL(preds),
-                   function(n) quantile(preds[,n],
-                                        probs = probs))
-    # If plot = TRUE, plot the LVs
-    if(plot){
-      preds_last <- preds[1,]
-      ylim <- range(cred)
-      ylab <- paste0('Factor ', x)
-      pred_vals <- seq(1:length(preds_last))
-      plot(1, type = "n", bty = 'L',
-           xlab = 'Time',
-           ylab = ylab,
-           xlim = c(0, length(preds_last)),
-           ylim = ylim)
-      polygon(c(pred_vals, rev(pred_vals)), c(cred[1,], rev(cred[9,])),
-              col = c_light, border = NA)
-      polygon(c(pred_vals, rev(pred_vals)), c(cred[2,], rev(cred[8,])),
-              col = c_light_highlight, border = NA)
-      polygon(c(pred_vals, rev(pred_vals)), c(cred[3,], rev(cred[7,])),
-              col = c_mid, border = NA)
-      polygon(c(pred_vals, rev(pred_vals)), c(cred[4,], rev(cred[6,])),
-              col = c_mid_highlight, border = NA)
-      lines(pred_vals, cred[5,], col = c_dark, lwd = 2.5)
-      box(bty = 'L', lwd = 2)
-
-    }
+                           # Keep only the in-sample observations for testing against the null of white noise
+                           preds <- preds[,1:(length(object$obs_data$y) / NCOL(object$ytimes))]
+                           n_draw <- nrow(preds)
+                           n_obs <- ncol(preds)
+                           factor_name <- paste0("Factor ", x)
 
-    # Calculate second derivatives of empirical medians and upper / lower intervals;
-    # factors with small second derivatives are moving in roughly a straight line and not
-    # likely contributing much (or at all) to the latent trend estimates
-    meds <- cred[5,]
-    uppers <- cred[8,]
-    lowers <- cred[2,]
-    data.frame('Contribution' = sum(abs(diff(diff(meds)) +
-                                          diff(diff(uppers)) +
-                                          diff(diff(lowers)))))
-  }))
-
-  rownames(lv_estimates) <- paste0('Factor', 1:object$n_lv)
-
-  if(plot){
-    layout(1)
-  }
+                           if (plot) {
+                             library(ggplot2)
+                             fig <- data.frame(preds = c(preds),
+                                               obs = rep(1:n_obs, each = n_draw),
+                                               .draw = rep(1:n_draw, n_obs)) |>
+                               ggplot(aes(obs, preds)) +
+                               ggdist::stat_lineribbon(point_interval = ggdist::median_qi,
+                                                       .width = probs,
+                                                       colour = c_dark,
+                                                       show.legend = FALSE) +
+                               scale_fill_manual(values = c(c_light, c_light_highlight, c_mid, c_mid_highlight)) +
+                               scale_x_continuous(expand = c(0, 0)) +
+                               scale_y_continuous(expand = c(0, 0)) +
+                               labs(x = "Time",
+                                    y = factor_name)
+                           }
 
-  lv_estimates / sum(lv_estimates)
+                           # Calculate second derivatives of empirical medians and upper / lower intervals;
+                           # factors with small second derivatives are moving in roughly a straight line and not
+                           # likely contributing much (or at all) to the latent trend estimates
+                           meds <- apply(preds, 2, median)
+                           uppers <- apply(preds, 2, \(p) quantile(p, 0.8))
+                           lowers <- apply(preds, 2, \(p) quantile(p, 0.2))
+                           contribution <- data.frame('Contribution' = sum(abs(diff(diff(meds)) +
+                                                                                 diff(diff(uppers)) +
+                                                                                 diff(diff(lowers)))),
+                                                      row.names = factor_name)
+                           if (plot) {
+                             list(fig = fig, contribution = contribution)
+                           } else {
+                             list(contribution = contribution)
+                           }
+                         })
+  if (plot)
+    lapply(lv_estimates, \(x) print(x$fig))
+  contributions <- do.call(rbind, lapply(lv_estimates, \(x) x$contribution))
+  contributions / sum(contributions)
 }

From 51ebcbcfa20b6ac873e934ed9d75d501d61da05d Mon Sep 17 00:00:00 2001
From: Matthijs Hollanders <matthijs.hollanders@gmail.com>
Date: Wed, 6 Nov 2024 15:19:23 +1100
Subject: [PATCH 3/3] updated DESCRIPTION and three plotting functions

---
 DESCRIPTION            |   3 +-
 R/plot_mvgam_factors.R |  13 +-
 R/plot_mvgam_resids.R  |  79 +++-----
 R/plot_mvgam_series.R  | 404 +++++++++++------------------------------
 4 files changed, 143 insertions(+), 356 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 7c5700b9..b36719c2 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -31,7 +31,8 @@ Imports:
     dplyr,
     magrittr,
     Matrix,
-    rlang
+    rlang,
+    patchwork (>= 1.3.0)
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
diff --git a/R/plot_mvgam_factors.R b/R/plot_mvgam_factors.R
index 9477ec75..fdcef672 100644
--- a/R/plot_mvgam_factors.R
+++ b/R/plot_mvgam_factors.R
@@ -86,20 +86,19 @@ plot_mvgam_factors = function(object, plot = TRUE) {
                            factor_name <- paste0("Factor ", x)
 
                            if (plot) {
-                             library(ggplot2)
                              fig <- data.frame(preds = c(preds),
                                                obs = rep(1:n_obs, each = n_draw),
                                                .draw = rep(1:n_draw, n_obs)) |>
-                               ggplot(aes(obs, preds)) +
+                               ggplot2::ggplot(ggplot2::aes(obs, preds)) +
                                ggdist::stat_lineribbon(point_interval = ggdist::median_qi,
                                                        .width = probs,
                                                        colour = c_dark,
                                                        show.legend = FALSE) +
-                               scale_fill_manual(values = c(c_light, c_light_highlight, c_mid, c_mid_highlight)) +
-                               scale_x_continuous(expand = c(0, 0)) +
-                               scale_y_continuous(expand = c(0, 0)) +
-                               labs(x = "Time",
-                                    y = factor_name)
+                               ggplot2::scale_fill_manual(values = c(c_light, c_light_highlight, c_mid, c_mid_highlight)) +
+                               ggplot2::scale_x_continuous(expand = c(0, 0)) +
+                               ggplot2::scale_y_continuous(expand = c(0, 0)) +
+                               ggplot2::labs(x = "Time",
+                                             y = factor_name)
                            }
 
                            # Calculate second derivatives of empirical medians and upper / lower intervals;
diff --git a/R/plot_mvgam_resids.R b/R/plot_mvgam_resids.R
index b4315f93..250bcab1 100644
--- a/R/plot_mvgam_resids.R
+++ b/R/plot_mvgam_resids.R
@@ -193,72 +193,51 @@ plot_mvgam_resids = function(object, series = 1,
   }
 
   # plot predictions and residuals (commented out code is my first attempt per draw...)
-  library(ggplot2)
-  library(patchwork)
-  # preds <- hindcast(object, type = 'expected')$hindcasts[[series]]
-  # resids <- object$resids[[series]]
-  # n_draw <- dim(preds)[[1]]
-  # n_obs <- dim(preds)[[2]]
-  # draws <- sample(1:n_draw, min(150, n_draw),
-  #                 replace = FALSE)
-  # fvr <- data.frame(preds = c(preds),
-  #            resids = c(resids),
-  #            obs = rep(1:n_obs, n_draw),
-  #            .draw = rep(1:n_draw, each = n_obs)) %>%
-  #   dplyr::filter(.draw %in% draws) |>
-  #   ggplot(aes(preds, resids)) +
-  #   geom_point(shape = 16, alpha = 0.2) +
-  #   geom_smooth(method = "gam", colour = "#7C000060", fill = "#7C000040") +
-  #   labs(title = "Resids vs Fitted",
-  #        x = "DS residuals",
-  #        y = "Fitted values")
   median_resids <- apply(object$resids[[series]], 2, median)
   fvr_plot <- data.frame(preds =  apply(hindcast(object, type = 'expected')$hindcasts[[series]], 2, median),
                          resids = median_resids) |>
-    ggplot(aes(preds, resids)) +
-    geom_point(shape = 16, alpha = 2/3) +
-    geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs"), colour = "#7C000060", fill = "#7C000040") +
-    labs(title = "Resids vs Fitted",
-         x = "DS residuals",
-         y = "Fitted values")
+    ggplot2::ggplot(ggplot2::aes(preds, resids)) +
+    ggplot2::geom_point(shape = 16, alpha = 2/3) +
+    ggplot2::geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs"), colour = "#7C000060", fill = "#7C000040") +
+    ggplot2::labs(title = "Resids vs Fitted",
+                  x = "DS residuals",
+                  y = "Fitted values")
 
   # Q-Q plot
   qq_plot <- data.frame(resids = median_resids) |>
-    ggplot(aes(sample = resids)) +
-    stat_qq(shape = 16, alpha = 2/3) +
-    stat_qq_line(colour = c_dark) +
-    labs(title = "Normal Q-Q Plot",
-         x = "Theoretical Quantiles",
-         y = "Sample Quantiles")
+    ggplot2::ggplot(ggplot2::aes(sample = resids)) +
+    ggplot2::stat_qq(shape = 16, alpha = 2/3) +
+    ggplot2::stat_qq_line(colour = c_dark) +
+    ggplot2::labs(title = "Normal Q-Q Plot",
+                  x = "Theoretical Quantiles",
+                  y = "Sample Quantiles")
 
   # ACF plot
-  acf_resids <- acf(median_resids, plot = F, na.action = na.pass)
+  acf_resids <- acf(median_resids, plot = FALSE, na.action = na.pass)
   acf_plot <- data.frame(acf = acf_resids$acf[,,1],
                          lag = acf_resids$lag[,1,1]) |>
     dplyr::filter(lag > 0) |>
-    ggplot(aes(x = lag, y = 0, yend = acf)) +
-    geom_hline(yintercept = c(-1, 1) * qnorm((1 + 0.95) / 2) / sqrt(acf_resids$n.used),
-               linetype = "dashed") +
-    geom_segment(colour = c_dark) +
-    labs(title = "ACF",
-         x = "Lag",
-         y = "Autocorrelation")
-
+    ggplot2::ggplot(ggplot2::aes(x = lag, y = 0, yend = acf)) +
+    ggplot2::geom_hline(yintercept = c(-1, 1) * qnorm((1 + 0.95) / 2) / sqrt(acf_resids$n.used),
+                        linetype = "dashed") +
+    ggplot2::geom_segment(colour = c_dark) +
+    ggplot2::labs(title = "ACF",
+                  x = "Lag",
+                  y = "Autocorrelation")
 
   # PACF plot
-  pacf_resids <- pacf(median_resids, plot = F, na.action = na.pass)
+  pacf_resids <- pacf(median_resids, plot = FALSE, na.action = na.pass)
   pacf_plot <- data.frame(pacf = pacf_resids$acf[,,1],
                           lag = pacf_resids$lag[,1,1]) |>
     dplyr::filter(lag > 0) |>
-    ggplot(aes(x = lag, y = 0, yend = pacf)) +
-    geom_hline(yintercept = c(-1, 1) * qnorm((1 + 0.95) / 2) / sqrt(pacf_resids$n.used),
-               linetype = "dashed") +
-    geom_segment(colour = c_dark) +
-    labs(title = "pACF",
-         x = "Lag",
-         y = "Autocorrelation")
+    ggplot2::ggplot(ggplot2::aes(x = lag, y = 0, yend = pacf)) +
+    ggplot2::geom_hline(yintercept = c(-1, 1) * qnorm((1 + 0.95) / 2) / sqrt(pacf_resids$n.used),
+                        linetype = "dashed") +
+    ggplot2::geom_segment(colour = c_dark) +
+    ggplot2::labs(title = "pACF",
+                  x = "Lag",
+                  y = "Autocorrelation")
 
   # return
-  suppressWarnings(print((fvr_plot | qq_plot) / (acf_plot | pacf_plot)))
+  patchwork::wrap_plots(fvr_plot, qq_plot, acf_plot, pacf_plot, ncol = 2, nrow = 2, byrow = T)
 }
-
diff --git a/R/plot_mvgam_series.R b/R/plot_mvgam_series.R
index a1e2141e..6c545c33 100644
--- a/R/plot_mvgam_series.R
+++ b/R/plot_mvgam_series.R
@@ -180,323 +180,131 @@ plot_mvgam_series = function(object,
     data_test$series <- droplevels(data_test$series)
   }
 
+  # only plot time series
   if(series == 'all'){
-    n_plots <- length(levels(data_train$series))
-    pages <- 1
-
-    if (n_plots > 4) pages <- 2
-    if (n_plots > 8) pages <- 3
-    if (n_plots > 12) pages <- 4
-    if (pages != 0)  {
-      ppp <- n_plots %/% pages
-
-      if (n_plots %% pages != 0) {
-        ppp <- ppp + 1
-        while (ppp * (pages - 1) >= n_plots) pages <- pages - 1
-      }
-
-      # Configure layout matrix
-      c <- r <- trunc(sqrt(ppp))
-      if (c<1) r <- c <- 1
-      if (c*r < ppp) c <- c + 1
-      if (c*r < ppp) r <- r + 1
-
-      .pardefault <- par(no.readonly=T)
-      on.exit(par(.pardefault))
-      oldpar <- par(mfrow = c(r,c))
-
-    } else { ppp <- 1; oldpar <- par()}
-
-    all_ys <- lapply(seq_len(n_plots), function(x){
-      if(log_scale){
-        log(      data.frame(y = data_train$y,
-                             series = data_train$series,
-                             time = data_train$time) %>%
-                    dplyr::filter(series == levels(data_train$series)[x]) %>%
-                    dplyr::pull(y) + 1)
-      } else {
-        data.frame(y = data_train$y,
-                   series = data_train$series,
-                   time = data_train$time) %>%
-          dplyr::filter(series == levels(data_train$series)[x]) %>%
-          dplyr::pull(y)
-      }
-    })
-
-    for(i in 1:n_plots){
-      s_name <- levels(data_train$series)[i]
-
-      truth <- data.frame(y = data_train$y,
-                          series = data_train$series,
-                          time = data_train$time) %>%
-        dplyr::filter(series == s_name) %>%
-        dplyr::select(time, y) %>%
-        dplyr::distinct() %>%
-        dplyr::arrange(time) %>%
-        dplyr::pull(y)
-
-      if(log_scale){
-        truth <- log(truth + 1)
-        ylab <- paste0('log(', y,' + 1)')
-      } else {
-        ylab <- paste0(y)
-      }
 
-      plot(1, type = "n", bty = 'L',
-           xlab = 'Time',
-           ylab = ylab,
-           xaxt = 'n',
-           ylim = range(unlist(all_ys), na.rm = TRUE),
-           xlim = c(0, length(c(truth))))
-      axis(side = 1,
-           at = floor(seq(0, max(data_train$time) -
-                            (min(data_train$time)-1),
-                          length.out = 6)),
-           labels = floor(seq(min(data_train$time),
-                              max(data_train$time),
-                              length.out = 6)))
-      title(s_name, line = 0)
-
-      if(n_plots > 1){
-        if(lines){
-          for(x in 1:n_plots){
-            lines(all_ys[[x]], lwd = 1.85, col = 'grey85')
-          }
-        } else {
-          for(x in 1:n_plots){
-            points(all_ys[[x]], col = 'grey85', pch = 16)
-          }
-        }
+    # create tibble
+    dat <- dplyr::as_tibble(data_train) |>
+      dplyr::distinct(time, y, series)
 
-      }
-
-      if(lines){
-        lines(x = 1:length(truth), y = truth, lwd = 3, col = "white")
-        lines(x = 1:length(truth), y = truth, lwd = 2.5, col = "#8F2727")
-      } else {
-        points(x = 1:length(truth), y = truth, cex = 1.2, col = "white", pch = 16)
-        points(x = 1:length(truth), y = truth, cex = 0.9, col = "#8F2727", pch = 16)
-      }
+    # log transform and y-axis labels
+    if(log_scale){
+      dat$y <- log(dat$y + 1)
+      ylab <- paste0('log(', y,' + 1)')
+    } else {
+      ylab <- paste0(y)
+    }
 
-      box(bty = 'L', lwd = 2)
+    # create faceted time serires
+    plot_ts <- dat |>
+      ggplot2::ggplot(ggplot2::aes(time, y)) +
+      ggplot2::facet_wrap(~ series) +
+      ggplot2::labs(x = "Time",
+           y = ylab)
 
+    if (lines) {
+      plot_ts <- plot_ts + ggplot2::geom_line(colour = "#8F2727")
+    } else {
+      plot_ts <- plot_ts + ggplot2::geom_point(colour = "#8F2727")
     }
-  layout(1)
+    plot_ts
 
+    # multiple plots for one time series
   } else {
+    # series name
     s_name <- levels(data_train$series)[series]
-    truth <- data.frame(y = data_train$y,
-                        series = data_train$series,
-                        time = data_train$time) %>%
-      dplyr::filter(series == s_name) %>%
-      dplyr::select(time, y) %>%
-      dplyr::distinct() %>%
-      dplyr::arrange(time) %>%
-      dplyr::pull(y)
 
-    if(log_scale){
-      truth <- log(truth + 1)
+    # training data
+    dat <- dplyr::as_tibble(data_train) |>
+      dplyr::filter(series == s_name) |>
+      dplyr::distinct(time, y) |>
+      dplyr::mutate(data = "train")
+
+    # optionally bind test data
+    if (!missing(data_test)) {
+      dat <- dplyr::bind_rows(dat,
+                              dplyr::as_tibble(data_test) |>
+                                dplyr::filter(series == s_name) |>
+                                dplyr::distinct(time, y) |>
+                                dplyr::mutate(data = "test"))
+    }
+
+    # log-transform y
+    if (log_scale) {
+      truth$y <- log(truth$y + 1)
       ylab <- paste0('log(', y,' + 1)')
     } else {
       ylab <- paste0(y)
     }
 
-    .pardefault <- par(no.readonly=T)
-    on.exit(par(.pardefault))
-    layout(matrix(1:4, nrow = 2, byrow = TRUE))
-
-    if(!missing(data_test)){
-      test <- data.frame(y = data_test$y,
-                         series = data_test$series,
-                         time = data_test$time) %>%
-        dplyr::filter(series == s_name) %>%
-        dplyr::select(time, y) %>%
-        dplyr::distinct() %>%
-        dplyr::arrange(time) %>%
-        dplyr::pull(y)
-
-      plot(1, type = "n", bty = 'L',
-           xlab = '',
-           ylab = '',
-           xaxt = 'n',
-           ylim = range(c(truth, test), na.rm = TRUE),
-           xlim = c(0, length(c(truth, test))))
-        axis(side = 1,
-             at = floor(seq(0, max(data_test$time) -
-                              (min(data_train$time)-1),
-                            length.out = 6)),
-             labels = floor(seq(min(data_train$time),
-                                max(data_test$time),
-                                length.out = 6)))
-
-      title('Time series', line = 0)
-      title(ylab = ylab, line = 1.85)
-      title(xlab = "Time", line = 1.85)
-
-      if(lines){
-        lines(x = 1:length(truth), y = truth, lwd = 2, col = "#8F2727")
-        lines(x = (length(truth)+1):length(c(truth, test)),
-              y = test, lwd = 2, col = "black")
+    # time series
+    if (!missing(data_test)) {
+      plot_ts <- dat |>
+        ggplot2::ggplot(ggplot2::aes(time, y, colour = data)) +
+        ggplot2::geom_vline(xintercept = dat |>
+                              dplyr::filter(data == "test") |>
+                              dplyr::pull(time) |>
+                              min(),
+                            linetype = "dashed",
+                            colour = "black") +
+        ggplot2::scale_colour_manual(values = c("black", "#8F2727")) +
+        ggplot2::labs(title = "Time series",
+                      x = "Time",
+                      y = ylab)
+      if (lines) {
+        plot_ts <- plot_ts + ggplot2::geom_line(show.legend = F)
       } else {
-        points(x = 1:length(truth), y = truth, pch = 16, col = "#8F2727")
-        points(x = (length(truth)+1):length(c(truth, test)),
-               y = test, pch = 16, col = "black")
-      }
-
-      abline(v = length(truth)+1, col = '#FFFFFF60', lwd = 2.85)
-      abline(v = length(truth)+1, col = 'black', lwd = 2.5, lty = 'dashed')
-      box(bty = 'L', lwd = 2)
-
-      if(missing(n_bins)){
-        n_bins <- max(c(length(hist(c(truth, test), plot = F)$breaks),
-                        20))
-      }
-
-      hist(c(truth, test), border = "#8F2727",
-           lwd = 2,
-           freq = FALSE,
-           breaks = n_bins,
-           col = "#C79999",
-           ylab = '',
-           xlab = '',
-           main = '')
-      title('Histogram', line = 0)
-      title(ylab = 'Density', line = 1.85)
-      title(xlab = paste0(y), line = 1.85)
-
-      acf(c(truth, test),
-          na.action = na.pass, bty = 'L',
-          lwd = 2.5, ci.col = 'black', col = "#8F2727",
-          main = '', ylab = '', xlab = '')
-      acf1 <- acf(c(truth, test), plot = F,
-                  na.action = na.pass)
-      clim <- qnorm((1 + .95)/2)/sqrt(acf1$n.used)
-      abline(h = clim,  col = '#FFFFFF', lwd = 2.85)
-      abline(h = clim,  col = 'black', lwd = 2.5, lty = 'dashed')
-      abline(h = -clim,  col = '#FFFFFF', lwd = 2.85)
-      abline(h = -clim,  col = 'black', lwd = 2.5, lty = 'dashed')
-      box(bty = 'L', lwd = 2)
-      title('ACF', line = 0)
-      title(ylab = 'Autocorrelation', line = 1.85)
-      title(xlab = 'Lag', line = 1.85)
-
-      ecdf_plotdat = function(vals, x){
-        func <- ecdf(vals)
-        func(x)
+        plot_ts <- plot_ts + ggplot2::geom_point(show.legend = F)
       }
-
-      plot_x <- seq(from = min(c(truth, test), na.rm = T),
-                    to = max(c(truth, test), na.rm = T),
-                    length.out = 100)
-
-      plot(1, type = "n", bty = 'L',
-           xlab = '',
-           ylab = '',
-           xlim = c(min(plot_x), max(plot_x)),
-           ylim = c(0, 1))
-      title('CDF', line = 0)
-      title(ylab = 'Empirical CDF', line = 1.85)
-      title(xlab = paste0(y), line = 1.85)
-      lines(x = plot_x,
-            y = ecdf_plotdat(c(truth, test),
-                             plot_x),
-            col = "#8F2727",
-            lwd = 2.5)
-      box(bty = 'L', lwd = 2)
-
     } else {
-      plot(1, type = "n", bty = 'L',
-           xlab = '',
-           ylab = '',
-           xaxt = 'n',
-           ylim = range(c(truth), na.rm = TRUE),
-           xlim = c(0, length(c(truth))))
-      if(max(data_train$time < 6)){
-        axis(side = 1,
-             at = 0:(max(data_train$time) - 1),
-             labels = 1:max(data_train$time))
-      } else {
-        axis(side = 1,
-             at = floor(seq(0, max(data_train$time) -
-                              (min(data_train$time)-1),
-                            length.out = 6)),
-             labels = floor(seq(min(data_train$time),
-                                max(data_train$time),
-                                length.out = 6)))
-      }
-
-      title('Time series', line = 0)
-      title(ylab = ylab, line = 1.85)
-      title(xlab = "Time", line = 1.85)
-
-      if(lines){
-        lines(x = 1:length(truth), y = truth, lwd = 2, col = "#8F2727")
-      } else {
-        points(x = 1:length(truth), y = truth, pch = 16, col = "#8F2727")
-      }
-
-      box(bty = 'L', lwd = 2)
-
-      if(missing(n_bins)){
-        n_bins <- max(c(length(hist(c(truth), plot = F)$breaks),
-                        20))
-      }
-
-      hist(c(truth), border = "#8F2727",
-           lwd = 2,
-           freq = FALSE,
-           breaks = n_bins,
-           col = "#C79999",
-           ylab = '',
-           xlab = '',
-           main = '')
-      title('Histogram', line = 0)
-      title(ylab = 'Density', line = 1.85)
-      title(xlab = paste0(y), line = 1.85)
-
-
-      acf(c(truth),
-          na.action = na.pass, bty = 'L',
-          lwd = 2.5, ci.col = 'black', col = "#8F2727",
-          main = '', ylab = '', xlab = '')
-      acf1 <- acf(c(truth), plot = F,
-                  na.action = na.pass)
-      clim <- qnorm((1 + .95)/2)/sqrt(acf1$n.used)
-      abline(h = clim,  col = '#FFFFFF', lwd = 2.85)
-      abline(h = clim,  col = 'black', lwd = 2.5, lty = 'dashed')
-      abline(h = -clim,  col = '#FFFFFF', lwd = 2.85)
-      abline(h = -clim,  col = 'black', lwd = 2.5, lty = 'dashed')
-      box(bty = 'L', lwd = 2)
-      title('ACF', line = 0)
-      title(ylab = 'Autocorrelation', line = 1.85)
-      title(xlab = 'Lag', line = 1.85)
-
-
-      ecdf_plotdat = function(vals, x){
-        func <- ecdf(vals)
-        func(x)
-      }
-
-      plot_x <- seq(from = min(truth, na.rm = T),
-                    to = max(truth, na.rm = T),
-                    length.out = 100)
-      plot(1, type = "n", bty = 'L',
-           xlab = '',
-           ylab = '',
-           xlim = c(min(plot_x), max(plot_x)),
-           ylim = c(0, 1))
-      title('CDF', line = 0)
-      title(ylab = 'Empirical CDF', line = 1.85)
-      title(xlab = paste0(y), line = 1.85)
-      lines(x = plot_x,
-            y = ecdf_plotdat(truth,
-                             plot_x),
-            col = "#8F2727",
-            lwd = 2.5)
-      box(bty = 'L', lwd = 2)
+      plot_ts <- dat |>
+        ggplot2::ggplot(ggplot2::aes(time, y)) +
+        ggplot2::labs(title = "Time series",
+                      x = "Time",
+                      y = ylab)
+    }
+    if (lines) {
+      plot_ts <- plot_ts + ggplot2::geom_line(colour = "#8F2727")
+    } else {
+      plot_ts <- plot_ts + ggplot2::geom_point(colour = "#8F2727")
     }
 
+    # histogram
+    if(missing(n_bins)){
+      n_bins <- max(c(length(hist(c(dat$y), plot = F)$breaks),
+                      20))
+    }
+    plot_hist <- dat |>
+      ggplot2::ggplot(ggplot2::aes(y)) +
+      ggplot2::geom_histogram(bins = n_bins, fill = "#C79999") +
+      ggplot2::labs(title = "Histogram",
+                    x = "y",
+                    y = "Count")
+
+    # ACF
+    acf_y <- acf(dat$y, plot = F, na.action = na.pass)
+    plot_acf <- data.frame(acf = acf_y$acf[,,1],
+                           lag = acf_y$lag[,1,1]) |>
+      ggplot2::ggplot(ggplot2::aes(x = lag, y = 0, yend = acf)) +
+      ggplot2::geom_hline(yintercept = c(-1, 1) * qnorm((1 + 0.95) / 2) / sqrt(acf_y$n.used),
+                          linetype = "dashed") +
+      ggplot2::geom_segment(colour = "#8F2727") +
+      ggplot2::labs(title = "ACF",
+                    x = "Lag",
+                    y = "Autocorrelation")
+
+    # ECDF plot
+    range_y <- range(dat$y, na.rm = T)
+    plot_ecdf <- data.frame(x = seq(range_y[1], range_y[2], length.out = 100)) |>
+      dplyr::mutate(y = ecdf(truth$y)(x)) |>
+      ggplot2::ggplot(ggplot2::aes(x, y)) +
+      ggplot2::geom_line(colour = "#8F2727") +
+      ggplot2::scale_y_continuous(limits = c(0, 1)) +
+      ggplot2::labs(title = "CDF",
+                    x = "y",
+                    y = "Empirical CDF")
+
+    # plot
+    patchwork::wrap_plots(plot_ts, plot_hist, plot_acf, plot_ecdf, ncol = 2, nrow = 2, byrow = T)
   }
-
-  layout(1)
 }