diff --git a/DESCRIPTION b/DESCRIPTION index 442b0f92..11606e5d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,13 +1,13 @@ Package: mvgam Title: Multivariate (Dynamic) Generalized Additive Models Version: 1.1.4 -Date: 2024-09-05 +Date: 2024-11-25 Authors@R: c(person("Nicholas J", "Clark", , "nicholas.j.clark1214@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-7131-3301")), person("Scott", "Pease", role = c("ctb"), - comment = c("broom enhancements")), + comment = c("broom enhancements", ORCID = "0009-0006-8977-9285")), person("Matthijs", "Hollanders", role = c("ctb"), - comment = c("ggplot visualizations"))) + comment = c("ggplot visualizations", ORCID = "0000-0003-0796-1018"))) Description: Fit Bayesian Dynamic Generalized Additive Models to multivariate observations. Users can build nonlinear State-Space models that can incorporate semiparametric effects in observation and process components, using a wide range of observation families. Estimation is performed using Markov Chain Monte Carlo with Hamiltonian Monte Carlo in the software 'Stan'. References: Clark & Wells (2022) . URL: https://github.com/nicholasjclark/mvgam, https://nicholasjclark.github.io/mvgam/ BugReports: https://github.com/nicholasjclark/mvgam/issues diff --git a/R/plot_mvgam_resids.R b/R/plot_mvgam_resids.R index ea6818be..b504a293 100644 --- a/R/plot_mvgam_resids.R +++ b/R/plot_mvgam_resids.R @@ -7,11 +7,15 @@ #'@importFrom mgcv bam #'@param object \code{list} object returned from \code{mvgam}. See [mvgam()] #'@param series \code{integer} specifying which series in the set is to be plotted +#'@param n_draws \code{integer} specifying the number of posterior residual draws +#'to use for calculating uncertainty in the "ACF" and "pACF" frames. Default is `100` +#'@param n_points \code{integer} specifying the maximum number of points to show in the +#'"Resids vs Fitted" and "Normal Q-Q Plot" frames. Default is `1000` #'@author Nicholas J Clark #'@details A total of four ggplot plots are generated to examine posterior #'Dunn-Smyth residuals for the specified series. Plots include a residuals vs fitted values plot, #'a Q-Q plot, and two plots to check for any remaining temporal autocorrelation in the residuals. -#'Note, all plots use only report statistics from a sample of up to 20 posterior +#'Note, all plots only report statistics from a sample of up to 100 posterior #'draws (to save computational time), so uncertainty in these relationships may not be adequately represented. #'@return A series of facetted ggplot object #'@author Nicholas J Clark and Matthijs Hollanders @@ -31,7 +35,9 @@ #' } #'@export plot_mvgam_resids = function(object, - series = 1){ + series = 1, + n_draws = 100L, + n_points = 1000L){ # Check arguments if (!(inherits(object, "mvgam"))) { @@ -39,6 +45,8 @@ plot_mvgam_resids = function(object, } validate_pos_integer(series) + validate_pos_integer(n_draws) + validate_pos_integer(n_points) if(series > NCOL(object$ytimes)){ stop(paste0('object only contains data / predictions for ', @@ -46,30 +54,41 @@ plot_mvgam_resids = function(object, call. = FALSE) } - # Plotting colours - c_dark <- c("#8F2727") - # Take a sample of posterior draws to compute autocorrelation statistics # This is because acf(posterior_median_residual) can induce spurious patterns # due to the randomness of DS residuals; # rather, we want median(acf(residual_i)), where i indexes all possible draws # But this is computationally expensive for some models so we compromise # by only taking a few draws - n_draws <- NROW(object$resids[[series]]) - n_samps <- min(20, n_draws) + n_total_draws <- NROW(object$resids[[series]]) + n_samps <- min(n_draws, n_total_draws) hcs <- hindcast(object, type = 'expected')$hindcasts[[series]] resids <- object$resids[[series]] resid_df <- do.call(rbind, lapply(seq_len(n_samps), function(x){ data.frame(preds = hcs[x, ], - resids = resids[x, ]) %>% - dplyr::filter(!is.na(resids)) + resids = resids[x, ], + .draw = x) %>% + dplyr::filter(!is.na(resids)) })) - # Plot predictions and residuals + # Plot predictions and residuals (but limit number of points to n_points to + # speed up plotting) + if(NROW(resid_df) > n_points){ + resid_df <- resid_df[sample(1:NROW(resid_df), + n_points, + replace = FALSE), ] + } + fvr_plot <- ggplot2::ggplot(resid_df, ggplot2::aes(preds, resids)) + - ggplot2::geom_point(shape = 16, col = 'white', size = 1.25) + - ggplot2::geom_point(shape = 16, col = 'black', size = 1) + + ggplot2::geom_point(shape = 16, + col = 'white', + size = 1.25, + alpha = 0.4) + + ggplot2::geom_point(shape = 16, + col = 'black', + size = 1, + alpha = 0.4) + ggplot2::geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs"), colour = "#7C000060", @@ -81,11 +100,16 @@ plot_mvgam_resids = function(object, # Q-Q plot qq_plot <- ggplot2::ggplot(resid_df, ggplot2::aes(sample = resids)) + - ggplot2::stat_qq_line(colour = c_dark, + ggplot2::stat_qq_line(colour = "#8F2727", linewidth = 1) + - ggplot2::stat_qq(shape = 16, col = 'white', size = 1.25) + - ggplot2::stat_qq(shape = 16, col = 'black', - size = 1) + + ggplot2::stat_qq(shape = 16, + col = 'white', + size = 1.25, + alpha = 0.4) + + ggplot2::stat_qq(shape = 16, + col = 'black', + size = 1, + alpha = 0.4) + ggplot2::labs(title = "Normal Q-Q Plot", x = "Theoretical Quantiles", y = "Sample Quantiles") + @@ -100,16 +124,34 @@ plot_mvgam_resids = function(object, dplyr::filter(lag > 0) })) %>% dplyr::group_by(lag) %>% - dplyr::summarise_all(median) - acf_plot <- ggplot2::ggplot(acf_stats, ggplot2::aes(x = lag, y = 0, yend = acf)) + + dplyr::mutate(ylow = quantile(acf, probs = 0.05), + yqlow = quantile(acf, probs = 0.2), + ymidlow = quantile(acf, probs = 0.25), + ymidhigh = quantile(acf, probs = 0.75), + yqhigh = quantile(acf, probs = 0.8), + yhigh = quantile(acf, probs = 0.95)) %>% + dplyr::distinct() + acf_plot <- ggplot2::ggplot(acf_stats, + ggplot2::aes(x = lag)) + ggplot2::geom_hline(yintercept = c(-1, 1) * - qnorm((1 + 0.95) / 2) / acf_stats$denom[1], + qnorm((1 + 0.95) / 2) / + acf_stats$denom[1], linetype = "dashed") + ggplot2::geom_hline(yintercept = 0, - colour = c_dark, + colour = "#7C0000", linewidth = 0.25) + - ggplot2::geom_segment(colour = c_dark, - linewidth = 1) + + ggplot2::geom_segment(colour = "#DCBCBC", + linewidth = 1.5, + ggplot2::aes(y = ylow, + yend = yhigh)) + + ggplot2::geom_segment(colour = "#B97C7C", + linewidth = 1.5, + ggplot2::aes(y = yqlow, + yend = yqhigh)) + + ggplot2::geom_segment(colour = "#7C0000", + linewidth = 1.5, + ggplot2::aes(y = ymidlow, + yend = ymidhigh)) + ggplot2::labs(title = "ACF", x = "Lag", y = "Autocorrelation") + @@ -124,17 +166,35 @@ plot_mvgam_resids = function(object, dplyr::filter(lag > 0) })) %>% dplyr::group_by(lag) %>% - dplyr::summarise_all(median) + dplyr::mutate(ylow = quantile(pacf, probs = 0.05), + yqlow = quantile(pacf, probs = 0.2), + ymidlow = quantile(pacf, probs = 0.25), + ymidhigh = quantile(pacf, probs = 0.75), + yqhigh = quantile(pacf, probs = 0.8), + yhigh = quantile(pacf, probs = 0.95)) %>% + dplyr::distinct() - pacf_plot <- ggplot2::ggplot(pacf_stats, ggplot2::aes(x = lag, y = 0, yend = pacf)) + + pacf_plot <- ggplot2::ggplot(pacf_stats, + ggplot2::aes(x = lag)) + ggplot2::geom_hline(yintercept = c(-1, 1) * - qnorm((1 + 0.95) / 2) / pacf_stats$denom[1], + qnorm((1 + 0.95) / 2) / + pacf_stats$denom[1], linetype = "dashed") + ggplot2::geom_hline(yintercept = 0, - colour = c_dark, + colour = "#7C0000", linewidth = 0.25) + - ggplot2::geom_segment(colour = c_dark, - linewidth = 1) + + ggplot2::geom_segment(colour = "#DCBCBC", + linewidth = 1.5, + ggplot2::aes(y = ylow, + yend = yhigh)) + + ggplot2::geom_segment(colour = "#B97C7C", + linewidth = 1.5, + ggplot2::aes(y = yqlow, + yend = yqhigh)) + + ggplot2::geom_segment(colour = "#7C0000", + linewidth = 1.5, + ggplot2::aes(y = ymidlow, + yend = ymidhigh)) + ggplot2::labs(title = "pACF", x = "Lag", y = "Partial autocorrelation") + diff --git a/README.Rmd b/README.Rmd index b2e4f6c6..1bed2fef 100644 --- a/README.Rmd +++ b/README.Rmd @@ -44,7 +44,7 @@ A series of [vignettes cover data formatting, forecasting and several extended c * [Phylogenetic smoothing using `mgcv`](https://ecogambler.netlify.app/blog/phylogenetic-smooths-mgcv/){target="_blank"} * [Incorporating time-varying seasonality in forecast models](https://ecogambler.netlify.app/blog/time-varying-seasonality/){target="_blank"} -Please also feel free to use the [`mvgam` Discussion Board](https://github.com/nicholasjclark/mvgam/discussions) to hunt for or post other discussion topics related to the package. +Please also feel free to use the [`mvgam` Discussion Board](https://github.com/nicholasjclark/mvgam/discussions) to hunt for or post other discussion topics related to the package, and do check out the [`mvgam` changelog](https://nicholasjclark.github.io/mvgam/news/index.html) for any updates about recent upgrades that the package has incorporated. ## Installation Install the stable version from `CRAN` using: `install.packages('mvgam')`, or install the development version from `GitHub` using: diff --git a/README.md b/README.md index 029e919e..75265bb1 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,10 @@ been compiled: Please also feel free to use the [`mvgam` Discussion Board](https://github.com/nicholasjclark/mvgam/discussions) to hunt for -or post other discussion topics related to the package. +or post other discussion topics related to the package, and do check out +the [`mvgam` +changelog](https://nicholasjclark.github.io/mvgam/news/index.html) for +any updates about recent upgrades that the package has incorporated. ## Installation @@ -242,29 +245,29 @@ summary(lynx_mvgam) #> #> #> GAM coefficient (beta) estimates: -#> 2.5% 50% 97.5% Rhat n_eff -#> (Intercept) 6.4000 6.60 6.900 1.01 722 -#> s(season).1 -0.6500 -0.14 0.360 1.00 1143 -#> s(season).2 0.6800 1.30 1.900 1.01 693 -#> s(season).3 1.3000 1.90 2.600 1.00 725 -#> s(season).4 -0.0730 0.54 1.100 1.00 978 -#> s(season).5 -1.3000 -0.70 -0.140 1.00 997 -#> s(season).6 -1.2000 -0.57 0.097 1.00 894 -#> s(season).7 0.0071 0.72 1.300 1.00 974 -#> s(season).8 0.6200 1.40 2.100 1.00 903 -#> s(season).9 -0.3500 0.22 0.850 1.00 758 -#> s(season).10 -1.4000 -0.85 -0.330 1.00 911 +#> 2.5% 50% 97.5% Rhat n_eff +#> (Intercept) 6.400 6.60 6.900 1 830 +#> s(season).1 -0.620 -0.13 0.390 1 1000 +#> s(season).2 0.750 1.30 1.900 1 1053 +#> s(season).3 1.300 1.90 2.500 1 826 +#> s(season).4 -0.110 0.54 1.100 1 898 +#> s(season).5 -1.300 -0.68 -0.077 1 878 +#> s(season).6 -1.200 -0.54 0.120 1 988 +#> s(season).7 -0.022 0.72 1.400 1 1057 +#> s(season).8 0.640 1.40 2.100 1 795 +#> s(season).9 -0.360 0.23 0.820 1 808 +#> s(season).10 -1.400 -0.88 -0.350 1 1073 #> #> Approximate significance of GAM smooths: #> edf Ref.df Chi.sq p-value -#> s(season) 9.96 10 51.8 <2e-16 *** +#> s(season) 9.97 10 48.9 <2e-16 *** #> --- #> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #> #> Latent trend parameter AR estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> ar1[1] 0.59 0.82 0.98 1 705 -#> sigma[1] 0.38 0.48 0.60 1 576 +#> ar1[1] 0.60 0.82 0.97 1 592 +#> sigma[1] 0.38 0.48 0.60 1 596 #> #> Stan MCMC diagnostics: #> n_eff / iter looks reasonable for all parameters @@ -273,7 +276,7 @@ summary(lynx_mvgam) #> 0 of 2000 iterations saturated the maximum tree depth of 10 (0%) #> E-FMI indicated no pathological behavior #> -#> Samples were drawn using NUTS(diag_e) at Fri Nov 22 6:47:36 PM 2024. +#> Samples were drawn using NUTS(diag_e) at Mon Nov 25 9:36:12 AM 2024. #> For each parameter, n_eff is a crude measure of effective sample size, #> and Rhat is the potential scale reduction factor on split MCMC chains #> (at convergence, Rhat = 1) @@ -414,8 +417,8 @@ plot(lynx_mvgam, type = 'forecast', newdata = lynx_test) Plotting forecast distributions using mvgam in R - #> Out of sample CRPS: - #> 2351.96370025 + #> Out of sample DRPS: + #> 2395.89857175 And the estimated latent trend component, again using the more flexible `plot_mvgam_...()` option to show first derivatives of the estimated @@ -574,7 +577,7 @@ summary(mod, include_betas = FALSE) #> 0 of 2000 iterations saturated the maximum tree depth of 10 (0%) #> E-FMI indicated no pathological behavior #> -#> Samples were drawn using NUTS(diag_e) at Fri Nov 22 6:48:56 PM 2024. +#> Samples were drawn using NUTS(diag_e) at Mon Nov 25 9:37:32 AM 2024. #> For each parameter, n_eff is a crude measure of effective sample size, #> and Rhat is the potential scale reduction factor on split MCMC chains #> (at convergence, Rhat = 1) diff --git a/docs/authors.html b/docs/authors.html index 1281c038..8aa31498 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -63,9 +63,13 @@

Authors

  • -

    Scott Pease. Contributor. +

    Scott Pease. Contributor.
    broom enhancements

  • +
  • +

    Matthijs Hollanders. Contributor. +
    ggplot visualizations

    +
  • diff --git a/docs/index.html b/docs/index.html index 5a4d552a..b26bf243 100644 --- a/docs/index.html +++ b/docs/index.html @@ -132,17 +132,17 @@

    Getting started

    See ??mvgam_families for more information. Below is a simple example for simulating and modelling proportional data with Beta observations over a set of seasonal series with independent Gaussian Process dynamic trends:

    -data <- sim_mvgam(family = betar(),
    +set.seed(100)
    +data <- sim_mvgam(family = betar(),
                       T = 80,
                       trend_model = GP(),
    -                  trend_rel = 0.5, 
    +                  prop_trend = 0.5, 
                       seasonality = 'shared')

    Plot the series to see how they evolve over time

     plot_mvgam_series(data = data$data_train, series = 'all')
    -
    Simulating and analysing multivariate time series with Dynamic Generalized Additive Models

    Fit a State-Space GAM to these series that uses a hierarchical cyclic seasonal smooth term to capture variation in seasonality among series. The model also includes series-specific latent Gaussian Processes with squared exponential covariance functions to capture temporal dynamics

    +

    +

    Fit a State-Space GAM to these series that uses a hierarchical cyclic seasonal smooth term to capture variation in seasonality among series. The model also includes series-specific latent Gaussian Processes with squared exponential covariance functions to capture temporal dynamics

     mod <- mvgam(y ~ s(season, bs = 'cc', k = 7) +
                    s(season, by = series, m = 1, k = 5),
    diff --git a/docs/reference/figures/README-unnamed-chunk-12-1.png b/docs/reference/figures/README-unnamed-chunk-12-1.png
    index f8230545..ba237196 100644
    Binary files a/docs/reference/figures/README-unnamed-chunk-12-1.png and b/docs/reference/figures/README-unnamed-chunk-12-1.png differ
    diff --git a/docs/reference/figures/README-unnamed-chunk-13-1.png b/docs/reference/figures/README-unnamed-chunk-13-1.png
    index 042e20d4..127034a1 100644
    Binary files a/docs/reference/figures/README-unnamed-chunk-13-1.png and b/docs/reference/figures/README-unnamed-chunk-13-1.png differ
    diff --git a/docs/reference/figures/README-unnamed-chunk-14-1.png b/docs/reference/figures/README-unnamed-chunk-14-1.png
    index 31b4fa0e..a99aefbf 100644
    Binary files a/docs/reference/figures/README-unnamed-chunk-14-1.png and b/docs/reference/figures/README-unnamed-chunk-14-1.png differ
    diff --git a/docs/reference/figures/README-unnamed-chunk-15-1.png b/docs/reference/figures/README-unnamed-chunk-15-1.png
    index cf8e947c..18ea06c7 100644
    Binary files a/docs/reference/figures/README-unnamed-chunk-15-1.png and b/docs/reference/figures/README-unnamed-chunk-15-1.png differ
    diff --git a/docs/reference/figures/README-unnamed-chunk-16-1.png b/docs/reference/figures/README-unnamed-chunk-16-1.png
    index af9d8d2a..12a473f3 100644
    Binary files a/docs/reference/figures/README-unnamed-chunk-16-1.png and b/docs/reference/figures/README-unnamed-chunk-16-1.png differ
    diff --git a/docs/reference/figures/README-unnamed-chunk-17-1.png b/docs/reference/figures/README-unnamed-chunk-17-1.png
    index 3f41646f..3de4b3bb 100644
    Binary files a/docs/reference/figures/README-unnamed-chunk-17-1.png and b/docs/reference/figures/README-unnamed-chunk-17-1.png differ
    diff --git a/docs/reference/figures/README-unnamed-chunk-18-1.png b/docs/reference/figures/README-unnamed-chunk-18-1.png
    index 769e9164..f28779ef 100644
    Binary files a/docs/reference/figures/README-unnamed-chunk-18-1.png and b/docs/reference/figures/README-unnamed-chunk-18-1.png differ
    diff --git a/docs/reference/figures/README-unnamed-chunk-19-1.png b/docs/reference/figures/README-unnamed-chunk-19-1.png
    index 8f3edf8b..ed452b78 100644
    Binary files a/docs/reference/figures/README-unnamed-chunk-19-1.png and b/docs/reference/figures/README-unnamed-chunk-19-1.png differ
    diff --git a/docs/reference/figures/README-unnamed-chunk-20-1.png b/docs/reference/figures/README-unnamed-chunk-20-1.png
    index e9d1853c..cd9e7c18 100644
    Binary files a/docs/reference/figures/README-unnamed-chunk-20-1.png and b/docs/reference/figures/README-unnamed-chunk-20-1.png differ
    diff --git a/docs/reference/figures/README-unnamed-chunk-21-1.png b/docs/reference/figures/README-unnamed-chunk-21-1.png
    index e924b817..01a1b184 100644
    Binary files a/docs/reference/figures/README-unnamed-chunk-21-1.png and b/docs/reference/figures/README-unnamed-chunk-21-1.png differ
    diff --git a/docs/reference/figures/README-unnamed-chunk-22-1.png b/docs/reference/figures/README-unnamed-chunk-22-1.png
    index de9c4d26..c795a5b4 100644
    Binary files a/docs/reference/figures/README-unnamed-chunk-22-1.png and b/docs/reference/figures/README-unnamed-chunk-22-1.png differ
    diff --git a/docs/reference/figures/README-unnamed-chunk-23-1.png b/docs/reference/figures/README-unnamed-chunk-23-1.png
    index bf101f1f..f1489416 100644
    Binary files a/docs/reference/figures/README-unnamed-chunk-23-1.png and b/docs/reference/figures/README-unnamed-chunk-23-1.png differ
    diff --git a/docs/reference/figures/README-unnamed-chunk-24-1.png b/docs/reference/figures/README-unnamed-chunk-24-1.png
    index 265b2bf5..85316a55 100644
    Binary files a/docs/reference/figures/README-unnamed-chunk-24-1.png and b/docs/reference/figures/README-unnamed-chunk-24-1.png differ
    diff --git a/docs/reference/mvgam-package.html b/docs/reference/mvgam-package.html
    index bde7b542..f709bf25 100644
    --- a/docs/reference/mvgam-package.html
    +++ b/docs/reference/mvgam-package.html
    @@ -78,8 +78,8 @@ 

    See also

    Author

    Maintainer: Nicholas J Clark nicholas.j.clark1214@gmail.com (ORCID)

    -

    Other contributors:

    • Scott Pease (broom enhancements) [contributor]

    • -
    • Matthijs Hollanders (ggplot visualizations) [contributor]

    • +

      Other contributors:

      • Scott Pease (ORCID) (broom enhancements) [contributor]

      • +
      • Matthijs Hollanders (ORCID) (ggplot visualizations) [contributor]