improve docs; streamline description

swpease · Jun 5, 2023 · c1bce6a · c1bce6a
1 parent 0f80c03
commit c1bce6a
Show file tree

Hide file tree

Showing 206 changed files with 7,179 additions and 16,475 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -2,8 +2,11 @@
 ^\.Rproj\.user$
 ^LICENSE\.md$
 ^doc$
-^Meta$
 ^_pkgdown\.yml$
 ^docs$
 ^pkgdown$
 ^\.github$
+^README.Rmd
+^index.Rmd
+^index.md
+^tests/testthat/Rplots\.pdf$
diff --git a/.gitignore b/.gitignore
@@ -1,8 +1,7 @@
-.Rproj.user
+*.Rproj*
 .Rhistory
 .RData
 .Ruserdata
-NEON_manuscript/Results/
-manuscript_analysis
 doc
 Meta
+.Rproj.user
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,46 +1,50 @@
 Package: mvgam
 Title: Multivariate (Dynamic) Generalized Additive Models
 Version: 1.0.5
-Date: 2023-03-29
-Authors@R: 
-    person(given = "Nicholas",
-           family = "Clark",
-           role = c("aut", "cre"),
-           email = "[email protected]")
-Description: This package is for fitting Bayesian Generalised Additive Models to sets of discrete time series. 
-             The primary purpose of the package is to build a version of dynamic factor models that incorporates 
-             the flexibility of GAMs in the linear predictor and latent dynamic trend components that are useful 
+Date: 2023-06-03
+Authors@R:
+    person("Nicholas J", "Clark", , "[email protected]", role = c("aut", "cre"),
+           comment = c(ORCID = "0000-0001-7131-3301"))
+Description: Fit Bayesian Dynamic Generalised Additive Models to sets of time series. 
+             The primary purpose of the package is to build dynamic nonlinear models that incorporate 
+             the flexibility of GAMs in the linear predictor and latent trend components that are useful 
              for time series analysis and forecasting. Estimation is performed using Markov Chain Monte Carlo (MCMC) with
              either the Gibbs sampling software JAGS or with Hamiltonian Monte Carlo in the software Stan. The package also includes 
              utilities for online updating of  forecast distributions with a recursive particle filter that 
-             uses sequential importance sampling to assimilate new observations as they become available. References: 
-             Clark & Wells (2022) <https://doi.org/10.1111/2041-210X.13974>
-Maintainer: Nicholas J Clark <[email protected]>
+             uses sequential importance sampling to assimilate new observations as they become available. 
+             References: Clark & Wells (2022) <https://doi.org/10.1111/2041-210X.13974>.
+URL: https://github.com/nicholasjclark/mvgam, https://nicholasjclark.github.io/mvgam/
 License: MIT + file LICENSE
-Depends: R (>= 3.6.0), mgcv, parallel, rstan (>= 2.19.2)
+Depends: 
+    R (>= 3.6.0),
+    mgcv (>= 1.8-13),
+    methods
 Imports: 
+    rstan (>= 2.19.2),
+    posterior (>= 1.0.0),
+    loo (>= 2.3.1),
+    parallel,
     pbapply,
     tweedie,
-    lubridate,
     MASS,
     purrr,
     zoo,
+    xts,
+    lubridate,
     smooth,
-    MCMCpack,
-    loo,
     dplyr,
-    magrittr
+    magrittr,
+    Matrix
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3
 Suggests: 
+    cmdstanr (>= 0.4.0),
     knitr, 
     rmarkdown, 
-    cmdstanr (>= 0.4.0),
     rjags,
+    coda,
     runjags,
     testthat
 VignetteBuilder: knitr
-URL: https://github.com/nicholasjclark/mvgam,
-    https://nicholasjclark.github.io/mvgam/
diff --git a/NAMESPACE b/NAMESPACE
@@ -47,9 +47,90 @@ export(series_to_mvgam)
 export(sim_mvgam)
 export(student_t)
 export(tweedie)
+importFrom(grDevices,devAskNewPage)
+importFrom(grDevices,hcl.colors)
+importFrom(grDevices,rgb)
+importFrom(graphics,abline)
+importFrom(graphics,axis)
+importFrom(graphics,barplot)
+importFrom(graphics,box)
+importFrom(graphics,boxplot)
+importFrom(graphics,bxp)
+importFrom(graphics,hist)
+importFrom(graphics,layout)
+importFrom(graphics,legend)
+importFrom(graphics,lines)
+importFrom(graphics,par)
+importFrom(graphics,points)
+importFrom(graphics,polygon)
+importFrom(graphics,rect)
+importFrom(graphics,rug)
+importFrom(graphics,title)
 importFrom(magrittr,"%>%")
+importFrom(methods,new)
+importFrom(mgcv,betar)
+importFrom(mgcv,nb)
+importFrom(parallel,clusterEvalQ)
+importFrom(parallel,clusterExport)
+importFrom(parallel,makePSOCKcluster)
+importFrom(parallel,setDefaultCluster)
+importFrom(parallel,stopCluster)
+importFrom(stats,Gamma)
+importFrom(stats,acf)
+importFrom(stats,as.formula)
+importFrom(stats,coef)
+importFrom(stats,complete.cases)
+importFrom(stats,cor)
+importFrom(stats,cov)
+importFrom(stats,cov2cor)
+importFrom(stats,dbeta)
+importFrom(stats,density)
+importFrom(stats,dlnorm)
+importFrom(stats,dnbinom)
+importFrom(stats,dnorm)
+importFrom(stats,dpois)
 importFrom(stats,dt)
+importFrom(stats,ecdf)
+importFrom(stats,formula)
+importFrom(stats,frequency)
+importFrom(stats,gaussian)
+importFrom(stats,is.ts)
+importFrom(stats,logLik)
 importFrom(stats,make.link)
+importFrom(stats,median)
+importFrom(stats,na.pass)
+importFrom(stats,pacf)
+importFrom(stats,pbeta)
+importFrom(stats,plnorm)
+importFrom(stats,plogis)
+importFrom(stats,pnorm)
+importFrom(stats,poisson)
+importFrom(stats,ppois)
+importFrom(stats,predict)
 importFrom(stats,pt)
+importFrom(stats,qnorm)
+importFrom(stats,qqline)
+importFrom(stats,qqnorm)
 importFrom(stats,qt)
+importFrom(stats,quantile)
+importFrom(stats,rbeta)
+importFrom(stats,reformulate)
+importFrom(stats,rgamma)
+importFrom(stats,rlnorm)
+importFrom(stats,rnbinom)
+importFrom(stats,rnorm)
+importFrom(stats,rpois)
 importFrom(stats,rt)
+importFrom(stats,runif)
+importFrom(stats,sd)
+importFrom(stats,start)
+importFrom(stats,stl)
+importFrom(stats,terms)
+importFrom(stats,terms.formula)
+importFrom(stats,time)
+importFrom(stats,ts)
+importFrom(stats,update)
+importFrom(stats,update.formula)
+importFrom(stats,var)
+importFrom(utils,head)
+importFrom(utils,tail)
diff --git a/R/dynamic.R b/R/dynamic.R
@@ -4,15 +4,16 @@
 #' low-rank Gaussian Process smooths are available for estimating the dynamics of the
 #' time-varying coefficient.
 #'
+#' @importFrom stats terms formula reformulate
 #' @param variable The variable that the dynamic smooth will be a function of
 #' @param stationary logical. If \code{TRUE} (the default), the latent Gaussian Process
 #' smooth will not have a linear trend component. If \code{FALSE},
 #' a linear trend in the covariate is added to the Gaussian Process smooth. Leave at \code{TRUE}
 #' if you do not believe the coefficient is evolving with much trend, as the linear component of the
 #' basis functions can be hard to penalize to zero. This sometimes causes divergence issues in `Stan`.
-#' See \code{\link[mcgv]{gp.smooth}} for details
+#' See \code{\link[mgcv]{gp.smooth}} for details
 #' @param rho Positive numeric stating the length scale to be used for approximating the
-#' squared exponential Gaussian Process smooth. See \code{\link[mcgv]{gp.smooth}} for details
+#' squared exponential Gaussian Process smooth. See \code{\link[mgcv]{gp.smooth}} for details
 #' @details \code{mvgam} currently sets up dynamic coefficients as low-rank
 #' squared exponential Gaussian Process smooths via
 #' the call \code{s(time, by = variable, bs = "gp", m = c(2, rho, 2))}. These smooths, if specified with
@@ -48,6 +49,7 @@ dynamic = function(variable, rho = 5, stationary = TRUE){
 
 #' Interpret the formula specified to mvgam and replace any dynamic terms
 #' with the correct Gaussian Process smooth specification
+#' @importFrom stats formula terms as.formula terms.formula
 #' @noRd
 interpret_mvgam = function(formula, N){
 

diff --git a/R/evaluate_mvgams.R b/R/evaluate_mvgams.R
@@ -1,5 +1,9 @@
 #'Evaluate forecasts from fitted mvgam objects
 #'
+#'@importFrom graphics barplot boxplot axis
+#'@importFrom stats quantile ecdf median predict
+#'@importFrom parallel clusterExport stopCluster setDefaultCluster
+#'@importFrom grDevices devAskNewPage
 #'@param object \code{list} object returned from \code{mvgam}
 #'@param n_samples \code{integer} specifying the number of samples to generate from the model's
 #'posterior distribution
@@ -194,19 +198,19 @@ eval_mvgam = function(object,
   }
 
   # Beta coefficients for GAM component
-  betas <- mvgam:::mcmc_chains(object$model_output, 'b')
+  betas <- mcmc_chains(object$model_output, 'b')
 
   # Family-specific parameters
   family <- object$family
-  family_pars <- mvgam:::extract_family_pars(object = object)
+  family_pars <- extract_family_pars(object = object)
 
   # Trend model
   trend_model <- object$trend_model
   use_lv <- object$use_lv
 
   # Trend-specific parameters; keep only the trend / lv estimates
   # up to the specific evaluation timepoint
-  trend_pars <- mvgam:::extract_trend_pars(object = object,
+  trend_pars <- extract_trend_pars(object = object,
                                            keep_all_estimates = FALSE,
                                            ending_time = eval_timepoint)
 
@@ -266,12 +270,12 @@ eval_mvgam = function(object,
       betas <- betas[samp_index, ]
 
       # Sample general trend-specific parameters
-      general_trend_pars <- mvgam:::extract_general_trend_pars(trend_pars = trend_pars,
+      general_trend_pars <- extract_general_trend_pars(trend_pars = trend_pars,
                                                                samp_index = samp_index)
 
       if(use_lv){
         # Propagate the lvs forward using the sampled trend parameters
-        trends <- mvgam:::forecast_trend(trend_model = trend_model,
+        trends <- forecast_trend(trend_model = trend_model,
                                          use_lv = use_lv,
                                          trend_pars = general_trend_pars,
                                          h = fc_horizon)
@@ -281,7 +285,7 @@ eval_mvgam = function(object,
       trend_states <- do.call(cbind, (lapply(seq_len(n_series), function(series){
 
         # Sample series- and trend-specific parameters
-        trend_extracts <- mvgam:::extract_series_trend_pars(series = series,
+        trend_extracts <- extract_series_trend_pars(series = series,
                                                             samp_index = samp_index,
                                                             trend_pars = trend_pars,
                                                             use_lv = use_lv)
@@ -298,7 +302,7 @@ eval_mvgam = function(object,
 
         } else {
           # Propagate the series-specific trends forward
-          out <- mvgam:::forecast_trend(trend_model = trend_model,
+          out <- forecast_trend(trend_model = trend_model,
                                         use_lv = FALSE,
                                         trend_pars = trend_extracts,
                                         h = fc_horizon)
@@ -323,7 +327,7 @@ eval_mvgam = function(object,
         })
         names(family_extracts) <- names(family_pars)
 
-        mvgam:::mvgam_predict(family = family,
+        mvgam_predict(family = family,
                       Xp = Xpmat,
                       type = 'response',
                       betas = c(betas, 1),
@@ -344,12 +348,12 @@ eval_mvgam = function(object,
       betas <- betas[samp_index, ]
 
       # Sample general trend-specific parameters
-      general_trend_pars <- mvgam:::extract_general_trend_pars(trend_pars = trend_pars,
+      general_trend_pars <- extract_general_trend_pars(trend_pars = trend_pars,
                                                        samp_index = samp_index)
 
       if(use_lv){
         # Propagate the lvs forward using the sampled trend parameters
-        trends <- mvgam:::forecast_trend(trend_model = trend_model,
+        trends <- forecast_trend(trend_model = trend_model,
                                  use_lv = use_lv,
                                  trend_pars = general_trend_pars,
                                  h = fc_horizon)
@@ -359,7 +363,7 @@ eval_mvgam = function(object,
       trend_states <- do.call(cbind, (lapply(seq_len(n_series), function(series){
 
         # Sample series- and trend-specific parameters
-        trend_extracts <- mvgam:::extract_series_trend_pars(series = series,
+        trend_extracts <- extract_series_trend_pars(series = series,
                                                             samp_index = samp_index,
                                                             trend_pars = trend_pars,
                                                             use_lv = use_lv)
@@ -376,7 +380,7 @@ eval_mvgam = function(object,
 
         } else {
           # Propagate the series-specific trends forward
-          out <- mvgam:::forecast_trend(trend_model = trend_model,
+          out <- forecast_trend(trend_model = trend_model,
                                 use_lv = FALSE,
                                 trend_pars = trend_extracts,
                                 h = fc_horizon)
@@ -401,7 +405,7 @@ eval_mvgam = function(object,
         })
         names(family_extracts) <- names(family_pars)
 
-        mvgam:::mvgam_predict(family = family,
+        mvgam_predict(family = family,
                       Xp = Xpmat,
                       type = 'response',
                       betas = c(betas, 1),
@@ -437,7 +441,7 @@ eval_mvgam = function(object,
         dplyr::pull(y)
     }))
 
-    series_score <- mvgam:::variogram_mcmc_object(truths = truths,
+    series_score <- variogram_mcmc_object(truths = truths,
                                                     fcs = series_fcs,
                                                     log = log,
                                                   weights = weights)
@@ -855,7 +859,7 @@ crps_score <- function(truth, fc, method = "edf", w = NULL,
     score <- crps_edf(truth, fc, w)
   } else {
     score <- sapply(seq_along(truth),
-                    function(i) crps_edf(y[i], fc[i, ], w[i, ]))
+                    function(i) crps_edf(truth[i], fc[i, ], w[i, ]))
   }
 
   # Is value within empirical interval?
@@ -1079,10 +1083,10 @@ stan_data <- list(n = eval_timepoint + fc_horizon,
 
 # Compile and run the stan code
 if(backend == 'cmdstanr'){
-  require(cmdstanr)
-  cmd_mod <- cmdstan_model(write_stan_file(var1_forecast_all,
-                                           dir = tools::R_user_dir("mvgam", which = "cache")),
-                           stanc_options = list('canonicalize=deprecations,braces,parentheses'))
+  requireNamespace('cmdstanr', quietly = TRUE)
+  cmd_mod <- cmdstanr::cmdstan_model(cmdstanr::write_stan_file(var1_forecast_all,
+                                                               dir = tools::R_user_dir("mvgam", which = "cache")),
+                                     stanc_options = list('canonicalize=deprecations,braces,parentheses'))
   fit <- cmd_mod$optimize(data = stan_data)
   post_trends <- fit$draws("fc", format = 'draws_matrix')
   draw_names <- dimnames(post_trends)$variable
@@ -1112,7 +1116,7 @@ if(backend == 'cmdstanr'){
   }
 
   } else {
-  require(rstan)
+  requireNamespace('rstan', quietly = TRUE)
   options(mc.cores = parallel::detectCores())
   dir.create(paste0(tools::R_user_dir("mvgam", which = "cache")),
              showWarnings = FALSE)
@@ -1121,9 +1125,9 @@ if(backend == 'cmdstanr'){
   m <- rstan::stan_model(file = paste0(tools::R_user_dir("mvgam", which = "cache"),
                                        '/var1_forecast_all.stan'),
                          auto_write = TRUE)
-  fit <- optimizing(object = m,
-                     data = stan_data,
-                    as_vector = FALSE)
+  fit <- rstan::optimizing(object = m,
+                           data = stan_data,
+                           as_vector = FALSE)
   post_trends <- fit$par$fc
 
   # Store each sample's trend forecasts as an array