diff --git a/.Rbuildignore b/.Rbuildignore
index 114f0403..2275c8c2 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -1,3 +1,5 @@
 ^mvgam\.Rproj$
 ^\.Rproj\.user$
 ^LICENSE\.md$
+^doc$
+^Meta$
diff --git a/.gitignore b/.gitignore
index d754cf2d..00c23e3a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@
 .RData
 .Ruserdata
 NEON_manuscript/Results/
+doc
+Meta
diff --git a/DESCRIPTION b/DESCRIPTION
index befabfd4..e902f940 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -33,3 +33,5 @@ Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.1.1
+Suggests: knitr, rmarkdown
+VignetteBuilder: knitr
diff --git a/NEON_manuscript/Manuscript/Clark_etal_2021_Dynamic_GAMs.docx b/NEON_manuscript/Manuscript/Clark_etal_2021_Dynamic_GAMs.docx
index f13095d1..2b637a84 100644
Binary files a/NEON_manuscript/Manuscript/Clark_etal_2021_Dynamic_GAMs.docx and b/NEON_manuscript/Manuscript/Clark_etal_2021_Dynamic_GAMs.docx differ
diff --git a/NEON_manuscript/Manuscript/~$ark_etal_2021_Dynamic_GAMs.docx b/NEON_manuscript/Manuscript/~$ark_etal_2021_Dynamic_GAMs.docx
deleted file mode 100644
index 32ca91cf..00000000
Binary files a/NEON_manuscript/Manuscript/~$ark_etal_2021_Dynamic_GAMs.docx and /dev/null differ
diff --git a/NEON_manuscript/amb_hyp_tests.R b/NEON_manuscript/amb_hyp_tests.R
index 2caff84e..48421d31 100644
--- a/NEON_manuscript/amb_hyp_tests.R
+++ b/NEON_manuscript/amb_hyp_tests.R
@@ -47,9 +47,9 @@ hyp3 = y ~
   s(season, by = series, m = 1, k = 8) - 1
 
 # Fit each hypothesis
-n.burnin = 1000
-n.iter = 1000
-thin = 1
+n.burnin = 100000
+n.iter = 5000
+thin = 5
 
 # Use default priors for latent drift terms and for smooth penalties
 fit_null <- fit_mvgam(data_train = all_data$data_train,
diff --git a/NEON_manuscript/ixodes_hyp_tests.R b/NEON_manuscript/ixodes_hyp_tests.R
index d8e9efbf..91900833 100644
--- a/NEON_manuscript/ixodes_hyp_tests.R
+++ b/NEON_manuscript/ixodes_hyp_tests.R
@@ -10,7 +10,7 @@ source('NEON_manuscript/neon_utility_functions.R')
 # Prep data
 all_data <- prep_neon_data(species = 'Ixodes_scapularis', split_prop = 0.8)
 
-#### Set hypothtesis formulae ####
+#### Set hypothesis formulae ####
 # NULL. There is no seasonal pattern to be estimated, and we simply let the latent
 # factors and site-level effects of growing days influence the series dynamics
 null_hyp = y ~ siteID + s(cum_gdd, by = siteID, k = 3) - 1
@@ -47,9 +47,9 @@ hyp3 = y ~
   s(season, by = series, m = 1, k = 8) - 1
 
 # Fit each hypothesis
-n.burnin = 2000
-n.iter = 2000
-thin = 1
+n.burnin = 1000
+n.iter = 5000
+thin = 5
 
 fit_null <- fit_mvgam(data_train = all_data$data_train,
                   data_test = all_data$data_test,
@@ -75,13 +75,6 @@ fit_hyp1 <- fit_mvgam(data_train = all_data$data_train,
                       auto_update = F,
                       interval_width = 0.9)
 
-par(mfrow = c(1,1))
-compare_mvgams(model1 = fit_null$out_gam_mod,
-               model2 = fit_hyp1$out_gam_mod,
-               fc_horizon = 6,
-               n_evaluations = 20, n_samples = 1000,
-               n_cores = 2)
-
 fit_hyp2 <- fit_mvgam(data_train = all_data$data_train,
                       data_test = all_data$data_test,
                       formula = hyp2,
diff --git a/NEON_manuscript/simulation_component.R b/NEON_manuscript/simulation_component.R
index baea68e5..e6a4e704 100644
--- a/NEON_manuscript/simulation_component.R
+++ b/NEON_manuscript/simulation_component.R
@@ -6,7 +6,6 @@ source('neon_utility_functions.R')
 # Use hierarchical seasonality for all;
 # trend components: 0.3 and 0.7
 # N_series: 4, 12
-# Overdispersion: 0.5, 100
 # Length: 6 years
 # Missingness: none, 10%, 50%
 set.seed(110)
@@ -22,7 +21,7 @@ sim_results <- lapply(seq_len(nrow(run_parameters)), function(x){
   sim_data <- sim_mvgam(T = run_parameters$T[x],
                         n_series = run_parameters$n_series[x],
                         prop_missing = run_parameters$prop_missing[x],
-                        train_prop = 0.85,
+                        train_prop = 0.833,
                         trend_rel = run_parameters$trend_rel[x],
                         seasonality = 'hierarchical')
 
@@ -30,10 +29,10 @@ sim_results <- lapply(seq_len(nrow(run_parameters)), function(x){
   t <- Sys.time()
   nullmod <- mvjagam(data_train = sim_data$data_train,
                   data_test = sim_data$data_test,
-                  formula = y ~ s(series, bs = 're') - 1,
-                  use_nb = TRUE,
+                  formula = y ~ s(series, bs = 're'),
+                  family = 'nb',
                   use_lv = F,
-                  n.burnin = 60000,
+                  n.burnin = 25000,
                   n.iter = 5000,
                   thin = 5,
                   auto_update = T)
@@ -103,12 +102,13 @@ sim_results <- lapply(seq_len(nrow(run_parameters)), function(x){
   # Fit a multivariate gam with hierarchical seasonality
   t <- Sys.time()
   hier_mod <- mvjagam(data_train = sim_data$data_train,
-                        data_test = sim_data$data_test,
-                        formula = y ~ s(season, k = 4, m = 2, bs = 'cc') +
-                                        s(season, by = series, m = 1, k = 8) - 1,
-                        use_nb = TRUE,
-                        use_lv = T,
-                      n.burnin = 90000,
+                      data_test = sim_data$data_test,
+                      formula = y ~ s(season, k = 4, m = 2, bs = 'cc') +
+                                        s(season, by = series, m = 1, k = 8),
+                      family = 'nb',
+                      trend_model = 'AR3',
+                      use_lv = T,
+                      n.burnin = 25000,
                       n.iter = 5000,
                       thin = 5,
                       auto_update = T)
@@ -132,10 +132,10 @@ sim_results <- lapply(seq_len(nrow(run_parameters)), function(x){
                                                              correlations$mean_correlations))
 
   # Fit a standard mgcv version of the hierarchical model as a final comparison, including
-  # non-wiggly series-specific year smooths
+  # non-wiggly series-specific year smooths with penalty on the first derivative
   mgcv_hier_mod <- gam(y ~ s(season, k = 4, m = 2, bs = 'cc') +
                          s(season, by = series, m = 1, k = 8) +
-                         s(year, by = series, k = 3, bs = 'gp'),
+                         s(year, by = series, k = 3, bs = 'gp', m = 1),
                        family = nb(), data = sim_data$data_train)
 
   # Extract posterior predictions from the mgcv model and calculate out of sample DRPS
diff --git a/R/mvjagam.R b/R/mvjagam.R
index 0fea3070..e9e8eba9 100644
--- a/R/mvjagam.R
+++ b/R/mvjagam.R
@@ -85,7 +85,7 @@ mvjagam = function(formula,
                     prior_simulation = FALSE,
                     family = 'nb',
                     use_lv = FALSE,
-                    n_lv = 5,
+                    n_lv,
                     trend_model = 'RW',
                     n.chains = 2,
                     n.burnin = 5000,
@@ -111,18 +111,20 @@ mvjagam = function(formula,
   # Estimate the GAM model using mgcv so that the linear predictor matrix can be easily calculated
   # when simulating from the JAGS model later on
   if(!missing(knots)){
-    ss_gam <- mgcv::gam(formula(formula),
+    ss_gam <- mgcv::bam(formula(formula),
                         data = data_train,
-                        method = "REML",
+                        method = "fREML",
                         family = poisson(),
                         drop.unused.levels = FALSE,
-                        knots = knots)
+                        knots = knots,
+                        nthreads = parallel::detectCores()-1)
   } else {
-    ss_gam <- mgcv::gam(formula(formula),
+    ss_gam <- mgcv::bam(formula(formula),
                         data = data_train,
-                        method = "REML",
+                        method = "fREML",
                         family = poisson(),
-                        drop.unused.levels = FALSE)
+                        drop.unused.levels = FALSE,
+                        nthreads = parallel::detectCores()-1)
   }
 
   # Make jags file and appropriate data structures
diff --git a/man/summary_mvgam.Rd b/man/summary_mvgam.Rd
index efe2f800..7dfb627d 100644
--- a/man/summary_mvgam.Rd
+++ b/man/summary_mvgam.Rd
@@ -22,9 +22,7 @@ so summaries for the \code{rho} parameters may include more penalty terms than t
 the original model formula. The function also returns an approximate smooth term
 significance table, but note that this signficance comes with the usual caveats associated with calculations
 of p-values for smooth terms in \code{\link[mcgv]{summary.gam}} (i.e. all p-values are computed without
-considering uncertainty in the smoothing parameter estimates), with the added challenge that
-Estimated Degrees of Freedom (edf) for each smooth term are taken from a call to \code{\link[mcgv]{gam}}
-that does not include the latent trend component of the model.
+considering uncertainty in the smoothing parameter estimates).
 }
 \author{
 Nicholas J Clark
diff --git a/pfilter/particles.rda b/pfilter/particles.rda
deleted file mode 100644
index e26782a3..00000000
Binary files a/pfilter/particles.rda and /dev/null differ
diff --git a/test_mvjagam.R b/test_mvjagam.R
index 171211c9..ee24604a 100644
--- a/test_mvjagam.R
+++ b/test_mvjagam.R
@@ -2,7 +2,270 @@
 library(mvgam)
 library(dplyr)
 
-# Test that the series_to_mvgam function works to show how ts or xts objects can easily
+# Replicate the lynx analysis from the ESA 2018 GAM course, with some minor adjustments
+library(ggplot2)
+library(tidyr)
+data(lynx)
+lynx_full = data.frame(year=1821:1934, population = as.numeric(lynx))
+head(lynx_full)
+ggplot(aes(x=year, y=population), data= lynx_full)+
+  geom_point()+
+  geom_line()
+acf(lynx_full$population)
+
+
+
+# There is a clear ~10-year cyclic pattern to the data, so I create a 'season'
+# term that can be used to model this effect and give a better representation
+# of the data generating process
+plot(stl(ts(lynx_full$population, frequency = 19), s.window = 'periodic'))
+lynx_full$season <- (lynx_full$year %%19)+1
+lynx_full$y <- lynx_full$population
+lynx_full$series <- factor('series1')
+# Add lag indicators and fit the nonlinear lag model that gave the best
+# one step ahead point forecasts in the workshop example (order 2)
+mean_pop_l = mean(log(lynx_full$population))
+lynx_full = mutate(lynx_full,
+                   popl = log(population),
+                   lag1 = lag(popl,1, default = mean_pop_l),
+                   lag2 = lag(popl,2, default = mean_pop_l),
+                   lag3 = lag(popl,3, default = mean_pop_l),
+                   lag4 = lag(popl,4, default = mean_pop_l),
+                   lag5 = lag(popl,5, default = mean_pop_l),
+                   lag6 = lag(popl,6, default = mean_pop_l))
+
+# Split the data into training (first 75 years)
+lynx_train = lynx_full[1:75,]
+
+# and testing: we will test on the next 5 years of data
+lynx_test = lynx_full[76:80,]
+
+# The best-forecasting model in the course was with nonlinear smooths of
+# lags 1 and 2; the only difference here is that I also include a cyclic smooth
+# for the 10-year cycles
+lynx_mgcv = gam(population~s(season, bs = 'cc') +
+                s(lag1, k = 5) + s(lag2, k = 5),
+                knots = list(season = c(0.5, 19.5)),
+                data = lynx_train, family = "poisson",
+                method = "REML")
+summary(lynx_mgcv)
+plot(lynx_mgcv, pages=1, shade=T)
+
+# This model captures most of the deviance in the series and the
+# functions are all confidently estimated to be non-zero and non-flat. There is
+# something strange going on with the seasonal cyclic pattern, but we will
+# look at that more later on. Anyway, so far, so good. Now for some forecasts
+# for the out of sample period
+
+# Use the uncertainties around smooth functions to generate a set of possible
+# coefficients to draw from when simulating forecast paths
+coef_sim <- gam.mh(lynx_mgcv)$bs
+
+# Function to perform forecast simulations from the nonlinear lag model
+# in a recursive fasion
+recurse_nonlin = function(model, lagged_vals, h){
+  # Initiate state vector
+  states <- rep(NA, length = h + 2)
+  # Last two values of the predicted log density begin the state vector
+  states[1] <- as.numeric(exp(lagged_vals[2]))
+  states[2] <- as.numeric(exp(lagged_vals[1]))
+  # Get a random sample of the smooth coefficient uncertainty matrix
+  # to use for the entire forecast horizon of this particular path
+  gam_coef_index <- sample(seq(1, NROW(coef_sim)), 1, T)
+  # For each following timestep, recursively predict based on the
+  # predictions at each previous lag
+  for (t in 3:(h + 2)) {
+    # Build the linear predictor matrix using the two previous lags
+    # of the (log) density
+    newdata <- data.frame(lag1 = log(states[t-1]),
+                          lag2 = log(states[t-2]),
+                          season = lynx_test$season[t-2])
+    colnames(newdata) <- c('lag1', 'lag2', 'season')
+    Xp <- predict(model, newdata = newdata, type = 'lpmatrix')
+    # Posterior predictions
+    mu <- rpois(1, lambda = exp(Xp %*% coef_sim[gam_coef_index,]))
+    states[t] <- mu
+  }
+  states[-c(1:2)]
+}
+
+# Create the mgcv gam's forecast distribution by generating 2000 simulated
+# forecast paths. Each path is fed the true observed values for the last two lags
+# of the first out of sample timepoint, but they can deviate when simulating ahead
+# depending on their particular draw of possible coefficients from the model's
+# estimated covariance matrix. Note, this is a bit slow and could easily be
+# parallelised to speed up computations
+gam_sims <- matrix(NA, nrow = 2000, ncol = 5)
+for(i in 1:2000){
+  gam_sims[i,] <- recurse_nonlin(lynx_mgcv,
+                                 lagged_vals = head(lynx_test,1)[c(7,8)],
+                                 h = 5)
+}
+
+# Plot the mgcv model's out of sample forecast for the next 5 years ahead
+par(mfrow=c(1,2))
+cred_ints <- apply(gam_sims, 2, function(x) hpd(x, 0.95))
+yupper <- max(lynx_full$population) * 1.25
+plot(cred_ints[3,] ~ seq(1:NCOL(cred_ints)), type = 'l',
+     col = rgb(1,0,0, alpha = 0),
+     ylim = c(0, yupper),
+     ylab = 'Predicted lynx trappings',
+     xlab = 'Forecast horizon',
+     main = 'mgcv')
+polygon(c(seq(1:(NCOL(cred_ints))), rev(seq(1:NCOL(cred_ints)))),
+        c(cred_ints[1,],rev(cred_ints[3,])),
+        col = rgb(150, 0, 0, max = 255, alpha = 100), border = NA)
+cred_ints <- apply(gam_sims, 2, function(x) hpd(x, 0.68))
+polygon(c(seq(1:(NCOL(cred_ints))), rev(seq(1:NCOL(cred_ints)))),
+        c(cred_ints[1,],rev(cred_ints[3,])),
+        col = rgb(150, 0, 0, max = 255, alpha = 180), border = NA)
+lines(cred_ints[2,], col = rgb(150, 0, 0, max = 255), lwd = 2, lty = 'dashed')
+points(lynx_test$population[1:5], pch = 16)
+lines(lynx_test$population[1:5])
+
+# Not a great forecast. The general shape is correct, but basically none of the
+# observations lie within the model's 95% uncertainty intervals.
+
+# Now fit an mvgam model; it essentially fits a similar model to the above
+# but with linear lag coefficients within a full time series model
+# for the errors, rather than smoothing splines that do not incorporate a concept
+# of the future
+
+# Add variables needed for mvgam models
+
+lynx_mvgam <- mvjagam(data_train = lynx_train,
+               data_test = lynx_test,
+               formula = y ~ s(season, bs = 'cc', k=10),
+               knots = list(season = c(0.5, 10.5)),
+               family = 'poisson',
+               use_lv = F,
+               trend_model = 'AR1',
+               n.burnin = 4000,
+               n.iter = 1000,
+               thin = 1,
+               auto_update = F)
+plot_mvgam_smooth(lynx_mvgam, smooth = 'season')
+plot_mvgam_trend(lynx_mvgam)
+plot_mvgam_fc(lynx_mvgam)
+plot_mvgam_uncertainty(lynx_mvgam, data_test = lynx_test)
+
+# Calculate the out of sample forecast from the mvgam model
+fits <- MCMCvis::MCMCchains(lynx_mvgam$jags_output, 'ypred')
+fits <- fits[,(NROW(lynx_mvgam$obs_data)+1):(NROW(lynx_mvgam$obs_data)+5)]
+cred_ints <- apply(fits, 2, function(x) hpd(x, 0.95))
+plot(cred_ints[3,] ~ seq(1:NCOL(cred_ints)), type = 'l',
+     col = rgb(1,0,0, alpha = 0),
+     ylim = c(0, yupper),
+     ylab = '',
+     xlab = 'Forecast horizon',
+     main = 'mvgam')
+polygon(c(seq(1:(NCOL(cred_ints))), rev(seq(1:NCOL(cred_ints)))),
+        c(cred_ints[1,],rev(cred_ints[3,])),
+        col = rgb(150, 0, 0, max = 255, alpha = 100), border = NA)
+cred_ints <- apply(fits, 2, function(x) hpd(x, 0.68))
+polygon(c(seq(1:(NCOL(cred_ints))), rev(seq(1:NCOL(cred_ints)))),
+        c(cred_ints[1,],rev(cred_ints[3,])),
+        col = rgb(150, 0, 0, max = 255, alpha = 180), border = NA)
+lines(cred_ints[2,], col = rgb(150, 0, 0, max = 255), lwd = 2, lty = 'dashed')
+points(lynx_test$population[1:5], pch = 16)
+lines(lynx_test$population[1:5])
+
+# The mvgam has much more realistic uncertainty than the mgcv version, with all
+# out of sample observations falling within the model's 95% credible intervals
+# Have a look at this model's summary to see what is being estimated
+summary_mvgam(lynx_mvgam)
+
+# Now inspect each model's estimated 10-year cyclic pattern
+par(mfrow=c(1,2))
+plot(lynx_mgcv, select=1, shade=T)
+plot_mvgam_smooth(lynx_mvgam, 1, 'season')
+
+# The mgcv verison includes a fairly wiggly and strange function for the
+# cyclic effect. I postulate that it is overfitting the nonlinear lag functions
+# (i.e. the lag effects are allowed to wiggle too much and overfit), making it challenging
+# to also estimate the cyclic pattern. If we drop the nonlinear lag functions and reduce
+# the maximum complexity of the cyclic smooth, can this be remedied and forecasts improved?
+lynx_mgcv2 = gam(population ~ s(season, bs = 'cc', k = 5) +
+                 lag1 + lag2,
+                 knots = list(season = c(0.5, 10.5)),
+                 data = lynx_train, family = "poisson",
+                 method = "REML")
+summary(lynx_mgcv2)
+par(mfrow=c(1,2))
+plot(lynx_mgcv2, select=1, shade=T)
+plot_mvgam_smooth(lynx_mvgam, 1, 'season')
+
+# The cyclic smooth looks better here. How are the forecasts?
+
+# Re-plot the first mgcv model's out of sample forecast
+par(mfrow=c(1,2))
+cred_ints <- apply(gam_sims, 2, function(x) hpd(x, 0.95))
+yupper <- max(lynx_full$population) * 1.25
+plot(cred_ints[3,] ~ seq(1:NCOL(cred_ints)), type = 'l',
+     col = rgb(1,0,0, alpha = 0),
+     ylim = c(0, yupper),
+     ylab = 'Predicted lynx trappings',
+     xlab = 'Forecast horizon',
+     main = 'mgcv (smooth lags)')
+polygon(c(seq(1:(NCOL(cred_ints))), rev(seq(1:NCOL(cred_ints)))),
+        c(cred_ints[1,],rev(cred_ints[3,])),
+        col = rgb(150, 0, 0, max = 255, alpha = 100), border = NA)
+cred_ints <- apply(gam_sims, 2, function(x) hpd(x, 0.68))
+polygon(c(seq(1:(NCOL(cred_ints))), rev(seq(1:NCOL(cred_ints)))),
+        c(cred_ints[1,],rev(cred_ints[3,])),
+        col = rgb(150, 0, 0, max = 255, alpha = 180), border = NA)
+lines(cred_ints[2,], col = rgb(150, 0, 0, max = 255), lwd = 2, lty = 'dashed')
+points(lynx_test$population[1:5], pch = 16)
+lines(lynx_test$population[1:5])
+
+# Now calculate the linear lag model's forecasts and plot
+coef_sim <- gam.mh(lynx_mgcv2)$bs
+gam_sims <- matrix(NA, nrow = 2000, ncol = 5)
+for(i in 1:2000){
+  gam_sims[i,] <- recurse_nonlin(lynx_mgcv2,
+                                 lagged_vals = head(lynx_test,1)[c(7,8)],
+                                 h = 5)
+}
+cred_ints <- apply(gam_sims, 2, function(x) hpd(x, 0.95))
+yupper <- max(lynx_full$population) * 1.25
+plot(cred_ints[3,] ~ seq(1:NCOL(cred_ints)), type = 'l',
+     col = rgb(1,0,0, alpha = 0),
+     ylim = c(0, yupper),
+     ylab = 'Predicted lynx trappings',
+     xlab = 'Forecast horizon',
+     main = 'mgcv (linear lags)')
+polygon(c(seq(1:(NCOL(cred_ints))), rev(seq(1:NCOL(cred_ints)))),
+        c(cred_ints[1,],rev(cred_ints[3,])),
+        col = rgb(150, 0, 0, max = 255, alpha = 100), border = NA)
+cred_ints <- apply(gam_sims, 2, function(x) hpd(x, 0.68))
+polygon(c(seq(1:(NCOL(cred_ints))), rev(seq(1:NCOL(cred_ints)))),
+        c(cred_ints[1,],rev(cred_ints[3,])),
+        col = rgb(150, 0, 0, max = 255, alpha = 180), border = NA)
+lines(cred_ints[2,], col = rgb(150, 0, 0, max = 255), lwd = 2, lty = 'dashed')
+points(lynx_test$population[1:5], pch = 16)
+lines(lynx_test$population[1:5])
+
+# The forecast is certainly better than previously but still misses the mark on nearly
+# all of the observations, and it has perhaps even more unrealistically narrow confidence
+# intervals. Of course this is just one out of sample test of the model, and to really
+# determine which model is most appropriate for forecasting we would want to run many of these
+# tests using a rolling window approach.
+
+# We can also view the mvgam's posterior predictions for the entire series (testing and training)
+par(mfrow=c(1,1))
+plot_mvgam_fc(lynx_mvgam, data_test = lynx_test)
+
+# And the estimated trend
+par(mfrow=c(1,1))
+plot_mvgam_trend(lynx_mvgam, data_test = lynx_test)
+
+# Have a look at the model's residuals, which are posterior medians of Dunn-Smyth
+# randomised quantile residuals so should follow approximate normality. We are primarily
+# looking for a lack of autocorrelation, which would suggest our AR2 model is appropriate for the
+# latent trend
+plot_mvgam_resids(lynx_mvgam)
+
+ # Test that the series_to_mvgam function works to show how ts or xts objects can easily
 # be converted into the correct format
 library(xts)
 library(forecast)
diff --git a/vignettes/Introduction_to_mvgam.Rmd b/vignettes/Introduction_to_mvgam.Rmd
new file mode 100644
index 00000000..19d1c00e
--- /dev/null
+++ b/vignettes/Introduction_to_mvgam.Rmd
@@ -0,0 +1,248 @@
+---
+title: "Introduction to mvgam"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Introduction_to_mvgam}
+  %\VignetteEngine{knitr::rmarkdown}
+  \usepackage[utf8]{inputenc}
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+options(rmarkdown.html_vignette.check_title = FALSE)
+```
+
+In this vignette we will introduce dynamic Generalised Additive Models and some of the key utility functions provided in `mvgam`. First a univariate example to show how challenging it can be to forecast ahead with conventional GAMs and how `mvgam` overcomes these challenges. We begin by replicating the lynx analysis from [2018 Ecological Society of America workshop on GAMs](https://noamross.github.io/mgcv-esa-2018/) that was hosted by Eric Pedersen, David L. Miller, Gavin Simpson, and Noam Ross, with some minor adjustments. First, load the data and plot the series as well as its estimated autocorrelation function
+```{r, fig.show='hold', fig.align='center'}
+library(mvgam)
+data(lynx)
+lynx_full = data.frame(year = 1821:1934, 
+                       population = as.numeric(lynx))
+plot(lynx_full$population, type = 'l', ylab = 'Lynx trappings',
+     xlab = 'Time')
+acf(lynx_full$population, main = '')
+```
+
+There is a clear ~19-year cyclic pattern to the data, so I create a `season` term that can be used to model this effect and give a better representation of the data generating process. I also put the year term on a more manageable scale
+```{r, fig.width = 5, fig.height = 4, fig.align='center'}
+plot(stl(ts(lynx_full$population, frequency = 19), s.window = 'periodic'))
+lynx_full$season <- (lynx_full$year %%19) + 1
+lynx_full$year <- lynx_full$year - 1820
+```
+
+Add lag indicators needed to fit the nonlinear lag models that gave the best one step ahead point forecasts in the ESA workshop example. As in the example, we specify the `default` argument in the `lag` function as the mean log population.
+```{r}
+mean_pop_l = mean(log(lynx_full$population))
+lynx_full = dplyr::mutate(lynx_full,
+                   popl = log(population),
+                   lag1 = dplyr::lag(popl,1, default = mean_pop_l),
+                   lag2 = dplyr::lag(popl,2, default = mean_pop_l),
+                   lag3 = dplyr::lag(popl,3, default = mean_pop_l),
+                   lag4 = dplyr::lag(popl,4, default = mean_pop_l),
+                   lag5 = dplyr::lag(popl,5, default = mean_pop_l),
+                   lag6 = dplyr::lag(popl,6, default = mean_pop_l))
+```
+
+For `mvgam` models, the response needs to be labelled `y` and we also need an indicator of the series name as a `factor` variable
+```{r}
+lynx_full$y <- lynx_full$population
+lynx_full$series <- factor('series1')
+```
+
+Split the data into training (first 74 years) and testing (next 10 years of data) to evaluate multi-step ahead forecasts
+```{r}
+lynx_train = lynx_full[1:75, ]
+lynx_test = lynx_full[76:85, ]
+```
+
+The best-forecasting model in the course was with nonlinear smooths of lags 1 and 2; we use those here is that we also include a cyclic smooth for the 19-year cycles as this seems like an important feature, as well as a yearly smooth for the long-term trend with penalty on the first derivative to prevent linear extrapolation into the future
+```{r}
+lynx_mgcv = gam(population ~ s(season, bs = 'cc', k = 10) +
+                 s(year, k = 5, m = 1) + s(lag1, k = 5) +
+                  s(lag2, k = 5),
+               knots = list(season = c(0.5, 19.5)),
+                data = lynx_train, family = "poisson",
+                method = "REML")
+```
+
+Inspect the model's summary and estimated smooth functions for the season, year and lag terms
+```{r}
+summary(lynx_mgcv)
+plot(lynx_mgcv, select = 1)
+plot(lynx_mgcv, select = 2)
+plot(lynx_mgcv, select = 3)
+plot(lynx_mgcv, select = 4)
+```
+
+
+This model captures most of the deviance in the series and the functions are all confidently estimated to be non-zero and non-flat. So far, so good. Now for some forecasts for the out of sample period. First we must take posterior draws of smooth beta coefficients to incorporate the uncertainties around smooth functions when simulating forecast paths
+```{r}
+coef_sim <- gam.mh(lynx_mgcv)$bs
+```
+
+Now we define a function to perform forecast simulations from the nonlinear lag model in a recursive fashion. Using starting values for the last two lags, the function will iteratively project the path ahead with a random sample from the model's coefficient posterior
+```{r}
+recurse_nonlin = function(model, lagged_vals, h){
+  # Initiate state vector
+  states <- rep(NA, length = h + 2)
+  # Last two values of the conditional expectations begin the state vector
+  states[1] <- as.numeric(exp(lagged_vals[2]))
+  states[2] <- as.numeric(exp(lagged_vals[1]))
+  # Get a random sample of the smooth coefficient uncertainty matrix
+  # to use for the entire forecast horizon of this particular path
+  gam_coef_index <- sample(seq(1, NROW(coef_sim)), 1, T)
+  # For each following timestep, recursively predict based on the
+  # predictions at each previous lag
+  for (t in 3:(h + 2)) {
+    # Build the GAM linear predictor matrix using the two previous lags
+    # of the (log) density
+    newdata <- data.frame(lag1 = log(states[t-1] + 0.01),
+                          lag2 = log(states[t-2] + 0.01),
+                          season = lynx_test$season[t-2],
+                          year = lynx_test$year[t-2])
+    colnames(newdata) <- c('lag1', 'lag2', 'season', 'year')
+    Xp <- predict(model, newdata = newdata, type = 'lpmatrix')
+    # Calculate the posterior prediction for this timepoint
+    mu <- rpois(1, lambda = exp(Xp %*% coef_sim[gam_coef_index,]))
+    # Fill in the state vector and iterate to the next timepoint
+    states[t] <- mu
+  }
+  # Return the forecast path
+  states[-c(1:2)]
+}
+```
+
+Create the GAM's forecast distribution by generating `1000` simulated forecast paths. Each path is fed the true observed values for the last two lags of the first out of sample timepoint, but they can deviate when simulating ahead depending on their particular draw of possible coefficients. Note, this is a bit slow and could easily be parallelised to speed up computations
+```{r}
+gam_sims <- matrix(NA, nrow = 1000, ncol = 10)
+for(i in 1:1000){
+  gam_sims[i,] <- recurse_nonlin(lynx_mgcv,
+                                 lagged_vals = c(lynx_test$lag1[1],
+                                                 lynx_test$lag2[1]),
+                                 h = 10)
+}
+```
+
+Plot the mgcv model's out of sample forecast for the next 8 years ahead
+```{r, fig.width=5, fig.height=4, fig.align='center'}
+cred_ints <- apply(gam_sims, 2, function(x) hpd(x, 0.95))
+yupper <- max(lynx_full$population) * 1.25
+plot(cred_ints[3,] ~ seq(1:NCOL(cred_ints)), type = 'l',
+     col = rgb(1,0,0, alpha = 0),
+     ylim = c(0, yupper),
+     ylab = 'Predicted lynx trappings',
+     xlab = 'Forecast horizon',
+     main = 'mgcv')
+polygon(c(seq(1:(NCOL(cred_ints))), rev(seq(1:NCOL(cred_ints)))),
+        c(cred_ints[1,],rev(cred_ints[3,])),
+        col = rgb(150, 0, 0, max = 255, alpha = 100), border = NA)
+cred_ints <- apply(gam_sims, 2, function(x) hpd(x, 0.68))
+polygon(c(seq(1:(NCOL(cred_ints))), rev(seq(1:NCOL(cred_ints)))),
+        c(cred_ints[1,],rev(cred_ints[3,])),
+        col = rgb(150, 0, 0, max = 255, alpha = 180), border = NA)
+lines(cred_ints[2,], col = rgb(150, 0, 0, max = 255), lwd = 2, lty = 'dashed')
+points(lynx_test$population[1:10], pch = 16)
+lines(lynx_test$population[1:10])
+```
+
+A decent forecast? The shape is certainly correct, but the 95% uncertainty intervals are far too narrow due to the interpolation of the precisely estimated lag smooth functions and the flat extrapolation of the yearly smooth. Now fit an `mvgam` model; it fits a similar model to the above but with a full time series model for the errors, rather than smoothing splines that do not incorporate a concept of the future. We also exclude the `year` term to reduce any possible extrapolation and because latent trend model will capture this temporal variation. We estimate the model in `JAGS` using MCMC sampling (Note that `JAGS` 4.3.0 is required; installation links are found [here](https://sourceforge.net/projects/mcmc-jags/files/))
+```{r}
+lynx_mvgam <- mvjagam(data_train = lynx_train,
+               data_test = lynx_test,
+               formula = y ~ s(season, bs = 'cc', k = 10),
+               knots = list(season = c(0.5, 19.5)),
+               family = 'poisson',
+               trend_model = 'AR1',
+               n.burnin = 60000,
+               n.iter = 10000,
+               thin = 10,
+               auto_update = F)
+```
+
+Calculate the out of sample forecast from the fitted `mvgam` model
+```{r, fig.width=5, fig.height=4, fig.align='center'}
+fits <- MCMCvis::MCMCchains(lynx_mvgam$jags_output, 'ypred')
+fits <- fits[,(NROW(lynx_mvgam$obs_data)+1):(NROW(lynx_mvgam$obs_data)+10)]
+cred_ints <- apply(fits, 2, function(x) hpd(x, 0.95))
+plot(cred_ints[3,] ~ seq(1:NCOL(cred_ints)), type = 'l',
+     col = rgb(1,0,0, alpha = 0),
+     ylim = c(0, yupper),
+     ylab = '',
+     xlab = 'Forecast horizon',
+     main = 'mvgam')
+polygon(c(seq(1:(NCOL(cred_ints))), rev(seq(1:NCOL(cred_ints)))),
+        c(cred_ints[1,],rev(cred_ints[3,])),
+        col = rgb(150, 0, 0, max = 255, alpha = 100), border = NA)
+cred_ints <- apply(fits, 2, function(x) hpd(x, 0.68))
+polygon(c(seq(1:(NCOL(cred_ints))), rev(seq(1:NCOL(cred_ints)))),
+        c(cred_ints[1,],rev(cred_ints[3,])),
+        col = rgb(150, 0, 0, max = 255, alpha = 180), border = NA)
+lines(cred_ints[2,], col = rgb(150, 0, 0, max = 255), lwd = 2, lty = 'dashed')
+points(lynx_test$population[1:10], pch = 16)
+lines(lynx_test$population[1:10])
+```
+
+The `mvgam` has much more realistic uncertainty than the `mgcv` version, with all out of sample observations falling within the model's 95% credible intervals. Of course this is just one out of sample test of the model, and to really determine which model is most appropriate for forecasting we would want to run many of these tests using a [rolling window approach](https://robjhyndman.com/hyndsight/tscv/). Have a look at this model's summary to see what is being estimated (note that longer MCMC runs would probably be needed to increase effective sample sizes)
+```{r}
+summary_mvgam(lynx_mvgam)
+```
+
+Now inspect each model's estimated smooth for the 19-year cyclic pattern. Note that the `mvgam` smooth plot is on a different scale compared to the `mgcv` plot, but interpretation is similar
+```{r, fig.show='hold', fig.align='center'}
+plot(lynx_mgcv, select=1, shade=T)
+plot_mvgam_smooth(lynx_mvgam, 1, 'season')
+```
+
+We can also view the mvgam's posterior predictions for the entire series (testing and training)
+```{r, fig.width=5, fig.height=4, fig.align='center'}
+plot_mvgam_fc(lynx_mvgam, data_test = lynx_test)
+```
+
+And the estimated trend
+```{r, fig.width=5, fig.height=4, fig.align='center'}
+plot_mvgam_trend(lynx_mvgam, data_test = lynx_test)
+```
+
+A key aspect of ecological forecasting is to understand [how different components of a model contribute to forecast uncertainty](https://esajournals.onlinelibrary.wiley.com/doi/full/10.1002/eap.1589). We can estimate contributions to forecast uncertainty for the GAM smooth functions and the latent trend using `mvgam`
+```{r, fig.width=5, fig.height=4, fig.align='center'}
+plot_mvgam_uncertainty(lynx_mvgam, data_test = lynx_test)
+```
+
+Clearly the trend dominates the forecast uncertainty, which is not unexpected as this is the stochastic component of the model. There may be some long-term cyclicity in the trend based on the trend plot above, so perhaps another cyclic smooth term could be useful in the GAM linear predictor. But we will leave the model as-is for this example. Diagnostics of the model can also be performed using `mvgam`. Have a look at the model's residuals, which are posterior medians of Dunn-Smyth randomised quantile residuals so should follow approximate normality. We are primarily looking for a lack of autocorrelation, which would suggest our AR2 model is appropriate for the latent trend
+```{r, fig.width=6, fig.height=6, fig.align='center'}
+plot_mvgam_resids(lynx_mvgam)
+```
+
+Another useful utility of `mvgam` is the ability to use rolling window forecasts to evaluate competing models that may represent different hypotheses about the series dynamics. Here we will fit a poorly specified model to showcase how this evaluation works
+```{r}
+lynx_mvgam_poor <- mvjagam(data_train = lynx_train,
+               data_test = lynx_test,
+               formula = y ~ s(season, bs = 'gp', k = 3),
+               family = 'poisson',
+               trend_model = 'RW',
+               n.burnin = 10000,
+               n.iter = 5000,
+               thin = 5,
+               auto_update = F)
+
+```
+
+We choose a set of timepoints within the training data to forecast from, allowing us to simulate a situation where the model's parameters had already been estimated but we have only observed data up to the evaluation timepoint and would like to generate forecasts from the latent trends. Here we use year 10 as our last observation and forecast ahead for the next 10 years.
+```{r}
+mod1_eval <- eval_mvgam(lynx_mvgam, eval_timepoint = 10, fc_horizon = 10)
+mod2_eval <- eval_mvgam(lynx_mvgam_poor, eval_timepoint = 10, fc_horizon = 10)
+```
+
+Summary statistics of the two models' out of sample Discrete Rank Probability Score (DRPS) indicate that the well-specified model performs markedly better (far lower DRPS)
+```{r}
+summary(mod1_eval$series1$drps)
+summary(mod2_eval$series1$drps)
+```
+
+Nominal coverages for both models' 90% prediction intervals
+```{r}
+mean(mod1_eval$series1$in_interval)
+mean(mod2_eval$series1$in_interval)
+```
+
+The `compare_mvgams` function automates this process by rolling along a set of timepoints for each model, ensuring a more in-depth evaluation of each competing model at the same set of timepoints