diff --git a/.Rbuildignore b/.Rbuildignore
index 206a9a08..114f0403 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -1,2 +1,3 @@
 ^mvgam\.Rproj$
 ^\.Rproj\.user$
+^LICENSE\.md$
diff --git a/DESCRIPTION b/DESCRIPTION
index 24cf163a..f2213eca 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,15 +1,21 @@
 Package: mvgam
-Title: What the Package Does (One Line, Title Case)
+Title: Multivariate Bayesian Generalised Additive Models for discrete time series
 Version: 0.0.0.9000
 Authors@R: 
-    person(given = "First",
-           family = "Last",
+    person(given = "Nicholas",
+           family = "Clark",
            role = c("aut", "cre"),
-           email = "first.last@example.com",
-           comment = c(ORCID = "YOUR-ORCID-ID"))
-Description: What the package does (one paragraph).
-License: `use_mit_license()`, `use_gpl3_license()` or friends to
-    pick a license
+           email = "nicholas.j.clark1214@.com")
+Description: Multivariate Bayesian Generalised Additive Models for discrete time series
+Maintainer: Nicholas J Clark <nicholas.j.clark1214@gmail.com>
+License: MIT + file LICENSE
+Depends: R (>= 4.0.0), mgcv, rjags
+Imports: parallel,
+    coda,
+    MCMCpack,
+    runjags,
+    MCMCvis,
+    dplyr
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 00000000..10a9fe51
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,2 @@
+YEAR: 2021
+COPYRIGHT HOLDER: Nicholas Clark
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 00000000..5608c0db
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,21 @@
+# MIT License
+
+Copyright (c) 2021 Nicholas Clark
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/NAMESPACE b/NAMESPACE
index 6ae92683..f19d6267 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,2 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+export(hpd)
+export(mvjagam)
+export(plot_mvgam_fc)
+export(plot_mvgam_trend)
diff --git a/R/all_neon_tick_data.R b/R/all_neon_tick_data.R
new file mode 100644
index 00000000..61d020a7
--- /dev/null
+++ b/R/all_neon_tick_data.R
@@ -0,0 +1,15 @@
+#' NEON Amblyomma and Ixodes tick abundance survey data
+#'
+#' A dataset containing timeseries of Amblyomma americanum and Ixodes scapularis nymph abundances at NEON sites
+#'
+#' @format A tibble/dataframe containing covariate information alongside the main fields of:
+#' \describe{
+#' \item{Year}{Year of sampling}
+#' \item{epiWeek}{Epidemiological week of sampling}
+#'   \item{plot_ID}{NEON plot ID for survey location}
+#'   \item{siteID}{NEON site ID for survey location}
+#'   \item{amblyomma_americanum}{Counts of A. americanum nymphs}
+#'   \item{ixodes_scapularis}{Counts of I. scapularis nymphs}
+#' }
+#' @source \url{https://www.neonscience.org/data}
+"all_neon_tick_data"
diff --git a/R/hpd.R b/R/hpd.R
new file mode 100644
index 00000000..526b79c1
--- /dev/null
+++ b/R/hpd.R
@@ -0,0 +1,58 @@
+# Calculate the highest posterior density interval
+#'
+#'This function uses estimated densities to calculate HPD intevals. Code originally supplied by Martyn Plummer
+#'
+#'@param x \code{vector} of values representing the distribution to be summarised
+#'@param coverage \code{numeric} value specifying the width of the HPD interval. Default is 0.95
+#'@return A \code{list} containing the reconciled forecast distributions for each series in \code{y}. Each element in
+#'the \code{vector} with three values: lower estimate, median estimate and upper estimate of the HPD interval
+#'
+#'@export
+#'
+hpd <- function(x, coverage = 0.95)
+{
+  x <- as.matrix(x)
+  out <- matrix(NA, nrow = ncol(x), ncol = 3)
+  rownames(out) <- dimnames(x)[[2]]
+  colnames(out) <- c("mode", "lower", "upper")
+
+  f <- function(p) {
+    if (p == density.range[2]) {
+      set.coverage <- 0
+    }
+    else {
+      p.upper <- min(y.density$y[y.density$y > p])
+      p.lower <- max(y.density$y[y.density$y <= p])
+      cov.upper <- sum(y.counts[y.density$y >= p.upper]) / sum(y.counts)
+      cov.lower <- sum(y.counts[y.density$y >= p.lower]) / sum(y.counts)
+      c <- (p.upper - p)/(p.upper - p.lower)
+      set.coverage <- c * cov.upper + (1 - c) * cov.lower
+    }
+    return(set.coverage - coverage)
+  }
+
+  for (i in 1:ncol(x)) {
+    y <- unclass(x[,i])
+    y.density <- stats::density(y, n=1024)
+    m <- length(y.density$x)
+
+    ## Find the midpoint
+    out[i,1] <- y.density$x[which.max(y.density$y)]
+    dx <- diff(range(y.density$x)) / m
+    breaks <- c(y.density$x[1] - dx / 2, y.density$x + dx /2)
+    y.counts <- hist(y, breaks = breaks, plot = FALSE)$counts
+    density.range <- range(y.density$y)
+    uniroot.out <- stats::uniroot(f, density.range)
+
+    ## Assuming that we have a single interval, find the limits
+    out[i,2:3] <- range(y.density$x[y.density$y > uniroot.out$root])
+
+    ## Check!
+    if (sum(abs(diff(y.density$y > uniroot.out$root))) != 2) {
+      warning("HPD set is not a closed interval")
+    }
+  }
+  out <- c(out[2], out[1], out[3])
+  names(out) <- c('lower', 'median', 'upper')
+  return(out)
+}
diff --git a/R/mvjagam.R b/R/mvjagam.R
new file mode 100644
index 00000000..4be75ff5
--- /dev/null
+++ b/R/mvjagam.R
@@ -0,0 +1,493 @@
+#'Fit a Bayesian multivariate GAM to a set of discrete time series
+#'
+#'This function estimates the posterior distribution for a multivariate GAM that includes
+#'smooth seasonality and possible other smooth functions specified in the GAM formula. State-space latent trends
+#'are estimated for each series, with a number of options for specifying the structures of the trends
+#'
+#'
+#'@param formula A \code{character} string specifying the GAM formula. These are exactly like the formula
+#'for a GLM except that smooth terms, s, te, ti and t2, can be added to the right hand side
+#'to specify that the linear predictor depends on smooth functions of predictors (or linear functionals of these).
+#'@param data_train A\code{dataframe} containing the model response variable and covariates
+#'required by the GAM \code{formula}. Should include columns:
+#''y' (the discrete outcomes; NAs allowed)
+#''series' (character or factor index of the series IDs)
+#''season' (numeric index of the seasonal time point for each observation; should not have any missing)
+#''year' the numeric index for year
+#''in_season' indicator for whether the observation is in season or not. If the counts tend to go to zero
+#'during the off season (as in tick counts for example), setting this to zero can be useful as trends won't contribute during
+#'during this time but they continue to evolve, allowing the trend from the past season to continue evolving rather than forcing
+#'it to zero
+#'Any other variables to be included in the linear predictor of \code{formula} must also be present
+#'@param data_test \code{dataframe} containing at least 'series', 'season', 'year' and 'in_season' for the forecast horizon, in
+#'addition to any other variables included in the linear predictor of \code{formula}
+#'@param use_nb \code{logical} If \code{TRUR}, use a Negative Binomial likelihood with estimated
+#'overdispersion parameter r;
+#'if \code{FALSE}, set r to \code{10,000} to approximate the Poisson distribution
+#'@param use_mv \code{logical} If \code{TRUE} and the total number of series is \code{<=5},
+#'a multivariate gaussian distribution is used for the state space trend errors. If \code{TRUE} and the total number
+#'of series is \code{>5}, a set of latent dynamic factors is estimated to capture possible dependencies in the state
+#'space trends. If \code{FALSE}, independent gaussian distributions are used for the trends
+#'@param n.chains \code{integer} the number of parallel chains for the model
+#'@param n.adapt \code{integer} the number of iterations for adaptation. See adapt for details.
+#'If \code{n.adapt} = 0 then no adaptation takes place
+#'@param n.iter \code{integer} the number of iterations of the Markov chain to run
+#'@param auto_update \code{logical} If \code{TRUE}, the model is run for up to \code{3} additional sets of
+#'\code{n.iter} iterations, or until the lower 15% of effective sample sizes reaches \code{100}
+#'@param phi_prior \code{character} specifying (in JAGS syntax) the prior distributions for the AR1 coefficients in the
+#'latent trends
+#'@param rho_prior \code{character} specifying (in JAGS syntax) the prior distributions for the smooth penalty parameters
+#'@param r_prior \code{character} specifying (in JAGS syntax) the prior distribution for the Negative Binomial
+#'overdispersion parameter
+#'@param tau_prior \code{character} specifying (in JAGS syntax) the prior distributions for the independent gaussian
+#'precisions used for the latent trends (ignored if \code{use_mv == TRUE})
+#'@details A \code{\link[mcgv]{jagam}} model file is generated from \code{formula} and modified to include the latent
+#'AR1 state space trends. The model parameters are esimated in a Bayesian framework using Gibbs sampling
+#'in \code{\link[rjags]{jags.model}}.
+#'
+#'@return A \code{list} object containing JAGS model output, the text representation of the model file,
+#'the mgcv model output (for easily generating simulations at
+#'unsampled covariate values), the names of the smooth parameters and the matrix of indices for each series
+#'
+#'@export
+
+mvjagam = function(formula,
+                    data_train,
+                    data_test,
+                    use_nb = TRUE,
+                    use_mv = FALSE,
+                    n.adapt = 1000,
+                    n.chains = 2,
+                    n.burnin = 5000,
+                    n.iter = 2000,
+                    thin = 2,
+                    auto_update = TRUE,
+                    phi_prior = 'dbeta(1, 1)',
+                    rho_prior = 'dunif(-12, 12)',
+                    r_prior = 'dexp(0.001)',
+                    tau_prior = 'dunif(1, 100)'){
+
+  # Fill in any NA's with arbitrary values so the observations aren't dropped when jags data is created
+  orig_y <- data_train$y
+  data_train$y[which(is.na(data_train$y))] <- floor(sample(seq(0,50),
+                                                           length(which(is.na(data_train$y))),
+                                                           T))
+
+  # Estimate the GAM model using mgcv so that the linear predictor matrix can be easily calculated
+  # when simulating from the JAGS model later on
+  ss_gam <- mgcv::gam(formula(formula),
+                      data = data_train,
+                      method = "REML",
+                      family = poisson(),
+                      drop.unused.levels = FALSE)
+
+  # Make jags file and appropriate data structures
+  ss_jagam <- mgcv::jagam(formula,
+                          data = data_train,
+                          family = poisson(),
+                           drop.unused.levels = FALSE,
+                          file = 'base_gam.txt',
+                          sp.prior = 'log.uniform')
+
+  # Read in the base (unmodified) jags model file
+  base_model <- suppressWarnings(readLines('base_gam.txt'))
+
+  # Need to remove lines from the linear predictor section and replace with
+  # the modified state space representation
+  lines_remove <- c(1:grep('## response', base_model))
+  base_model <- base_model[-lines_remove]
+  base_model[grep('rho\\[i\\] ~', base_model)] <- paste0('   rho[i] ~ ', rho_prior)
+
+  # Add replacement lines for trends and linear predictor
+  if(!use_mv){
+    modification <- c("model {
+
+                                   ## GAM linear predictor
+                                   eta <- X %*% b
+
+                                   ## Mean expectations
+                                   for (i in 1:n) {
+                                   for (s in 1:n_series) {
+                                   mu[i, s] <- exp(eta[ytimes[i, s]] + trend[i, s] * in_season[i])
+                                   }
+                                   }
+
+                                   ## AR1 state space trends
+                                   for(s in 1:n_series) {
+                                   trend[1, s] ~ dnorm(0, tau[s])
+                                   }
+
+                                   for (i in 2:n){
+                                   for (s in 1:n_series){
+                                   trend[i, s] ~ dnorm(trend[i - 1, s] * phi[s], tau[s])
+                                   }
+                                   }
+
+                                   for (s in 1:n_series){
+                                   phi[s] ~ dunif(-0.9, 0.9)
+                                   tau[s] ~ dexp(0.5)
+                                   }
+
+                                   ## Negative binomial likelihood functions
+                                   for (i in 1:n) {
+                                   for (s in 1:n_series) {
+                                   y[i, s] ~ dnegbin(rate[i, s], r);
+                                   rate[i, s] <- ifelse((r / (r + mu[i, s])) < min_eps, min_eps,
+                                                        (r / (r + mu[i, s])))
+                                   }
+                                   }
+
+                                   ## Set overdispersion parameter to 10000 if not using nb;
+                                   ## this approximates the Poisson distribution
+                                   r1 ~ dgamma(0.01, 0.01)
+                                   r2 <- 10000
+                                   r_indicator <- ifelse(use_nb > 0, 1, 0)
+                                   r <- (r1 * r_indicator) + (r2 * (1 - r_indicator))
+
+                                   ## Posterior predictions
+                                   for (i in 1:n) {
+                                   for (s in 1:n_series) {
+                                   ypred[i, s] ~ dnegbin(rate[i, s], r)
+                                   }
+                                   }
+                                   ")
+
+    # Create the joined model file
+    fil <- tempfile(fileext = ".xt")
+    cat(c(readLines(textConnection(modification)), base_model), file = fil,
+        sep = "\n")
+    model_file <- readLines(fil, n = -1)
+
+    # Update and prior distribution choices
+    model_file[grep('phi\\[s\\] ~', model_file)] <- paste0('   phi[s] ~ ', phi_prior)
+    model_file[grep('tau\\[s\\] ~', model_file)] <- paste0('   tau[s] ~ ', tau_prior)
+    model_file[grep('r1 ~', model_file)] <- paste0('   r1 ~ ', r_prior)
+
+    model_file <- textConnection(model_file)
+  }
+
+  if(use_mv){
+    if(length(unique(data_train$series)) <= 5){
+      modification <- c("model {
+
+                        ## GAM linear predictor
+                        eta <- X %*% b
+
+                        ## Mean expectations
+                        for (i in 1:n) {
+                        for (s in 1:n_series) {
+                        mu[i, s] <- exp(eta[ytimes[i, s]] + trend[i, s] * in_season[i])
+                        }
+                        }
+
+                        ## AR1 state space trends with possible covarying errors
+                        trend[1, 1:n_series] ~ dmnorm(zeros, tau[ , ])
+
+                        for (i in 2:n){
+                        for (s in 1:n_series){
+                        trend_mus[i, s] <- trend[i - 1, s] * phi[s]
+                        }
+                        }
+
+                        for (i in 2:n){
+                        trend[i, 1:n_series] ~ dmnorm(trend_mus[i, ], tau[ , ]);
+                        }
+
+                        # AR1 persistence coefficients
+                        for (s in 1:n_series){
+                        phi[s] ~ dunif(-0.9, 0.9)
+                        }
+
+                        # Wishart is appropriate conjugate prior for dmnorm precision
+                        # df = n_series + 1 sets uniform distribution on correlation parameters
+                        tau[1:n_series, 1:n_series] ~ dwish(diag_mat[ , ], n_series + 1)
+
+                        # Scaled residual correlation matrix (between -1 and 1; Gelman & Hill 2007)
+                        Covar.raw[1:n_series, 1:n_series] <- inverse(tau[ , ])
+                        for(i in 1:n_series){
+                        for(j in 1:n_series){
+                        cor[i, j] <- Covar.raw[i, j] / sqrt(Covar.raw[i, i] * Covar.raw[j, j])
+                        }
+                        sigma[i] <- abs(xi[i]) * sqrt(Covar.raw[i, i])
+                        # Redundant scaling factor improves sampling efficiency
+                        xi[i] ~ dunif(0, 100)
+                        }
+
+                        ## Negative binomial likelihood functions
+                        for (i in 1:n) {
+                        for (s in 1:n_series) {
+                        y[i, s] ~ dnegbin(rate[i, s], r);
+                        rate[i, s] <- ifelse((r / (r + mu[i, s])) < min_eps, min_eps,
+                        (r / (r + mu[i, s])))
+                        }
+                        }
+
+                        ## Set overdispersion parameter to 10000 if not using nb;
+                        ## this approximates the Poisson distribution
+                        r1 ~ dgamma(0.01, 0.01)
+                        r2 <- 10000
+                        r_indicator <- ifelse(use_nb > 0, 1, 0)
+                        r <- (r1 * r_indicator) + (r2 * (1 - r_indicator))
+
+                        ## Posterior predictions
+                        for (i in 1:n) {
+                        for (s in 1:n_series) {
+                        ypred[i, s] ~ dnegbin(rate[i, s], r)
+                        }
+                        }
+                        ")
+
+      # Create the joined model file
+      fil <- tempfile(fileext = ".xt")
+      cat(c(readLines(textConnection(modification)), base_model), file = fil,
+          sep = "\n")
+      model_file <- readLines(fil, n = -1)
+
+      # Update and prior distribution choices
+      model_file[grep('phi\\[s\\] ~', model_file)] <- paste0('   phi[s] ~ ', phi_prior)
+      model_file[grep('tau\\[s\\] ~', model_file)] <- paste0('   tau[s] ~ ', tau_prior)
+      model_file[grep('r1 ~', model_file)] <- paste0('   r1 ~ ', r_prior)
+
+      model_file <- textConnection(model_file)
+    } else {
+      #### Use the latent variable approach to estimate dependencies when n_series is large
+      modification <- c("model {
+
+                        ## GAM linear predictor
+                        eta <- X %*% b
+
+                        ## Mean expectations
+                        for (i in 1:n) {
+                        for (s in 1:n_series) {
+                        mu[i, s] <- exp(eta[ytimes[i, s]] + trend[i, s] * in_season[i])
+                        }
+                        }
+
+                        ## Latent factors evolve as AR1 time series
+                        tau_fac ~ dnorm(50, 0.05)T(0.01,)
+                        for(j in 1:n_lv){
+                           LV[1, j] ~ dnorm(0, tau_fac)
+                        }
+
+                        for(i in 2:n){
+                         for(j in 1:n_lv){
+                           LV[i, j] ~ dnorm(LV[i - 1, j] * phi[j], tau_fac)
+                         }
+                        }
+
+                        # AR1 persistence coefficients for latent factors
+                        for (s in 1:n_lv){
+                         phi[s] ~ dunif(0, 1)
+                        }
+
+                        ## Latent factor loadings, with identifiability constraints (Hui et al 2015)
+                        ## Upper diagonal 0
+                        for(i in 1:(n_lv - 1)){
+                           for(j in (i + 1):n_lv){
+                             lv_coefs[i, j] <- 0
+                            }
+                         }
+
+                        ## Positive sign constraints on diagonal elements
+                        for(i in 1:n_lv) {
+                          lv_coefs[i, i]  ~ dunif(0, 1)
+                        }
+
+                        ## Lower diagonal free
+                        for(i in 2:n_lv){
+                          for(j in 1:(i - 1)){
+                           lv_coefs[i, j] ~ dunif(-1, 1)
+                           }
+                        }
+
+                        ## Other elements also free
+                        for(i in (n_lv + 1):n_series) {
+                         for(j in 1:n_lv){
+                          lv_coefs[i, j] ~ dunif(-1, 1)
+                          }
+                        }
+
+                        ## Trend evolution for the series depends on latent factors;
+                        ## fix the precision here as it may become unidentifiable when estimating
+                        ## precision for the latent factors
+                        for (i in 1:n){
+                         for (s in 1:n_series){
+                          trend[i, s] ~ dnorm(inprod(lv_coefs[s,], LV[i,]), 100)
+                         }
+                        }
+
+                        ## Negative binomial likelihood functions
+                        for (i in 1:n) {
+                         for (s in 1:n_series) {
+                          y[i, s] ~ dnegbin(rate[i, s], r);
+                          rate[i, s] <- ifelse((r / (r + mu[i, s])) < min_eps, min_eps,
+                          (r / (r + mu[i, s])))
+                         }
+                        }
+
+                        ## Set overdispersion parameter to 10000 if not using nb;
+                        ## this approximates the Poisson distribution
+                        r1 ~ dgamma(0.01, 0.01)
+                        r2 <- 10000
+                        r_indicator <- ifelse(use_nb > 0, 1, 0)
+                        r <- (r1 * r_indicator) + (r2 * (1 - r_indicator))
+
+                        ## Posterior predictions
+                        for (i in 1:n) {
+                        for (s in 1:n_series) {
+                        ypred[i, s] ~ dnegbin(rate[i, s], r)
+                        }
+                        }
+                        ")
+
+      # Create the joined model file
+      fil <- tempfile(fileext = ".xt")
+      cat(c(readLines(textConnection(modification)), base_model), file = fil,
+          sep = "\n")
+      model_file <- readLines(fil, n = -1)
+
+      # Update and prior distribution choices
+      model_file[grep('phi\\[s\\] ~', model_file)] <- paste0('   phi[s] ~ ', phi_prior)
+      model_file[grep('tau\\[s\\] ~', model_file)] <- paste0('   tau[s] ~ ', tau_prior)
+      model_file[grep('r1 ~', model_file)] <- paste0('   r1 ~ ', r_prior)
+
+      model_file <- textConnection(model_file)
+    }
+
+  }
+
+  # Covariate datafrme including training and testing observations
+  X <- data.frame(rbind(ss_jagam$jags.data$X,
+                        predict(ss_gam, newdata = data_test, type = 'lpmatrix')))
+
+  # Add a time variable
+  X$time <- rbind(data_train, data_test[,1:ncol(data_train)]) %>%
+    dplyr::left_join(rbind(data_train, data_test[,1:ncol(data_train)]) %>%
+                       dplyr::select(year, season) %>%
+                       dplyr::distinct() %>%
+                       dplyr::arrange(year, season) %>%
+                       dplyr::mutate(time = dplyr::row_number())) %>%
+    dplyr::pull(time)
+
+  # Add an outcome variable
+  X$outcome <- c(orig_y, rep(NA, NROW(data_test)))
+
+  # Add a series identifier variable
+  X$series <- as.numeric(rbind(data_train, data_test)$series)
+
+  # Arrange by time then by series
+  X %>%
+    dplyr::arrange(time, series) -> X
+
+  # Matrix of indices in X that correspond to timepoints for each series
+  ytimes <- matrix(NA, nrow = max(X$time), ncol = length(unique(X$series)))
+  for(i in 1:length(unique(X$series))){
+    ytimes[,i] <- which(X$series == i)
+  }
+  ss_jagam$jags.data$ytimes <- ytimes
+
+  # Matrix of outcomes in X that correspond to each series at each timepoint
+  ys_mat <- matrix(NA, nrow = NROW(ytimes), ncol = NCOL(ytimes))
+  for(i in 1:length(unique(X$series))){
+    ys_mat[,i] <- X$outcome[which(X$series == i)]
+  }
+  ss_jagam$jags.data$y <- ys_mat
+
+  # Other necessary variables for JAGS
+  ss_jagam$jags.data$n <- NROW(ytimes)
+  ss_jagam$jags.data$n_series <- NCOL(ytimes)
+  ss_jagam$jags.data$X = as.matrix(X %>%
+                                     dplyr::select(-time, -series, -outcome))
+
+  if(use_mv){
+    # We use a Wishart prior for the state space multivariate precision matrix
+    ss_jagam$jags.data$diag_mat <- diag(ss_jagam$jags.data$n_series)
+    ss_jagam$jags.data$zeros <- rep(0, ss_jagam$jags.data$n_series)
+  }
+
+
+  # Set use_nb to 0 for approximate Poisson; otherwise to 1 for Negative Binomial
+  if(use_nb){
+    ss_jagam$jags.data$use_nb <- 1
+  } else {
+    ss_jagam$jags.data$use_nb <- 0
+  }
+
+  # Machine epsilon for minimum allowable non-zero rate
+  ss_jagam$jags.data$min_eps <- .Machine$double.eps
+
+  # Ensure inits fall within prior bounds for rho
+  ss_jagam$jags.ini$rho[ss_jagam$jags.ini$rho > 12] <- 11.99
+  ss_jagam$jags.ini$rho[ss_jagam$jags.ini$rho < 12] <- -11.99
+
+  # Number of latent variables to use
+  ss_jagam$jags.data$n_lv <- floor(ss_jagam$jags.data$n_series / 2)
+
+  # Binary indicator of in_season
+  ss_jagam$jags.data$in_season <- rbind(data_train, data_test) %>%
+    dplyr::select(year, season, in_season) %>%
+    dplyr::distinct() %>%
+    dplyr::arrange(year, season) %>%
+    dplyr::pull(in_season)
+
+  # Initiate adaptation of the model
+  load.module("glm")
+  gam_mod <- jags.model(model_file,
+                        data = ss_jagam$jags.data,
+                        inits = ss_jagam$jags.ini,
+                        n.chains = n.chains,
+                        n.adapt = n.adapt)
+  unlink('base_gam.txt')
+
+  # Update the model for the burnin period
+  update(gam_mod, n.burnin)
+
+  # Gather posterior samples for the specified parameters
+  if(!use_mv){
+    param <- c('rho', 'b', 'mu', 'ypred',  'r', 'phi',
+               'tau', 'trend', 'p_resid')
+  } else {
+    if(length(unique(data_train$series)) <= 5){
+      param <- c('rho', 'b', 'mu', 'ypred',  'r', 'phi',
+                 'tau', 'trend', 'p_resid', 'cor')
+    } else {
+      param <- c('rho', 'b', 'mu', 'ypred',  'r', 'phi',
+                 'trend', 'p_resid', 'lv_coefs', 'tau_fac')
+    }
+
+  }
+
+  out_gam_mod <- coda.samples(gam_mod,
+                                     variable.names = param,
+                                     n.iter = n.iter,
+                                     thin = thin)
+  if(auto_update){
+  # Update until reasonable convergence (exclude 'cor' as it will have some 0 neffs)
+  mod_summary <- MCMCvis::MCMCsummary(out_gam_mod, c('rho', 'b', 'phi', 'tau'))
+  for(i in 1:3){
+    if(quantile(mod_summary$Rhat, 0.85, na.rm = T) < 1 &
+       quantile(mod_summary$n.eff, 0.15) > 100){
+      break;
+    }
+    update(gam_mod, 5000)
+    out_gam_mod <- rjags::coda.samples(gam_mod,
+                                       variable.names = param,
+                                       n.iter = n.iter,
+                                       thin = thin)
+    mod_summary <- MCMCvis::MCMCsummary(out_gam_mod, c('rho', 'b', 'phi', 'tau'))
+  }
+  }
+
+  model_file <- readLines(fil, n = -1)
+  unlink(fil)
+
+  return(list(jags_output = out_gam_mod,
+              model_file = model_file,
+              mgcv_model = ss_gam,
+              jags_model = gam_mod,
+              smooth_param_details = base_model[sort(c(grep('## prior for', base_model),
+                                                       grep('## prior for', base_model)+1))],
+              ytimes = ytimes))
+
+
+}
diff --git a/R/plot_mvgam_fc.R b/R/plot_mvgam_fc.R
new file mode 100644
index 00000000..c30e51a2
--- /dev/null
+++ b/R/plot_mvgam_fc.R
@@ -0,0 +1,51 @@
+#'Plot mvjagam posterior predictions for a specified series
+#'@param out_gam_mod \code{list} object returned from \code{mvjagam}
+#'@param series \code{integer} specifying which series in the set is to be plotted
+#'@param data_train A\code{dataframe} containing the model response variable and covariates
+#'required by the GAM \code{formula}. Should include columns:
+#''y' (the discrete outcomes; NAs allowed)
+#''series' (character or factor index of the series IDs)
+#''season' (numeric index of the seasonal time point for each observation; should not have any missing)
+#''year' the numeric index for year
+#''in_season' indicator for whether the observation is in season or not. If the counts tend to go to zero
+#'during the off season (as in tick counts for example), setting this to zero can be useful as trends won't contribute during
+#'during this time but they continue to evolve, allowing the trend from the past season to continue evolving rather than forcing
+#'it to zero
+#'Any other variables to be included in the linear predictor of \code{formula} must also be present
+#'@param data_test \code{dataframe} containing at least 'series', 'season', 'year' and 'in_season' for the forecast horizon, in
+#'addition to any other variables included in the linear predictor of \code{formula}
+#'@export
+plot_mvgam_fc = function(out_gam_mod, series, data_test, data_train){
+  pred_indices <- seq(0, length(out_gam_mod$ytimes),
+                      length.out = NCOL(out_gam_mod$ytimes) + 1)
+  preds <- MCMCvis::MCMCchains(out_gam_mod$jags_output, 'ypred')[,pred_indices[series]:pred_indices[series + 1]]
+  preds_last <- preds[1,]
+  int <- apply(preds,
+               2, hpd, 0.95)
+  plot(preds_last,
+       type = 'l', ylim = c(0, max(int) + 2),
+       col = rgb(1,0,0, alpha = 0),
+       ylab = paste0('Estimated counts for ', levels(data_train$series)[series]),
+       xlab = 'Time')
+  int[int<0] <- 0
+  polygon(c(seq(1:(NCOL(int))), rev(seq(1:NCOL(int)))),
+          c(int[1,],rev(int[3,])),
+          col = rgb(150, 0, 0, max = 255, alpha = 100), border = NA)
+  int <- apply(preds,
+               2, hpd, 0.68)
+  int[int<0] <- 0
+  polygon(c(seq(1:(NCOL(int))), rev(seq(1:NCOL(int)))),
+          c(int[1,],rev(int[3,])),
+          col = rgb(150, 0, 0, max = 255, alpha = 180), border = NA)
+  lines(int[2,], col = rgb(150, 0, 0, max = 255), lwd = 2, lty = 'dashed')
+
+  s_name <- levels(data_train$series)[series]
+  points(rbind(data_train, data_test) %>%
+           dplyr::filter(series == s_name) %>%
+           dplyr::select(year, season, y) %>%
+           dplyr::distinct() %>%
+           dplyr::arrange(year, season) %>%
+           dplyr::pull(y), pch = 16)
+  abline(v = NROW(data_train) / NCOL(out_gam_mod$ytimes))
+
+}
diff --git a/R/plot_mvgam_trend.R b/R/plot_mvgam_trend.R
new file mode 100644
index 00000000..9ef2cb3f
--- /dev/null
+++ b/R/plot_mvgam_trend.R
@@ -0,0 +1,41 @@
+#'Plot mvjagam latent trend for a specified series
+#'@param out_gam_mod \code{list} object returned from \code{mvjagam}
+#'@param series \code{integer} specifying which series in the set is to be plotted
+#'@param data_train A\code{dataframe} containing the model response variable and covariates
+#'required by the GAM \code{formula}. Should include columns:
+#''y' (the discrete outcomes; NAs allowed)
+#''series' (character or factor index of the series IDs)
+#''season' (numeric index of the seasonal time point for each observation; should not have any missing)
+#''year' the numeric index for year
+#''in_season' indicator for whether the observation is in season or not. If the counts tend to go to zero
+#'during the off season (as in tick counts for example), setting this to zero can be useful as trends won't contribute during
+#'during this time but they continue to evolve, allowing the trend from the past season to continue evolving rather than forcing
+#'it to zero
+#'Any other variables to be included in the linear predictor of \code{formula} must also be present
+#'@param data_test \code{dataframe} containing at least 'series', 'season', 'year' and 'in_season' for the forecast horizon, in
+#'addition to any other variables included in the linear predictor of \code{formula}
+#'@export
+plot_mvgam_trend = function(out_gam_mod, series, data_test, data_train){
+  pred_indices <- seq(0, length(out_gam_mod$ytimes),
+                      length.out = NCOL(out_gam_mod$ytimes) + 1)
+  preds <- MCMCvis::MCMCchains(out_gam_mod$jags_output, 'trend')[,pred_indices[series]:pred_indices[series + 1]]
+  preds_last <- preds[1,]
+  plot(preds_last,
+       type = 'l', ylim = range(preds),
+       col = rgb(1,0,0, alpha = 0),
+       ylab = paste0('Estimated trend for ', levels(data_train$series)[series]),
+       xlab = 'Time')
+  int <- apply(preds,
+               2, hpd, 0.95)
+  polygon(c(seq(1:(NCOL(int))), rev(seq(1:NCOL(int)))),
+          c(int[1,],rev(int[3,])),
+          col = rgb(150, 0, 0, max = 255, alpha = 100), border = NA)
+  int <- apply(preds,
+               2, hpd, 0.68)
+  polygon(c(seq(1:(NCOL(int))), rev(seq(1:NCOL(int)))),
+          c(int[1,],rev(int[3,])),
+          col = rgb(150, 0, 0, max = 255, alpha = 180), border = NA)
+  lines(int[2,], col = rgb(150, 0, 0, max = 255), lwd = 2, lty = 'dashed')
+  abline(v = NROW(data_train) / NCOL(out_gam_mod$ytimes))
+
+}
diff --git a/data/all_neon_tick_data.rda b/data/all_neon_tick_data.rda
new file mode 100644
index 00000000..736490b1
Binary files /dev/null and b/data/all_neon_tick_data.rda differ
diff --git a/man/all_neon_tick_data.Rd b/man/all_neon_tick_data.Rd
new file mode 100644
index 00000000..be7ed812
--- /dev/null
+++ b/man/all_neon_tick_data.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/all_neon_tick_data.R
+\docType{data}
+\name{all_neon_tick_data}
+\alias{all_neon_tick_data}
+\title{NEON Amblyomma and Ixodes tick abundance survey data}
+\format{
+A tibble/dataframe containing covariate information alongside the main fields of:
+\describe{
+\item{Year}{Year of sampling}
+\item{epiWeek}{Epidemiological week of sampling}
+\item{plot_ID}{NEON plot ID for survey location}
+\item{siteID}{NEON site ID for survey location}
+\item{amblyomma_americanum}{Counts of A. americanum nymphs}
+\item{ixodes_scapularis}{Counts of I. scapularis nymphs}
+}
+}
+\source{
+\url{https://www.neonscience.org/data}
+}
+\usage{
+all_neon_tick_data
+}
+\description{
+A dataset containing timeseries of Amblyomma americanum and Ixodes scapularis nymph abundances at NEON sites
+}
+\keyword{datasets}
diff --git a/man/hpd.Rd b/man/hpd.Rd
new file mode 100644
index 00000000..2d7c6840
--- /dev/null
+++ b/man/hpd.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/hpd.R
+\name{hpd}
+\alias{hpd}
+\title{This function uses estimated densities to calculate HPD intevals. Code originally supplied by Martyn Plummer}
+\usage{
+hpd(x, coverage = 0.95)
+}
+\arguments{
+\item{x}{\code{vector} of values representing the distribution to be summarised}
+
+\item{coverage}{\code{numeric} value specifying the width of the HPD interval. Default is 0.95}
+}
+\value{
+A \code{list} containing the reconciled forecast distributions for each series in \code{y}. Each element in
+the \code{vector} with three values: lower estimate, median estimate and upper estimate of the HPD interval
+}
+\description{
+This function uses estimated densities to calculate HPD intevals. Code originally supplied by Martyn Plummer
+}
diff --git a/man/mvjagam.Rd b/man/mvjagam.Rd
new file mode 100644
index 00000000..dbb6b598
--- /dev/null
+++ b/man/mvjagam.Rd
@@ -0,0 +1,89 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mvjagam.R
+\name{mvjagam}
+\alias{mvjagam}
+\title{Fit a Bayesian multivariate GAM to a set of discrete time series}
+\usage{
+mvjagam(
+  formula,
+  data_train,
+  data_test,
+  use_nb = TRUE,
+  use_mv = FALSE,
+  n.adapt = 1000,
+  n.chains = 2,
+  n.burnin = 5000,
+  n.iter = 2000,
+  thin = 2,
+  auto_update = TRUE,
+  phi_prior = "dbeta(1, 1)",
+  rho_prior = "dunif(-12, 12)",
+  r_prior = "dexp(0.001)",
+  tau_prior = "dunif(1, 100)"
+)
+}
+\arguments{
+\item{formula}{A \code{character} string specifying the GAM formula. These are exactly like the formula
+for a GLM except that smooth terms, s, te, ti and t2, can be added to the right hand side
+to specify that the linear predictor depends on smooth functions of predictors (or linear functionals of these).}
+
+\item{data_train}{A\code{dataframe} containing the model response variable and covariates
+required by the GAM \code{formula}. Should include columns:
+'y' (the discrete outcomes; NAs allowed)
+'series' (character or factor index of the series IDs)
+'season' (numeric index of the seasonal time point for each observation; should not have any missing)
+'year' the numeric index for year
+'in_season' indicator for whether the observation is in season or not. If the counts tend to go to zero
+during the off season (as in tick counts for example), setting this to zero can be useful as trends won't contribute during
+during this time but they continue to evolve, allowing the trend from the past season to continue evolving rather than forcing
+it to zero
+Any other variables to be included in the linear predictor of \code{formula} must also be present}
+
+\item{data_test}{\code{dataframe} containing at least 'series', 'season', 'year' and 'in_season' for the forecast horizon, in
+addition to any other variables included in the linear predictor of \code{formula}}
+
+\item{use_nb}{\code{logical} If \code{TRUR}, use a Negative Binomial likelihood with estimated
+overdispersion parameter r;
+if \code{FALSE}, set r to \code{10,000} to approximate the Poisson distribution}
+
+\item{use_mv}{\code{logical} If \code{TRUE} and the total number of series is \code{<=5},
+a multivariate gaussian distribution is used for the state space trend errors. If \code{TRUE} and the total number
+of series is \code{>5}, a set of latent dynamic factors is estimated to capture possible dependencies in the state
+space trends. If \code{FALSE}, independent gaussian distributions are used for the trends}
+
+\item{n.adapt}{\code{integer} the number of iterations for adaptation. See adapt for details.
+If \code{n.adapt} = 0 then no adaptation takes place}
+
+\item{n.chains}{\code{integer} the number of parallel chains for the model}
+
+\item{n.iter}{\code{integer} the number of iterations of the Markov chain to run}
+
+\item{auto_update}{\code{logical} If \code{TRUE}, the model is run for up to \code{3} additional sets of
+\code{n.iter} iterations, or until the lower 15\% of effective sample sizes reaches \code{100}}
+
+\item{phi_prior}{\code{character} specifying (in JAGS syntax) the prior distributions for the AR1 coefficients in the
+latent trends}
+
+\item{rho_prior}{\code{character} specifying (in JAGS syntax) the prior distributions for the smooth penalty parameters}
+
+\item{r_prior}{\code{character} specifying (in JAGS syntax) the prior distribution for the Negative Binomial
+overdispersion parameter}
+
+\item{tau_prior}{\code{character} specifying (in JAGS syntax) the prior distributions for the independent gaussian
+precisions used for the latent trends (ignored if \code{use_mv == TRUE})}
+}
+\value{
+A \code{list} object containing JAGS model output, the text representation of the model file,
+the mgcv model output (for easily generating simulations at
+unsampled covariate values), the names of the smooth parameters and the matrix of indices for each series
+}
+\description{
+This function estimates the posterior distribution for a multivariate GAM that includes
+smooth seasonality and possible other smooth functions specified in the GAM formula. State-space latent trends
+are estimated for each series, with a number of options for specifying the structures of the trends
+}
+\details{
+A \code{\link[mcgv]{jagam}} model file is generated from \code{formula} and modified to include the latent
+AR1 state space trends. The model parameters are esimated in a Bayesian framework using Gibbs sampling
+in \code{\link[rjags]{jags.model}}.
+}
diff --git a/man/plot_mvgam_fc.Rd b/man/plot_mvgam_fc.Rd
new file mode 100644
index 00000000..abf5c726
--- /dev/null
+++ b/man/plot_mvgam_fc.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_mvgam_fc.R
+\name{plot_mvgam_fc}
+\alias{plot_mvgam_fc}
+\title{Plot mvjagam posterior predictions for a specified series}
+\usage{
+plot_mvgam_fc(out_gam_mod, series, data_test, data_train)
+}
+\arguments{
+\item{out_gam_mod}{\code{list} object returned from \code{mvjagam}}
+
+\item{series}{\code{integer} specifying which series in the set is to be plotted}
+
+\item{data_test}{\code{dataframe} containing at least 'series', 'season', 'year' and 'in_season' for the forecast horizon, in
+addition to any other variables included in the linear predictor of \code{formula}}
+
+\item{data_train}{A\code{dataframe} containing the model response variable and covariates
+required by the GAM \code{formula}. Should include columns:
+'y' (the discrete outcomes; NAs allowed)
+'series' (character or factor index of the series IDs)
+'season' (numeric index of the seasonal time point for each observation; should not have any missing)
+'year' the numeric index for year
+'in_season' indicator for whether the observation is in season or not. If the counts tend to go to zero
+during the off season (as in tick counts for example), setting this to zero can be useful as trends won't contribute during
+during this time but they continue to evolve, allowing the trend from the past season to continue evolving rather than forcing
+it to zero
+Any other variables to be included in the linear predictor of \code{formula} must also be present}
+}
+\description{
+Plot mvjagam posterior predictions for a specified series
+}
diff --git a/man/plot_mvgam_trend.Rd b/man/plot_mvgam_trend.Rd
new file mode 100644
index 00000000..949da301
--- /dev/null
+++ b/man/plot_mvgam_trend.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot_mvgam_trend.R
+\name{plot_mvgam_trend}
+\alias{plot_mvgam_trend}
+\title{Plot mvjagam latent trend for a specified series}
+\usage{
+plot_mvgam_trend(out_gam_mod, series, data_test, data_train)
+}
+\arguments{
+\item{out_gam_mod}{\code{list} object returned from \code{mvjagam}}
+
+\item{series}{\code{integer} specifying which series in the set is to be plotted}
+
+\item{data_test}{\code{dataframe} containing at least 'series', 'season', 'year' and 'in_season' for the forecast horizon, in
+addition to any other variables included in the linear predictor of \code{formula}}
+
+\item{data_train}{A\code{dataframe} containing the model response variable and covariates
+required by the GAM \code{formula}. Should include columns:
+'y' (the discrete outcomes; NAs allowed)
+'series' (character or factor index of the series IDs)
+'season' (numeric index of the seasonal time point for each observation; should not have any missing)
+'year' the numeric index for year
+'in_season' indicator for whether the observation is in season or not. If the counts tend to go to zero
+during the off season (as in tick counts for example), setting this to zero can be useful as trends won't contribute during
+during this time but they continue to evolve, allowing the trend from the past season to continue evolving rather than forcing
+it to zero
+Any other variables to be included in the linear predictor of \code{formula} must also be present}
+}
+\description{
+Plot mvjagam latent trend for a specified series
+}
diff --git a/test_mvjagam.R b/test_mvjagam.R
new file mode 100644
index 00000000..c8266c01
--- /dev/null
+++ b/test_mvjagam.R
@@ -0,0 +1,285 @@
+#### Testing the mgjagam function ####
+# Utility functions, too specific to put into the package but useful for our study
+plot_mvgam_season = function(out_gam_mod, series, data_test, data_train,
+                             xlab = 'Season'){
+
+  pred_dat <- expand.grid(series = levels(data_train$series)[series],
+                          season = seq(min(data_train$season),
+                                       max(data_train$season), length.out = 100),
+                          year = mean(data_train$year),
+                          cum_gdd = mean(data_train$cum_gdd, na.rm = T),
+                          siteID = as.character(unique(data_train$siteID[which(data_train$series ==
+                                                                                 levels(data_train$series)[series])])))
+  Xp <- predict(out_gam_mod$mgcv_model, newdata = pred_dat, type = 'lpmatrix')
+  betas <- MCMCchains(out_gam_mod$jags_output, 'b')
+  plot(Xp %*% betas[1,] ~ pred_dat$season, ylim = range(Xp %*% betas[1,] + 1.5,
+                                                        Xp %*% betas[1,] - 1.5),
+
+       col = rgb(150, 0, 0, max = 255, alpha = 10), type = 'l',
+       ylab = paste0('F(season) for ', levels(data_train$series)[series]),
+       xlab = xlab)
+  for(i in 2:1000){
+    lines(Xp %*% betas[i,] ~ pred_dat$season,
+
+          col = rgb(150, 0, 0, max = 255, alpha = 10))
+  }
+}
+
+plot_mvgam_gdd = function(out_gam_mod, series, data_test, data_train,
+                          xlab = 'Season'){
+
+  pred_dat <- expand.grid(series = levels(data_train$series)[series],
+                          season = 20,
+                          in_season = 1,
+                          year = mean(data_train$year),
+                          cum_gdd = seq(min(data_train$cum_gdd[which(data_train$series ==
+                                                                       levels(data_train$series)[series])],
+                                            na.rm = T),
+                                        max(data_train$cum_gdd[which(data_train$series ==
+                                                                       levels(data_train$series)[series])],
+                                            na.rm = T), length.out = 100),
+                          siteID = as.character(unique(data_train$siteID[which(data_train$series ==
+                                                                                 levels(data_train$series)[series])])))
+  Xp <- predict(out_gam_mod$mgcv_model, newdata = pred_dat, type = 'lpmatrix')
+  betas <- MCMCchains(out_gam_mod$jags_output, 'b')
+  plot(Xp %*% betas[1,] ~ pred_dat$cum_gdd,
+       ylim = range(Xp %*% betas[1,] + 1.5,
+                    Xp %*% betas[1,] - 1.5),
+
+       col = rgb(150, 0, 0, max = 255, alpha = 10), type = 'l',
+       ylab = paste0('Predicted peak count for ', levels(data_train$series)[series]),
+       xlab = 'Cumulative growing degree days')
+  for(i in 2:1000){
+    lines(Xp %*% betas[i,] ~ pred_dat$cum_gdd,
+
+          col = rgb(150, 0, 0, max = 255, alpha = 10))
+  }
+  rug(data_train$cum_gdd[which(data_train$series ==
+                                 levels(data_train$series)[series])])
+}
+
+#### NEON mv_gam ####
+library(mvgam)
+data("all_neon_tick_data")
+
+# Prep data for modelling
+species = 'Ambloyomma_americanum'
+
+if(species == 'Ambloyomma_americanum'){
+
+  plotIDs <- c('SCBI_013', 'SERC_001', 'SERC_005', 'SERC_006',
+               'SERC_002', 'SERC_012', 'KONZ_025', 'UKFS_001',
+               'UKFS_004', 'UKFS_003', 'ORNL_002', 'ORNL_040',
+               'ORNL_008', 'ORNL_007', 'ORNL_009', 'ORNL_003',
+               'TALL_001', 'TALL_008', 'TALL_002')
+  model_dat <- all_neon_tick_data %>%
+    dplyr::mutate(target = amblyomma_americanum) %>%
+    dplyr::select(Year, epiWeek, plotID, target) %>%
+    dplyr::mutate(epiWeek = as.numeric(epiWeek)) %>%
+    dplyr::filter(Year > 2014 & Year < 2021) %>%
+    dplyr::mutate(Year_orig = Year)
+
+  model_dat %>%
+    dplyr::full_join(expand.grid(plotID = unique(model_dat$plotID),
+                                 Year_orig = unique(model_dat$Year_orig),
+                                 epiWeek = seq(1, 52))) %>%
+    dplyr::left_join(all_neon_tick_data %>%
+                       dplyr::select(siteID, plotID) %>%
+                       dplyr::distinct()) %>%
+    # Remove winter tick abundances as we are not interested in modelling them
+    dplyr::filter(epiWeek > 14) %>%
+    dplyr::filter(epiWeek < 41) %>%
+    dplyr::mutate(series = plotID,
+                  season = epiWeek - 14,
+                  year = as.vector(scale(Year_orig)),
+                  y = target) %>%
+    dplyr::select(-Year, -epiWeek, -target) %>%
+    dplyr::ungroup() %>%
+    dplyr::arrange(Year_orig, season, series) -> model_dat
+
+  model_dat %>%
+    dplyr::left_join(all_neon_tick_data %>%
+                       dplyr::ungroup() %>%
+                       dplyr::select(Year, siteID, cum_sdd, cum_gdd) %>%
+                       dplyr::mutate(Year_orig = Year) %>%
+                       dplyr::select(-Year) %>%
+                       dplyr::distinct()) -> model_dat
+}
+
+# Set indicator for whether time point is 'in-season' or not (trends won't contribute during
+# off season, as counts generally go to zero during this time)
+model_dat = model_dat %>%
+  dplyr::mutate(in_season = ifelse(season >=3 & season <= 22, 1, 0)) %>%
+  dplyr::filter(siteID %in% c('SERC', 'TALL', 'UKFS'),
+                Year_orig <= 2019) %>%
+  dplyr::mutate(plotID = factor(plotID),
+                siteID = factor(siteID),
+                series = factor(series))
+
+data_train = model_dat[1:(floor(nrow(model_dat) * 0.9)),]
+data_test = model_dat[((floor(nrow(model_dat) * 0.9)) + 1):nrow(model_dat),]
+(nrow(data_train) + nrow(data_test)) == nrow(model_dat)
+
+# Hypothesis testing
+# NULL. There is no seasonal pattern to be estimated, and we simply let the latent
+# factors and site-level effects of growing days influence the series dynamics
+null_hyp = y ~ siteID + s(cum_gdd, by = siteID, k = 3) - 1
+
+# 1. Do all series share same seasonal pattern, with any remaining variation due to
+# non-seasonal local variation?
+hyp1 = y ~
+  siteID +
+  s(cum_gdd, by = siteID, k = 3) +
+  # Global cyclic seasonality term (smooth)
+  s(season, k = 12, m = 2, bs = 'cc') - 1
+
+# 2. Do all series share same seasonal pattern but with different magnitudes
+# (i.e. random intercepts per series)?
+hyp2 = y ~
+  siteID +
+  s(cum_gdd, by = siteID, k = 3) +
+  s(season, k = 12, m = 2, bs = 'cc') +
+  # Hierarchical variable intercepts
+  s(series, bs = 're') - 1
+
+# 3. Is there evidence for global seasonality but each site's seasonal pattern deviates
+# based on more local conditions?
+hyp3 = y ~
+  siteID +
+  s(cum_gdd, by = siteID, k = 3) +
+  s(season, k = 4, m = 2, bs = 'cc') +
+  s(series, bs = 're') +
+  # Site-level deviations from global pattern, which can be wiggly (m=1 to reduce concurvity);
+  # If these dominate, they will have smaller smoothing parameters and the global seasonality
+  # will become less important (larger smoothing parameter). Sites with the smallest smooth
+  # parameters are those that deviate the most from the global seasonality
+  s(season, by = siteID, m = 1, k = 8) - 1
+
+# 4. Is there evidence for global seasonality but each plot's seasonal pattern deviates
+# based on even more local conditions than above (i.e. plot-level is not as informative)?
+hyp4 = y ~
+  siteID +
+  s(cum_gdd, by = siteID, k = 3) +
+  s(season, k = 4, m = 2, bs = 'cc') +
+  s(series, bs = 're') +
+  # Series-level deviations from global pattern
+  s(season, by = series, m = 1, k = 8) - 1
+
+# If evidence of gdd effects, can also let use a global smoother and then site-level
+# deviations for a 'hierarchical' setup
+use_mv <- T
+out_gam_mod <- mvjagam(formula = hyp1,
+                        data_train = data_train,
+                        data_test = data_test,
+                        n.adapt = 1000,
+                        n.burnin = 1000,
+                        n.iter = 1000,
+                        thin = 1,
+                        auto_update = FALSE,
+                        use_mv = use_mv,
+                        use_nb = FALSE,
+                        # Laplace distribution emphasizes our prior that smooths should not be overly wiggly
+                        # unless the data supports this
+                        rho_prior = 'ddexp(5, 0.25)T(-12, 12)',
+                        phi_prior = 'dbeta(2,2)',
+                        tau_prior = 'dunif(0.1, 100)')
+
+# View the modified JAGS model file
+out_gam_mod$model_file
+
+# Traces of key parameters
+library(MCMCvis)
+
+# Negative binomial size parameter (set to 10,000 if use_nb = FALSE)
+MCMCtrace(out_gam_mod$jags_output, c('r'), pdf = F, n.eff = T)
+
+# Penalties of smooth components (smaller means an effect is more nonlinear)
+out_gam_mod$smooth_param_details # rhos are the logged versions of the lambdas
+MCMCtrace(out_gam_mod$jags_output, c('rho'), pdf = F, n.eff = T)
+
+# AR1 persistence coefficients for latent dynamic factors (or for each
+# series if use_mv = FALSE)
+MCMCtrace(out_gam_mod$jags_output, c('phi'), pdf = F, n.eff = T)
+
+# Precision for latent dynamic factors (if using latent factors)
+MCMCtrace(out_gam_mod$jags_output, c('tau_fac'), pdf = F, n.eff = T)
+
+# Precision for latent trends (if using independent Gaussian trends)
+MCMCtrace(out_gam_mod$jags_output, c('tau'), pdf = F, n.eff = T)
+dev.off()
+
+# Plot useful posterior quantities for a specific series
+opar <- par()
+par(mfrow = c(3, 1))
+
+# Total number of series in the set
+length(unique(data_train$series))
+series = 1
+plot_mvgam_season(out_gam_mod, series = series, data_test = data_test,
+                  data_train = data_train, xlab = 'Epidemiological week')
+plot_mvgam_fc(out_gam_mod, series = series, data_test = data_test,
+              data_train = data_train)
+plot_mvgam_trend(out_gam_mod, series = series, data_test = data_test,
+                 data_train = data_train)
+par(opar)
+
+plot_mvgam_gdd(out_gam_mod, series = series, data_test = data_test,
+               data_train = data_train, xlab = 'Epidemiological week')
+
+if(use_mv){
+  if(length(unique(data_train$series)) > 5){
+    # Calculate the correlation matrix from the latent variables
+    samps <- jags.samples(out_gam_mod$jags_model,
+                          variable.names = 'lv_coefs',
+                          n.iter = 1000, thin = 1)
+    lv_coefs <- samps$lv_coefs
+    n_series <- dim(lv_coefs)[1]
+    n_lv <- dim(lv_coefs)[2]
+    n_samples <- prod(dim(lv_coefs)[3:4])
+
+    # Get arrat of latend variable loadings
+    coef_array <- array(NA, dim = c(n_series, n_lv, n_samples))
+    for(i in 1:n_series){
+      for(j in 1:n_lv){
+        coef_array[i, j, ] <- c(lv_coefs[i, j, , 1],
+                                lv_coefs[i, j, , 2])
+      }
+    }
+
+    # Variances of each series' latent trend are fixed at (1/100)^2
+    eps_res <- rep((1/100)^2, n_series)
+
+    # Posterior correlations based on latent variable loadings
+    correlations <- array(NA, dim = c(n_series, n_series, n_samples))
+    for(i in 1:n_samples){
+      correlations[,,i] <- cov2cor(tcrossprod(coef_array[,,i]) + diag(eps_res))
+    }
+    mean_correlations <- apply(correlations, c(1,2), function(x) quantile(x, 0.5))
+
+    # Plot the mean posterior correlations
+    mean_correlations[upper.tri(mean_correlations)] <- NA
+    mean_correlations <- data.frame(mean_correlations)
+    rownames(mean_correlations) <- colnames(mean_correlations) <- levels(data_train$series)
+
+    library(ggplot2)
+    ggplot(mean_correlations %>%
+             tibble::rownames_to_column("series1") %>%
+             tidyr::pivot_longer(-c(series1), names_to = "series2", values_to = "Correlation"),
+           aes(x = series1, y = series2)) + geom_tile(aes(fill = Correlation)) +
+      scale_fill_gradient2(low="darkred", mid="white", high="darkblue",
+                           midpoint = 0,
+                           breaks = seq(-1,1,length.out = 5),
+                           limits = c(-1, 1),
+                           name = 'Residual\ncorrelation') + labs(x = '', y = '') + theme_dark() +
+      theme(axis.text.x = element_text(angle = 45, hjust=1))
+
+  }
+}
+
+#### If GAM component is LESS supported, we should see evidence in the form of: ####
+# 1. Stronger residual correlations, suggesting we are missing some site-level structure
+summary(as.vector(as.matrix(mean_correlations)))
+
+# 2. Smaller precisions for tau_fac (i.e. larger variance for latent dynamic factors)
+MCMCsummary(out_gam_mod$jags_output, 'tau_fac')
+