From 1c2671a4e30e9b55e7d6798f02fd28b88c2428a1 Mon Sep 17 00:00:00 2001 From: Nicholas Clark Date: Tue, 19 Oct 2021 10:36:40 +1000 Subject: [PATCH] first addition of code --- .Rbuildignore | 1 + DESCRIPTION | 22 +- LICENSE | 2 + LICENSE.md | 21 ++ NAMESPACE | 4 + R/all_neon_tick_data.R | 15 ++ R/hpd.R | 58 +++++ R/mvjagam.R | 493 ++++++++++++++++++++++++++++++++++++ R/plot_mvgam_fc.R | 51 ++++ R/plot_mvgam_trend.R | 41 +++ data/all_neon_tick_data.rda | Bin 0 -> 40689 bytes man/all_neon_tick_data.Rd | 27 ++ man/hpd.Rd | 20 ++ man/mvjagam.Rd | 89 +++++++ man/plot_mvgam_fc.Rd | 31 +++ man/plot_mvgam_trend.Rd | 31 +++ test_mvjagam.R | 285 +++++++++++++++++++++ 17 files changed, 1183 insertions(+), 8 deletions(-) create mode 100644 LICENSE create mode 100644 LICENSE.md create mode 100644 R/all_neon_tick_data.R create mode 100644 R/hpd.R create mode 100644 R/mvjagam.R create mode 100644 R/plot_mvgam_fc.R create mode 100644 R/plot_mvgam_trend.R create mode 100644 data/all_neon_tick_data.rda create mode 100644 man/all_neon_tick_data.Rd create mode 100644 man/hpd.Rd create mode 100644 man/mvjagam.Rd create mode 100644 man/plot_mvgam_fc.Rd create mode 100644 man/plot_mvgam_trend.Rd create mode 100644 test_mvjagam.R diff --git a/.Rbuildignore b/.Rbuildignore index 206a9a08..114f0403 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,2 +1,3 @@ ^mvgam\.Rproj$ ^\.Rproj\.user$ +^LICENSE\.md$ diff --git a/DESCRIPTION b/DESCRIPTION index 24cf163a..f2213eca 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,15 +1,21 @@ Package: mvgam -Title: What the Package Does (One Line, Title Case) +Title: Multivariate Bayesian Generalised Additive Models for discrete time series Version: 0.0.0.9000 Authors@R: - person(given = "First", - family = "Last", + person(given = "Nicholas", + family = "Clark", role = c("aut", "cre"), - email = "first.last@example.com", - comment = c(ORCID = "YOUR-ORCID-ID")) -Description: What the package does (one paragraph). -License: `use_mit_license()`, `use_gpl3_license()` or friends to - pick a license + email = "nicholas.j.clark1214@.com") +Description: Multivariate Bayesian Generalised Additive Models for discrete time series +Maintainer: Nicholas J Clark +License: MIT + file LICENSE +Depends: R (>= 4.0.0), mgcv, rjags +Imports: parallel, + coda, + MCMCpack, + runjags, + MCMCvis, + dplyr Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..10a9fe51 --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2021 +COPYRIGHT HOLDER: Nicholas Clark diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 00000000..5608c0db --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +# MIT License + +Copyright (c) 2021 Nicholas Clark + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/NAMESPACE b/NAMESPACE index 6ae92683..f19d6267 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,2 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(hpd) +export(mvjagam) +export(plot_mvgam_fc) +export(plot_mvgam_trend) diff --git a/R/all_neon_tick_data.R b/R/all_neon_tick_data.R new file mode 100644 index 00000000..61d020a7 --- /dev/null +++ b/R/all_neon_tick_data.R @@ -0,0 +1,15 @@ +#' NEON Amblyomma and Ixodes tick abundance survey data +#' +#' A dataset containing timeseries of Amblyomma americanum and Ixodes scapularis nymph abundances at NEON sites +#' +#' @format A tibble/dataframe containing covariate information alongside the main fields of: +#' \describe{ +#' \item{Year}{Year of sampling} +#' \item{epiWeek}{Epidemiological week of sampling} +#' \item{plot_ID}{NEON plot ID for survey location} +#' \item{siteID}{NEON site ID for survey location} +#' \item{amblyomma_americanum}{Counts of A. americanum nymphs} +#' \item{ixodes_scapularis}{Counts of I. scapularis nymphs} +#' } +#' @source \url{https://www.neonscience.org/data} +"all_neon_tick_data" diff --git a/R/hpd.R b/R/hpd.R new file mode 100644 index 00000000..526b79c1 --- /dev/null +++ b/R/hpd.R @@ -0,0 +1,58 @@ +# Calculate the highest posterior density interval +#' +#'This function uses estimated densities to calculate HPD intevals. Code originally supplied by Martyn Plummer +#' +#'@param x \code{vector} of values representing the distribution to be summarised +#'@param coverage \code{numeric} value specifying the width of the HPD interval. Default is 0.95 +#'@return A \code{list} containing the reconciled forecast distributions for each series in \code{y}. Each element in +#'the \code{vector} with three values: lower estimate, median estimate and upper estimate of the HPD interval +#' +#'@export +#' +hpd <- function(x, coverage = 0.95) +{ + x <- as.matrix(x) + out <- matrix(NA, nrow = ncol(x), ncol = 3) + rownames(out) <- dimnames(x)[[2]] + colnames(out) <- c("mode", "lower", "upper") + + f <- function(p) { + if (p == density.range[2]) { + set.coverage <- 0 + } + else { + p.upper <- min(y.density$y[y.density$y > p]) + p.lower <- max(y.density$y[y.density$y <= p]) + cov.upper <- sum(y.counts[y.density$y >= p.upper]) / sum(y.counts) + cov.lower <- sum(y.counts[y.density$y >= p.lower]) / sum(y.counts) + c <- (p.upper - p)/(p.upper - p.lower) + set.coverage <- c * cov.upper + (1 - c) * cov.lower + } + return(set.coverage - coverage) + } + + for (i in 1:ncol(x)) { + y <- unclass(x[,i]) + y.density <- stats::density(y, n=1024) + m <- length(y.density$x) + + ## Find the midpoint + out[i,1] <- y.density$x[which.max(y.density$y)] + dx <- diff(range(y.density$x)) / m + breaks <- c(y.density$x[1] - dx / 2, y.density$x + dx /2) + y.counts <- hist(y, breaks = breaks, plot = FALSE)$counts + density.range <- range(y.density$y) + uniroot.out <- stats::uniroot(f, density.range) + + ## Assuming that we have a single interval, find the limits + out[i,2:3] <- range(y.density$x[y.density$y > uniroot.out$root]) + + ## Check! + if (sum(abs(diff(y.density$y > uniroot.out$root))) != 2) { + warning("HPD set is not a closed interval") + } + } + out <- c(out[2], out[1], out[3]) + names(out) <- c('lower', 'median', 'upper') + return(out) +} diff --git a/R/mvjagam.R b/R/mvjagam.R new file mode 100644 index 00000000..4be75ff5 --- /dev/null +++ b/R/mvjagam.R @@ -0,0 +1,493 @@ +#'Fit a Bayesian multivariate GAM to a set of discrete time series +#' +#'This function estimates the posterior distribution for a multivariate GAM that includes +#'smooth seasonality and possible other smooth functions specified in the GAM formula. State-space latent trends +#'are estimated for each series, with a number of options for specifying the structures of the trends +#' +#' +#'@param formula A \code{character} string specifying the GAM formula. These are exactly like the formula +#'for a GLM except that smooth terms, s, te, ti and t2, can be added to the right hand side +#'to specify that the linear predictor depends on smooth functions of predictors (or linear functionals of these). +#'@param data_train A\code{dataframe} containing the model response variable and covariates +#'required by the GAM \code{formula}. Should include columns: +#''y' (the discrete outcomes; NAs allowed) +#''series' (character or factor index of the series IDs) +#''season' (numeric index of the seasonal time point for each observation; should not have any missing) +#''year' the numeric index for year +#''in_season' indicator for whether the observation is in season or not. If the counts tend to go to zero +#'during the off season (as in tick counts for example), setting this to zero can be useful as trends won't contribute during +#'during this time but they continue to evolve, allowing the trend from the past season to continue evolving rather than forcing +#'it to zero +#'Any other variables to be included in the linear predictor of \code{formula} must also be present +#'@param data_test \code{dataframe} containing at least 'series', 'season', 'year' and 'in_season' for the forecast horizon, in +#'addition to any other variables included in the linear predictor of \code{formula} +#'@param use_nb \code{logical} If \code{TRUR}, use a Negative Binomial likelihood with estimated +#'overdispersion parameter r; +#'if \code{FALSE}, set r to \code{10,000} to approximate the Poisson distribution +#'@param use_mv \code{logical} If \code{TRUE} and the total number of series is \code{<=5}, +#'a multivariate gaussian distribution is used for the state space trend errors. If \code{TRUE} and the total number +#'of series is \code{>5}, a set of latent dynamic factors is estimated to capture possible dependencies in the state +#'space trends. If \code{FALSE}, independent gaussian distributions are used for the trends +#'@param n.chains \code{integer} the number of parallel chains for the model +#'@param n.adapt \code{integer} the number of iterations for adaptation. See adapt for details. +#'If \code{n.adapt} = 0 then no adaptation takes place +#'@param n.iter \code{integer} the number of iterations of the Markov chain to run +#'@param auto_update \code{logical} If \code{TRUE}, the model is run for up to \code{3} additional sets of +#'\code{n.iter} iterations, or until the lower 15% of effective sample sizes reaches \code{100} +#'@param phi_prior \code{character} specifying (in JAGS syntax) the prior distributions for the AR1 coefficients in the +#'latent trends +#'@param rho_prior \code{character} specifying (in JAGS syntax) the prior distributions for the smooth penalty parameters +#'@param r_prior \code{character} specifying (in JAGS syntax) the prior distribution for the Negative Binomial +#'overdispersion parameter +#'@param tau_prior \code{character} specifying (in JAGS syntax) the prior distributions for the independent gaussian +#'precisions used for the latent trends (ignored if \code{use_mv == TRUE}) +#'@details A \code{\link[mcgv]{jagam}} model file is generated from \code{formula} and modified to include the latent +#'AR1 state space trends. The model parameters are esimated in a Bayesian framework using Gibbs sampling +#'in \code{\link[rjags]{jags.model}}. +#' +#'@return A \code{list} object containing JAGS model output, the text representation of the model file, +#'the mgcv model output (for easily generating simulations at +#'unsampled covariate values), the names of the smooth parameters and the matrix of indices for each series +#' +#'@export + +mvjagam = function(formula, + data_train, + data_test, + use_nb = TRUE, + use_mv = FALSE, + n.adapt = 1000, + n.chains = 2, + n.burnin = 5000, + n.iter = 2000, + thin = 2, + auto_update = TRUE, + phi_prior = 'dbeta(1, 1)', + rho_prior = 'dunif(-12, 12)', + r_prior = 'dexp(0.001)', + tau_prior = 'dunif(1, 100)'){ + + # Fill in any NA's with arbitrary values so the observations aren't dropped when jags data is created + orig_y <- data_train$y + data_train$y[which(is.na(data_train$y))] <- floor(sample(seq(0,50), + length(which(is.na(data_train$y))), + T)) + + # Estimate the GAM model using mgcv so that the linear predictor matrix can be easily calculated + # when simulating from the JAGS model later on + ss_gam <- mgcv::gam(formula(formula), + data = data_train, + method = "REML", + family = poisson(), + drop.unused.levels = FALSE) + + # Make jags file and appropriate data structures + ss_jagam <- mgcv::jagam(formula, + data = data_train, + family = poisson(), + drop.unused.levels = FALSE, + file = 'base_gam.txt', + sp.prior = 'log.uniform') + + # Read in the base (unmodified) jags model file + base_model <- suppressWarnings(readLines('base_gam.txt')) + + # Need to remove lines from the linear predictor section and replace with + # the modified state space representation + lines_remove <- c(1:grep('## response', base_model)) + base_model <- base_model[-lines_remove] + base_model[grep('rho\\[i\\] ~', base_model)] <- paste0(' rho[i] ~ ', rho_prior) + + # Add replacement lines for trends and linear predictor + if(!use_mv){ + modification <- c("model { + + ## GAM linear predictor + eta <- X %*% b + + ## Mean expectations + for (i in 1:n) { + for (s in 1:n_series) { + mu[i, s] <- exp(eta[ytimes[i, s]] + trend[i, s] * in_season[i]) + } + } + + ## AR1 state space trends + for(s in 1:n_series) { + trend[1, s] ~ dnorm(0, tau[s]) + } + + for (i in 2:n){ + for (s in 1:n_series){ + trend[i, s] ~ dnorm(trend[i - 1, s] * phi[s], tau[s]) + } + } + + for (s in 1:n_series){ + phi[s] ~ dunif(-0.9, 0.9) + tau[s] ~ dexp(0.5) + } + + ## Negative binomial likelihood functions + for (i in 1:n) { + for (s in 1:n_series) { + y[i, s] ~ dnegbin(rate[i, s], r); + rate[i, s] <- ifelse((r / (r + mu[i, s])) < min_eps, min_eps, + (r / (r + mu[i, s]))) + } + } + + ## Set overdispersion parameter to 10000 if not using nb; + ## this approximates the Poisson distribution + r1 ~ dgamma(0.01, 0.01) + r2 <- 10000 + r_indicator <- ifelse(use_nb > 0, 1, 0) + r <- (r1 * r_indicator) + (r2 * (1 - r_indicator)) + + ## Posterior predictions + for (i in 1:n) { + for (s in 1:n_series) { + ypred[i, s] ~ dnegbin(rate[i, s], r) + } + } + ") + + # Create the joined model file + fil <- tempfile(fileext = ".xt") + cat(c(readLines(textConnection(modification)), base_model), file = fil, + sep = "\n") + model_file <- readLines(fil, n = -1) + + # Update and prior distribution choices + model_file[grep('phi\\[s\\] ~', model_file)] <- paste0(' phi[s] ~ ', phi_prior) + model_file[grep('tau\\[s\\] ~', model_file)] <- paste0(' tau[s] ~ ', tau_prior) + model_file[grep('r1 ~', model_file)] <- paste0(' r1 ~ ', r_prior) + + model_file <- textConnection(model_file) + } + + if(use_mv){ + if(length(unique(data_train$series)) <= 5){ + modification <- c("model { + + ## GAM linear predictor + eta <- X %*% b + + ## Mean expectations + for (i in 1:n) { + for (s in 1:n_series) { + mu[i, s] <- exp(eta[ytimes[i, s]] + trend[i, s] * in_season[i]) + } + } + + ## AR1 state space trends with possible covarying errors + trend[1, 1:n_series] ~ dmnorm(zeros, tau[ , ]) + + for (i in 2:n){ + for (s in 1:n_series){ + trend_mus[i, s] <- trend[i - 1, s] * phi[s] + } + } + + for (i in 2:n){ + trend[i, 1:n_series] ~ dmnorm(trend_mus[i, ], tau[ , ]); + } + + # AR1 persistence coefficients + for (s in 1:n_series){ + phi[s] ~ dunif(-0.9, 0.9) + } + + # Wishart is appropriate conjugate prior for dmnorm precision + # df = n_series + 1 sets uniform distribution on correlation parameters + tau[1:n_series, 1:n_series] ~ dwish(diag_mat[ , ], n_series + 1) + + # Scaled residual correlation matrix (between -1 and 1; Gelman & Hill 2007) + Covar.raw[1:n_series, 1:n_series] <- inverse(tau[ , ]) + for(i in 1:n_series){ + for(j in 1:n_series){ + cor[i, j] <- Covar.raw[i, j] / sqrt(Covar.raw[i, i] * Covar.raw[j, j]) + } + sigma[i] <- abs(xi[i]) * sqrt(Covar.raw[i, i]) + # Redundant scaling factor improves sampling efficiency + xi[i] ~ dunif(0, 100) + } + + ## Negative binomial likelihood functions + for (i in 1:n) { + for (s in 1:n_series) { + y[i, s] ~ dnegbin(rate[i, s], r); + rate[i, s] <- ifelse((r / (r + mu[i, s])) < min_eps, min_eps, + (r / (r + mu[i, s]))) + } + } + + ## Set overdispersion parameter to 10000 if not using nb; + ## this approximates the Poisson distribution + r1 ~ dgamma(0.01, 0.01) + r2 <- 10000 + r_indicator <- ifelse(use_nb > 0, 1, 0) + r <- (r1 * r_indicator) + (r2 * (1 - r_indicator)) + + ## Posterior predictions + for (i in 1:n) { + for (s in 1:n_series) { + ypred[i, s] ~ dnegbin(rate[i, s], r) + } + } + ") + + # Create the joined model file + fil <- tempfile(fileext = ".xt") + cat(c(readLines(textConnection(modification)), base_model), file = fil, + sep = "\n") + model_file <- readLines(fil, n = -1) + + # Update and prior distribution choices + model_file[grep('phi\\[s\\] ~', model_file)] <- paste0(' phi[s] ~ ', phi_prior) + model_file[grep('tau\\[s\\] ~', model_file)] <- paste0(' tau[s] ~ ', tau_prior) + model_file[grep('r1 ~', model_file)] <- paste0(' r1 ~ ', r_prior) + + model_file <- textConnection(model_file) + } else { + #### Use the latent variable approach to estimate dependencies when n_series is large + modification <- c("model { + + ## GAM linear predictor + eta <- X %*% b + + ## Mean expectations + for (i in 1:n) { + for (s in 1:n_series) { + mu[i, s] <- exp(eta[ytimes[i, s]] + trend[i, s] * in_season[i]) + } + } + + ## Latent factors evolve as AR1 time series + tau_fac ~ dnorm(50, 0.05)T(0.01,) + for(j in 1:n_lv){ + LV[1, j] ~ dnorm(0, tau_fac) + } + + for(i in 2:n){ + for(j in 1:n_lv){ + LV[i, j] ~ dnorm(LV[i - 1, j] * phi[j], tau_fac) + } + } + + # AR1 persistence coefficients for latent factors + for (s in 1:n_lv){ + phi[s] ~ dunif(0, 1) + } + + ## Latent factor loadings, with identifiability constraints (Hui et al 2015) + ## Upper diagonal 0 + for(i in 1:(n_lv - 1)){ + for(j in (i + 1):n_lv){ + lv_coefs[i, j] <- 0 + } + } + + ## Positive sign constraints on diagonal elements + for(i in 1:n_lv) { + lv_coefs[i, i] ~ dunif(0, 1) + } + + ## Lower diagonal free + for(i in 2:n_lv){ + for(j in 1:(i - 1)){ + lv_coefs[i, j] ~ dunif(-1, 1) + } + } + + ## Other elements also free + for(i in (n_lv + 1):n_series) { + for(j in 1:n_lv){ + lv_coefs[i, j] ~ dunif(-1, 1) + } + } + + ## Trend evolution for the series depends on latent factors; + ## fix the precision here as it may become unidentifiable when estimating + ## precision for the latent factors + for (i in 1:n){ + for (s in 1:n_series){ + trend[i, s] ~ dnorm(inprod(lv_coefs[s,], LV[i,]), 100) + } + } + + ## Negative binomial likelihood functions + for (i in 1:n) { + for (s in 1:n_series) { + y[i, s] ~ dnegbin(rate[i, s], r); + rate[i, s] <- ifelse((r / (r + mu[i, s])) < min_eps, min_eps, + (r / (r + mu[i, s]))) + } + } + + ## Set overdispersion parameter to 10000 if not using nb; + ## this approximates the Poisson distribution + r1 ~ dgamma(0.01, 0.01) + r2 <- 10000 + r_indicator <- ifelse(use_nb > 0, 1, 0) + r <- (r1 * r_indicator) + (r2 * (1 - r_indicator)) + + ## Posterior predictions + for (i in 1:n) { + for (s in 1:n_series) { + ypred[i, s] ~ dnegbin(rate[i, s], r) + } + } + ") + + # Create the joined model file + fil <- tempfile(fileext = ".xt") + cat(c(readLines(textConnection(modification)), base_model), file = fil, + sep = "\n") + model_file <- readLines(fil, n = -1) + + # Update and prior distribution choices + model_file[grep('phi\\[s\\] ~', model_file)] <- paste0(' phi[s] ~ ', phi_prior) + model_file[grep('tau\\[s\\] ~', model_file)] <- paste0(' tau[s] ~ ', tau_prior) + model_file[grep('r1 ~', model_file)] <- paste0(' r1 ~ ', r_prior) + + model_file <- textConnection(model_file) + } + + } + + # Covariate datafrme including training and testing observations + X <- data.frame(rbind(ss_jagam$jags.data$X, + predict(ss_gam, newdata = data_test, type = 'lpmatrix'))) + + # Add a time variable + X$time <- rbind(data_train, data_test[,1:ncol(data_train)]) %>% + dplyr::left_join(rbind(data_train, data_test[,1:ncol(data_train)]) %>% + dplyr::select(year, season) %>% + dplyr::distinct() %>% + dplyr::arrange(year, season) %>% + dplyr::mutate(time = dplyr::row_number())) %>% + dplyr::pull(time) + + # Add an outcome variable + X$outcome <- c(orig_y, rep(NA, NROW(data_test))) + + # Add a series identifier variable + X$series <- as.numeric(rbind(data_train, data_test)$series) + + # Arrange by time then by series + X %>% + dplyr::arrange(time, series) -> X + + # Matrix of indices in X that correspond to timepoints for each series + ytimes <- matrix(NA, nrow = max(X$time), ncol = length(unique(X$series))) + for(i in 1:length(unique(X$series))){ + ytimes[,i] <- which(X$series == i) + } + ss_jagam$jags.data$ytimes <- ytimes + + # Matrix of outcomes in X that correspond to each series at each timepoint + ys_mat <- matrix(NA, nrow = NROW(ytimes), ncol = NCOL(ytimes)) + for(i in 1:length(unique(X$series))){ + ys_mat[,i] <- X$outcome[which(X$series == i)] + } + ss_jagam$jags.data$y <- ys_mat + + # Other necessary variables for JAGS + ss_jagam$jags.data$n <- NROW(ytimes) + ss_jagam$jags.data$n_series <- NCOL(ytimes) + ss_jagam$jags.data$X = as.matrix(X %>% + dplyr::select(-time, -series, -outcome)) + + if(use_mv){ + # We use a Wishart prior for the state space multivariate precision matrix + ss_jagam$jags.data$diag_mat <- diag(ss_jagam$jags.data$n_series) + ss_jagam$jags.data$zeros <- rep(0, ss_jagam$jags.data$n_series) + } + + + # Set use_nb to 0 for approximate Poisson; otherwise to 1 for Negative Binomial + if(use_nb){ + ss_jagam$jags.data$use_nb <- 1 + } else { + ss_jagam$jags.data$use_nb <- 0 + } + + # Machine epsilon for minimum allowable non-zero rate + ss_jagam$jags.data$min_eps <- .Machine$double.eps + + # Ensure inits fall within prior bounds for rho + ss_jagam$jags.ini$rho[ss_jagam$jags.ini$rho > 12] <- 11.99 + ss_jagam$jags.ini$rho[ss_jagam$jags.ini$rho < 12] <- -11.99 + + # Number of latent variables to use + ss_jagam$jags.data$n_lv <- floor(ss_jagam$jags.data$n_series / 2) + + # Binary indicator of in_season + ss_jagam$jags.data$in_season <- rbind(data_train, data_test) %>% + dplyr::select(year, season, in_season) %>% + dplyr::distinct() %>% + dplyr::arrange(year, season) %>% + dplyr::pull(in_season) + + # Initiate adaptation of the model + load.module("glm") + gam_mod <- jags.model(model_file, + data = ss_jagam$jags.data, + inits = ss_jagam$jags.ini, + n.chains = n.chains, + n.adapt = n.adapt) + unlink('base_gam.txt') + + # Update the model for the burnin period + update(gam_mod, n.burnin) + + # Gather posterior samples for the specified parameters + if(!use_mv){ + param <- c('rho', 'b', 'mu', 'ypred', 'r', 'phi', + 'tau', 'trend', 'p_resid') + } else { + if(length(unique(data_train$series)) <= 5){ + param <- c('rho', 'b', 'mu', 'ypred', 'r', 'phi', + 'tau', 'trend', 'p_resid', 'cor') + } else { + param <- c('rho', 'b', 'mu', 'ypred', 'r', 'phi', + 'trend', 'p_resid', 'lv_coefs', 'tau_fac') + } + + } + + out_gam_mod <- coda.samples(gam_mod, + variable.names = param, + n.iter = n.iter, + thin = thin) + if(auto_update){ + # Update until reasonable convergence (exclude 'cor' as it will have some 0 neffs) + mod_summary <- MCMCvis::MCMCsummary(out_gam_mod, c('rho', 'b', 'phi', 'tau')) + for(i in 1:3){ + if(quantile(mod_summary$Rhat, 0.85, na.rm = T) < 1 & + quantile(mod_summary$n.eff, 0.15) > 100){ + break; + } + update(gam_mod, 5000) + out_gam_mod <- rjags::coda.samples(gam_mod, + variable.names = param, + n.iter = n.iter, + thin = thin) + mod_summary <- MCMCvis::MCMCsummary(out_gam_mod, c('rho', 'b', 'phi', 'tau')) + } + } + + model_file <- readLines(fil, n = -1) + unlink(fil) + + return(list(jags_output = out_gam_mod, + model_file = model_file, + mgcv_model = ss_gam, + jags_model = gam_mod, + smooth_param_details = base_model[sort(c(grep('## prior for', base_model), + grep('## prior for', base_model)+1))], + ytimes = ytimes)) + + +} diff --git a/R/plot_mvgam_fc.R b/R/plot_mvgam_fc.R new file mode 100644 index 00000000..c30e51a2 --- /dev/null +++ b/R/plot_mvgam_fc.R @@ -0,0 +1,51 @@ +#'Plot mvjagam posterior predictions for a specified series +#'@param out_gam_mod \code{list} object returned from \code{mvjagam} +#'@param series \code{integer} specifying which series in the set is to be plotted +#'@param data_train A\code{dataframe} containing the model response variable and covariates +#'required by the GAM \code{formula}. Should include columns: +#''y' (the discrete outcomes; NAs allowed) +#''series' (character or factor index of the series IDs) +#''season' (numeric index of the seasonal time point for each observation; should not have any missing) +#''year' the numeric index for year +#''in_season' indicator for whether the observation is in season or not. If the counts tend to go to zero +#'during the off season (as in tick counts for example), setting this to zero can be useful as trends won't contribute during +#'during this time but they continue to evolve, allowing the trend from the past season to continue evolving rather than forcing +#'it to zero +#'Any other variables to be included in the linear predictor of \code{formula} must also be present +#'@param data_test \code{dataframe} containing at least 'series', 'season', 'year' and 'in_season' for the forecast horizon, in +#'addition to any other variables included in the linear predictor of \code{formula} +#'@export +plot_mvgam_fc = function(out_gam_mod, series, data_test, data_train){ + pred_indices <- seq(0, length(out_gam_mod$ytimes), + length.out = NCOL(out_gam_mod$ytimes) + 1) + preds <- MCMCvis::MCMCchains(out_gam_mod$jags_output, 'ypred')[,pred_indices[series]:pred_indices[series + 1]] + preds_last <- preds[1,] + int <- apply(preds, + 2, hpd, 0.95) + plot(preds_last, + type = 'l', ylim = c(0, max(int) + 2), + col = rgb(1,0,0, alpha = 0), + ylab = paste0('Estimated counts for ', levels(data_train$series)[series]), + xlab = 'Time') + int[int<0] <- 0 + polygon(c(seq(1:(NCOL(int))), rev(seq(1:NCOL(int)))), + c(int[1,],rev(int[3,])), + col = rgb(150, 0, 0, max = 255, alpha = 100), border = NA) + int <- apply(preds, + 2, hpd, 0.68) + int[int<0] <- 0 + polygon(c(seq(1:(NCOL(int))), rev(seq(1:NCOL(int)))), + c(int[1,],rev(int[3,])), + col = rgb(150, 0, 0, max = 255, alpha = 180), border = NA) + lines(int[2,], col = rgb(150, 0, 0, max = 255), lwd = 2, lty = 'dashed') + + s_name <- levels(data_train$series)[series] + points(rbind(data_train, data_test) %>% + dplyr::filter(series == s_name) %>% + dplyr::select(year, season, y) %>% + dplyr::distinct() %>% + dplyr::arrange(year, season) %>% + dplyr::pull(y), pch = 16) + abline(v = NROW(data_train) / NCOL(out_gam_mod$ytimes)) + +} diff --git a/R/plot_mvgam_trend.R b/R/plot_mvgam_trend.R new file mode 100644 index 00000000..9ef2cb3f --- /dev/null +++ b/R/plot_mvgam_trend.R @@ -0,0 +1,41 @@ +#'Plot mvjagam latent trend for a specified series +#'@param out_gam_mod \code{list} object returned from \code{mvjagam} +#'@param series \code{integer} specifying which series in the set is to be plotted +#'@param data_train A\code{dataframe} containing the model response variable and covariates +#'required by the GAM \code{formula}. Should include columns: +#''y' (the discrete outcomes; NAs allowed) +#''series' (character or factor index of the series IDs) +#''season' (numeric index of the seasonal time point for each observation; should not have any missing) +#''year' the numeric index for year +#''in_season' indicator for whether the observation is in season or not. If the counts tend to go to zero +#'during the off season (as in tick counts for example), setting this to zero can be useful as trends won't contribute during +#'during this time but they continue to evolve, allowing the trend from the past season to continue evolving rather than forcing +#'it to zero +#'Any other variables to be included in the linear predictor of \code{formula} must also be present +#'@param data_test \code{dataframe} containing at least 'series', 'season', 'year' and 'in_season' for the forecast horizon, in +#'addition to any other variables included in the linear predictor of \code{formula} +#'@export +plot_mvgam_trend = function(out_gam_mod, series, data_test, data_train){ + pred_indices <- seq(0, length(out_gam_mod$ytimes), + length.out = NCOL(out_gam_mod$ytimes) + 1) + preds <- MCMCvis::MCMCchains(out_gam_mod$jags_output, 'trend')[,pred_indices[series]:pred_indices[series + 1]] + preds_last <- preds[1,] + plot(preds_last, + type = 'l', ylim = range(preds), + col = rgb(1,0,0, alpha = 0), + ylab = paste0('Estimated trend for ', levels(data_train$series)[series]), + xlab = 'Time') + int <- apply(preds, + 2, hpd, 0.95) + polygon(c(seq(1:(NCOL(int))), rev(seq(1:NCOL(int)))), + c(int[1,],rev(int[3,])), + col = rgb(150, 0, 0, max = 255, alpha = 100), border = NA) + int <- apply(preds, + 2, hpd, 0.68) + polygon(c(seq(1:(NCOL(int))), rev(seq(1:NCOL(int)))), + c(int[1,],rev(int[3,])), + col = rgb(150, 0, 0, max = 255, alpha = 180), border = NA) + lines(int[2,], col = rgb(150, 0, 0, max = 255), lwd = 2, lty = 'dashed') + abline(v = NROW(data_train) / NCOL(out_gam_mod$ytimes)) + +} diff --git a/data/all_neon_tick_data.rda b/data/all_neon_tick_data.rda new file mode 100644 index 0000000000000000000000000000000000000000..736490b1043e96d25e15767cfc5dc2b1a8afa49b GIT binary patch literal 40689 zcmd432~?BGwl;kGcs#b+!f`-kZbbw^Kt<*utum+xk$DVhMMXeBW&$BO-HOPVMg){8 z$~-0_AdrNl1tBseGDgN2LWm(ifCx#*^acB#d(XY+-1UF!|NeiiuU0mds$I3~*;ViR z?!D`+ppo(YzkN%8St;55vx>t6v1er8)P<{;om{o$%9Z}K^@(P3;9+;%p;YCns^#-W z(WvWQxPW!#g7yA{6vI2cdj99A@Xa}ky!o^<4%znw9VMdpxrV!Y;FcqUBxmJ-{1rp) z^nH`+q20_F`o~2jljBBurUQ4Q%|AD5zW>|X25&rcuD{UvI+eb713N zN94lgi}Yi#nEA6@?%T%{-C+w>(`l4;ReNdSeBxsGL^OSDw#SGD?!Na%a8fjF>|}y0 z+%J7}xo`M3_p;fM+P=k$Zj5U!;nxm2ZJ7bhRFJ2N9(GNFMsG(q*K)l~@s1`AhkIsa zVuRn9YpKr2`V$KE_T*ZVawC7fbzu^ENU~CRs>si&1m34kF#c+Gqum?eWYYF116$R% z_~JoRLcV%(*QSS&>KO%j=E>s z>eEw2Ze8zW7`S@l=E1>l1Z~(}M3z&%ZT!{TAuvsd3Vzcu?axOEtuH3zt2&u@Jp#wd z0Bd?c#y%<=a_Vf^6@80o59(os_vgMRE>&A(8;|jwdXo;6??7snRpa-Xva8QaLXSC_ zfFA{J%0{v`^91BY`qM{lM_=W8$yBI*1m7I>ro!2EMN73^jw83ZR}usUkW0Xj%aCzY2r9=dI%}}-xl!mXQbU?~^Qp!EUT6++MS4VM zH57*49{1O*UsU!nrhKUDu=gA?ek1EHV=c>A6n{f=DLUBAenat?9!409}CzF2nFb;D^06hpflUY@8wm(rx@KoqR*8!bA(4k?M@2KhIp?S@*>~t#^ z`k}KdGHZ9mGQ;^0(D7MR+W2LaDde5G#F` z{>oaaSNt43-eD=l99zV5#WY=O>u9xQJ%op8xhiie?e6(G`&sP`d`=#!#gG=flm5bJX3nr3fj^^5Td)QsD0wHLp=f`v5-@^5g5Aeq*$;!8&!E$Cbf zYkM)CIdgnLu1Ppu`0Tg4G5QQwp}2f+6(lLNK)Xxjvi=8g<7(yf%!Xe4*^Y|3o@SBH!!)%kZUQwU;12iqw3Pn#~)Z(qlc>_swsKC3Z*7zO&?Wv zjV2_FdyfV+x4xdd)YjaSo0s##kE^tyV9VbpU8?WX5S-V9`W(bBe-CzkWN6w|ph(NO z;^=Ja8zm{s#|;jC5B7XyXxUY8m^P5L#<1gWXhL@%#6PJIx?{^fG4fO)C%@rI%Wb;~ zExi-^J_m#Q(yusr+WNkcvD9+%mt`!63N6bM`jawN_7%rCTi+ADR|0JJ_hsBW4mqfQ z(y%OL!LH>u+>@hE6^Acv{3laz(6~gyW%Tdai=3t-wtPupx#&qD2~orPKhF%Fe978< zu(RpMvg~~)mTLb0$j+9j@6%j*_0O4+CYndVDcX-+zG-WH?+Wp zWr+M~;AHsFF6fJc_=fMnagPkOyA=Op2~NnrfQMPH-ErVYoq%>6#N)mPzgM#^>y6S3 zIb-@iWXhvg9C{6nZIlRO{M({ig8xo&Ca}KDwO|J$?9G^*f`fu-yZ0b_`#&p^(RI;Q|hsdfqnayXw zT(uyE!&pmZ!&_k-$I#&X($s3`7plv?6OJ}zh(3E<)OH~H_?5ldbj5*xLH)=SL_ zLU2xrb)VOFg}Cw0l)9mN9&Io0Ka9ZGOa@SZWEzrSb05_4jeIH35Fmxrs%; za@d7p#lUQx1`=*z4G+@yPIf}D5vH7jN|6zRPuflI+BQt}JxPeZZ2!JU<@|Zg z)zKF@UQgPAL3T6M%_oS*4+dXtHOH4^gO~6Db*qHX58vOC>GD^ZE`Jc3h4?W}QuVzO z$lFFn_(fw(4U=>Hw2k~p&59KprQ<&Titz5oj>{f$E%ZvWUhDIiPpi4ZDU5$bp&RA< z1+fc;vKvJv{~Nsq{&C~{z3h!Ybq;={BWm}~jR$*#{;^AOz1ZEhh@5thn+CBzQ+h5Z zGwcjFP;Gj_!Ky%u{P{j_Wv&9!Pucu1o`$@>#TOU!GLx->b0SxJ!m?)4^%z+F=@Wi%&?^n7g!#Hu!Hd8$7x@@D?F9N_Q|%y(iFzx}d^$8s7} z8R*#C*Q7e|Pakc1f^?mJxBu@94vM~>+nbdaXrBLn{0VO$=BDOl=ll7Ys$y4XjuFAf;X`VMOHT>cKqq)b;-fv#H*_;yY)$M%l==IRZ zjGCI7d!v+GG9dU=Z0bmib01n|2b>+dcIh>k)E+{gER!WCFH>AIq&N2A4$IBUv}#t+ zuiyPm4V5dYYQlSSoFwGw9T5)_MM=n(W0KAt2=klmhyQXHP@*N{D5HHjf%Tpl}V}$WZYH1Z>D6zmSA_<_EofU^)!!5i3{J#i zRZVAiCUol0U$N*Zo2{&!VeeVTF8P`AQ4Jzz8Q7M+?Cl9JLQM4zXCFJalHhE%qmp~e zFEQKA(qc&D__3W;csv>eQ~DG?4A_~{!)P+z=ykV({LXs!)=vmOxmQLWlo%Y9K?>XO zm25Thg>5?m`y%__n`caL^lr#M{`HlZLitS zM`U6Y!hXP1X$~o-?F=!2Vv}25{L%ef487yM)3-TWd}W}r4whlPFm4=uCK9-Vx;C^v z<6cSCj-p>k<3##TJLB}CCap+2^QF*;B;kE_+69MB4z$Sm?{xNvoh$i<>_9Es((t@= zXZlo}lXfmQ3{3kmfn!+Z!nqkgA|jfadkz%TmCLadp0AH@5^3v8%B))v#gW zlWFL3Yqdit_+?s9-71EmjMXQxCj!*BH=6se#;au|znd@{tv`*}T?qZMWULD_CTzvr zM-U1(ylNfMz4#wV8#RH6fo+5qGy3UjQSd;Ue;}n;BPf|Y1)5p8ngr67Pfj!zj6QP! zjlQY}h6%`Su@(drYuclA1RbWB*UAKv2d22yi(Zs)%9e(c)s&>%5}fPol!Wkc<{DEF z6x2W)O1!jPoxRJ2NnF$@K{K;XX`D+qTrVYNR=X+^PEonB%xN=tVN5XhRfjWuF4r z^&&os?gJfS8!2g0n|5<&tbt^T9FZWmVpWosf%Xjqj(!@5_f@xy@*A(WnwNB*uopHa ziO1CmHgHR_gGi8JiXYmWUX6C`NqQxRSxfV5h?4zET3z50HoXC>6cgDUKqB zTBIAx@JPbUGaC#_H~90z6-IMWji!5I8FgYCMWi*%`57ggNv;1XK#zP11%QiCyT@CJ z%~yI}06;b|EN$$T>~W)t?kwG`9*T!7#GY}s1B0E$E33DnMk2}E;S(t$@Wjv*NJS5@)sYl#tXm7Km1je!IuR7dWVN@}@C#F^`RzoQ9 z_co9<)cAQx@{jiBTFX2KCv)Qj>DBt#0B$lgbcHsx<&p#m9<3DP{gXtIyl(HAkB#+L zKJwVMt$rYEod9%Gk<*Nc^QLevW4A3M14lvP*&kP9%_E}#uvPaS2;M-FFzSXM<(z8pDc1VMs)qWQk1pAR_8&B-_N7!o{0y6Q|*6p{9O^4+E{?K~C;P4kfGNGn-b>5vBDo?w;Tq;*C*M7^$DDTXydJ8?3qofX7 zR<~`jPliJMDX|-t8@2wq0VSPVP$CtrGKIPdFI(N?_Mw!&9_(wIu+dNM2SZk9`|HX_ zl=Q%|Yijg&Her*Ml|y*G9@aR@ytRIhC)LJ5aP>>kuoB{TMdcz( zLnJ2pZgeKJH@<|AJX|Xld5aqL+71ov2@{SF5%Rr)=SLZr4-?*YR*r`tlGRE&i#L3l zZGyNYIwhMcn>y=G4D;}kFEW;ydy2YlLwxpf{ThzCtFWJwvL3CYOHddujG9#SFbFQ> z_S1E%`4D_ZTv7w|^idBxM)Ov}*Oixb6Wl-;wHSRtFbWNK&V<@Z!!LLoCaenjH>Osm z=@h85B6x_G(O6Lk?U}!eBe)T%Bx4o=CP_#%W(Gz>87~J4R+Gx!T7|Z0X9k{I)pPO) zlp4G?womBJcO2rjSfGZ`v#6|y>^kexsH|kziePm8r@YYqtEMPjsxQ{sndB2^49rFO zC}S5zg=^^_V(KRg6Lc>KbbuKL;ShIGYr*Ii{|mH!j-D*>kDqZ%CS zbGX^M=NM3XYR(@|w1<;JU$ajx$A9mUWVODUafk`M4dQmxuNcB4!Tp*y#kZIY7b@g4 zW()nelt>9^%jUwTxh_fOT1hYmz87{P9wxgmkR=6fIEI=jaK4y0sg5_-l?d|PwIE% zb-ZsIsQbV-CZpfadzEbi%gjo3i$isV<15dBdC%G7LW!CZO04F|>-34o=Mj!^17cU1 zg;cFTn86Y+a)tBUWu3T0nC7usRxU`>=J}|N*LVt=!>fN!MQG9;KbaW~i^6Vi*|z0A zmZ*156LZtvP@*-`Y%8^I(g)3$wJn0tAx%3n_kDZ7kjOI`Ss!s*e9nblvHiCGad>Lq z8zL24AJS$x@=2w=G;lnq6W!b`y4k?>C?O%HZ7RbVISuOOS zDqJbngC_&Dy*WpzOE$SAx2Q=>SVQvqlQ4LS=H-xg?Mhl*nDG- zpvXH3QnOwdvz3uUNZ@!`7zBkbV>U)3J)=SOv-Z)>l<*kAc(QX6HS-!g28W6KeRcj` zG8w@@8ydE3Hinv7IDeVH`jKCUw;E4j_g|eRjmE)BF!@LWf4{kNc*OgDIEX%4Y@UQXWfNw;L_*{M!tmw2%=FQYhafNYrX_7%^)PdO9IW)M23PD5 zn_(zzY7fclMn#S6qx4j_vhk+DBaI9ne6J0z&RLM*lO4G+a%>g3B-nDq6Gx!9SLu!yJ^9u~mS39k1$5of2bM4QT7tjgR8+ zIw9ys5;vBC(j#~aG8EKJ6^#i1&VztV>Mvz*`7szGBPT-UL~Pd46LIw9FzooJSb(Wn~_O$0U6$(a7lj<}&Am&2xvrMgHovKkY^D9VfA^sj=Rd^2h6I0@$;n$ssifs`5(e}vkWKRxiA|$VLF0;PB*{_73DXdjl}E6scJu;b1Ozjp z9%duMxmtfI!b_MO=mvtZlLBz;x8o0-tT4 z%mNOpTa#5dR*(iN{esxQ(-G2%hyk0nEwVmKd=k87-46__E>U zg=HXkg61!h+6>dX*2E4QZ$)s!=F?%;@Va+=DM~z5CRv(UjpcneWOi;f;R0(og*abl z0B=IMdD-0Ss8%s;`vgg3Pmz|UkS(l_ld*!dSU@^#4qM6&i(SzQCyhmSzFajgdA@QZ zy3}Cis93P8E`&hR$tq#0&4VJsK=G;2UM#`2m|RQY@kE#^Vt;|`#KMCWOqsYYq^%%? zL}~O>u5Aik^_!+=;P6{X+GNFh=Jo7UOyYPlx;c1x-KJpdQXrbX9SX7u!+d4+(l{zb#}WdM{;UKW@q&G#4zfb z+5ML(@^n=PB9v4Fs1`%_XIWnkq*w>3GPgV1M-K-=i*!(5Mm#86N1DaRp_+i3N=%fQ8V$M|D#x z!eA0*77#-nHQrLjzZYOpC_vAQp?)uCa+nG(>t+ujE@Xk4Z9po(OmRv%XMfX_!`Ijy zHLX^YFU+jaJlPJCASi=df%`5TjotoacOM#SR&VF9Cv??3A)olk_dOGrrl|AQ`K&yqz6~$Ni%>`J$gMg@3Cg zqIp~&@6DW)-Fe%P%4Xto#2igz#?+hlkVS8cl%tzoOi@n~EQ3E%nGw;(FeLF@LdY4d zu+zGr^+obFv|$Z1;eg{>tiQc1GfLF)?THkM=^r~R-)zRWKUrR_Ck zUoiAcWET=FxY~&Xf;TrpKLjZAv&$7qYb4F}Tle5N;T5gIpsB>PcEy| zUvcDNe+w5OUs7}CSD2fO6a#$1s7ihEsXC->D$xQUvz;SoC$GX@b*3HCa4BnLy~xj4uks@v(m<4^I>qzT02kM@gw($1vVdk zt~9g>rJ+YS$G+-?j-4?y&!1>5zQxcCz^0F)G937A(K4fqnR07|e%ry^*%$ohv>DQD z&Pu*-nAvEezmyW`QE~VP>!b?nwj8pbHfj$2ODcI`ZBo^ypnJd^_iMNOP2tV^fZ`R0 z4QBTITA-Q8TMkg#4P02uH5VO`17vlRKd&{LiSEk*3cAV6wf*Lz6LN${-G=lvJu|3L zD!7m470qu|g{h@df2Wy7@~P&cqjH3-ZqDa5_yayj6=s@B9iT)@*+1{AcD#nF05TtwRzN?Vdfv1us2nnzIIU6CcWDsnh#U8 z$?qmc@)P8cA85JeP~B7@eeJv)vX9nm4wXwK)7RY0L>J@$CEc<)X*L)6%K;GGhM(6m z%tVLe2+z6=nQKF;Fs)Q@4~_DGAFB%6l}i0UGmYl!tHM-LsU%u_)W56PP2)xKkqhuX zdr5(6jKtBvLaJq&nq)rndF|g)_n1TF-Nn4Zgc3LQxWrP=Tz~4RPDlyE`TddqjnA9&UdJzQ_sG2hp&RRxQLX^D9HgY?&u@vAJ@J13f9f({s) z>|ZU!>p``$dMK31QeFQnb>5xk&f4e+Wva&m{-5N4dF!(g_o=h06Hs(m)J(~s@{kkl z$Jz(Kxtm?lRXrKjL$i?UJyw7}`u$TjUk!-(>Y60S`HAL~ zU67SLceTwR_`6iEsP?rIcfPYOR9gX`d+G@X!%R3Nv8-9kxNwx)D$m3AuFM3_=k(KYmXH7-2=wf(x=fCKGV z?So8NzpitxG;i;CQ{rt^BJJyRJ zMJn>(J4tnLPmG8UZ57?Xv*j9%8{|s% zT>?b*hR0J7k!n;C#ZNR>d;km39x8NXOACCfd=gC}+`nLRi#cwdmWbV+c`#d*^mdVbPTV#g>ZZh}Q;#vNF(?y&N3`o^*|OBHsw zbyw@-5#uN9poROol$+u&T-_{GAOp_tf3WL@Zmx|NMgv56nNGIMSk@{gHwBujwdE#z z{|#k%DzzWNS3d$~jUEA>ZsQyxF@KAlS?%z@w!H>PSzy9`TZR?Bme}Ybr#NQ~m?WzuE_DO3aRe`2}Jg^*(`))=q9bL(N90^eB z+MIYO?f`#;bPT>)F0a+N6JWPu@^Cxy%>cT#Mz^7A!*DSU@YvjK!;I2>b4=bLN@{qs zR8E{D5*m4Bkbi{vitoKQatCVpb2ML|HND)Dbne3B(F^R^9&sUEV@r-WFf&Nw8kr>v zX2Q24p_9NHNva>L3_izsV$xB5{ZQqhbvb67`-hd5ujk*wuZfi2@p4?{G*(~6nG|=N zD?3~QsL}?-pJaB=_pVV-t95s%+8b5dpRXpWUKFXv9*;bqOD9uvM~aAIJtWub1MOiz zMZ)6;@MFCKy~QX!^&at?6Z0@1H|mmYKUGJAbr#RbUJ$O{$iU7>9gK*1u>_HPZ6T0# znl&I=+%g(dmVc035W|R~vyYy0XcWd9Q2h25aKB&t>2v{lUg&=g9zJ$2z546*MWX^> z1fHRk9}~%>{F(BjeS^U+=ayR$>As4gsukCL#rCy5a?Sh zF|~{E7Y`b8%}{|~unc?lTH@`6->0bZR{c>%mmTd^xr_1B0qkwKZt6{uaC23iurFTa zZD|U&Yxc=ag`XO{Dr%a6Qn!200Vwfjvsq*C6+ znt6`8QC~C5T*n}mja<{?L(I?xI`f;_J^nUI{?gzH%+ala4Z%ajM+4fCJiS$U!Hjxp zRN`akPn%~SnY(lYohdg^)lc`-&dM>?WZ;($hj4l(wiQ;H-4JnFVVD2H=2^R<8tDPn z74dll8<0WVJBfp7<(lVoQ&{~_om=5Y2m`E}0SMjlgjC2AGR`K1xNY`$!)*|EMy-#x z(lZkQNq$|zMx<4`z`G0T1|_)6z#d!oS)D11(pCG3qDM=7yNa-zF?j66 z+yqbX2yyPEmBrb&RK|04UnT0?Rrh7LrW=$&e_(7Ih@WpDyEp6j7IqWcwG4MTEhtr| z$!uGf`H<*TiL<%3o?NR_M5^yVnm7g6M!V+CbTfwN8BZf|qyY@VkI^HJ;4|If#^Oxj zunwN!w8c$^8S#Sfz@R2M+V&Zes? zvW0y%uOZe)@1aB9=2($~8i6Wt-zA6h!(>muQtbh{=qtKRw74NT`%n=5utRkNUlnt7 z4R1UFrG4+rgxznO(8%OJrO3-I#OB~&QJE!+Li@KbV=@;7_*Yh7vrMMoG1qGTfiwC0 z0H9eqFGs0|&9V2Th*f$lEwV;J^V|p|9Qv_1f(tG?@_n8=th>s!^ZGK0?FUhh{qr>O z0j`p|Ze`LYJcu)h1O#YU7EjNf4bV7SJS~tQegpe#4y^p#VhkX22LmAz??|D>WyZ7n zaut|;%tz5#>Cx$MRyi3Z>qb8H?6i0L0?%eD!ORl%S7RLw_m-d&7?nDW1`= z`h6JZDn=DQg2IPfX_-7yTwA3ujNu3*E<_!GrfDAqG}Tz?0U9cwP;yP*vC z8HaU!l}qc5hSl6tT!~x;sS5I}hcgPvAXjd9uj>wt4!&?^HVQN*DIPM2h0!Orv|Liv zuL>slyGq(HfRZ+rw}IE(p4-RCfod^7^kA2b`pH(MUQ=r)1g2{?az+W%7keFklm1j% z&@fZ56ezAeB7NkhL*;gqRtZ1gTh#K%BM_(!huE}ws;=!CiGz7~x>K(mYC31fjbW(Q zk#}7Swh4{209A|roX&ZDSqE=7Q0q@obyz<`=jOMTZ@t+LuR}mIZkT)E90e~3iXil@ z3vEa?OLQ%~z|pyys~b_VQ6xOt#xBqSaS3IKzDt!~jXrmqRWAB2T-n0?MZHqAI+BvUfht1|! zyOYBws@*v%>P%R?c}e`*?3mo*hK*H`r%RHbPo806 z8JOo9CT$n>b6lS=)!;Kvn-pYVSa?O^qbe;dY6nDH`mxHk0^Y48ueLp-I*mSe%=i+m zM=pb+SM8aGG{}}#^Akr$FjM#)dcemlT{vV^QO~O)dzCr~tY0cJn0)$G#Rpc7351#s z%5S8_SQTv%$qIE+_^V-!ji*!t6hDsf<*{5s;jg$}%T5B&Z{56tz#3JPnM;h8uL%YO ztC5u1TCf4@x+JUG)%r}ci=e@>aN}Ga1vp3-4=hGo`cD2Tl%A~P#MYlih6YrL&M=|+ zYEI0?V?dVAJGR3_UZj|MkyX#n9f|yF^c`ozkw~a`vU&mo^a%9QH%)nXz;CtdBJF3R z#g}!1Wna%s+p-$kQ+m5cJ)auTW6l&hQK+-@%QW!Ax-}rbV4uh_4sgvI-k=9g$vEIA zFEh|(51NK``ZG2FNJ0DE(|N-GZ3}%(0GWMZ_Y9%*;vm~Igk-sPPM3Ee!p&Sg#-g=* z9JgBj^~(duDNDd!y#gzVb^Ja6&xWBHg3b^K7-@$;1Z?dBfJyA={^}Ht=3Yl)m$ZD# zEK`WwRDT|cE4_Ee7=8KJC2>$!&zOV>uw99LrY{PbC62qV?y0o?!W~VpcD7;vKqA6A zVmkK)kd4@8byAD9|u(h1CNcar1i7Yt;O<0I+}*-TmhbIp4!Ulo%4-mx31)`h`~k@$&d^XqEP zD|c@11;y`cD-RxQ6=bkPR{?I4Q?s%o0r!lm4T}w;;Yqd zc9oD6^3-t917N}A_IMUW^}3( zh_`QS_d3@uq~e)ejW{Oo`9p6f*yg! z2yU8x)C!18S!_D0=C~!m!CseX4VfCmj)-QLCU6o%*`4w=&kE3%Nl~bJq11Ak@d`4< zz)4nq|0_p3%8Yy&MqCo z@2JRX{N0m!UT%r`fa|R0`H~L;|86c}Ai=u3EAXDp1KD(WZ>f+WJQxCVdJx$3LaqR1euhn4)x{AqF-JACTW zR&ZP)6_ogWCH%YUpo7dL@u6$+N@StBSvu0q;1#)XA@=qDYS--oGBdU|<%mtZ45z4r z7IUg-S1qc`oaSFXKcfT7Zupgg&YX`mK20Lrj{dnVn)$nlyPU#c%Bw)PB?$5^5lm}% z;Ay6}bHwo9)6|!-Q=GtWrvi6Q20C5zxPwD`PVOsA)uBUPvqFOdZf9KTCkz$kj2O&IJKT0FD9$SU4Az+{FO2A#$t7s>r~=4u0{@ox^ON_WT1eN8z+ z9z%R0ac_)LKZ`Z8X))+3=ri#{v`~47a(z)iJP$(`8Ed%5MVpltiktui0HW~8vvr%M zJ5~`sTe@H=K$U#0u=+IiH=YU&LMY^ilFA{aZxj;(mf$jDAMLeuVn}$MA*ej>2!YjQ z@~}Ge&Et21h6SEB)m#Z|6r%Ch`qF;6`K`!ZL;LgBttMjT_LOJx3UsigVw}|O#kgEU z%*^2Ctvt4|yWA+Y39e4${>aQ_6Z<2S$UtXlKARX)4UXzulN`IliMC`waD@p06@vF~ zi0!KC^CFUSEhOH!JiWgf?;E#=qv1;AR!0e(=Zcn^PpMnx#SbQU<~mnEzW!mWw-mT9 zcn1m(RwO>BmBY?H`_9j$zFjsT@nS_Cm4lW62-htj8{_XhXavHRz89Vw(kbJ& zuE1RvlJi!!VMFX6%IGg_O4uH?b=7^k0&AvS;4-c+iFp^suO*+G#CH!jY%5r^i25OXIw~b?W_~z(FP!W!snzVAdP;I zK)5^QG$Usp1*h>)&lI99=b0Zuw|RPdX9<6?qQk_yxIMmJ-0#{fVfg6~;+=?QxS?d` za>LPx29Y8-0l28<+3D{0AX5!brfrz@5D zWV$>Ps5M*C{0m2ORR`tAt~e7=@;fUro8yKn4Vm&rMbz6gZbsBM#9l9!3otdF#Q+~z zjnZ}o)@Zm+>y?2`baW}j7`Ct zp}UXXt_TNbh}?|{nC^r`k7rBD%`55Q)=yY9C=gol*;I;4GmJb{5OjoG02<=nG~L*2 zfc!KdIYtZ|BKp8!b1|6KxlHc4EGqZLVWhEq$jYwwtoj#w@I?cuNPrHV2k>T`oz<1s zD^0J7!WIv_yN1rAet`t*mFgM<)mw$r#i92bJS`fcV9%NIjoikK)(J0f#;tKhPMeX~ zl!VMPkexTjH7Jmw*;F#D-e2+#X^9vp8Z{Dozx7+mWWO3LBxL)dy^Z9zwF^e6?IOOl z+8ymkSoN^gmzHzOZHqpT0PJ17NFEjBO!>idXg_7Jo@*7Cg&b(jqy+i01AVvnnt z&x+=9lpeOLHgX#_btXhf#>|02PV(_(pvfLrP)c-HhkZ{+t;P+3lQ?%OW3#m42owCq z{I#&7o2)qanRhjKVow#ON)EmgRni*%h#~(7Bm>=wa0n;anecDv!fp?>jRQlu*L7b( zP#sCbqMkzTz0zCeXn=XlfuYN)F@{4x)t2H#g$Xq~i6(Iv`WV4e{P0=OP^pnCe(4yD zze$?AfR6kcdjt$wz&-R8A0HCBZ@z@2*Aefj-`#{A0ghk4MxF`%!i}a3LqCkH#&q8C z8W!vcKd;$RJKo7kw7Yy-^kB|`UQ@3_Ei!mXMoaQWtdY!IUCb~f8A45caUZb0SGV9$ zsiA+8A}8V<7g$et$v3$>vYGu-tRAw_c6)bEn?=b;2Mw4WPtz?V_!$*1Mjs{EuWL-@ zbrrzuDdzWW^14SD&+vI7^_FYd@iydC(%iLm)vtfb6W;W22d4%&uZOGt#K4AwbyMvM zToNe~L_d59eFVRt@wG!?g=LX}dc{5iE9_DPmVF(fLF(*qfX_3>zjip$PfI3uKg-?? z)2-fDiYy^=gQFoDzuN&_F+m$5*2$>?TQ#ev1HxF{wmViDUXWv;hjv}IiRjHHk^1JnKpVPjowbQgzXAFsiRLh;pHFX15!`uZCWeF$>HlK0o}c%weT zQ>MIWW>u8+uR^E2GLQ8mXX4M||JNI`zh$O~HKpC~s z3~>&sE9gPwqSP;~(7(XkB@O^b-|?bprt4&zQE4LT*ky4rY32Q9x?_hFU7l$18&Au3 zkUK+cH38gnYp!zRy9dPIh*U3W%bxE31t%SQoOD`pz#pzab+a^oPC0&i9dD8v1)zT5 z4(kP!a|@!Ae4by#nb&EzR(B^z%V;ugB(iHdICVd{r$Wh@@X|#vZr9 z*M$+an>gUdrOwkOp`41ir1zj_C_17V+HmX&E1-+1LCBj2iFHfjyEyLZp&fA5ULi<- zQH1vOV%u1UEdQ`Ax*q^Omot#GdCr}r5>A7_=Qu$IX!#fYLKlw>{7V&JWt3lY+OmqT zQ4p^(AwS?fr*>i6XYyB+S0DR_{B<#;$J`ykXm0OpDI*)UXU9|`N1D?)D)pQnr6UG7 zn_qdWf1Gn-+b5|7F(r`z9l{?kasTjWFmwu}6@QIS@?X0)6CVi|TMo}kjZ5<36aaDc`H z#^5Kl59C-wo2OHw2XS(DH*?$rDjllFqTh!6Cfh)~yY(v3hOgVtyhuHl$~R|5sw-3F z;g6=sv~e0)>x>#5RhEEIa@=gWeSUM)M}*^EEU7`Nt?wEKAEy%T8pT(wCFZ6$xB$HQ zW|i^V_tPWJGP%D`d=+HL1t3RDNXR&C1{V5fbm)@p%FnYV*NE2wkS)P-n-4D!oSqBZ zUxRFEI!Hr>#5r$-u*g#~bfCF^oI3E33acrbmqExg(66wWQBV zMWwGCA)vpKO*4flBSz^1k#ix}f`$*rz{bz_Qx=?_;rpN(x;157-D3CBJzKJzbQ0ve z;_bxTy5KRqDQBIIGDT2~^_QI(um84G&QH~-EqV5UuRDkM`H~U{fE)-DTs+7o7N*-r zyE@Uw$OOX)SZvP8;Z=T*9mF9=1aub4%llAl&$w%Hqv7+e^)xZ^%rma(T zz=s41uyVtVUFC{vY_|sbJ5%(SLI#ZCj%Wr{OU~a<{}x>;;@uK@c# z&@QB7oK$fqjQCf{DZ6fR;GTNSg!TnQ%M$g1G=@LDxYwk0^+tf9$@9hk)!laoGptgP=M2ADs($aX-~x07 z;+vm6nw~sjzo@Un6GXJX(nS%tZO3IyuOfEC5er#TGLeY zA~w$Q={0h$^*TOXHDQpPlNu;HgLYLwNT_BlhXBIENPB$EaJ?w9IRp zvTts~J7ql@*i|TZ7L~HNlTPZrM@dl-$YY&K%qj|6a#mg~erGg&5TYw=p|hU8W~NW6 zng?RXeRCq2r+XcA3Dsqx`j$&_ghSBtoTD7|CGEH~v%3kIkuCbZUuC;BwY-OVe}NtL zG#r2y{WwudSsc*+p4sEwGgAo|%B@z=Kg-g4IGr}okwZ0Rc zxm6H-mvp0z$C^K!>HDy;OTLZSG7p-s&e9!)B(`UHX(=$$=aB8@P!Q8PMHy;x41a69 zQJ6=oB==W=hM9@V_z)KLCCCY7JL)P=YKoN@g`|YXh|%>}TNCu{QNoMv!qhn=fxhb; zj$^@6(mYg#2IA9hGn2__7~vyx(502DfI`5G z2`OCV76Vv_2{IKtM1W{30fUW8zdTW4I2;KC7tAY6(S@rl4AV6kMD>wu7gnBQz3YO1 zb>p&_35w$C-?@dTSCg5r-#?~xHF)-rDV(1_2cMFbi)2-ubC6%5c6cGKJ}+7^IFZK6 zY@I#B1dHW4Lj2!yzj1)4S%|Q&#;LS9+|4F~{sFW6XWtj^=XfLQI$h>lO~V4h9qV3V`7R(V_3SEq zLuTHIf!hW(xhI1|UW7Emt6XOpO~iAIAM8dqrbHo~PY6R3YpY(Z#>FtlR=LKOU+Iiy zK38PsL8sL?yjI`Fu(cXo$Fm7(XxUr2Kx`@OjQPj9zqn*Zv zU+$QRMEVSBTzk-(d`7l4q}ZG)1q`raRm84AaTUA0P@R3IYC(4tgKaH#nSMRO-yRM6 zdeQ&gfOVPON_?ax^@mwxh+wxF06*@&10oGg1P!FMFCBp?h_ghfGZ1V<)*+@CnTXJDF}R*7>hWvm}D_N{jHAJt48 z?M6ze!yfO_Ox}wb7(`~)AiKP9gqoUWCdJ3*a; zRpic7<3cF&XW-yYQ=F{62S-ZLNqJybBD6p9h$s6TzqV?bo*jpo7nIZugFvnhpA=x!pPAfMmUM+uTgJWYzeCa4;}u)SI*`7@fZv1Y$d;>;kW-ZzjF3UeQb- zvZ`bdT>FSQkVg;x-us2pDXEbX3#m2ZQj`I^MRK2cv7#jP={zF-sOJ$Kxy&+ewXyTQud22DXm&=t>&WD-nwSPvRfWYQ3mO~oc{qjW`xyf0EG|Z zs-0A%DknpTiy@X(-u<9w8$)-0^Qwk9X| zzfE_td$uNVxYmkaMHmay3m!vd1n~+c8c|9@uozZoc-<#*lw2Ol(Omt7%gDtD0+8SX4xUBh6(C3pxhZ`?tH?t z+bNxj-2C;k<3VYyR)*A~+d5%uy*7%}Fg+j-pO?@oyWEl&ovsSu2kKqv*P|?GJl|6N zeQGC}Jy-^hrvW@32cAipdq+q;qJ70T@Ob;QJx*w!*E+B5V#J9_z;MeT&Rz1`NM}W= z3pRMXYAF`q%{zB_w3RF9o}(!er|2tZoS;)Ce)yfA+6sNZ3XCDN4Yp+xU-FY$YaaBs zQS6Ys2Yke;iGv)=xxcp;YiVV53OPtPOPvlf4GLQGZ3g8RnFewmMp+o`%hRJYoJ?5Z z=YB3)&zNh0&M0*m6IBG(T&s2h)gP&F4YKBL2 z%v=55p&Dq1eRg~7YfWd4-4xKnq{`${MUpn<&bb6miuS^=@l@F{4b6Tt?F&cK0!c#V zlg6BaOBFX=;Pz$gl0~6rKt}5js#TBg;+;Hr$p!ALUp}hPX1dRB%nZYCnT;SqLkjMw zCdnMv%dC_$WTHlanZD>WK>+hDM_7Hvy6Z=JpG_8zv!nBMpp@(pK$RWpBV&3Er}a%%em8$8b3n zy05UU{6`aQL*H-JJEG>kx47Z7(qGwY8^9sorKU z_N(!7A!UcXl!iehVdmsZ0mj}~bngSI^OxLtcGzG=LX9sEXr%m{KXI}XUcci~I5FBE zryPu@1XV~3kcNLl?ATs9W|zfY{O-QhWrBcS&xKwO5Ep|=DQ_^sudKm1juSs(&y@A$7JdK zKN@l?SvxKV4Kph<=QHbtoi()=9ojPki{~?kQ9bt-gN9- zAbIn&wp{&Skl_NF`gsZRCXU-NAl=Gc*nb~WsHuD1vn02N4)4_QB3^JM50A5@Fl7`U z_)0b=x+6r{A#AQvVzouLnBvV{3YG%A$QmYKENt1l+e3K-((|5Lb*S(uv^s|CF&*E) zH{{IJ+`~W<89b!1Zs63g%1jv|s&${!jqZ$|q4|W#wGgCwS$0JSBycf@qM;2+%`Hmh zrau5;w%l#7L32XZlBP+>@m=z{a6xS(r{VjAGKB*18ID+io3rrvj2gk3$CVz3$3Onu z4mfVcb^Jngt=3UR{!0Fk+?SsCCQ`DOmklg)?y|k7!qpe2J2i35EoQTJU9Pk#%w%X> zhbd+`x_y?{2#KU0O<#WaA~4idXT^%gw`SR0yK%&tHCwZq%0(RGRJ%w;82y_p-tSGyLVeF^$_13f=b zd+HlhK@TMv7Y2t6#gvfZOM!>@yNr;gQp5`c9p|*~aHh_rE>B!HA~gxQV;L^;|nK1B_lQ=IpIHU_HKWejE zB`S($w{BdKQS(m`9hmN>WHgwLJKdx>6>$v>To%jliTX=@0|bm+@Y6P>6^Pi89`Cmm zhUOJ4#B06vmztk8IDt~n*qUXMs^ZCDp6#9j^An!W*`ODk_JLyHw%X*;koSJEQ(fL1 zJ(Y6WYGkl%c2IlmV94ju`>%TX4Z|NZWxWq}gkeT&mj$iDjh(@|nz!?8*Nmhr@8N`e~R?xpG)>Fo7F`j4Hl;{#s-pRMqJSdnlIlwq~4X!dwh{SZQUM+NZ(uJns@Qo(vBGRY~a0}=Ghb<+!mVauxQ z;{D@N_GxSudy1tNxsZqb&o(d9DGj@RXfkC~-&lL0FYTaP9=RW;vQXT^dCIu^e6H-t z80sZKhmb7Onrk;*<^#HhHm5_RwQGKn&N^)GMy?I!ln1pDxjs-xB4HI;(X1LY=aJ5 zM`A{dQFa5flcJgFJjSPEyY#<{2u^NWdx36e&*<$RE!0%iF_QT5Tl(zG3UI-xFQZY3 zf6qpcFt<22YYUs2@DS-L4A-!bSuvN<+c?xLTzVgTwq~!3fAa8`4(KPAwbxXZ!3!Im zq~`dq4`SFYB}miey{Ay4S(C5)Yxn`L=Eg`N@bi{LX1*HGTdvfIrA-QnX+~g7(<`cr zA{6&SZ`3+}%i?wE)|9qZ(%|0p*{wBsX;zD_e%j%XZX|sF zq(8D+^J>JpZU6~IH{8~`A z&4K4Vh=YaMS|Y12;k&c`_y3eO=^jGUEvVZ#&`25dzD9Io zI%Nm-gFRF{iSxH!u5Jxxp-l=vtBHyu$c}rxRjJr}x@Wi=yWz-2?XUu2PLxzO)2Lro zqA}P=&c1eU!AnX|_Tds$>y~TYGR=#w?|P(=g}lk#khP%}w=vr_f5_UQjzegvoSHRW z4B^<+Nt}Vs_X)BW>B>qT7w89-X6-#k7@FBH#+GhTgwKZCCFaw?XQ6VB1K-^Agt$K) zKKD59PNXv;&-v_!$X746i2mh`cgSh3^-@p6GjGSLY9ZbrK$tsKZ|t-P%wk2qUVZt^ zDWW~H=snlUtgwK(|=k}0pdgd;sDb9KwL831=_XbZJJ?XC9Z%`y~FtJf&8LjetTRd}OAjDA3 z$lAd7yef!*m87RXyo_m0viC#TB&4`_E*BX&06-_4-ay`*9AY2l4lsEBMZL;Kn0qv1 z`YqYp@AN4f#nj&eCdfb9Ts6J499H4+KFoDSZ#-}vD2>e3_u9+q$arw#!=y z>xvE`AQ95xcQUZxPJ9i;M8lOaendqRb0+xd@S6L(bb)dSi1s6auDDf*yDd>Oj7yjG-r0?p=<6LuA8Lo;L!2U)R#@4_rNe2(_L#( z^`Hon6gnz4gMDo;$!I`1Xm(wrHSP~7+4?G9=c>f9ijLGeOM_qm!fP!d5mSw7+qVI2|0y@E~`@5h9#dn`snRd z8Umnaj$Z|t@w}nTb1$OUS*#-l)?I=;1A-_9V^=+PdT z(*5e#9$kF=7}W}FOx<};@GbZCUiq`^(Tp^&RvmjnabMTTA(HnDmozH+KUV@j3`}H7 zK{%tc{FHFPvo+OK#cIz(gZv|ABNzQne|J72eBHyc@s^3E{zbnvQCZ|6@}^z%Vk6Z} zL4RJ(hSVO!c94u3D1H>QKc6%O--p-`r8vLxl0z{_@IJH%7M0F*FwP&qQU8aKm_Y-*yoS#3ezKzL34sM_x7^TYn4_kE_RxUsBz z7vEYfzH?jUy##v5uMMcX!txlk8+Dd$oL*4-db#Z9m*Bu;E&bKtYrz^`6c;)5+l81b1f`Y@k$Noa-Y;y*OGIra-14Up_VoInIl$kUe%mBI!bEhfZPL zrj*uWLGDfSP$95bmI(^UAWFbb_B|84Za#?X~uNU4oKNPq%JLYDhKPVe7Nd(>QtSBT$GyGq*%3tJy-glqg2MMU= zgbMbwcnVuA)MvIbOY>#G^_wXg+p~VzmQ0yuEc&llmmta+$yf1GA3`gB>exb*#ukXk zP}25>hj}|du_m~Po>v32ddT^p1vq~zFdXU^o9WES)UwhKU)#+pN*-t61*vE7lCUsAo zlJDMvL*NYYbzPr%_U#;H;dPPw9MC0t|SYa7y%kbl*gN zGTxODLCL3VgapW__%Fv`u4M!!iS<1@K`H7PcZHP>UUPSS>o2U_qMm=xT@43Fei1cR z{w_r92gUU7=rR&aM?Ti6xc^S4g4m2V{c@CIogng$-y{1C_s1z)E-Vhy z_U5SXkp;~sq&dQ%Q;n911nL}D;c@;kw{?RCq>}m>_gOLavdaM7%Ovzz=}YPtlL<_4 z=Z<9DEb?3cK028?B`j+hDL3}zAE+kXYgOibuncA08}qx^mLQ$_VoSQPV7erjW@)Rb z`jbjR;4$_=ZKKGoM73)+pdWk&*Pj*}0v+;zL@EJCRv)+4M31%g&n;++o8;@7`tk=y z^gSNW5m^s&Y7rF8gqu0=o;8$xvSDt%o6ur$oDdBr0oq;4ja}=S@N%~(W#V9xm;4Sm zky#GyJNw$L2r>FR5qYPfU0vfneW?$ZP=wprUk!P>%S zoYL5EPf?1J7eTw9J%z5>u?WxEt-iq>L|Q6KDz`eltz(-xaYZ_aZj+ehOnOkl@I%%> z2FxgT9mNy+59yLD%>{6a9{pot0rC~?^{1qsj1Hxm=zJsbwG%;>jYV3F1Fl zr3$!R+{LT7>_EDn-~zR8BTvvEDV?#`31Q32u2^W@3_{lgXZV*;Q|5i5A7M&Pq_y7< zB39m6K`lam7%QFse(FRD--DC~q!dMXsaV?LB1pkkm=&EKXnCoN!p_uCOOL(gNUFxu z74EFxF=y>n5++i*;Mx0NqUZ%6y0D}$<4F1wcK6!-l5`)tHOrmOT?cQ(Fuqq zVw1cc@w_imu^pRCpa-8S5Ss}Kt4QlicSZI1cf@=Pfl|I5x)0J1c{4wTX|pYna^W)k zPB(&wN=L``y}MrQTIJNjR)#`zDtY1kmh)Vd1+}4~-Ig6z=oNTO0O<&6q%#*g2UD0j zw4gZQKLFn%GWQXs!E0~Zg&z9ep+&)tPsRTMUSl3PvVgg{VWDx*P2$!m_@)A`GpB9} z>RADQ(#jLX371`V&D1<~iF=BR+=ijTO-lk^u&d?fUJbC; zpNh+u69iV}2AEjW-qT?k+;#F7OJXzHb_59>@)BK0Id-PqK~W~e61+K~*k#m+pgCKu z-@GwCUYOHU)>Dfb^?J^dw>8y(TDs0>`j}xzpWWJ?9F_}gczSaQ)P3b*rPOjsH~)m%ZBqXW z%wEm-G)c3|B7>3Wh$bwxH%2NxO?2r!T~_YiNZc9`bn2y;ljzW1|#n)b#M6 z+{g-DXAsfhNyYq&6jyJF8iE5OV_6rGW7y&R1YJv7e*vu@K*Moe-m_=85GBBfAa_pv zP<5w6q>o=^XnL?0?p}H{TmH*2=(AN^?~L99m%K)*XB0AneGXlxzZ_DF|9W)(X)9s| zv93DDeEq_()|MxSt-9rr%6>!gs?1z9UA{506`Y4_A2;z&c%pWD)C`L+*a9-kVcVlE zP_kYs`IpmUelonyz1p}uhsc{_eVs4~q})c)J;4W=r5a+iOKDEJ(`WDJ)if6Xmxn)h zLR%;`(tD2!)T>9o8S=~%?!`aFqITq3C^;kV`T;Ig4*LdVlYVT&U#_=WMjRF@j_g&` zzpU37Hx&mFP-hVXvxyl8=%Y=@WZZp};8Gc_#54>=k{)u|wc!8KD?lbR#Gxvba^W;+ zCqighG>j~Ql&)0`x@%%e29$(a91!~#gU}q~+G=#nQitstyT>ryA`GCZ&xva&vS*9` z^Jw1hzPpFO3(VWss8y#uVd$J+fpF-RSezOEgL?vj3t8d2wcigQ!qsY!8N+7Fh$tF6 zrET*C$Z(qrAKL7>4169t(8K*l58`=Xb)hQga<@^8Wx;Tl;G|1)FIWC_M2^>h5)uLF zjB2(sA2o7DB^45qkwS}ws@_Vx$=RCr``-{f;{ZbX+(aBCvGJI^3#eP5=fVTVETew* z3RpRp(};UeczI!{m#a2{?+Kcym80=*y-8Eeg-h}6Ima41u~tZJ7&2KC$WY79nm$OO z?x&!Ex?ZDs3N@x#ZHN7NoOe;$m<3Kr?FpY@c4R~5nlEZ3Yr#MYQ_|e2E=@ygBs|vx z++!D&f;N!Q>qo8p=x;lBM}5M6p6FUd&jXAYr^=`pVG10u(;ik6X1r|0`& z(!>k+z>?TvB|IssaTBv)7>o$G8kp>AewO;YDgvrQG?ciA6h$Lo5iw3G1*D5QsFg!= za9ZHPn$2T$C2$exco#FQ1CF&y#d|t+hm>adyx?St6PF&3d8?9|FD*FsR`yAr1w(CP zq4olFzjQb6_Aqv)<{!cO-_c3V0|~7KDNgIlo|QEhHKetaR16t-SVHd|%X6;5Oarch zh7ij+1HNmkUl3PrU{Fu=&~_(j*=y7vkdgSV(}Vnrrz%9zEj?4Ycl&YvZV4U&bcuoU zXmHf}Bj%zg8n-B@!=BFVPQMNwBa=2HipfN@y3oMY2Rk7+!`aQKM9Vbfyurd)TA&l_ zNA1H{AH~4zO2z`iJBTQ!dXY4p(-$4e|G=T3wLx_8eK7PzR)bSAQhL4d37uOj zJRrc9MaTnEa#x=DVBlVQ93(EC7d`hijSwH($+KhuLHF?3y&w*)T}N{5dg|P@bbiw< zOxq&{01T_IZ%bdpODAZiqOdq#&%*qPRVa(cTGtQhgIVv8$@}8x$|q1>)tL`b`E_FT^zWqQeX3{scc$t%Jl_+^{B4F!FLDCH(}*-@q4_b&)P0u0)VRg02TO zE5}9nqUM0nRph8M#EF-)apTCTmJqVaCJ64NH<2Aa0c}`(klV==651aw&HD}+YO?Y% zw3X{x>2m_`7-pA|^x?v*wH)tvk31c{-P`8U{UaBMe4tQ3^e(fpN&`x!nJ;KWPa|=A zg#PksVG7d1cEU|Z-eV0iCm01TXlyFRx@x8l85l-cu&y{9;qD_Q^!g*qb?qQ&*yA8y zh;Sd~f;h&IE_JixF($n^d2 z9qcX$A>6lLejVb!)xS6| zTIl9CE#rwbQMfJM$CmR>$ zB})O>hQ4z9JiA<>bEej{r_gHTrU^nL5$5Mh^*!>nu42p_dbO9~VvYgaqoRX-hairP zV-{&T&BqA_m`FSC_`n;!UKsjZb*$rEvAkY%2qqY&?I`c$_qyn=x0%Q%c$Z+CaN<4G zSG>5??v;LWIE!-S{M+MOvsUw9$Y5|e*idB{-NIzvSmIsN zT$;&3R;BzvGj@E92WyibzfGzd+FOW=ntRd*&Et-FTJW5Rvo>Qcee!u#vG4udS-6}I z@oMu?{*Kewvsy9$caBC#@O<`F`^&m;n%Zy+i%zq*a{+`7whI z5!qFN!AMPP%-q6><|Nfq+IAXCfeNFQ&*vmx-y){9o)I11T#wrzllmb_5nmLgD9?mv zq-|nF7+BU-;x)$2HBOE8aS||u?&oQ@6|PS7&G|5wf_Vj}3zTBmI~3^?FjsDEUaDp{ z6W%8ZF1+;ZQ#B-}>^jf=@)=Km0i!wcs|7A%x6lWVa&2~MP#(8XyrmR0a#+^wk!-IX z@=9Z=JrcjibGv3lN<+uFHyz5z)N`EJiJT{K4JXE*!Y*p{ztuW2sU_l}$VTY?AyAQ{ z{7K>y5~!;*aj-5H!ryK-$4tWv=$`^z9^iP&siy3ciiS#%gVC|8VLCmOm$f6s|;V7>2kV9cN+CkPHy#OQTBy|q00B*-dMg?^(}?k5Ts2g9T@)ncfyTWu76Cv;j0msq=Y=hHqMcGF<3< zJlQ?=NK{n|<$`&)y|kk=7cL@d`y~W$dk^IqiHV8FM53yhzPatRu}1UR!SkYQo~mZm?ykx1`Wn8*8pw6erb^M^g}p3m z7dfv?pHYufH0jX_px@>MB!*hA(duPR3|GnaOLMKn+!Xo1^^4dO_>U2cC>N?`!>fDU z-^G#as|UVLc28eV8`?rifvLSxlwHrI`UlBFTZp?5e&mqkW}{D4KbP@ve?nSCZXnTSW_{BgwlX0m+)OF>r%xD~5R3@r*Tu zcSVb&a;84+vfi6K!}McombU(Ot6Lu44w^7(g<|I3&}VDo=Ym$f`bhEF4M`$S2Oda8 zqmEPwH7#qus={k%82f>w5q+ldqj>XFc?~Mu-O1LN*(6a|JhPUmGr)Xe#x;fLIUOyH zd`!&l!<$w z9@X@Uj`pug$31O!>K^(&pfYbn#L?uapR&L9oojeSc~fs6$F!F{Dtgc#Mp+xDBV+7k zaaFZQg%nNWv+*6u;}a^bhsKj(uUueeZ?a+x;yccc)7Bx!_$kjQo=TE4P$A{QdR5Kw z>2(}xJ~eGcxCC-m4C-_!rB>WL>wlI2iXOU-nF~xaw^p4Q(_OAMH162T>_`}UfC!cjf8QFrA`gxn8Mbm6qb_zLMe77u_%Jx@+& zi%+YaWsU4w<=j2G$8#GlSqyjgCOYGEY}3RbO_WMDSG4Q{kBsDtR+v{q#~S*9@a|2~ z^XQ7X$WvQg#aJV8J^8HY1`=hWjbZU|g+ZK+hOl<=L1ZhwVg_>f zbp)7*XUWS@f?KKEF@z&n_Q*bp0cK8=G=%T{nv_0xW>@E{D?{I}9!lNSseYkT-CRIS z^p+@KQF0ljCZc;}J=DtQBKE(#6hzzWid~!wG`%l?sJf@fsSv=uOi3dTz3FND7W9aYgBbOs6Bf# z1OZ4~v3iwxTTb_&mf;@3py)v3qM1TvLHyOa(wkXs=Ny`@>aSQ7+MTW`TKZ;XP4Zk9 zl$TCyvn-HT8(9+RIBSdEIOW^aUK@WowTIgO>N+7JLHtQ!?iKyA8~h%Y%wCJ4iUzF&tub1@L?h~JR$jm5ag@!g8?2?)h{JA5O7F%hZ#40y0NAUqGH(EH7yHfL ziAs>MfW5Q3pjmMBN`>cG<>ozkn#ZwMs``u`xbb`T^e229e@_0T&r~(hO-UMDXhV3T z_qP0V6#7a98JlC_j+p{~nPoCsd}DU#Sb~y8%B$n~Ho6Zw3IsR$;Ts{1c$43Yk7$*C=(ExavK zDOBmM%qGn*J-4QE?&SQV{D#(M%Z--XHFtPwjcnCoW*^TYpKFW>8SDsh z8r*l0k2DzHN!g*f*;8@kRQW8-0j5TFZ0NX?BB2TJR2k7Nzg@n)oC6alcU^E#fJKw{ zHY$f%Z^!MReN#RKQzBb56x^|vz-{qVPCZXx!DPr94RV`s+i2Iy4akxaIy-2v@M4kDy^W^=Xp+TSk&#PW5RL{*ogVu<80V zDgO;45ise&+_a$va;5RYJ8Do3e~FP&*g(Cll)u!tE+lt# z!QwQJa)0tz1Q%3Z0HfE}NU=AJM8f3j;XBydMsi@NdblKe^GG5LS`U|CON`)Pq-ihmN+Vt{d?SAYTVX`w+fX^-yZQ|}TGs%8d2*w*Y=={R{l7E`J7Bc4&Px5;;^4H4GejDX04~9N@>OlLWr2 z7t;y229TO3huPML{SNzNMfgV9pPPL0@~^HG5-nH?w9=3l>T}7SM!9x z$YGy=$zh|%FMLFhQ^OyS^XJ(2v6%jd(VwUZdcDx}K6bpKtFEzx8m>bMgA(L_6x20 z4GLQ1yUp7qfBLlvkV+2SB@4SFW&*1df!+PTDi1!Y_1CfH@)T$R#2q15g zN+pNw(uLgt{99l$DZK8#u)BYAHlJ)>sE$WBhW*T#4}AMAxBiR4;+W9CJet>ZnG*u| zZ;RuT4EP@ar#03B>l{Q8e{9HM;#;Hx$Qz~Bf9b-)04j{S&A{?HG3nNkpT0l32&>yH zSzfn6T8q39U_%ZSmxYCisr(JVc9d@Yzh>puy7KD9KZ$`j3RbrPK2Y=#B|npy$f554 z6#iSDly>qYzL5NgL4T&%Cs_DT{(h=?A}9Fdr*P@D;(yZfhdrKbll!|!m}UTZi`2+g|KB3yFd5z7Rr`B_vaoQm zU(vV)FhUNM`O{!D1WJDiigy%r_VMX(dhX4BYFKj1@V8kVAvYT zDvVG7kWnWF45$;63Lu{e1;{c&#elFnG0A|s4N_XY?)E%IhSH91sTmhq3@ARc2MYIup$#Bxnl z1IU}Ds>xx}x{NyjJ4T&2FrZ%SZwmfQ&mA#4Slw@l{0L0*>clrHpOU^*7xtgl{U?_` z>frw+n7pt>mhq=X!@YB*G}Zk7A)CLlhK8Lvp;EexFn~Uz4gg%O6O#`hi%ErO)&Ht+ z6!{ZK4wI5)go)|H>Hw0fbsOZh$YOvHa_D~>h<$BSEBep<)zDtcD<{yR=`T2`c!Y_j zO%C%)t=PR;a-E>s^OVbRBhvb*!&<+3I zhj<4u2KKvNUNzX9*#=ne`PCbn{-1vRxq+H*bH632{hpPdV($lRq3<)P9gMm^`|)+} zrgee*T_o_I&gD+2YTG z?CG*7zbSwej~phi%LoS`7)p{{d0Qr9#_zOe-JbO}Dn|~vWKi~HMzGmF8E2V}1 zHt`Ka_r*Mo{&Et&*GS!wzWe+yuBo{${7_);nTnVE+PN1MWEI{ZuX>Np&B&Lq4-p zck3O%8ODLFz)khx2Lg6(lKS5P{zCL8Iq_c!I`?b;2mJo+D5{fXH{WdjU&iUbxOQ-6 zzIW@wcLwa-DD^G*%vRm4VSs~-1Dk-q2IaT^3k=(-wR0ojTk) zPlG|;RcBIm{HwG7WY|y6_T|^y${` zMWicKYY?K|m{BKR(KfhQ%9X^Ls3)@ctd&s5}, a set of latent dynamic factors is estimated to capture possible dependencies in the state +space trends. If \code{FALSE}, independent gaussian distributions are used for the trends} + +\item{n.adapt}{\code{integer} the number of iterations for adaptation. See adapt for details. +If \code{n.adapt} = 0 then no adaptation takes place} + +\item{n.chains}{\code{integer} the number of parallel chains for the model} + +\item{n.iter}{\code{integer} the number of iterations of the Markov chain to run} + +\item{auto_update}{\code{logical} If \code{TRUE}, the model is run for up to \code{3} additional sets of +\code{n.iter} iterations, or until the lower 15\% of effective sample sizes reaches \code{100}} + +\item{phi_prior}{\code{character} specifying (in JAGS syntax) the prior distributions for the AR1 coefficients in the +latent trends} + +\item{rho_prior}{\code{character} specifying (in JAGS syntax) the prior distributions for the smooth penalty parameters} + +\item{r_prior}{\code{character} specifying (in JAGS syntax) the prior distribution for the Negative Binomial +overdispersion parameter} + +\item{tau_prior}{\code{character} specifying (in JAGS syntax) the prior distributions for the independent gaussian +precisions used for the latent trends (ignored if \code{use_mv == TRUE})} +} +\value{ +A \code{list} object containing JAGS model output, the text representation of the model file, +the mgcv model output (for easily generating simulations at +unsampled covariate values), the names of the smooth parameters and the matrix of indices for each series +} +\description{ +This function estimates the posterior distribution for a multivariate GAM that includes +smooth seasonality and possible other smooth functions specified in the GAM formula. State-space latent trends +are estimated for each series, with a number of options for specifying the structures of the trends +} +\details{ +A \code{\link[mcgv]{jagam}} model file is generated from \code{formula} and modified to include the latent +AR1 state space trends. The model parameters are esimated in a Bayesian framework using Gibbs sampling +in \code{\link[rjags]{jags.model}}. +} diff --git a/man/plot_mvgam_fc.Rd b/man/plot_mvgam_fc.Rd new file mode 100644 index 00000000..abf5c726 --- /dev/null +++ b/man/plot_mvgam_fc.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_mvgam_fc.R +\name{plot_mvgam_fc} +\alias{plot_mvgam_fc} +\title{Plot mvjagam posterior predictions for a specified series} +\usage{ +plot_mvgam_fc(out_gam_mod, series, data_test, data_train) +} +\arguments{ +\item{out_gam_mod}{\code{list} object returned from \code{mvjagam}} + +\item{series}{\code{integer} specifying which series in the set is to be plotted} + +\item{data_test}{\code{dataframe} containing at least 'series', 'season', 'year' and 'in_season' for the forecast horizon, in +addition to any other variables included in the linear predictor of \code{formula}} + +\item{data_train}{A\code{dataframe} containing the model response variable and covariates +required by the GAM \code{formula}. Should include columns: +'y' (the discrete outcomes; NAs allowed) +'series' (character or factor index of the series IDs) +'season' (numeric index of the seasonal time point for each observation; should not have any missing) +'year' the numeric index for year +'in_season' indicator for whether the observation is in season or not. If the counts tend to go to zero +during the off season (as in tick counts for example), setting this to zero can be useful as trends won't contribute during +during this time but they continue to evolve, allowing the trend from the past season to continue evolving rather than forcing +it to zero +Any other variables to be included in the linear predictor of \code{formula} must also be present} +} +\description{ +Plot mvjagam posterior predictions for a specified series +} diff --git a/man/plot_mvgam_trend.Rd b/man/plot_mvgam_trend.Rd new file mode 100644 index 00000000..949da301 --- /dev/null +++ b/man/plot_mvgam_trend.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_mvgam_trend.R +\name{plot_mvgam_trend} +\alias{plot_mvgam_trend} +\title{Plot mvjagam latent trend for a specified series} +\usage{ +plot_mvgam_trend(out_gam_mod, series, data_test, data_train) +} +\arguments{ +\item{out_gam_mod}{\code{list} object returned from \code{mvjagam}} + +\item{series}{\code{integer} specifying which series in the set is to be plotted} + +\item{data_test}{\code{dataframe} containing at least 'series', 'season', 'year' and 'in_season' for the forecast horizon, in +addition to any other variables included in the linear predictor of \code{formula}} + +\item{data_train}{A\code{dataframe} containing the model response variable and covariates +required by the GAM \code{formula}. Should include columns: +'y' (the discrete outcomes; NAs allowed) +'series' (character or factor index of the series IDs) +'season' (numeric index of the seasonal time point for each observation; should not have any missing) +'year' the numeric index for year +'in_season' indicator for whether the observation is in season or not. If the counts tend to go to zero +during the off season (as in tick counts for example), setting this to zero can be useful as trends won't contribute during +during this time but they continue to evolve, allowing the trend from the past season to continue evolving rather than forcing +it to zero +Any other variables to be included in the linear predictor of \code{formula} must also be present} +} +\description{ +Plot mvjagam latent trend for a specified series +} diff --git a/test_mvjagam.R b/test_mvjagam.R new file mode 100644 index 00000000..c8266c01 --- /dev/null +++ b/test_mvjagam.R @@ -0,0 +1,285 @@ +#### Testing the mgjagam function #### +# Utility functions, too specific to put into the package but useful for our study +plot_mvgam_season = function(out_gam_mod, series, data_test, data_train, + xlab = 'Season'){ + + pred_dat <- expand.grid(series = levels(data_train$series)[series], + season = seq(min(data_train$season), + max(data_train$season), length.out = 100), + year = mean(data_train$year), + cum_gdd = mean(data_train$cum_gdd, na.rm = T), + siteID = as.character(unique(data_train$siteID[which(data_train$series == + levels(data_train$series)[series])]))) + Xp <- predict(out_gam_mod$mgcv_model, newdata = pred_dat, type = 'lpmatrix') + betas <- MCMCchains(out_gam_mod$jags_output, 'b') + plot(Xp %*% betas[1,] ~ pred_dat$season, ylim = range(Xp %*% betas[1,] + 1.5, + Xp %*% betas[1,] - 1.5), + + col = rgb(150, 0, 0, max = 255, alpha = 10), type = 'l', + ylab = paste0('F(season) for ', levels(data_train$series)[series]), + xlab = xlab) + for(i in 2:1000){ + lines(Xp %*% betas[i,] ~ pred_dat$season, + + col = rgb(150, 0, 0, max = 255, alpha = 10)) + } +} + +plot_mvgam_gdd = function(out_gam_mod, series, data_test, data_train, + xlab = 'Season'){ + + pred_dat <- expand.grid(series = levels(data_train$series)[series], + season = 20, + in_season = 1, + year = mean(data_train$year), + cum_gdd = seq(min(data_train$cum_gdd[which(data_train$series == + levels(data_train$series)[series])], + na.rm = T), + max(data_train$cum_gdd[which(data_train$series == + levels(data_train$series)[series])], + na.rm = T), length.out = 100), + siteID = as.character(unique(data_train$siteID[which(data_train$series == + levels(data_train$series)[series])]))) + Xp <- predict(out_gam_mod$mgcv_model, newdata = pred_dat, type = 'lpmatrix') + betas <- MCMCchains(out_gam_mod$jags_output, 'b') + plot(Xp %*% betas[1,] ~ pred_dat$cum_gdd, + ylim = range(Xp %*% betas[1,] + 1.5, + Xp %*% betas[1,] - 1.5), + + col = rgb(150, 0, 0, max = 255, alpha = 10), type = 'l', + ylab = paste0('Predicted peak count for ', levels(data_train$series)[series]), + xlab = 'Cumulative growing degree days') + for(i in 2:1000){ + lines(Xp %*% betas[i,] ~ pred_dat$cum_gdd, + + col = rgb(150, 0, 0, max = 255, alpha = 10)) + } + rug(data_train$cum_gdd[which(data_train$series == + levels(data_train$series)[series])]) +} + +#### NEON mv_gam #### +library(mvgam) +data("all_neon_tick_data") + +# Prep data for modelling +species = 'Ambloyomma_americanum' + +if(species == 'Ambloyomma_americanum'){ + + plotIDs <- c('SCBI_013', 'SERC_001', 'SERC_005', 'SERC_006', + 'SERC_002', 'SERC_012', 'KONZ_025', 'UKFS_001', + 'UKFS_004', 'UKFS_003', 'ORNL_002', 'ORNL_040', + 'ORNL_008', 'ORNL_007', 'ORNL_009', 'ORNL_003', + 'TALL_001', 'TALL_008', 'TALL_002') + model_dat <- all_neon_tick_data %>% + dplyr::mutate(target = amblyomma_americanum) %>% + dplyr::select(Year, epiWeek, plotID, target) %>% + dplyr::mutate(epiWeek = as.numeric(epiWeek)) %>% + dplyr::filter(Year > 2014 & Year < 2021) %>% + dplyr::mutate(Year_orig = Year) + + model_dat %>% + dplyr::full_join(expand.grid(plotID = unique(model_dat$plotID), + Year_orig = unique(model_dat$Year_orig), + epiWeek = seq(1, 52))) %>% + dplyr::left_join(all_neon_tick_data %>% + dplyr::select(siteID, plotID) %>% + dplyr::distinct()) %>% + # Remove winter tick abundances as we are not interested in modelling them + dplyr::filter(epiWeek > 14) %>% + dplyr::filter(epiWeek < 41) %>% + dplyr::mutate(series = plotID, + season = epiWeek - 14, + year = as.vector(scale(Year_orig)), + y = target) %>% + dplyr::select(-Year, -epiWeek, -target) %>% + dplyr::ungroup() %>% + dplyr::arrange(Year_orig, season, series) -> model_dat + + model_dat %>% + dplyr::left_join(all_neon_tick_data %>% + dplyr::ungroup() %>% + dplyr::select(Year, siteID, cum_sdd, cum_gdd) %>% + dplyr::mutate(Year_orig = Year) %>% + dplyr::select(-Year) %>% + dplyr::distinct()) -> model_dat +} + +# Set indicator for whether time point is 'in-season' or not (trends won't contribute during +# off season, as counts generally go to zero during this time) +model_dat = model_dat %>% + dplyr::mutate(in_season = ifelse(season >=3 & season <= 22, 1, 0)) %>% + dplyr::filter(siteID %in% c('SERC', 'TALL', 'UKFS'), + Year_orig <= 2019) %>% + dplyr::mutate(plotID = factor(plotID), + siteID = factor(siteID), + series = factor(series)) + +data_train = model_dat[1:(floor(nrow(model_dat) * 0.9)),] +data_test = model_dat[((floor(nrow(model_dat) * 0.9)) + 1):nrow(model_dat),] +(nrow(data_train) + nrow(data_test)) == nrow(model_dat) + +# Hypothesis testing +# NULL. There is no seasonal pattern to be estimated, and we simply let the latent +# factors and site-level effects of growing days influence the series dynamics +null_hyp = y ~ siteID + s(cum_gdd, by = siteID, k = 3) - 1 + +# 1. Do all series share same seasonal pattern, with any remaining variation due to +# non-seasonal local variation? +hyp1 = y ~ + siteID + + s(cum_gdd, by = siteID, k = 3) + + # Global cyclic seasonality term (smooth) + s(season, k = 12, m = 2, bs = 'cc') - 1 + +# 2. Do all series share same seasonal pattern but with different magnitudes +# (i.e. random intercepts per series)? +hyp2 = y ~ + siteID + + s(cum_gdd, by = siteID, k = 3) + + s(season, k = 12, m = 2, bs = 'cc') + + # Hierarchical variable intercepts + s(series, bs = 're') - 1 + +# 3. Is there evidence for global seasonality but each site's seasonal pattern deviates +# based on more local conditions? +hyp3 = y ~ + siteID + + s(cum_gdd, by = siteID, k = 3) + + s(season, k = 4, m = 2, bs = 'cc') + + s(series, bs = 're') + + # Site-level deviations from global pattern, which can be wiggly (m=1 to reduce concurvity); + # If these dominate, they will have smaller smoothing parameters and the global seasonality + # will become less important (larger smoothing parameter). Sites with the smallest smooth + # parameters are those that deviate the most from the global seasonality + s(season, by = siteID, m = 1, k = 8) - 1 + +# 4. Is there evidence for global seasonality but each plot's seasonal pattern deviates +# based on even more local conditions than above (i.e. plot-level is not as informative)? +hyp4 = y ~ + siteID + + s(cum_gdd, by = siteID, k = 3) + + s(season, k = 4, m = 2, bs = 'cc') + + s(series, bs = 're') + + # Series-level deviations from global pattern + s(season, by = series, m = 1, k = 8) - 1 + +# If evidence of gdd effects, can also let use a global smoother and then site-level +# deviations for a 'hierarchical' setup +use_mv <- T +out_gam_mod <- mvjagam(formula = hyp1, + data_train = data_train, + data_test = data_test, + n.adapt = 1000, + n.burnin = 1000, + n.iter = 1000, + thin = 1, + auto_update = FALSE, + use_mv = use_mv, + use_nb = FALSE, + # Laplace distribution emphasizes our prior that smooths should not be overly wiggly + # unless the data supports this + rho_prior = 'ddexp(5, 0.25)T(-12, 12)', + phi_prior = 'dbeta(2,2)', + tau_prior = 'dunif(0.1, 100)') + +# View the modified JAGS model file +out_gam_mod$model_file + +# Traces of key parameters +library(MCMCvis) + +# Negative binomial size parameter (set to 10,000 if use_nb = FALSE) +MCMCtrace(out_gam_mod$jags_output, c('r'), pdf = F, n.eff = T) + +# Penalties of smooth components (smaller means an effect is more nonlinear) +out_gam_mod$smooth_param_details # rhos are the logged versions of the lambdas +MCMCtrace(out_gam_mod$jags_output, c('rho'), pdf = F, n.eff = T) + +# AR1 persistence coefficients for latent dynamic factors (or for each +# series if use_mv = FALSE) +MCMCtrace(out_gam_mod$jags_output, c('phi'), pdf = F, n.eff = T) + +# Precision for latent dynamic factors (if using latent factors) +MCMCtrace(out_gam_mod$jags_output, c('tau_fac'), pdf = F, n.eff = T) + +# Precision for latent trends (if using independent Gaussian trends) +MCMCtrace(out_gam_mod$jags_output, c('tau'), pdf = F, n.eff = T) +dev.off() + +# Plot useful posterior quantities for a specific series +opar <- par() +par(mfrow = c(3, 1)) + +# Total number of series in the set +length(unique(data_train$series)) +series = 1 +plot_mvgam_season(out_gam_mod, series = series, data_test = data_test, + data_train = data_train, xlab = 'Epidemiological week') +plot_mvgam_fc(out_gam_mod, series = series, data_test = data_test, + data_train = data_train) +plot_mvgam_trend(out_gam_mod, series = series, data_test = data_test, + data_train = data_train) +par(opar) + +plot_mvgam_gdd(out_gam_mod, series = series, data_test = data_test, + data_train = data_train, xlab = 'Epidemiological week') + +if(use_mv){ + if(length(unique(data_train$series)) > 5){ + # Calculate the correlation matrix from the latent variables + samps <- jags.samples(out_gam_mod$jags_model, + variable.names = 'lv_coefs', + n.iter = 1000, thin = 1) + lv_coefs <- samps$lv_coefs + n_series <- dim(lv_coefs)[1] + n_lv <- dim(lv_coefs)[2] + n_samples <- prod(dim(lv_coefs)[3:4]) + + # Get arrat of latend variable loadings + coef_array <- array(NA, dim = c(n_series, n_lv, n_samples)) + for(i in 1:n_series){ + for(j in 1:n_lv){ + coef_array[i, j, ] <- c(lv_coefs[i, j, , 1], + lv_coefs[i, j, , 2]) + } + } + + # Variances of each series' latent trend are fixed at (1/100)^2 + eps_res <- rep((1/100)^2, n_series) + + # Posterior correlations based on latent variable loadings + correlations <- array(NA, dim = c(n_series, n_series, n_samples)) + for(i in 1:n_samples){ + correlations[,,i] <- cov2cor(tcrossprod(coef_array[,,i]) + diag(eps_res)) + } + mean_correlations <- apply(correlations, c(1,2), function(x) quantile(x, 0.5)) + + # Plot the mean posterior correlations + mean_correlations[upper.tri(mean_correlations)] <- NA + mean_correlations <- data.frame(mean_correlations) + rownames(mean_correlations) <- colnames(mean_correlations) <- levels(data_train$series) + + library(ggplot2) + ggplot(mean_correlations %>% + tibble::rownames_to_column("series1") %>% + tidyr::pivot_longer(-c(series1), names_to = "series2", values_to = "Correlation"), + aes(x = series1, y = series2)) + geom_tile(aes(fill = Correlation)) + + scale_fill_gradient2(low="darkred", mid="white", high="darkblue", + midpoint = 0, + breaks = seq(-1,1,length.out = 5), + limits = c(-1, 1), + name = 'Residual\ncorrelation') + labs(x = '', y = '') + theme_dark() + + theme(axis.text.x = element_text(angle = 45, hjust=1)) + + } +} + +#### If GAM component is LESS supported, we should see evidence in the form of: #### +# 1. Stronger residual correlations, suggesting we are missing some site-level structure +summary(as.vector(as.matrix(mean_correlations))) + +# 2. Smaller precisions for tau_fac (i.e. larger variance for latent dynamic factors) +MCMCsummary(out_gam_mod$jags_output, 'tau_fac') +