update case studies

nicholasjclark · Feb 2, 2022 · d5f8aa3 · d5f8aa3
1 parent f180a3d
commit d5f8aa3
Show file tree

Hide file tree

Showing 27 changed files with 1,808 additions and 1,816 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/NEON_manuscript/.DS_Store b/NEON_manuscript/.DS_Store
diff --git a/NEON_manuscript/Case studies/.DS_Store b/NEON_manuscript/Case studies/.DS_Store
diff --git a/NEON_manuscript/Case studies/mvgam_case_study1.html b/NEON_manuscript/Case studies/mvgam_case_study1.html
diff --git a/NEON_manuscript/Case studies/mvgam_case_study2.Rmd b/NEON_manuscript/Case studies/mvgam_case_study2.Rmd
@@ -132,17 +132,17 @@ Plot the series to see how similar their seasonal shapes are over time
 plot(series, legend.loc = 'topleft')
 ```
 
-Now we will fit an `mvgam` model with shared seasonality and random intercepts per series. Our first attempt will ignore any temporal component in the residuals so that we can identidy which GAM predictor combination gives us the best fit, prior to investigating how to deal with any remaining autocorrelation. We assume a Negative Binomial distrubution with shared overdispersion parameter for the response. Also note that any smooths using the random effects basis (`s(series, bs = "re")` below) are automatically re-parameterised to use the [non-centred parameterisation that is necessary to help avoid common posterior degeneracies in hierarchical models](https://betanalpha.github.io/assets/case_studies/hierarchical_modeling.html)
+Now we will fit an `mvgam` model with shared seasonality and random intercepts per series. Our first attempt will ignore any temporal component in the residuals so that we can identidy which GAM predictor combination gives us the best fit, prior to investigating how to deal with any remaining autocorrelation. We assume a Negative Binomial distrubution with series-specific overdispersion parameters for the response. We use a complexity-penalising prior for the overdispersion, which allows the model to reduce toward a more simple Poisson observation process unless the data provide adequate information to support overdispersion. Also note that any smooths using the random effects basis (`s(series, bs = "re")` below) are automatically re-parameterised to use the [non-centred parameterisation that is necessary to help avoid common posterior degeneracies in hierarchical models](https://betanalpha.github.io/assets/case_studies/hierarchical_modeling.html). We suppress the global intercept as it is not needed and will lead to identifiability issues when estimating the series-specific random intercepts
 ```{r, message=FALSE, warning=FALSE}
 trends_mod1 <- mvjagam(data_train = trends_data$data_train,
                       data_test = trends_data$data_test,
                       formula = y ~ s(series, bs = 're') + 
-                        s(season, k = 12, m = 2, bs = 'cc'),
+                        s(season, k = 12, m = 2, bs = 'cc') - 1,
                       knots = list(season = c(0.5, 12.5)),
                       trend_model = 'None',
                       family = 'nb',
                       chains = 4,
-                      burnin = 5000)
+                      burnin = 8000)
 ```
 
 Given that these series could potentially be following a hierarchical seasonality, we will also trial a slghtly more complex model with an extra smoothing term per series that allows its seasonal curve to deviate from the global seasonal smooth. Ignore the warning about repeated smooths, as this is not an issue for estimation.
@@ -155,10 +155,10 @@ trends_mod2 <- mvjagam(data_train = trends_data$data_train,
                       trend_model = 'None',
                       family = 'nb',
                       chains = 4,
-                      burnin = 5000)
+                      burnin = 8000)
 ```
 
-How can we compare these models to ensure we choose one that performs well and provides useful inferences? Beyond posterior retrodictive and predictive checks, we can take advantage of the fact that `mvgam` fits an `mgcv` model to provide all the necessary penalty matrices, as well as to identify good initial values for smoothing and beta parameters. Because we did not modify this model by adding a trend component we can still inspect the usual `mgcv` model comparison routines
+How can we compare these models to ensure we choose one that performs well and provides useful inferences? Beyond posterior retrodictive and predictive checks, we can take advantage of the fact that `mvgam` fits an `mgcv` model to provide all the necessary penalty matrices, as well as to identify good initial values for smoothing and beta parameters. Because we did not modify this model by adding a trend component (the only modification is that we estimated series-specific overdispersion parameters), we can still inspect the usual `mgcv` model comparison routines
 ```{r}
 anova(trends_mod1$mgcv_model,
       trends_mod2$mgcv_model, test = 'LRT')
@@ -186,20 +186,19 @@ runjags::extract(trends_mod1$jags_model, 'dic')
 runjags::extract(trends_mod2$jags_model, 'dic')
 ```
 
-Model 2 seems to fit better so far, suggesting that hierarchical seasonality gives better performance for these series. But a problem with both of the above models is that their forecast uncertainties will not increase into the future, which is not how time series forecasts should behave. Here we fit Model 2 again but specifying a time series model for the latent trends. We assume the dynamic trends can be represented using latent factors that each follow an AR1 process, and we will rely on the exponential penalties to help regularise any un-needed factors by setting `n_lv = 4`
+Model 1 seems to fit better so far, suggesting that hierarchical seasonality does not give better performance for these series. But a problem with both of the above models is that their forecast uncertainties will not increase into the future, which is not how time series forecasts should behave. Here we fit Model 1 again but specifying a time series model for the latent trends. We assume the dynamic trends can be represented using latent factors that each follow an AR1 process, and we will rely on the exponential penalties to help regularise any un-needed factors by setting `n_lv = 4`
 ```{r, message=FALSE, warning=FALSE}
 trends_mod3 <- mvjagam(data_train = trends_data$data_train,
                       data_test = trends_data$data_test,
-                      formula = y ~ s(season, k = 12, m = 2, bs = 'cc') +
-                        s(season, series, k = 5, bs = 'fs', m = 1),
+                      formula = y ~ s(series, bs = 're') + 
+                        s(season, k = 12, m = 2, bs = 'cc') - 1,
                       knots = list(season = c(0.5, 12.5)),
                       trend_model = 'AR1',
                       use_lv = T,
                       n_lv = 4,
                       family = 'nb',
                       chains = 4,
-                      drift = T,
-                      burnin = 5000)
+                      burnin = 8000)
 ```
 
 
@@ -215,7 +214,7 @@ plot_mvgam_factors(trends_mod3)
 
 How do forecasts for this model compare to the previous one that did not include any trend component?
 ```{r,  fig.width = 6, fig.height = 5, fig.align='center'}
-compare_mvgams(trends_mod2, trends_mod3, fc_horizon = 6,
+compare_mvgams(trends_mod1, trends_mod3, fc_horizon = 6,
                n_cores = 3, n_evaluations = 20)
 ```
 
@@ -242,7 +241,7 @@ plot_mvgam_resids(trends_mod3, 3)
 plot_mvgam_resids(trends_mod3, 4)
 ```
 
-Perform posterior predictive checks to see if the model is able to simulate data that looks realistic and unbiased by examining simulated kernel densities for posterior predictions (`yhat`) compared to the density of the observations (`y`). This will be particularly useful for examining whether the shared overdispersion parameter is able to produce realistic looking simulations for each individual series.
+Perform posterior predictive checks to see if the model is able to simulate data that looks realistic and unbiased by examining simulated kernel densities for posterior predictions (`yhat`) compared to the density of the observations (`y`). This will be particularly useful for examining whether the Negative Binomial observation model is able to produce realistic looking simulations for each individual series.
 ```{r, fig.width = 5, fig.height = 4, fig.align='center'}
 plot_mvgam_ppc(trends_mod3, series = 1, type = 'density')
 ```
@@ -298,7 +297,7 @@ plot_mvgam_trend(object = trends_mod3, series = 3, data_test = trends_data$data_
 plot_mvgam_trend(object = trends_mod3, series = 4, data_test = trends_data$data_test)
 ```
 
-Given that we fit a model with hierarchical seasonality, the seasonal smooths are allowed to differ somewhat
+Given that we fit a model with sharedseasonality, the seasonal smooths will not differ
 ```{r, fig.width = 4, fig.height = 3, fig.align='center'}
 plot_mvgam_smooth(object = trends_mod3, series = 1, smooth = 'season')
 ```
@@ -345,7 +344,7 @@ plot(stl(ts(as.vector(series$`dog tick`), frequency = 12), 'periodic'))
 plot(stl(ts(as.vector(series$`tick bite`), frequency = 12), 'periodic'))
 ```
 
-A logical next step for this analysis would be to trial varying overdispersion parameters for each series. This may be necessary as the residual diagnostic plots and forecast period posterior predictive checks suggest that the estimated Negative Binomial overdispersion parameter is more suitable for some series than for others: 
+Forecast period posterior predictive checks suggest that the model still has room for improvement: 
 ```{r, fig.width = 5, fig.height = 4, fig.align='center'}
 plot_mvgam_ppc(trends_mod3, series = 1, type = 'density', data_test = trends_data$data_test)
 ```

diff --git a/NEON_manuscript/Case studies/mvgam_case_study2.html b/NEON_manuscript/Case studies/mvgam_case_study2.html
diff --git a/NEON_manuscript/Case studies/rsconnect/.DS_Store b/NEON_manuscript/Case studies/rsconnect/.DS_Store
diff --git a/NEON_manuscript/Case studies/rsconnect/documents/.DS_Store b/NEON_manuscript/Case studies/rsconnect/documents/.DS_Store
diff --git a/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study1.Rmd/.DS_Store b/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study1.Rmd/.DS_Store
diff --git a/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study1.Rmd/rpubs.com/.DS_Store b/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study1.Rmd/rpubs.com/.DS_Store
diff --git a/...se studies/rsconnect/documents/mvgam_case_study1.Rmd/rpubs.com/rpubs/Publish Document.dcf b/...se studies/rsconnect/documents/mvgam_case_study1.Rmd/rpubs.com/rpubs/Publish Document.dcf
@@ -1,10 +1,10 @@
-name: Publish Document
-title:
-username:
-account: rpubs
-server: rpubs.com
-hostUrl: rpubs.com
-appId: https://api.rpubs.com/api/v1/document/856585/540d5299dc584aff9095b986b978429b
-bundleId: https://api.rpubs.com/api/v1/document/856585/540d5299dc584aff9095b986b978429b
-url: http://rpubs.com/NickClark47/856585
-when: 1643680227.13467
+name: Publish Document
+title:
+username:
+account: rpubs
+server: rpubs.com
+hostUrl: rpubs.com
+appId: https://api.rpubs.com/api/v1/document/856585/540d5299dc584aff9095b986b978429b
+bundleId: https://api.rpubs.com/api/v1/document/856585/540d5299dc584aff9095b986b978429b
+url: http://rpubs.com/NickClark47/856585
+when: 1643680227.13467
diff --git a/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study2.Rmd/.DS_Store b/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study2.Rmd/.DS_Store
diff --git a/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study2.Rmd/rpubs.com/.DS_Store b/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study2.Rmd/rpubs.com/.DS_Store
diff --git a/...se studies/rsconnect/documents/mvgam_case_study2.Rmd/rpubs.com/rpubs/Publish Document.dcf b/...se studies/rsconnect/documents/mvgam_case_study2.Rmd/rpubs.com/rpubs/Publish Document.dcf
@@ -7,4 +7,4 @@ hostUrl: rpubs.com
 appId: https://api.rpubs.com/api/v1/document/856733/5be2f57c523545a694d50c31f9a87205
 bundleId: https://api.rpubs.com/api/v1/document/856733/5be2f57c523545a694d50c31f9a87205
 url: http://rpubs.com/NickClark47/856733
-when: 1643695525.11005
+when: 1643793559.32827
diff --git a/NEON_manuscript/Figures/.DS_Store b/NEON_manuscript/Figures/.DS_Store
diff --git a/R/eval_mvgam.R b/R/eval_mvgam.R
@@ -148,8 +148,7 @@ eval_mvgam = function(object,
   ar3s <- MCMCvis::MCMCchains(object$jags_output, 'ar3')
 
   # Negative binomial size estimate
-  sizes <- as.matrix(rep(hpd(MCMCvis::MCMCchains(object$jags_output, 'r'))[2],
-                         dim(betas)[1]))
+  sizes <- MCMCvis::MCMCchains(object$jags_output, 'r')
 
   # Generate sample sequence for n_samples
   if(n_samples < dim(phis)[1]){
@@ -249,7 +248,7 @@ eval_mvgam = function(object,
         trunc_preds <- rnbinom(fc_horizon,
                                mu = exp(as.vector((Xp[which(as.numeric(data_assim$series) == series),] %*%
                                                        betas)) + (trend_preds)),
-                               size = size)
+                               size = size[series])
         trunc_preds
       })
 
@@ -289,7 +288,7 @@ eval_mvgam = function(object,
         fc <-  rnbinom(fc_horizon,
                        mu = exp(as.vector((Xp[which(as.numeric(data_assim$series) == series),] %*%
                                                betas)) + (trend_preds)),
-                       size = size)
+                       size = size[series])
         fc
       })
     }
@@ -344,7 +343,7 @@ eval_mvgam = function(object,
           trunc_preds <- rnbinom(fc_horizon,
                                  mu = exp(as.vector((Xp[which(as.numeric(data_assim$series) == series),] %*%
                                                        betas)) + (trend_preds)),
-                                 size = size)
+                                 size = size[series])
           trunc_preds
         })
 
@@ -384,7 +383,7 @@ eval_mvgam = function(object,
           fc <-  rnbinom(fc_horizon,
                          mu = exp(as.vector((Xp[which(as.numeric(data_assim$series) == series),] %*%
                                                betas)) + (trend_preds)),
-                         size = size)
+                         size = size[series])
           fc
         })
       }

diff --git a/R/mvjagam.R b/R/mvjagam.R
@@ -62,7 +62,9 @@
 #'@param ar_prior \code{character} specifying (in JAGS syntax) the prior distribution for the AR terms
 #'in the latent trends
 #'@param r_prior \code{character} specifying (in JAGS syntax) the prior distribution for the Negative Binomial
-#'overdispersion parameter. Ignored if \code{family = 'poisson'}
+#'overdispersion parameters. Note that this prior acts on the INVERSE of \code{r}, which is convenient
+#'for inducing a complexity-penalising prior model whereby the observation process reduces to a Poisson
+#'as the sampled parameter approaches \code{0}. Ignored if \code{family = 'poisson'}
 #'@param tau_prior \code{character} specifying (in JAGS syntax) the prior distributions for the independent gaussian
 #'precisions used for the latent trends (ignored if \code{use_lv == TRUE})
 #'@param upper_bounds Optional \code{vector} of \code{integer} values specifying upper limits for each series. If supplied,
@@ -80,9 +82,7 @@
 #'factors' variances to zero. This is done to help protect against selecting too many latent factors than are needed to
 #'capture dependencies in the data, so it can often be advantageous to set n_lv to a slightly larger number. However
 #'larger numbers of factors do come with additional computational costs so these should be balanced as well.
-#'For the time being, all series are assumed to have the same overdispersion
-#'parameter when using a negative binomial distribution, though this will be relaxed in future versions. For each
-#'series, randomized quantile (i.e. Dunn-Smyth) residuals are calculated for inspecting model diagnostics using
+#' For each series, randomized quantile (i.e. Dunn-Smyth) residuals are calculated for inspecting model diagnostics using
 #'medians of posterior predictions. If the fitted model is appropriate then Dunn-Smyth residuals will be
 #'standard normal in distribution and no autocorrelation will be evident
 #'
@@ -327,17 +327,25 @@ for (s in 1:n_series){
 ## Negative binomial likelihood functions
 for (i in 1:n) {
  for (s in 1:n_series) {
-  y[i, s] ~ dnegbin(rate[i, s], r)T(, upper_bound[s]);
-  rate[i, s] <- ifelse((r / (r + mu[i, s])) < min_eps, min_eps,
-                      (r / (r + mu[i, s])))
+  y[i, s] ~ dnegbin(rate[i, s], r[s])T(, upper_bound[s]);
+  rate[i, s] <- ifelse((r[s] / (r[s] + mu[i, s])) < min_eps, min_eps,
+                      (r[s] / (r[s] + mu[i, s])))
  }
 }
-r ~ dexp(0.001)
+
+## Complexity penalising prior for the overdispersion parameter;
+## where the likelihood reduces to a 'base' model (Poisson) unless
+## the data support overdispersion
+for(s in 1:n_series){
+ r[s] <- 1 / r_raw[s]
+ r_raw[s] ~ dexp(0.5)
+}
+
 
 ## Posterior predictions
 for (i in 1:n) {
  for (s in 1:n_series) {
-   ypred[i, s] ~ dnegbin(rate[i, s], r)T(, upper_bound[s])
+   ypred[i, s] ~ dnegbin(rate[i, s], r[s])T(, upper_bound[s])
  }
 }
 ")
@@ -379,8 +387,8 @@ for (i in 1:n) {
 
     } else {
       if(missing(upper_bounds)){
-        model_file[grep('y\\[i, s\\] ~', model_file)] <- '  y[i, s] ~ dnegbin(rate[i, s], r)'
-        model_file[grep('ypred\\[i, s\\] ~', model_file)] <- '  ypred[i, s] ~ dnegbin(rate[i, s], r)'
+        model_file[grep('y\\[i, s\\] ~', model_file)] <- '  y[i, s] ~ dnegbin(rate[i, s], r[s])'
+        model_file[grep('ypred\\[i, s\\] ~', model_file)] <- '  ypred[i, s] ~ dnegbin(rate[i, s], r[s])'
       }
     }
 
@@ -478,13 +486,6 @@ for (i in 1:n) {
         ar3[s] ~ dnorm(0, 10)
         }
 
-        ## Latent factor loadings are penalized using a regularized horseshoe prior
-        ## to allow loadings for entire factors to be 'dropped', reducing overfitting. Still
-        ## need to impose identifiability constraints
-
-        ## Global shrinkage penalty (T distribution on the standard deviation)
-        lv_tau ~ dscaled.gamma(0.5, 5)
-
         ## Shrinkage penalties for each factor squeeze the factor to a flat line and squeeze
         ## the entire factor's set of coefficients toward zero if supported by
         ## the data. The prior for individual factor penalties allows each factor to possibly
@@ -510,6 +511,7 @@ for (i in 1:n) {
          penalty[t] <- theta.prime[t] * l.var[t]
         }
 
+        ## Latent factor loadings: standard normal with identifiability constraints
         ## Upper triangle of loading matrix set to zero
         for(j in 1:(n_lv - 1)){
           for(j2 in (j + 1):n_lv){
@@ -519,20 +521,20 @@ for (i in 1:n) {
 
         ## Positive constraints on loading diagonals
         for(j in 1:n_lv) {
-         lv_coefs[j, j] ~ dnorm(0, lv_tau * penalty[j])T(0, 1);
+         lv_coefs[j, j] ~ dnorm(0, 1)T(0, 1);
         }
 
         ## Lower diagonal free
         for(j in 2:n_lv){
          for(j2 in 1:(j - 1)){
-          lv_coefs[j, j2] ~ dnorm(0, lv_tau * penalty[j2])T(-1, 1);
+          lv_coefs[j, j2] ~ dnorm(0, 1)T(-1, 1);
          }
        }
 
         ## Other elements also free
         for(j in (n_lv + 1):n_series) {
          for(j2 in 1:n_lv){
-          lv_coefs[j, j2] ~ dnorm(0, lv_tau * penalty[j2])T(-1, 1);
+          lv_coefs[j, j2] ~ dnorm(0, 1)T(-1, 1);
          }
         }
 
@@ -546,12 +548,19 @@ for (i in 1:n) {
         ## Negative binomial likelihood functions
         for (i in 1:n) {
         for (s in 1:n_series) {
-        y[i, s] ~ dnegbin(rate[i, s], r)T(, upper_bound[s]);
-        rate[i, s] <- ifelse((r / (r + mu[i, s])) < min_eps, min_eps,
-                            (r / (r + mu[i, s])))
+        y[i, s] ~ dnegbin(rate[i, s], r[s])T(, upper_bound[s]);
+        rate[i, s] <- ifelse((r[s] / (r[s] + mu[i, s])) < min_eps, min_eps,
+                            (r[s] / (r[s] + mu[i, s])))
+        }
         }
+
+        ## Complexity penalising prior for the overdispersion parameter;
+        ## where the likelihood reduces to a 'base' model (Poisson) unless
+        ## the data support overdispersion
+        for(s in 1:n_series){
+         r[s] <- 1 / r_raw[s]
+         r_raw[s] ~ dexp(0.5)
         }
-        r ~ dexp(0.001)
 
         ## Posterior predictions
         for (i in 1:n) {
@@ -594,8 +603,8 @@ for (i in 1:n) {
 
       } else {
         if(missing(upper_bounds)){
-          model_file[grep('y\\[i, s\\] ~', model_file)] <- '       y[i, s] ~ dnegbin(rate[i, s], r)'
-          model_file[grep('ypred\\[i, s\\] ~', model_file)] <- '       ypred[i, s] ~ dnegbin(rate[i, s], r)'
+          model_file[grep('y\\[i, s\\] ~', model_file)] <- '       y[i, s] ~ dnegbin(rate[i, s], r[s])'
+          model_file[grep('ypred\\[i, s\\] ~', model_file)] <- '       ypred[i, s] ~ dnegbin(rate[i, s], r[s])'
         }
       }
 

diff --git a/R/pfilter_mvgam_fc.R b/R/pfilter_mvgam_fc.R
@@ -144,7 +144,7 @@ pfilter_mvgam_fc = function(file_path = 'pfilter',
                                mu = exp(as.vector((Xp[which(as.numeric(series_test$series) == series),] %*%
                                                        particles[[x]]$betas)) +
                                           (trend_preds)),
-                               size = particles[[x]]$size)
+                               size = particles[[x]]$size[series])
         trunc_preds
       })
 
@@ -161,7 +161,7 @@ pfilter_mvgam_fc = function(file_path = 'pfilter',
             fc <-  rnbinom(fc_horizon,
                                mu = exp(as.vector((Xp[which(as.numeric(series_test$series) == series),] %*%
                                                        particles[[x]]$betas)) + (trend_preds)),
-                               size = particles[[x]]$size)
+                               size = particles[[x]]$size[series])
           fc
       })
     }