Skip to content

Commit

Permalink
improve squared exponential trend extension
Browse files Browse the repository at this point in the history
  • Loading branch information
nicholasjclark committed Jun 13, 2022
1 parent 68adf05 commit 36899df
Show file tree
Hide file tree
Showing 44 changed files with 524 additions and 274 deletions.
2 changes: 1 addition & 1 deletion NEON_manuscript/ixodes_hyp_tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ hyp3 = y ~

# Fit each hypothesis
burnin = 5000
n_samples = 2000
n_samples = 1000

fit_null <- fit_mvgam(data_train = all_data$data_train,
data_test = all_data$data_test,
Expand Down
20 changes: 10 additions & 10 deletions NEON_manuscript/neon_utility_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
calculate_pit = function(out_gam_mod, series, data_test, data_train){

# Pull out predictions and truths for the specific series
ends <- seq(0, dim(MCMCvis::MCMCchains(out_gam_mod$jags_output, 'ypred'))[2],
ends <- seq(0, dim(MCMCvis::MCMCchains(out_gam_mod$model_output, 'ypred'))[2],
length.out = NCOL(out_gam_mod$ytimes) + 1)
starts <- ends + 1
starts <- c(1, starts[-c(1, (NCOL(out_gam_mod$ytimes)+1))])
Expand All @@ -24,7 +24,7 @@ calculate_pit = function(out_gam_mod, series, data_test, data_train){
dplyr::arrange(time) %>%
dplyr::pull(y)

preds <- MCMCvis::MCMCchains(out_gam_mod$jags_output, 'ypred')[,starts[series]:ends[series]]
preds <- MCMCvis::MCMCchains(out_gam_mod$model_output, 'ypred')[,starts[series]:ends[series]]
preds <- t(preds[, (last_obs +1):NCOL(preds)])
if(any(!is.na(truth))){

Expand Down Expand Up @@ -83,7 +83,7 @@ calculate_drps = function(out_gam_mod, pred_matrix = NULL, series, data_test, da
scores
}

ends <- seq(0, dim(MCMCvis::MCMCchains(out_gam_mod$jags_output, 'ypred'))[2],
ends <- seq(0, dim(MCMCvis::MCMCchains(out_gam_mod$model_output, 'ypred'))[2],
length.out = NCOL(out_gam_mod$ytimes) + 1)
starts <- ends + 1
starts <- c(1, starts[-c(1, (NCOL(out_gam_mod$ytimes)+1))])
Expand All @@ -105,7 +105,7 @@ calculate_drps = function(out_gam_mod, pred_matrix = NULL, series, data_test, da
dplyr::pull(y)

if(is.null(pred_matrix)){
preds <- MCMCvis::MCMCchains(out_gam_mod$jags_output, 'ypred')[,starts[series]:ends[series]]
preds <- MCMCvis::MCMCchains(out_gam_mod$model_output, 'ypred')[,starts[series]:ends[series]]
preds <- t(preds[, (last_obs +1):NCOL(preds)])
} else {
preds <- t(pred_matrix[, (last_obs +1):NCOL(pred_matrix)])
Expand Down Expand Up @@ -136,7 +136,7 @@ plot_mvgam_season = function(out_gam_mod, series, data_test, data_train,
siteID = as.character(unique(data_train$siteID[which(data_train$series ==
levels(data_train$series)[series])])))
Xp <- predict(out_gam_mod$mgcv_model, newdata = pred_dat, type = 'lpmatrix')
betas <- MCMCvis::MCMCchains(out_gam_mod$jags_output, 'b')
betas <- MCMCvis::MCMCchains(out_gam_mod$model_output, 'b')
plot((Xp %*% betas[1,]) ~ pred_dat$season, ylim = range((Xp %*% betas[1,]) + 2.5,
(Xp %*% betas[1,]) - 2.5),

Expand Down Expand Up @@ -172,11 +172,11 @@ plot_mvgam_gdd = function(out_gam_mod, series, data_test, data_train,
siteID = as.character(unique(data_train$siteID[which(data_train$series ==
levels(data_train$series)[series])])))
Xp <- predict(out_gam_mod$mgcv_model, newdata = pred_dat, type = 'lpmatrix')
betas <- MCMCvis::MCMCchains(out_gam_mod$jags_output, 'b')
betas <- MCMCvis::MCMCchains(out_gam_mod$model_output, 'b')
preds <- matrix(NA, nrow = 1000, ncol = length(pred_dat$cum_gdd))
for(i in 1:1000){
preds[i,] <- rnbinom(length(pred_dat$cum_gdd), mu = exp((Xp %*% betas[i,])),
size = MCMCvis::MCMCsummary(out_gam_mod$jags_output, 'r')$mean)
size = MCMCvis::MCMCsummary(out_gam_mod$model_output, 'r')$mean)
}
int <- apply(preds,
2, hpd, 0.95)
Expand Down Expand Up @@ -343,7 +343,7 @@ fit_mvgam = function(data_train,

# Condition the model on the observed data
if(missing(knots)){
out_gam_mod <- mvjagam(formula = formula,
out_gam_mod <- mvgam(formula = formula,
data_train = data_train,
data_test = data_test,
burnin = burnin,
Expand All @@ -355,7 +355,7 @@ fit_mvgam = function(data_train,
family = family,
phi_prior = phi_prior)
} else {
out_gam_mod <- mvjagam(formula = formula,
out_gam_mod <- mvgam(formula = formula,
data_train = data_train,
data_test = data_test,
burnin = burnin,
Expand All @@ -373,7 +373,7 @@ fit_mvgam = function(data_train,
#### If GAM component is LESS supported, we should see evidence in the form of: ####
# 1. Poorer convergence of smoothing parameter estimates, suggesting the model
# is more 'mis-specified' and harder to fit
rho_summary <- MCMCvis::MCMCsummary(out_gam_mod$jags_output,
rho_summary <- MCMCvis::MCMCsummary(out_gam_mod$model_output,
c('rho'), HPD = TRUE)

# 2. Stronger residual correlations, suggesting we are missing some site-level structure
Expand Down
6 changes: 3 additions & 3 deletions NEON_manuscript/simulation_component.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ sim_results <- lapply(seq_len(nrow(run_parameters)), function(x){
seasonality = 'hierarchical')

# Fit a multivariate gam with no seasonality (null model)
nullmod <- mvjagam(data_train = sim_data$data_train,
nullmod <- mvgam(data_train = sim_data$data_train,
data_test = sim_data$data_test,
formula = y ~ s(series, bs = 're'),
family = 'nb',
Expand Down Expand Up @@ -80,7 +80,7 @@ sim_results <- lapply(seq_len(nrow(run_parameters)), function(x){
correlations$mean_correlations))

# Fit a multivariate gam with hierarchical seasonality
hier_mod <- mvjagam(data_train = sim_data$data_train,
hier_mod <- mvgam(data_train = sim_data$data_train,
data_test = sim_data$data_test,
formula = y ~ s(series, bs = 're') +
s(season, k = 12, m = 2, bs = 'cc') +
Expand Down Expand Up @@ -117,7 +117,7 @@ sim_results <- lapply(seq_len(nrow(run_parameters)), function(x){
Xp <- rbind(predict(mgcv_hier_mod, type = 'lpmatrix'),
predict(mgcv_hier_mod, newdata = sim_data$data_test, type = 'lpmatrix'))
vc <- vcov(mgcv_hier_mod)
sim <- MASS::mvrnorm(dim(MCMCvis::MCMCchains(hier_mod$jags_output, 'ypred'))[1],
sim <- MASS::mvrnorm(dim(MCMCvis::MCMCchains(hier_mod$model_output, 'ypred'))[1],
mu = coef(mgcv_hier_mod), Sigma = vc)
dims_needed <- dim(exp(Xp %*% t(sim)))
mus <- as.vector(exp(Xp %*% t(sim)))
Expand Down
106 changes: 82 additions & 24 deletions NEON_manuscript/testing_tweedie.R
Original file line number Diff line number Diff line change
@@ -1,42 +1,100 @@
# Generate series with independent latent GP trends
library(mvgam)
sim_data <- sim_mvgam(T = 70, n_series = 2,
sim_data <- sim_mvgam(T = 90, n_series = 2,
use_lv = F,
trend_model = 'GP',
trend_rel = 0.6,
family = 'poisson',
mu_obs = c(7, 10))
mod1stan <- mvgam(data_train = sim_data$data_train,
data_test = sim_data$data_test,
formula = y ~ s(season, k = 12, bs = 'cc'),
knots = list(season = c(0.5, 12.5)),
family = 'poisson',
trend_model = 'GP',
chains = 4,
burnin = 1000,
use_stan = T, run_model = T)
mod1stan$model_file
summary(mod1stan)
plot(mod1stan, 'forecast', series = 2)
mu_obs = c(7, 10),
train_prop = 1)

# To run outside of mvgam (maybe show how other elements could be added?)
train_dat <- sim_data$data_train %>%
dplyr::filter(time <= 75)
test_dat <- sim_data$data_train %>%
dplyr::filter(time > 75)
# mod1stan <- mvgam(data_train = train_dat,
# data_test = test_dat,
# formula = y ~ s(season, k = 12, bs = 'cc'),
# knots = list(season = c(0.5, 12.5)),
# family = 'nb',
# trend_model = 'GP',
# use_stan = T,
# run_model = F)
# test <- rstan::stan(model_code = mod1stan$model_file,
# data = mod1stan$model_data,
# chains = 1, iter = 1000,
# init = mod1stan$inits)

mod1jags <- mvgam(data_train = train_dat,
data_test = test_dat,
formula = y ~ s(season, k = 12, bs = 'cc'),
knots = list(season = c(0.5, 12.5)),
family = 'poisson',
trend_model = 'AR3')

mod1stan <- mvgam(data_train = train_dat,
data_test = test_dat,
formula = y ~ s(season, k = 12, bs = 'cc'),
knots = list(season = c(0.5, 12.5)),
family = 'poisson',
trend_model = 'GP',
use_stan = T,
burnin = 1000)

# Compare the out of sample DRPS for each model
plot(mod1jags, 'forecast', series = 1, data_test = test_dat)
plot(mod1stan, 'forecast', series = 1, data_test = test_dat)

plot(mod1jags, 'forecast', series = 2, data_test = test_dat)
plot(mod1stan, 'forecast', series = 2, data_test = test_dat)

compare_mvgams(mod1jags, mod1stan, fc_horizon = 10,
n_evaluations = 20, n_cores = 4)

plot(mod1stan, 'residuals', series = 1)
plot(mod1stan, 'residuals', series = 2)
plot(mod1stan, 'uncertainty', data_test = test_dat, series = 1)
plot(mod1stan, 'uncertainty', data_test = test_dat, series = 2)
plot(mod1stan, 'forecast', series = 1, data_test = test_dat)
plot(mod1stan, 'forecast', series = 2, data_test = test_dat)
plot(mod1stan, 'smooths', series = 1)
plot(mod1stan, 'smooths', series = 2)
plot(mod1stan, 'trend')
plot(mod1stan, 'trend', series = 2)
plot(sim_data$true_trends[,2])

pfilter_mvgam_init(object = mod1stan, data_assim = sim_data$data_test %>%
dplyr::filter(time == 54),
n_particles = 10000)
# Need to fix the particle filter so it can initiate even if only one series has a next observation
pfilter_mvgam_init(object = mod1stan, data_assim = test_dat,
n_particles = 1000)

pfilter_mvgam_online(data_assim = sim_data$data_test[1:10, ], n_cores = 3, kernel_lambda = 1)
pfilter_mvgam_online(data_assim = test_dat[1:14, ],
n_cores = 3, use_resampling = T,
threshold = 0.6)

fcs <- pfilter_mvgam_fc(data_test = sim_data$data_test, ylim = c(0, 17))
fcs <- pfilter_mvgam_fc(data_test = test_dat, ylim = c(0, 20), n_cores = 3)

layout(matrix(c(1:2), ncol = 2, nrow = 1))
plot_mvgam_fc(mod1stan, series = 1, data_test = sim_data$data_test,
ylim = c(0, 17))
layout(matrix(c(1:2), ncol = 1, nrow = 2))
plot_mvgam_fc(mod1stan, series = 1, data_test = test_dat,
ylim = c(0, 20))
fcs$series_1()
points(c(sim_data$data_train %>%
dplyr::filter(series == 'series_1')%>%
dplyr::pull(y),
sim_data$data_test %>%
dplyr::filter(series == 'series_1') %>%
dplyr::pull(y)))
dplyr::pull(y)),
pch = 16, cex = 0.4)


layout(matrix(c(1:2), ncol = 1, nrow = 2))
plot_mvgam_fc(mod1stan, series = 2, data_test = test_dat,
ylim = c(0, 20))
fcs$series_2()
points(c(sim_data$data_train %>%
dplyr::filter(series == 'series_2')%>%
dplyr::pull(y),
sim_data$data_test %>%
dplyr::filter(series == 'series_2') %>%
dplyr::pull(y)),
pch = 16, cex = 0.4)
unlink('pfilter', recursive = T)
8 changes: 6 additions & 2 deletions R/add_base_dgam_lines.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,12 @@ add_base_dgam_lines = function(use_lv, stan = FALSE){
tau = sigma ^ -2;
// posterior predictions
int<lower=0> ypred[total_obs];
ypred = poisson_log_rng(eta + to_vector(trend));
matrix[n, n_series] ypred;
for(i in 1:n){
for(s in 1:n_series){
ypred[i, s] = poisson_log_rng(eta[ytimes[i, s]] + trend[i, s]);
}
}
}
"

Expand Down
Loading

0 comments on commit 36899df

Please sign in to comment.