Skip to content

Commit

Permalink
first pass at Hillbert space GPs
Browse files Browse the repository at this point in the history
  • Loading branch information
nicholasjclark committed May 16, 2022
1 parent 9e0e899 commit e5bc971
Show file tree
Hide file tree
Showing 28 changed files with 463 additions and 63 deletions.
109 changes: 89 additions & 20 deletions NEON_manuscript/testing_tweedie.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,87 @@ plot(y, type = 'l')

# Split data into training and testing
data_train <-
data.frame(y = y[1:125],
season = season[1:125],
time = 1:125)
data.frame(y = y[1:85],
season = season[1:85],
time = 1:85)
data_test <-
data.frame(y = y[126:length(season)],
season = season[126:length(season)],
time = 126:length(season))
data.frame(y = y[86:length(season)],
season = season[86:length(season)],
time = 86:length(season))

library(mvgam)
mod1 <- mvjagam(data_train = data_train,
data_test = data_test,
formula = y ~ s(season, k = 12, bs = 'cc'),
knots = list(season = c(0.5, 12.5)),
family = 'poisson',
trend_model = 'AR1',
chains = 4,
burnin = 1000)
summary(mod1)

model_file <- mod1$model_file

Xp <- predict(mod1$mgcv_model, type = 'lpmatrix')

# Zero out all other columns in Xp
Xp[,!grepl(paste0('(', 'season', ')'), colnames(Xp), fixed = T)] <- 0
betas <- MCMCvis::MCMCchains(mod1$model_output, 'b')
preds <- matrix(NA, nrow = NROW(betas), ncol = NROW(Xp))
for(i in 1:NROW(betas)){
preds[i,] <- (Xp %*% betas[i, ])
}



# Generate series with latent GP trends
sim_data <- sim_mvgam(T = 80, n_series = 2, n_lv = 2,
trend_model = 'GP',
trend_rel = 0.5, family = 'poisson')
mod1stan <- mvjagam(data_train = sim_data$data_train,
data_test = sim_data$data_test,
formula = y ~ s(season, k = 12, bs = 'cc'),
knots = list(season = c(0.5, 12.5)),
family = 'poisson',
trend_model = 'GP',
chains = 2,
burnin = 500,
use_stan = T, run_model = T)
mod1stan$model_file
class(mod1stan$model_output)
plot(mod1stan, type = 'forecast', series = 1, data_test = sim_data$data_test)
plot(mod1stan, type = 'forecast', series = 2, data_test = sim_data$data_test)
plot(mod1stan, type = 'trend', series = 1, data_test = sim_data$data_test)
plot(mod1stan, type = 'trend', series = 2, data_test = sim_data$data_test)
summary(mod1stan)

pairs(mod1stan$model_output, pars = c('rho', 'sigma'))



sim_data <- sim_mvgam(T = 60, n_series = 2, trend_rel = 0.5, prop_missing = 0.2)
mod1stan <- mvjagam(data_train = sim_data$data_train,
data_test = sim_data$data_test,
formula = y ~ s(season, k = 12, bs = 'cc'),
knots = list(season = c(0.5, 12.5)),
family = 'nb',
trend_model = 'GP',
chains = 4,
burnin = 500,
use_stan = T, run_model = T)
mod1stan$model_file
plot(mod1stan, type = 'forecast', series = 2, data_test = data_test)
plot(mod1stan, type = 'trend', series = 1, data_test = data_test)
summary(mod1stan)

# Fit non-dynamic GAMs
# Poisson model
library(mvgam)
mod1 <- mvjagam(data_train = data_train,
data_test = data_test,
formula = y ~ s(season, k = 15, bs = 'cc') +
s(time, k = 4, bs = 'gp'),
knots = list(season = c(0.5, 24.5)),
family = 'nb',
formula = y ~ s(season, k = 12, bs = 'cc'),
knots = list(season = c(0.5, 12.5)),
family = 'poisson',
trend_model = 'AR1',
chains = 4,
burnin = 1000)
Expand All @@ -37,20 +101,25 @@ summary(mod1)

mod1stan <- mvjagam(data_train = data_train,
data_test = data_test,
formula = y ~ s(season, k = 15, bs = 'cc') +
s(time, k = 4, bs = 'gp'),
knots = list(season = c(0.5, 24.5)),
family = 'nb',
trend_model = 'AR1',
formula = y ~ s(season, k = 12, bs = 'cc'),
knots = list(season = c(0.5, 12.5)),
family = 'poisson',
trend_model = 'GP',
chains = 4,
burnin = 1000,
use_stan = T)
mod1stan$model_file
summary(mod1stan)
use_stan = T, run_model = T)

MCMCvis::MCMCtrace(mod1stan$model_output,
c('alpha_gp', 'rho_gp'), pdf = F)

plot(mod1, type = 'forecast', data_test = data_test)
plot(mod1stan, type = 'forecast', data_test = data_test)

plot(mod1, type = 'trend', data_test = data_test)
plot(mod1stan, type = 'trend', data_test = data_test)

compare_mvgams(mod1, mod1stan, fc_horizon = 4,
n_evaluations = 20, n_cores = 5)
plot(mod1, type = 'smooths', data_test = data_test)
plot(mod1stan, type = 'smooths', data_test = data_test)

# Check if overdispersion correctly captured
ppc(mod1stan, type = 'hist', n_bins = 100)
Expand Down
8 changes: 2 additions & 6 deletions R/add_base_dgam_lines.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,8 @@ add_base_dgam_lines = function(use_lv, stan = FALSE){
tau = sigma ^ -2;
// posterior predictions
int<lower=0> ypred[n, n_series];
for (i in 1:n) {
for (s in 1:n_series) {
ypred[i, s] = poisson_log_rng(eta[ytimes[i, s]] + trend[i, s]);
}
}
int<lower=0> ypred[total_obs];
ypred = poisson_log_rng(eta + to_vector(trend));
}
"

Expand Down
136 changes: 133 additions & 3 deletions R/add_trend_lines.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ add_trend_lines = function(model_file, stan = FALSE,
'y[i, s] ~ poisson_log(eta[ytimes[i, s]]);'


model_file[grep('ypred[i, s] =', model_file, fixed = T)] <-
"ypred[i, s] = poisson_log_rng(eta[ytimes[i, s]] );"
model_file[grep('ypred =', model_file, fixed = T)] <-
"ypred = poisson_log_rng(eta );"

model_file <- model_file[-grep('tau =', model_file)]

Expand All @@ -37,7 +37,137 @@ add_trend_lines = function(model_file, stan = FALSE,
model_file <- readLines(textConnection(model_file), n = -1)
}

if(trend == 'RW'){
if(trend_model == 'GP'){

hilbert_approx = TRUE
if(hilbert_approx){
model_file <- model_file[-c((grep('// raw basis', model_file) + 3):
(grep('// raw basis', model_file) + 5))]

model_file <- model_file[-c((grep('// priors for latent trend', model_file)):
(grep('// priors for latent trend', model_file)+2))]

model_file <- model_file[-c((grep('// trend estimates', model_file)):
(grep('// trend estimates', model_file)+3))]
model_file <- model_file[-c((grep('trend[2:n', model_file, fixed = T)-1):
(grep('trend[2:n', model_file, fixed = T)+1))]
model_file <- model_file[-grep('tau =', model_file)]
model_file <- model_file[-grep('vector[n_series] tau', model_file, fixed = T)]
model_file[grep('##insert data', model_file)+1] <-
paste0('transformed data {\n',
'vector<lower=1>[n] times;\n',
'for (t in 1:n){\n',
'times[t] = t;\n',
'}\n\n',
'real<lower=0> boundary;\n',
'boundary = (5.0/4) * n;\n',
'int<lower=1> num_gp_basis;\n',
'num_gp_basis = max(30, n);\n',
'matrix[n, num_gp_basis] gp_phi;\n',
'for (m in 1:num_gp_basis){\n',
'gp_phi[,m] = phi_SE(boundary, m, times);\n',
'}\n}\n\n',
'parameters {')

model_file[grep('##insert data', model_file)-1] <-
paste0('functions {\n',
'real lambda_gp(real L, int m) {\n',
'real lam;\n',
'lam = ((m*pi())/(2*L))^2;\n',
'return lam;\n',
'}\n\n',
'vector phi_SE(real L, int m, vector x) {\n',
'vector[rows(x)] fi;\n',
'fi = 1/sqrt(L) * sin(m*pi()/(2*L) * (x+L));\n',
'return fi;\n',
'}\n\n',
'real spd_SE(real alpha, real rho, real w) {\n',
'real S;\n',
'S = (alpha^2) * sqrt(2*pi()) * rho * exp(-0.5*(rho^2)*(w^2));\n',
'return S;\n',
'}\n}\n')

model_file[grep('// latent trends', model_file)+2] <-
paste0('// gp parameters\n',
'vector<lower=0>[n_series] alpha_gp;\n',
'vector<lower=0>[n_series] rho_gp;\n\n',
'// gp coefficient weights\n',
'matrix[num_gp_basis, n_series] b_gp;\n')

model_file <- model_file[-c(grep('// latent trends', model_file):
(grep('// latent trends', model_file)+1))]

model_file[grep('// GAM contribution', model_file) + 3] <-
paste0('\n// gp spectral densities\n',
'matrix[n, n_series] trend;\n',
'matrix[num_gp_basis, n_series] diag_SPD;\n',
'matrix[num_gp_basis, n_series] SPD_beta;\n',
'for (m in 1:num_gp_basis){\n',
'for (s in 1:n_series){\n',
'diag_SPD[m, s] = sqrt(spd_SE(alpha_gp[s], rho_gp[s], sqrt(lambda_gp(boundary, m))));\n',
'}\n}\n',
'SPD_beta = diag_SPD .* b_gp;\n',
'trend = gp_phi * SPD_beta;\n}\n')

model_file[grep('// priors for smoothing parameters', model_file)+2] <-
paste0('\n// priors for gp parameters\n',
'for (s in 1:n_series){\n',
'b_gp[1:num_gp_basis, s] ~ normal(0, 1);\n',
'}\n',
'alpha_gp ~ exponential(4);\n',
'rho_gp ~ inv_gamma(4.6, 22.1);\n')

model_file <- readLines(textConnection(model_file), n = -1)
} else {
model_file <- model_file[-c((grep('// raw basis', model_file) + 3):
(grep('// raw basis', model_file) + 5))]

model_file <- model_file[-c((grep('// priors for latent trend', model_file)):
(grep('// priors for latent trend', model_file)+2))]

model_file <- model_file[-c((grep('// trend estimates', model_file)):
(grep('// trend estimates', model_file)+3))]
model_file <- model_file[-c((grep('trend[2:n', model_file, fixed = T)-1):
(grep('trend[2:n', model_file, fixed = T)+1))]
model_file <- model_file[-grep('tau =', model_file)]
model_file <- model_file[-grep('vector[n_series] tau', model_file, fixed = T)]
model_file[grep('##insert data', model_file)+1] <-
paste0('transformed data {\n',
'real times[n];\n',
'for (t in 1:n)\n',
'times[t] = t;\n',
'}\n\n',
'parameters {')

model_file[grep('// latent trends', model_file)+1] <-
paste0('vector<lower=0>[n_series] alpha_gp;\n',
'vector<lower=0>[n_series] rho_gp;\n',
'vector[n] gp_std;\n')

model_file[grep('// basis coefficients', model_file)+2] <-
paste0('\n\n// gp estimates\n',
'matrix[n, n_series] trend;\n',
'for (s in 1:n_series) {\n',
'// gp covariance matrices\n',
'matrix[n, n] cov;\n',
'matrix[n, n] L_cov;\n',
'cov = cov_exp_quad(times, alpha_gp[s], rho_gp[s]) + diag_matrix(rep_vector(1e-10, n));\n',
'L_cov = cholesky_decompose(cov);\n',
'// non-centred parameterisation\n',
'trend[1:n, s] = to_vector(L_cov * gp_std);\n',
'}\n')

model_file[grep('// priors for smoothing parameters', model_file)+2] <-
paste0('\n// priors for gp parameters\n',
'to_vector(gp_std) ~ normal(0, 1);\n',
'alpha_gp ~ exponential(4);\n',
'rho_gp ~ inv_gamma(4.6, 22.1);\n')

model_file <- readLines(textConnection(model_file), n = -1)
}
}

if(trend_model == 'RW'){
if(drift){
model_file[grep('// raw basis', model_file) + 1] <-
paste0('row_vector[num_basis] b_raw;\n\n// latent trend drift terms\nvector<lower=-1, upper=1>[n_series] phi;\n')
Expand Down
11 changes: 11 additions & 0 deletions R/compare_mvgams.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,17 @@ compare_mvgams = function(model1,
}
}

# Convert stanfit objects to coda samples
if(class(model1$model_output) == 'stanfit'){
model1$model_output <- coda::mcmc.list(lapply(1:NCOL(model1$model_output),
function(x) coda::mcmc(as.array(model1$model_output)[,x,])))
}

if(class(model2$model_output) == 'stanfit'){
model2$model_output <- coda::mcmc.list(lapply(1:NCOL(model2$model_output),
function(x) coda::mcmc(as.array(model2$model_output)[,x,])))
}

# Evaluate the two models
mod1_eval <- roll_eval_mvgam(model1,
n_samples = n_samples,
Expand Down
6 changes: 6 additions & 0 deletions R/eval_mvgam.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ eval_mvgam = function(object,
}
}

# Convert stanfit objects to coda samples
if(class(object$model_output) == 'stanfit'){
object$model_output <- coda::mcmc.list(lapply(1:NCOL(object$model_output),
function(x) coda::mcmc(as.array(object$model_output)[,x,])))
}

#### 1. Generate linear predictor matrix for covariates and extract trend estimates at timepoint
data_train <- object$obs_data
n_series <- NCOL(object$ytimes)
Expand Down
4 changes: 4 additions & 0 deletions R/get_monitor_pars.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ get_monitor_pars = function(family, use_lv, trend_model, drift){
param <- c(param, 'trend', 'tau', 'sigma', 'ar1', 'ar2', 'ar3')
}

if(trend_model == 'GP'){
param <- c(param, 'trend', 'alpha_gp', 'rho_gp')
}

}

if(trend_model != 'None'){
Expand Down
6 changes: 6 additions & 0 deletions R/get_mvgam_resids.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@
#'@return A \code{list} of residual distributions
get_mvgam_resids = function(object, n_cores = 1){

# Convert stanfit objects to coda samples
if(class(object$model_output) == 'stanfit'){
object$model_output <- coda::mcmc.list(lapply(1:NCOL(object$model_output),
function(x) coda::mcmc(as.array(object$model_output)[,x,])))
}

# Functions for calculating randomised quantile (Dunn-Smyth) residuals
ds_resids_nb = function(truth, fitted, draw, size){
dsres_out <- matrix(NA, length(truth), 1)
Expand Down
7 changes: 7 additions & 0 deletions R/lv_correlations.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@
#'@return A \code{list} object containing the mean posterior correlations and the full array of posterior correlations
#'@export
lv_correlations = function(object){

# Convert stanfit objects to coda samples
if(class(object$model_output) == 'stanfit'){
object$model_output <- coda::mcmc.list(lapply(1:NCOL(object$model_output),
function(x) coda::mcmc(as.array(object$model_output)[,x,])))
}

data_train <- object$obs_data

# Series start and end indices
Expand Down
Loading

0 comments on commit e5bc971

Please sign in to comment.