Skip to content

Commit 23933c2

Browse files
author
Nicholas Clark
committed
bug fixes for variance calculations to get edf; update docs
1 parent 5698279 commit 23933c2

110 files changed

Lines changed: 616 additions & 516 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,15 +110,18 @@ importFrom(bayesplot,neff_ratio)
110110
importFrom(bayesplot,nuts_params)
111111
importFrom(brms,bernoulli)
112112
importFrom(brms,conditional_effects)
113+
importFrom(brms,dbeta_binomial)
113114
importFrom(brms,dstudent_t)
114115
importFrom(brms,get_prior)
115116
importFrom(brms,logm1)
116117
importFrom(brms,lognormal)
117118
importFrom(brms,mcmc_plot)
118119
importFrom(brms,ndraws)
120+
importFrom(brms,pbeta_binomial)
119121
importFrom(brms,prior_string)
120122
importFrom(brms,pstudent_t)
121123
importFrom(brms,qstudent_t)
124+
importFrom(brms,rbeta_binomial)
122125
importFrom(brms,rstudent_t)
123126
importFrom(brms,student)
124127
importFrom(ggplot2,scale_colour_discrete)

R/compute_edf.R

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ compute_edf = function(mgcv_model, object, rho_names, sigma_raw_names){
5252
names(random_sp) <- gsub('\\))', ')', names(random_sp))
5353
}
5454

55-
mgcv_model$sp <- c(rho_sp, random_sp)
55+
mgcv_model$sp <- exp(c(rho_sp, random_sp))
5656

5757
# Compute estimated degrees of freedom based on edf.type = 1 from
5858
# https://github.com/cran/mgcv/blob/master/R/jagam.r
@@ -88,10 +88,9 @@ compute_edf = function(mgcv_model, object, rho_names, sigma_raw_names){
8888
mu[which(mu_variance == 0)]
8989
}
9090

91-
w <- as.numeric(mgcv_model$family$mu.eta(eta) / mu_variance)
91+
w <- as.numeric(mgcv_model$family$mu.eta(eta)^2 / mu_variance)
9292
XWX <- t(X) %*% (w * X)
93-
rho <- mgcv_model$sp
94-
lambda <- exp(rho)
93+
lambda <- mgcv_model$sp
9594
XWXS <- XWX
9695

9796
for(i in 1:length(lambda)){

R/families.R

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -401,8 +401,11 @@ mvgam_predict = function(Xp,
401401
xpred <- extraDistr::rtpois(n = length(lambdas),
402402
lambda = lambdas,
403403
b = cap)
404-
# Variance of a Binomial distribution
405-
out <- xpred * p * (1 - p)
404+
405+
# Variance of a Binomial distribution using the
406+
# weights convention from stats::glm()
407+
mu <- p / xpred
408+
out <- mu * (1 - mu)
406409
} else {
407410
# Expectations
408411
xpred <- extraDistr::rtpois(n = length(lambdas),
@@ -478,8 +481,9 @@ mvgam_predict = function(Xp,
478481
}
479482

480483
} else if(type == 'variance') {
481-
out <- as.vector(family_pars$nu) /
482-
(pmax(2.01, as.vector(family_pars$nu)) - 2)
484+
out <- as.vector(family_pars$sigma_obs)^2 *
485+
as.vector(family_pars$nu) /
486+
pmax(1.01, (as.vector(family_pars$nu) - 2))
483487

484488
} else {
485489
out <- rstudent_t(n = NROW(Xp),
@@ -568,8 +572,9 @@ mvgam_predict = function(Xp,
568572
size = as.vector(family_pars$trials))
569573
} else if(type == 'variance'){
570574
mu <- plogis(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*%
571-
betas) + attr(Xp, 'model.offset')))
572-
out <- as.vector(family_pars$trials) * mu * (1 - mu)
575+
betas) + attr(Xp, 'model.offset'))) /
576+
as.vector(family_pars$trials)
577+
out <- mu * (1 - mu)
573578
} else {
574579
out <- plogis(((matrix(Xp, ncol = NCOL(Xp)) %*%
575580
betas)) +
@@ -669,7 +674,7 @@ mvgam_predict = function(Xp,
669674
} else if(type == 'variance'){
670675
mu <- plogis(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*%
671676
betas) + attr(Xp, 'model.offset')))
672-
out <- mu * (1 - mu) / (1 + exp(as.vector(family_pars$phi)))
677+
out <- mu * (1 - mu) / (1 + as.vector(family_pars$phi))
673678
} else {
674679
out <- plogis(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*%
675680
betas) + attr(Xp, 'model.offset')))
@@ -1658,7 +1663,10 @@ dsresids_vec = function(object){
16581663
if(is.matrix(family_pars[[j]])){
16591664
as.vector(family_pars[[j]][, series_obs])
16601665
} else {
1661-
family_pars[[j]][]
1666+
as.vector(matrix(rep(family_pars[[j]],
1667+
NCOL(preds)),
1668+
nrow = NROW(preds),
1669+
byrow = FALSE))
16621670
}
16631671
})
16641672
names(family_extracts) <- names(family_pars)

R/forecast.mvgam.R

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -201,18 +201,25 @@ forecast.mvgam = function(object, newdata, data_test,
201201
family_pars <- extract_family_pars(object = object)
202202
par_extracts <- lapply(seq_along(family_pars), function(j){
203203
if(is.matrix(family_pars[[j]])){
204-
family_pars[[j]][, series]
204+
as.vector(matrix(rep(as.vector(family_pars[[j]][, series]),
205+
NCOL(preds)),
206+
nrow = NROW(preds),
207+
byrow = FALSE))
208+
205209
} else {
206-
family_pars[[j]]
210+
as.vector(matrix(rep(family_pars[[j]],
211+
NCOL(preds)),
212+
nrow = NROW(preds),
213+
byrow = FALSE))
207214
}
208215
})
209216
names(par_extracts) <- names(family_pars)
210217

211218
# Add trial information if this is a Binomial model
212219
if(object$family %in% c('binomial', 'beta_binomial')){
213220
trials <- as.vector(matrix(rep(as.vector(attr(object$mgcv_model, 'trials')[,series]),
214-
NROW(mus)),
215-
nrow = NROW(mus),
221+
NROW(preds)),
222+
nrow = NROW(preds),
216223
byrow = TRUE))
217224
par_extracts$trials <- trials
218225
}

R/hindcast.mvgam.R

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,16 @@ if(series == 'all'){
130130
family_pars <- extract_family_pars(object = object)
131131
par_extracts <- lapply(seq_along(family_pars), function(j){
132132
if(is.matrix(family_pars[[j]])){
133-
family_pars[[j]][, series]
133+
as.vector(matrix(rep(as.vector(family_pars[[j]][, series]),
134+
NCOL(preds)),
135+
nrow = NROW(preds),
136+
byrow = FALSE))
137+
134138
} else {
135-
family_pars[[j]]
139+
as.vector(matrix(rep(family_pars[[j]],
140+
NCOL(preds)),
141+
nrow = NROW(preds),
142+
byrow = FALSE))
136143
}
137144
})
138145
names(par_extracts) <- names(family_pars)

R/logLik.mvgam.R

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,10 @@ logLik.mvgam = function(object,
137137
if(is.matrix(family_pars[[j]])){
138138
as.vector(family_pars[[j]][, series_obs])
139139
} else {
140-
family_pars[[j]][]
140+
as.vector(matrix(rep(family_pars[[j]],
141+
NCOL(mus)),
142+
nrow = NROW(mus),
143+
byrow = FALSE))
141144
}
142145
})
143146
names(family_extracts) <- names(family_pars)

R/mvgam.R

Lines changed: 3 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2164,11 +2164,9 @@ mvgam = function(formula,
21642164
}
21652165

21662166
# Add Bayesian coefficients to the mgcv model to help with plotting of
2167-
# smooths that aren't yet supported by mvgam plotting functions
2168-
# Extract median beta params for smooths and their covariances
2169-
# so that uncertainty from mgcv plots is reasonably accurate
2170-
if(all(run_model, !lfo, residuals)){
2171-
# Use the empirical covariance matrix from the fitted coefficients
2167+
# smooths that aren't yet supported by mvgam plotting functions; this is
2168+
# also necessary for computing EDFs and approximate p-values of smooths
2169+
if(!lfo){
21722170
V <- cov(mcmc_chains(out_gam_mod, 'b'))
21732171
ss_gam$Ve <- ss_gam$Vp <- ss_gam$Vc <- V
21742172

@@ -2181,49 +2179,20 @@ mvgam = function(formula,
21812179
names(p) <- names(ss_gam$coefficients)
21822180
ss_gam$coefficients <- p
21832181

2184-
# Compute estimated degrees of freedom for smooths
2185-
object = list(
2186-
model_output = out_gam_mod,
2187-
call = orig_formula,
2188-
model_data = if(use_stan){
2189-
vectorised$model_data
2190-
} else {
2191-
ss_jagam$jags.data
2192-
},
2193-
fit_engine = fit_engine,
2194-
family = family_char,
2195-
share_obs_params = share_obs_params,
2196-
obs_data = data_train,
2197-
test_data = data_test,
2198-
trend_model = trend_model,
2199-
mgcv_model = ss_gam,
2200-
ytimes = ytimes)
2201-
class(object) <- 'mvgam'
2202-
ss_gam <- compute_edf(ss_gam, object, 'rho', 'sigma_raw')
2203-
22042182
# Repeat for any trend-specific mgcv model
22052183
if(!missing(trend_formula)){
22062184
V <- cov(mcmc_chains(out_gam_mod, 'b_trend'))
22072185
trend_mgcv_model$Ve <- trend_mgcv_model$Vp <- trend_mgcv_model$Vc <- V
2208-
22092186
p <- mcmc_summary(out_gam_mod, 'b_trend',
22102187
variational = algorithm %in% c('meanfield',
22112188
'fullrank',
22122189
'pathfinder',
22132190
'laplace'))[,c(4)]
22142191
names(p) <- names(trend_mgcv_model$coefficients)
22152192
trend_mgcv_model$coefficients <- p
2216-
2217-
object$trend_mgcv_model <- trend_mgcv_model
2218-
trend_mgcv_model <- compute_edf(trend_mgcv_model,
2219-
object, 'rho_trend', 'sigma_raw_trend')
22202193
}
22212194
}
22222195

2223-
if(lfo){
2224-
ss_gam <- NULL
2225-
}
2226-
22272196
#### Return the output as class mvgam ####
22282197
trim_data <- list()
22292198
attr(trim_data, 'trend_model') <- trend_model

R/predict.mvgam.R

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -262,12 +262,15 @@ predict.mvgam = function(object, newdata,
262262
# Determine which series each observation belongs to
263263
series_ind <- as.numeric(newdata$series)
264264

265-
# Family parameters spread into a vector
265+
# Family parameters spread into long vectors
266266
family_extracts <- lapply(seq_along(family_pars), function(j){
267267
if(is.matrix(family_pars[[j]])){
268268
as.vector(family_pars[[j]][, series_ind])
269269
} else {
270-
family_pars[[j]]
270+
as.vector(matrix(rep(family_pars[[j]],
271+
NROW(Xp)),
272+
nrow = NROW(betas),
273+
byrow = FALSE))
271274
}
272275
})
273276
names(family_extracts) <- names(family_pars)
@@ -283,16 +286,6 @@ predict.mvgam = function(object, newdata,
283286
}
284287

285288
trials <- newdata[[trial_name]]
286-
# trial_df <- data.frame(series = newdata$series,
287-
# time = newdata$time,
288-
# trial = newdata[[trial_name]])
289-
# trials <- do.call(cbind, lapply(length(levels(newdata$series)), function(i){
290-
# trial_df %>%
291-
# dplyr::filter(series == levels(newdata$series)[i]) %>%
292-
# dplyr::arrange(time) %>%
293-
# dplyr::pull(trial)
294-
# }))
295-
296289
trials <- as.vector(matrix(rep(as.vector(trials),
297290
NROW(betas)),
298291
nrow = NROW(betas),

R/summary.mvgam.R

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
#'@param include_betas Logical. Print a summary that includes posterior summaries
77
#'of all linear predictor beta coefficients (including spline coefficients)?
88
#'Defaults to \code{TRUE} but use \code{FALSE} for a more concise summary
9+
#'@param smooth_test Logical. Compute estimated degrees of freedom and approximate
10+
#'p-values for smooth terms? Defaults to \code{TRUE}, but users may wish to set
11+
#'to \code{FALSE} for complex models with many smooth terms
912
#'@param digits The number of significant digits for printing out the summary;
1013
#' defaults to \code{2}.
1114
#'@param ... Ignored
@@ -28,7 +31,25 @@
2831
#'For `coef.mvgam`, either a \code{matrix} of posterior coefficient distributions
2932
#'(if \code{summarise == FALSE} or \code{data.frame} of coefficient summaries)
3033
#'@export
31-
summary.mvgam = function(object, include_betas = TRUE, digits = 2, ...){
34+
summary.mvgam = function(object,
35+
include_betas = TRUE,
36+
smooth_test = TRUE,
37+
digits = 2, ...){
38+
39+
#### Smooth tests ####
40+
if(smooth_test){
41+
object$mgcv_model <- compute_edf(object$mgcv_model,
42+
object,
43+
'rho',
44+
'sigma_raw')
45+
46+
if(!is.null(object$trend_call)){
47+
object$trend_mgcv_model <- compute_edf(object$trend_mgcv_model,
48+
object,
49+
'rho_trend',
50+
'sigma_raw_trend')
51+
}
52+
}
3253

3354
#### Standard summary of formula and model arguments ####
3455
if(!is.null(object$trend_call)){
@@ -252,8 +273,8 @@ if(!is.null(attr(object$mgcv_model, 'gp_att_table'))){
252273
print(rbind(alpha_summary, rho_summary))
253274
}
254275

255-
if(any(!is.na(object$sp_names))){
256-
gam_sig_table <- summary(object$mgcv_model)$s.table[, c(1,3,4), drop = FALSE]
276+
if(any(!is.na(object$sp_names)) & smooth_test){
277+
gam_sig_table <- summary(object$mgcv_model)$s.table[, c(1,2,3,4), drop = FALSE]
257278
if(!is.null(attr(object$mgcv_model, 'gp_att_table'))){
258279
gp_names <- unlist(purrr::map(attr(object$mgcv_model,
259280
'gp_att_table'), 'name'))
@@ -676,8 +697,8 @@ if(!is.null(object$trend_call)){
676697
print(rbind(alpha_summary, rho_summary))
677698
}
678699

679-
if(any(!is.na(object$trend_sp_names))){
680-
gam_sig_table <- summary(object$trend_mgcv_model)$s.table[, c(1,3,4), drop = FALSE]
700+
if(any(!is.na(object$trend_sp_names)) & smooth_test){
701+
gam_sig_table <- summary(object$trend_mgcv_model)$s.table[, c(1,2,3,4), drop = FALSE]
681702
if(!is.null(attr(object$trend_mgcv_model, 'gp_att_table'))){
682703
gp_names <- unlist(purrr::map(attr(object$trend_mgcv_model, 'gp_att_table'), 'name'))
683704
if(all(rownames(gam_sig_table) %in% gsub('gp(', 's(', gp_names, fixed = TRUE))){

0 commit comments

Comments
 (0)