diff --git a/R/dynamic.R b/R/dynamic.R index 1e89c882..d868dc2f 100644 --- a/R/dynamic.R +++ b/R/dynamic.R @@ -62,14 +62,32 @@ interpret_mvgam = function(formula, N){ newfacs <- facs[!grepl('bs = \"re\"', facs, fixed = TRUE)] refacs <- facs[grepl('bs = \"re\"', facs, fixed = TRUE)] int <- attr(terms.formula(formula), 'intercept') - newformula <- as.formula(paste(terms.formula(formula)[[2]], '~',paste(paste(newfacs, collapse = '+'), - '+', - paste(refacs, collapse = '+'), - collapse = '+'), - ifelse(int == 0, ' - 1', ''))) + + # Preserve offset if included + if(!is.null(attr(terms(formula(formula)), 'offset'))){ + newformula <- as.formula(paste(terms.formula(formula)[[2]], '~', + grep('offset', rownames(attr(terms.formula(formula), 'factors')), + value = TRUE), + '+', + paste(paste(newfacs, collapse = '+'), + '+', + paste(refacs, collapse = '+'), + collapse = '+'), + ifelse(int == 0, ' - 1', ''))) + + } else { + newformula <- as.formula(paste(terms.formula(formula)[[2]], '~', + paste(paste(newfacs, collapse = '+'), + '+', + paste(refacs, collapse = '+'), + collapse = '+'), + ifelse(int == 0, ' - 1', ''))) + } + } else { newformula <- formula } + attr(newformula, '.Environment') <- attr(formula, '.Environment') # Check if any terms use the dynamic wrapper diff --git a/R/get_mvgam_priors.R b/R/get_mvgam_priors.R index 84146612..5f600dc3 100644 --- a/R/get_mvgam_priors.R +++ b/R/get_mvgam_priors.R @@ -148,6 +148,12 @@ get_mvgam_priors = function(formula, trend_map, drift = FALSE){ + # Check formula + if(attr(terms(formula), "response") == 0L){ + stop('response variable is missing from formula', + call. = FALSE) + } + # Validate the family argument family <- evaluate_family(family) family_char <- match.arg(arg = family$family, diff --git a/R/mvgam-class.R b/R/mvgam-class.R index ec15a157..a12d92fb 100644 --- a/R/mvgam-class.R +++ b/R/mvgam-class.R @@ -6,7 +6,9 @@ #' `print`, `score`, `summary` and `update` exist for this class. #' @details A `mvgam` object contains the following elements: #'\itemize{ -#' \item `call` the original model formula +#' \item `call` the original observation model formula +#' \item `trend_call` If a `trend_formula was supplied`, the original trend model formula is +#' returned. Otherwise `NULL` #' \item `family` \code{character} description of the observation distribution #' \item `trend_model` \code{character} description of the latent trend model #' \item `drift` Logical specifying whether a drift term was used in the trend model @@ -32,6 +34,8 @@ #' but these are only used if generating plots of smooth functions that `mvgam` currently cannot handle #' (such as plots for three-dimensional smooths). This model therefore should not be used for inference. #' See \code{\link[mgcv]{gamObject}} for details. +#' \item `trend_mgcv_model` If a `trend_formula was supplied`, an object of class `gam` containing +#' the `mgcv` version of the trend model. Otherwise `NULL` #' \item `ytimes` The `matrix` object used in model fitting for indexing which series and timepoints #' were observed in each row of the supplied data. Used internally by some downstream plotting #' and prediction functions diff --git a/R/mvgam.R b/R/mvgam.R index de68b1e4..70c0b8fa 100644 --- a/R/mvgam.R +++ b/R/mvgam.R @@ -7,9 +7,13 @@ #' #'@importFrom parallel clusterExport stopCluster setDefaultCluster #'@importFrom stats formula terms rnorm update.formula predict -#'@param formula A \code{character} string specifying the GAM formula. These are exactly like the formula +#'@param formula A \code{character} string specifying the GAM observation model formula. These are exactly like the formula #'for a GLM except that smooth terms, s, te, ti and t2, can be added to the right hand side #'to specify that the linear predictor depends on smooth functions of predictors (or linear functionals of these). +#'@param trend_formula An optional \code{character} string specifying the GAM process model formula. If +#'supplied, a linear predictor will be modelled for the latent trends to capture process model evolution +#'separately from the observation model. Should not have a response variable specified on the left-hand side +#'of the formula (i.e. a valid option would be `~ season + s(year)`) #'@param knots An optional \code{list} containing user specified knot values to be used for basis construction. #'For most bases the user simply supplies the knots to be used, which must match up with the k value supplied #'(note that the number of knots is not always just k). Different terms can use different numbers of knots, @@ -337,7 +341,7 @@ #' # Example showing how to incorporate an offset; simulate some count data #' # with different means per series #' set.seed(100) -#' dat <- sim_mvgam(trend_rel = 0, mu_obs = c(4, 8, 8), seasonality = 'hierarchical') +#' dat <- sim_mvgam(trend_rel = 0, mu = c(0, 2, 2), seasonality = 'hierarchical') #' #' # Add offset terms to the training and testing data #' dat$data_train$offset <- 0.5 * as.numeric(dat$data_train$series) @@ -346,7 +350,8 @@ #' # Fit a model that includes the offset in the linear predictor as well as #' # hierarchical seasonal smooths #' mod1 <- mvgam(formula = y ~ offset(offset) + -#' s(season, bs = 'cc') + +#' s(series, bs = 're') + +#' s(season, bs = 'cc') + #' s(season, by = series, m = 1, k = 5), #' data = dat$data_train, #' trend_model = 'None', @@ -359,9 +364,9 @@ #' # Forecasts for the first two series will differ in magnitude #' layout(matrix(1:2, ncol = 2)) #' plot(mod1, type = 'forecast', series = 1, newdata = dat$data_test, -#' ylim = c(0, 70)) +#' ylim = c(0, 75)) #' plot(mod1, type = 'forecast', series = 2, newdata = dat$data_test, -#' ylim = c(0, 70)) +#' ylim = c(0, 75)) #' layout(1) #' #' # Changing the offset for the testing data should lead to changes in @@ -396,6 +401,7 @@ #'@export mvgam = function(formula, + trend_formula, knots, data, data_train, @@ -433,6 +439,11 @@ mvgam = function(formula, data_train <- data } + if(attr(terms(formula), "response") == 0L){ + stop('response variable is missing from formula', + call. = FALSE) + } + if(!as.character(terms(formula(formula))[[2]]) %in% names(data_train)){ stop(paste0('variable ', terms(formula(formula))[[2]], ' not found in data'), call. = FALSE) @@ -467,6 +478,18 @@ mvgam = function(formula, # Validate the trend arguments trend_model <- evaluate_trend_model(trend_model) + if(!missing(trend_formula)){ + if(missing(trend_map)){ + trend_map <- data.frame(series = unique(data_train$series), + trend = 1:length(unique(data_train$series))) + } + + if(trend_model != 'RW'){ + stop('only random walk trends currently supported for trend predictor models', + call. = FALSE) + } + } + # Check trend_map is correctly specified if(!missing(trend_map)){ @@ -676,6 +699,13 @@ mvgam = function(formula, warning('No point in latent variables if trend model is None; changing use_lv to FALSE') } + # Check if there is an offset variable included + if(is.null(attr(terms(formula(formula)), 'offset'))){ + offset <- FALSE + } else { + offset <- TRUE + } + # Ensure outcome is labelled 'y' when feeding data to the model for simplicity orig_formula <- formula formula <- interpret_mvgam(formula, N = max(data_train$time)) @@ -695,13 +725,6 @@ mvgam = function(formula, } } - # Check if there is an offset variable included - if(is.null(attr(terms(formula(formula)), 'offset'))){ - offset <- FALSE - } else { - offset <- TRUE - } - # If there are missing values in y, use predictions from an initial mgcv model to fill # these in so that initial values to maintain the true size of the training dataset orig_y <- data_train$y @@ -709,10 +732,10 @@ mvgam = function(formula, # Initiate the GAM model using mgcv so that the linear predictor matrix can be easily calculated # when simulating from the Bayesian model later on; ss_gam <- mvgam_setup(formula = formula, - family = family_to_mgcvfam(family), - data = data_train, - drop.unused.levels = FALSE, - maxit = 30) + family = family_to_mgcvfam(family), + data = data_train, + drop.unused.levels = FALSE, + maxit = 30) # Fill in missing observations in data_train so the size of the dataset is correct when # building the initial JAGS model. @@ -966,7 +989,19 @@ mvgam = function(formula, model_file[grep('eta <- X %*% b', model_file, fixed = TRUE)] <- "eta <- X %*% b + offset" if(!missing(data_test) & !prior_simulation){ - ss_jagam$jags.data$offset <- c(ss_jagam$jags.data$offset, data_test$offset) + + get_offset <- function(model) { + nm1 <- names(attributes(model$terms)$dataClasses) + if('(offset)' %in% nm1) { + deparse(as.list(model$call)$offset) + } else { + + sub("offset\\((.*)\\)$", "\\1", grep('offset', nm1, value = TRUE)) + } + } + + ss_jagam$jags.data$offset <- c(ss_jagam$jags.data$offset, + data_test[[get_offset(ss_gam)]]) } } @@ -1351,6 +1386,42 @@ mvgam = function(formula, trend_model = trend_model) vectorised$model_file <- trend_map_setup$model_file vectorised$model_data <- trend_map_setup$model_data + + if(trend_model %in% c('RW', 'AR1', 'AR2', 'AR3')){ + param <- c(param, 'sigma') + } + + # If trend formula specified, add the predictors for the trend models + if(!missing(trend_formula)){ + trend_pred_setup <- add_trend_predictors( + trend_formula = trend_formula, + trend_map = trend_map, + trend_model = trend_model, + data_train = data_train, + data_test = if(missing(data_test)){ + NULL + } else { + data_test + }, + model_file = vectorised$model_file, + model_data = vectorised$model_data, + drift = drift) + + vectorised$model_file <- trend_pred_setup$model_file + vectorised$model_data <- trend_pred_setup$model_data + trend_mgcv_model <- trend_pred_setup$trend_mgcv_model + + param <- c(param, 'b_trend') + + if(trend_pred_setup$trend_smooths_included){ + param <- c(param, 'rho_trend') + } + + if(trend_pred_setup$trend_random_included){ + param <- c(param, 'mu_raw_trend', 'sigma_raw_trend') + } + + } } } else { @@ -1388,6 +1459,11 @@ mvgam = function(formula, if(!run_model){ unlink('base_gam.txt') output <- structure(list(call = orig_formula, + trend_call = if(!missing(trend_formula)){ + trend_formula + } else { + NULL + }, family = family_char, trend_model = trend_model, drift = drift, @@ -1405,6 +1481,11 @@ mvgam = function(formula, inits = inits, monitor_pars = param, mgcv_model = ss_gam, + trend_mgcv_model = if(!missing(trend_formula)){ + trend_mgcv_model + } else { + NULL + }, sp_names = rho_names, ytimes = ytimes, use_lv = use_lv, @@ -1666,10 +1747,24 @@ mvgam = function(formula, p <- mcmc_summary(out_gam_mod, 'b')[,c(4)] names(p) <- names(ss_gam$coefficients) ss_gam$coefficients <- p + + if(!missing(trend_formula)){ + V <- cov(mcmc_chains(out_gam_mod, 'b_trend')) + trend_mgcv_model$Ve <- trend_mgcv_model$Vp <- trend_mgcv_model$Vc <- V + + p <- mcmc_summary(out_gam_mod, 'b_trend')[,c(4)] + names(p) <- names(trend_mgcv_model$coefficients) + trend_mgcv_model$coefficients <- p + } } #### Return the output as class mvgam #### output <- structure(list(call = orig_formula, + trend_call = if(!missing(trend_formula)){ + trend_formula + } else { + NULL + }, family = family_char, trend_model = trend_model, drift = drift, @@ -1697,6 +1792,11 @@ mvgam = function(formula, }, sp_names = rho_names, mgcv_model = ss_gam, + trend_mgcv_model = if(!missing(trend_formula)){ + trend_mgcv_model + } else { + NULL + }, ytimes = ytimes, resids = series_resids, use_lv = use_lv, diff --git a/R/plot.mvgam.R b/R/plot.mvgam.R index d3dcaafb..0165bb0e 100644 --- a/R/plot.mvgam.R +++ b/R/plot.mvgam.R @@ -30,6 +30,8 @@ #'This argument is optional when plotting out of sample forecast period observations #'(when \code{type = forecast}) and required when plotting #'uncertainty components (\code{type = uncertainty}). +#'@param trend_effects logical. If `TRUE` and a `trend_formula` was used in model +#'fitting, terms from the trend (i.e. process) model will be plotted #'@param data_test Deprecated. Still works in place of \code{newdata} but users are recommended to use #'\code{newdata} instead for more seamless integration into `R` workflows #'@param ... Additional arguments for each individual plotting function. @@ -44,9 +46,11 @@ #'@export plot.mvgam = function(x, type = 'residuals', series = 1, residuals = FALSE, - newdata, data_test, ...){ + newdata, data_test, trend_effects = FALSE, + ...){ object <- x + # Argument checks type <- match.arg(arg = type, choices = c("residuals", "smooths", "re", "pterms", "forecast", "trend", @@ -66,11 +70,13 @@ plot.mvgam = function(x, type = 'residuals', } if(type == 're'){ - plot_mvgam_randomeffects(object, ...) + plot_mvgam_randomeffects(object, trend_effects = trend_effects, + ...) } if(type == 'pterms'){ - plot_mvgam_pterms(object, ...) + plot_mvgam_pterms(object, trend_effects = trend_effects, + ...) } if(type == 'residuals'){ @@ -111,11 +117,21 @@ plot.mvgam = function(x, type = 'residuals', if(type == 'smooths'){ - # Get labels of all included smooths from the object - smooth_labs <- do.call(rbind, lapply(seq_along(object$mgcv_model$smooth), function(x){ - data.frame(label = object$mgcv_model$smooth[[x]]$label, - class = class(object$mgcv_model$smooth[[x]])[1], - mgcv_plottable = object$mgcv_model$smooth[[x]]$plot.me) + object2 <- object + + if(trend_effects){ + if(is.null(object$trend_call)){ + stop('no trend_formula exists so there no trend-level smooths to plot') + } + + object2$mgcv_model <- object2$trend_mgcv_model + } + + # Get labels of all included smooths from the object2 + smooth_labs <- do.call(rbind, lapply(seq_along(object2$mgcv_model$smooth), function(x){ + data.frame(label = object2$mgcv_model$smooth[[x]]$label, + class = class(object2$mgcv_model$smooth[[x]])[1], + mgcv_plottable = object2$mgcv_model$smooth[[x]]$plot.me) })) n_smooths <- NROW(smooth_labs) smooth_labs$smooth_index <- 1:NROW(smooth_labs) @@ -169,8 +185,9 @@ plot.mvgam = function(x, type = 'residuals', # Plot the smooths for(i in which_to_plot){ - plot_mvgam_smooth(object = object, smooth = i, series = series, - residuals = residuals, ...) + plot_mvgam_smooth(object = object2, smooth = i, series = series, + residuals = residuals, trend_effects = trend_effects, + ...) } invisible() diff --git a/R/plot_mvgam_pterms.R b/R/plot_mvgam_pterms.R index e9ecffaf..28d62606 100644 --- a/R/plot_mvgam_pterms.R +++ b/R/plot_mvgam_pterms.R @@ -5,13 +5,16 @@ #'@importFrom graphics layout title rug bxp #'@importFrom stats coef predict #'@param object \code{list} object returned from \code{mvgam} +#'@param trend_effects logical. If `TRUE` and a `trend_formula` was used in model +#'fitting, terms from the trend (i.e. process) model will be plotted #'@details Posterior empirical quantiles of each parametric term's partial effect estimates #'(on the link scale) are calculated and visualised as ribbon plots. These effects can #'be interpreted as the partial effect that a parametric term contributes when all other #'terms in the model have been set to \code{0} #'@return A base \code{R} graphics plot #'@export -plot_mvgam_pterms = function(object){ +plot_mvgam_pterms = function(object, trend_effects = FALSE){ + # General plotting colours and empirical quantile probabilities c_light <- c("#DCBCBC") c_light_trans <- c("#DCBCBC70") @@ -23,8 +26,23 @@ c_dark <- c("#8F2727") c_dark_highlight <- c("#7C0000") probs = c(0.05, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.95) +# Check arguments +if (!(inherits(object, "mvgam"))) { + stop('argument "object" must be of class "mvgam"') +} + +object2 <- object + +if(trend_effects){ + if(is.null(object$trend_call)){ + stop('no trend_formula exists so there no trend-level smooths to plot') + } + + object2$mgcv_model <- object2$trend_mgcv_model +} + # Look for parametric terms in the model -pterms <- attr(object$mgcv_model$pterms, 'term.labels') +pterms <- attr(object2$mgcv_model$pterms, 'term.labels') if(length(pterms) > 0){ # Graphical parameters @@ -47,25 +65,29 @@ if(length(pterms) > 0){ for(i in 1:length(pterms)){ # Find out which beta corresponds to the associated parametric term betas_keep <- grepl(paste0('^(?=.*',pterms[i], ')(?!.*s\\()'), - colnames(predict(object$mgcv_model, type = 'lpmatrix')), + colnames(predict(object2$mgcv_model, type = 'lpmatrix')), perl = TRUE) - betas <- mcmc_chains(object$model_output, 'b')[ ,betas_keep] + if(trend_effects){ + betas <- mcmc_chains(object2$model_output, 'b_trend')[ ,betas_keep] + } else { + betas <- mcmc_chains(object2$model_output, 'b')[ ,betas_keep] + } # Generate linear predictor matrix from fitted mgcv model - Xp <- predict(object$mgcv_model, newdata = object$obs_data, type = 'lpmatrix') + Xp <- predict(object2$mgcv_model, newdata = object2$obs_data, type = 'lpmatrix') # Zero out all other columns in Xp Xp[,!betas_keep] <- 0 # X-axis values - if(inherits(object$obs_data, 'list')){ - pred_vals_orig <- sort(object$obs_data[[pterms[i]]]) + if(inherits(object2$obs_data, 'list')){ + pred_vals_orig <- sort(object2$obs_data[[pterms[i]]]) } else { - pred_vals_orig <- sort(object$obs_data %>% + pred_vals_orig <- sort(object2$obs_data %>% dplyr::pull(pterms[i])) } - if(inherits(object$obs_data[[pterms[i]]], 'factor')){ + if(inherits(object2$obs_data[[pterms[i]]], 'factor')){ # Use a simple Boxplot for factor terms for now if(is.matrix(betas)){ @@ -76,9 +98,9 @@ if(length(pterms) > 0){ } colnames(beta_creds) <- - substr(names(coef(object$mgcv_model))[grepl(paste0('^(?=.*', + substr(names(coef(object2$mgcv_model))[grepl(paste0('^(?=.*', pterms[i], ')(?!.*s\\()'), - colnames(predict(object$mgcv_model, type = 'lpmatrix')), + colnames(predict(object2$mgcv_model, type = 'lpmatrix')), perl = TRUE)], nchar(pterms[i]) + 1, 1000000L) bp <- boxplot(beta_creds, range=0, plot=FALSE) @@ -151,6 +173,6 @@ if(length(pterms) > 0){ par(.pardefault) layout(1) } else { - stop('No parametric terms (apart from intercept) found in model') + message('No parametric terms found in model') } } diff --git a/R/plot_mvgam_randomeffects.R b/R/plot_mvgam_randomeffects.R index 460b5653..419a4da7 100644 --- a/R/plot_mvgam_randomeffects.R +++ b/R/plot_mvgam_randomeffects.R @@ -4,13 +4,15 @@ #' #'@importFrom graphics layout title #'@param object \code{list} object returned from \code{mvgam} +#'@param trend_effects logical. If `TRUE` and a `trend_formula` was used in model +#'fitting, terms from the trend (i.e. process) model will be plotted #'@details Posterior empirical quantiles of random effect coefficient estimates #'(on the link scale) are calculated and visualised as ribbon plots. #'Labels for coefficients are taken from the levels of the original factor variable #'that was used to specify the smooth in the model's formula #'@return A base \code{R} graphics plot #'@export -plot_mvgam_randomeffects = function(object){ +plot_mvgam_randomeffects = function(object, trend_effects = FALSE){ # General plotting colours and empirical quantile probabilities c_light <- c("#DCBCBC") @@ -23,19 +25,33 @@ c_dark <- c("#8F2727") c_dark_highlight <- c("#7C0000") probs = c(0.05, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.95) +object2 <- object + +if(trend_effects){ + if(is.null(object$trend_call)){ + stop('no trend_formula exists so there no trend-level smooths to plot') + } + + object2$mgcv_model <- object2$trend_mgcv_model +} + # Labels of smooths in formula -smooth_labs <- do.call(rbind, lapply(seq_along(object$mgcv_model$smooth), function(x){ - data.frame(label = object$mgcv_model$smooth[[x]]$label, - class = class(object$mgcv_model$smooth[[x]])[1]) +smooth_labs <- do.call(rbind, lapply(seq_along(object2$mgcv_model$smooth), function(x){ + data.frame(label = object2$mgcv_model$smooth[[x]]$label, + class = class(object2$mgcv_model$smooth[[x]])[1]) })) -# Check if any smooths were bs = "re"; if not, return an error +# Check if any smooths were bs = "re"; if not, return a message if(any(smooth_labs$class == 'random.effect')){ re_smooths <- smooth_labs %>% dplyr::mutate(smooth_num = dplyr::row_number()) %>% dplyr::filter(class == 'random.effect') %>% dplyr::pull(label) + if(trend_effects){ + re_smooths <- gsub('series', 'trend', re_smooths, fixed = TRUE) + } + .pardefault <- par(no.readonly=T) par(.pardefault) @@ -59,9 +75,14 @@ if(any(smooth_labs$class == 'random.effect')){ dplyr::filter(class == 'random.effect') %>% dplyr::pull(smooth_num))[i] -> smooth_number - betas_keep <- object$mgcv_model$smooth[[smooth_number]]$first.para: - object$mgcv_model$smooth[[smooth_number]]$last.para - betas <- mcmc_chains(object$model_output, 'b')[ ,betas_keep] + betas_keep <- object2$mgcv_model$smooth[[smooth_number]]$first.para: + object2$mgcv_model$smooth[[smooth_number]]$last.para + + if(trend_effects){ + betas <- mcmc_chains(object2$model_output, 'b_trend')[ ,betas_keep] + } else { + betas <- mcmc_chains(object2$model_output, 'b')[ ,betas_keep] + } # Plot the random effect estimates beta_creds <- sapply(1:NCOL(betas), @@ -119,13 +140,19 @@ if(any(smooth_labs$class == 'random.effect')){ factor_var_name <- tail(strsplit(gsub('\\)', '', gsub('s\\(', '', re_smooths[i])), ',')[[1]], 1) - if(class(object$obs_data)[1] == 'list'){ + if(trend_effects & factor_var_name == 'trend'){ + # Just use trend labels axis(side = 1, at = 1:N, - labels = levels(object$obs_data[[factor_var_name]])) + labels = paste0('trend_', 1:N)) } else { - axis(side = 1, at = 1:N, - labels = levels(object$obs_data %>% - dplyr::pull(factor_var_name))) + if(inherits(object2$obs_data, 'list')){ + axis(side = 1, at = 1:N, + labels = levels(object2$obs_data[[factor_var_name]])) + } else { + axis(side = 1, at = 1:N, + labels = levels(object2$obs_data %>% + dplyr::pull(factor_var_name))) + } } } diff --git a/R/plot_mvgam_smooth.R b/R/plot_mvgam_smooth.R index ea441872..71a33f20 100644 --- a/R/plot_mvgam_smooth.R +++ b/R/plot_mvgam_smooth.R @@ -5,6 +5,8 @@ #'@importFrom grDevices hcl.colors #'@importFrom stats quantile predict #'@param object \code{list} object returned from \code{mvgam} +#'@param trend_effects logical. If `TRUE` and a `trend_formula` was used in model +#'fitting, terms from the trend (i.e. process) model will be plotted #'@param series \code{integer} specifying which series in the set is to be plotted #'@param smooth either a \code{character} or \code{integer} specifying which smooth term to be plotted #'@param residuals \code{logical}. If \code{TRUE} then posterior quantiles of partial residuals are added @@ -39,6 +41,7 @@ #'@seealso \code{\link[mgcv]{plot.gam}} #'@export plot_mvgam_smooth = function(object, + trend_effects = FALSE, series = 1, smooth, residuals = FALSE, @@ -53,6 +56,17 @@ plot_mvgam_smooth = function(object, stop('argument "object" must be of class "mvgam"') } + object2 <- object + + if(trend_effects){ + if(is.null(object$trend_call)){ + stop('no trend_formula exists so there no trend-level smooths to plot') + } + + residuals <- FALSE + object2$mgcv_model <- object2$trend_mgcv_model + } + if(sign(series) != 1){ stop('argument "series" must be a positive integer', call. = FALSE) @@ -63,9 +77,9 @@ plot_mvgam_smooth = function(object, } } - if(series > NCOL(object$ytimes)){ - stop(paste0('object only contains data / predictions for ', - NCOL(object$ytimes), ' series'), + if(series > NCOL(object2$ytimes)){ + stop(paste0('object2 only contains data / predictions for ', + NCOL(object2$ytimes), ' series'), call. = FALSE) } @@ -79,17 +93,21 @@ plot_mvgam_smooth = function(object, } } + if(missing(smooth)){ + smooth <- 1 + } + # Get smooth term names - s_name <- levels(object$obs_data$series)[series] - data_train <- object$obs_data - smooth_terms <- unlist(purrr::map(object$mgcv_model$smooth, 'label')) + s_name <- levels(object2$obs_data$series)[series] + data_train <- object2$obs_data + smooth_terms <- unlist(purrr::map(object2$mgcv_model$smooth, 'label')) if(is.character(smooth)){ if(!grepl('\\(', smooth)){ smooth <- paste0('s(', smooth, ')') } if(!smooth %in% smooth_terms){ - stop(smooth, ' not found in smooth terms of object\nAppropriate names are: ', + stop(smooth, ' not found in smooth terms of object2\nAppropriate names are: ', paste(smooth_terms, collapse = ', ')) } smooth_int <- which(smooth_terms == smooth) @@ -98,15 +116,15 @@ plot_mvgam_smooth = function(object, } # Check whether this type of smooth is even plottable - if(!object$mgcv_model$smooth[[smooth_int]]$plot.me){ - stop(paste0('unable to plot ', object$mgcv_model$smooth[[smooth_int]]$label, - ' (class = ', attr(object$mgcv_model$smooth[[smooth_int]], 'class')[1]), + if(!object2$mgcv_model$smooth[[smooth_int]]$plot.me){ + stop(paste0('unable to plot ', object2$mgcv_model$smooth[[smooth_int]]$label, + ' (class = ', attr(object2$mgcv_model$smooth[[smooth_int]], 'class')[1]), ')') } if(is.numeric(smooth)){ if(!smooth %in% seq_along(smooth_terms)){ - stop(smooth, ' not found in smooth terms of object') + stop(smooth, ' not found in smooth terms of object2') } smooth_int <- smooth smooth <- smooth_terms[smooth] @@ -117,9 +135,9 @@ plot_mvgam_smooth = function(object, } # Check that this is not a random effect smooth - smooth_labs <- do.call(rbind, lapply(seq_along(object$mgcv_model$smooth), function(x){ - data.frame(label = object$mgcv_model$smooth[[x]]$label, - class = class(object$mgcv_model$smooth[[x]])[1]) + smooth_labs <- do.call(rbind, lapply(seq_along(object2$mgcv_model$smooth), function(x){ + data.frame(label = object2$mgcv_model$smooth[[x]]$label, + class = class(object2$mgcv_model$smooth[[x]])[1]) })) if(smooth_labs$class[smooth_int] == 'random.effect'){ @@ -128,19 +146,19 @@ plot_mvgam_smooth = function(object, # Be sure that parametric and by variables are included in newdata smooth_terms <- unique(trimws(strsplit(gsub('\\+', ',', - as.character(object$mgcv_model$pred.formula)[2]), ',')[[1]])) + as.character(object2$mgcv_model$pred.formula)[2]), ',')[[1]])) # Remove comma separated names as these won't match the column names in data smooth_terms[!grepl(',', smooth_terms)] -> smooth_terms # Change smooth name to the covariate that needs a sequence of prediction values - smooth <- object$mgcv_model$smooth[[smooth_int]]$term + smooth <- object2$mgcv_model$smooth[[smooth_int]]$term # Predictions and plots for multi-dimensional smooths if(length(unlist(strsplit(smooth, ','))) >= 2){ # Use default mgcv plotting for bivariate smooths as it is quicker - suppressWarnings(plot(object$mgcv_model, select = smooth_int, + suppressWarnings(plot(object2$mgcv_model, select = smooth_int, residuals = residuals, scheme = 2, main = '', too.far = 0, @@ -149,8 +167,15 @@ plot_mvgam_smooth = function(object, palette = 'Reds 2'), lwd = 1, seWithMean = TRUE)) - title(object$mgcv_model$smooth[[smooth_int]]$label, - adj = 0) + if(trend_effects){ + title(sub('series', 'trend', object2$mgcv_model$smooth[[smooth_int]]$label, + fixed = TRUE), + adj = 0) + } else { + title(object2$mgcv_model$smooth[[smooth_int]]$label, + adj = 0) + } + } else { @@ -179,7 +204,7 @@ plot_mvgam_smooth = function(object, } colnames(pred_dat) <- gsub('smooth.var', smooth, colnames(pred_dat)) - } else if(missing(newdata) && class(object$obs_data)[1] == 'list'){ + } else if(missing(newdata) && inherits(object2$obs_data, 'list')){ # Make fake data by zeroing all other terms apart from the selected smooth # and the series indicator pred_dat <- vector(mode = 'list') @@ -190,7 +215,7 @@ plot_mvgam_smooth = function(object, pred_dat[[x]] <- rep(0, 500) } } - names(pred_dat) <- names(object$obs_data) + names(pred_dat) <- names(object2$obs_data) pred_dat$series <- rep((levels(data_train$series)[series]), 500) if(!is.matrix(pred_dat[[smooth]])){ @@ -226,7 +251,7 @@ plot_mvgam_smooth = function(object, } # Generate linear predictor matrix from fitted mgcv model - suppressWarnings(Xp <- try(predict(object$mgcv_model, + suppressWarnings(Xp <- try(predict(object2$mgcv_model, newdata = pred_dat, type = 'lpmatrix'), silent = TRUE)) @@ -234,7 +259,7 @@ plot_mvgam_smooth = function(object, if(inherits(Xp, 'try-error')){ testdat <- data.frame(series = pred_dat$series) - terms_include <- names(object$mgcv_model$coefficients)[which(!names(object$mgcv_model$coefficients) + terms_include <- names(object2$mgcv_model$coefficients)[which(!names(object2$mgcv_model$coefficients) %in% '(Intercept)')] if(length(terms_include) > 0){ newnames <- vector() @@ -246,14 +271,14 @@ plot_mvgam_smooth = function(object, colnames(testdat) <- newnames } - suppressWarnings(Xp <- predict(object$mgcv_model, + suppressWarnings(Xp <- predict(object2$mgcv_model, newdata = testdat, type = 'lpmatrix')) } # Zero out all other columns in Xp - keeps <- object$mgcv_model$smooth[[smooth_int]]$first.para: - object$mgcv_model$smooth[[smooth_int]]$last.para + keeps <- object2$mgcv_model$smooth[[smooth_int]]$first.para: + object2$mgcv_model$smooth[[smooth_int]]$last.para Xp[, ! seq_len(length.out = NCOL(Xp)) %in% keeps] <- 0 # Prediction x-axis values @@ -268,20 +293,24 @@ plot_mvgam_smooth = function(object, } # If this term has a by variable, need to use mgcv's plotting utilities - if(object$mgcv_model$smooth[[smooth_int]]$by != "NA"){ + if(object2$mgcv_model$smooth[[smooth_int]]$by != "NA"){ # Deal with by variables by <- rep(1,length(pred_vals)); dat <- data.frame(x = pred_vals, by = by) - names(dat) <- c(object$mgcv_model$smooth[[smooth_int]]$term, - object$mgcv_model$smooth[[smooth_int]]$by) + names(dat) <- c(object2$mgcv_model$smooth[[smooth_int]]$term, + object2$mgcv_model$smooth[[smooth_int]]$by) - Xp_term <- mgcv::PredictMat(object$mgcv_model$smooth[[smooth_int]], dat) - Xp[,object$mgcv_model$smooth[[smooth_int]]$first.para: - object$mgcv_model$smooth[[smooth_int]]$last.para] <- Xp_term + Xp_term <- mgcv::PredictMat(object2$mgcv_model$smooth[[smooth_int]], dat) + Xp[,object2$mgcv_model$smooth[[smooth_int]]$first.para: + object2$mgcv_model$smooth[[smooth_int]]$last.para] <- Xp_term } # Extract GAM coefficients - betas <- mcmc_chains(object$model_output, 'b') + if(trend_effects){ + betas <- mcmc_chains(object2$model_output, 'b_trend') + } else { + betas <- mcmc_chains(object2$model_output, 'b') + } # Calculate posterior marginal predictions preds <- matrix(NA, nrow = NROW(betas), ncol = NROW(Xp)) @@ -292,42 +321,42 @@ plot_mvgam_smooth = function(object, if(residuals){ # Need to predict from a reduced set that zeroes out all terms apart from the # smooth of interest - suppressWarnings(Xp2 <- predict(object$mgcv_model, - newdata = object$obs_data, type = 'lpmatrix')) + suppressWarnings(Xp2 <- predict(object2$mgcv_model, + newdata = object2$obs_data, type = 'lpmatrix')) if(!missing(newdata)){ stop('Partial residual plots not available when using newdata') } - if(object$mgcv_model$smooth[[smooth_int]]$by != "NA"){ - by <- rep(1,length(object$obs_data$series)) - dat <- data.frame(x = object$obs_data[[object$mgcv_model$smooth[[smooth_int]]$term]], + if(object2$mgcv_model$smooth[[smooth_int]]$by != "NA"){ + by <- rep(1,length(object2$obs_data$series)) + dat <- data.frame(x = object2$obs_data[[object2$mgcv_model$smooth[[smooth_int]]$term]], by = by) - names(dat) <- c(object$mgcv_model$smooth[[smooth_int]]$term, - object$mgcv_model$smooth[[smooth_int]]$by) + names(dat) <- c(object2$mgcv_model$smooth[[smooth_int]]$term, + object2$mgcv_model$smooth[[smooth_int]]$by) - Xp_term <- mgcv::PredictMat(object$mgcv_model$smooth[[smooth_int]], dat) - Xp2[,object$mgcv_model$smooth[[smooth_int]]$first.para: - object$mgcv_model$smooth[[smooth_int]]$last.para] <- Xp_term + Xp_term <- mgcv::PredictMat(object2$mgcv_model$smooth[[smooth_int]], dat) + Xp2[,object2$mgcv_model$smooth[[smooth_int]]$first.para: + object2$mgcv_model$smooth[[smooth_int]]$last.para] <- Xp_term } # Find index for the end of training for this series and keep only those training # observations for the particular series if(class(pred_dat)[1] == 'list'){ - end_train <- length(which(object$obs_data[['series']] == (levels(data_train$series)[series]))) + end_train <- length(which(object2$obs_data[['series']] == (levels(data_train$series)[series]))) } else { - end_train <- object$obs_data %>% + end_train <- object2$obs_data %>% dplyr::filter(series == s_name) %>% NROW() } - Xp2 <- Xp2[object$ytimes[,series][1:end_train], ] + Xp2 <- Xp2[object2$ytimes[,series][1:end_train], ] # # Zero out all other columns in Xp2 Xp2[,!grepl(paste0('(', smooth, ')'), colnames(Xp), fixed = T)] <- 0 # Calculate residuals from full prediction set - all_resids <- object$resids[[series]][,1:end_train] + all_resids <- object2$resids[[series]][,1:end_train] partial_resids <- matrix(NA, nrow = nrow(betas), ncol = NCOL(all_resids)) for(i in 1:NROW(betas)){ @@ -362,15 +391,27 @@ plot_mvgam_smooth = function(object, xlab = smooth, ylab = 'Partial effect', xlim = c(min(pred_vals), max(pred_vals)), - ylim = c(min(min(partial_resids, min(cred) - 0.8 * sd(preds), na.rm = T)), - max(max(partial_resids, max(cred) + 0.8 * sd(preds), na.rm = T)))) - - if(object$mgcv_model$smooth[[smooth_int]]$by != "NA"){ - title(object$mgcv_model$smooth[[smooth_int]]$label, - adj = 0) + ylim = c(min(min(partial_resids, min(cred) - 0.9 * sd(preds), na.rm = T)), + max(max(partial_resids, max(cred) + 0.9 * sd(preds), na.rm = T)))) + + if(object2$mgcv_model$smooth[[smooth_int]]$by != "NA"){ + if(trend_effects){ + title(sub('series', 'trend', + object2$mgcv_model$smooth[[smooth_int]]$label, + fixed = TRUE), + adj = 0) + } else { + title(object2$mgcv_model$smooth[[smooth_int]]$label, + adj = 0) + } } else { - title(paste0('s(', smooth, ') for ', unique(pred_dat$series)), - adj = 0) + if(trend_effects){ + title(paste0('s(', smooth, ')'), + adj = 0) + } else { + title(paste0('s(', smooth, ') for ', unique(pred_dat$series)), + adj = 0) + } } } else { @@ -378,25 +419,38 @@ plot_mvgam_smooth = function(object, xlab = smooth, ylab = 'Partial effect', xlim = c(min(pred_vals), max(pred_vals)), - ylim = c(min(cred) - 0.8 * sd(preds), - max(cred) + 0.8 * sd(preds))) - if(object$mgcv_model$smooth[[smooth_int]]$by != "NA"){ - title(object$mgcv_model$smooth[[smooth_int]]$label, - adj = 0) + ylim = c(min(cred) - 0.9 * sd(preds), + max(cred) + 0.9 * sd(preds))) + if(object2$mgcv_model$smooth[[smooth_int]]$by != "NA"){ + if(trend_effects){ + title(sub('series', 'trend', + object2$mgcv_model$smooth[[smooth_int]]$label, + fixed = TRUE), + adj = 0) + } else { + title(object2$mgcv_model$smooth[[smooth_int]]$label, + adj = 0) + } } else { - title(paste0('s(', smooth, ') for ', unique(pred_dat$series)), - adj = 0) + if(trend_effects){ + title(paste0('s(', smooth, ')'), + adj = 0) + } else { + title(paste0('s(', smooth, ') for ', unique(pred_dat$series)), + adj = 0) + } } } if(realisations){ for(i in 1:n_realisations){ + index <- sample(1:NROW(preds), 1, replace = TRUE) lines(x = pred_vals, - y = preds[i,], + y = preds[index,], col = 'white', lwd = 2.5) lines(x = pred_vals, - y = preds[i,], + y = preds[index,], col = sample(c("#DCBCBC", "#C79999", "#B97C7C", @@ -409,11 +463,11 @@ plot_mvgam_smooth = function(object, if(residuals){ # Get x-axis values and bin if necessary to prevent overplotting - sorted_x <- sort(unique(round(object$obs_data[[smooth]], 6))) + sorted_x <- sort(unique(round(object2$obs_data[[smooth]], 6))) - s_name <- levels(object$obs_data$series)[series] - obs_x <- round(data.frame(series = object$obs_data$series, - smooth_vals = object$obs_data[[smooth]]) %>% + s_name <- levels(object2$obs_data$series)[series] + obs_x <- round(data.frame(series = object2$obs_data$series, + smooth_vals = object2$obs_data[[smooth]]) %>% dplyr::filter(series == s_name) %>% dplyr::pull(smooth_vals), 6) @@ -506,7 +560,7 @@ plot_mvgam_smooth = function(object, box(bty = 'L', lwd = 2) # Show observed values of the smooth as a rug - if(class(object$obs_data)[1] == 'list'){ + if(class(object2$obs_data)[1] == 'list'){ rug((as.vector(as.matrix(pred_dat[[smooth]])))[which(pred_dat[['series']] == levels(pred_dat[['series']])[series])], lwd = 1.75, ticksize = 0.025, col = c_mid_highlight) @@ -530,12 +584,13 @@ plot_mvgam_smooth = function(object, if(realisations){ for(i in 1:n_realisations){ + index <- sample(1:NROW(first_derivs), 1, replace = TRUE) lines(x = pred_vals, - y = first_derivs[i,], + y = first_derivs[index,], col = 'white', lwd = 2.5) lines(x = pred_vals, - y = first_derivs[i,], + y = first_derivs[index,], col = sample(c("#DCBCBC", "#C79999", "#B97C7C", @@ -568,22 +623,34 @@ plot_mvgam_smooth = function(object, xlab = smooth, ylab = 'Partial effect', xlim = c(min(pred_vals), max(pred_vals)), - ylim = c(min(min(partial_resids, min(cred) - 0.8 * sd(preds), na.rm = T)), - max(max(partial_resids, max(cred) + 0.8 * sd(preds), na.rm = T)))) - if(object$mgcv_model$smooth[[smooth_int]]$by != "NA"){ - title(object$mgcv_model$smooth[[smooth_int]]$label, - adj = 0) + ylim = c(min(min(partial_resids, min(cred) - 0.9 * sd(preds), na.rm = T)), + max(max(partial_resids, max(cred) + 0.9 * sd(preds), na.rm = T)))) + if(object2$mgcv_model$smooth[[smooth_int]]$by != "NA"){ + if(trend_effects){ + title(sub('series', 'trend', + object2$mgcv_model$smooth[[smooth_int]]$label, + fixed = TRUE), + adj = 0) + } else { + title(object2$mgcv_model$smooth[[smooth_int]]$label, + adj = 0) + } } else { - title(paste0('s(', smooth, ') for ', unique(pred_dat$series)), - adj = 0) + if(trend_effects){ + title(paste0('s(', smooth, ')'), + adj = 0) + } else { + title(paste0('s(', smooth, ') for ', unique(pred_dat$series)), + adj = 0) + } } # Get x-axis values and bin if necessary to prevent overplotting - sorted_x <- sort(unique(round(object$obs_data[[smooth]], 6))) + sorted_x <- sort(unique(round(object2$obs_data[[smooth]], 6))) - s_name <- levels(object$obs_data$series)[series] - obs_x <- round(data.frame(series = object$obs_data$series, - smooth_vals = object$obs_data[[smooth]]) %>% + s_name <- levels(object2$obs_data$series)[series] + obs_x <- round(data.frame(series = object2$obs_data$series, + smooth_vals = object2$obs_data[[smooth]]) %>% dplyr::filter(series == s_name) %>% dplyr::pull(smooth_vals), 6) @@ -664,24 +731,37 @@ plot_mvgam_smooth = function(object, xlab = smooth, ylab = 'Partial effect', xlim = c(min(pred_vals), max(pred_vals)), - ylim = c(min(cred) - 0.8 * sd(preds), - max(cred) + 0.8 * sd(preds))) - if(object$mgcv_model$smooth[[smooth_int]]$by != "NA"){ - title(object$mgcv_model$smooth[[smooth_int]]$label, - adj = 0) + ylim = c(min(cred) - 0.9 * sd(preds), + max(cred) + 0.9 * sd(preds))) + if(object2$mgcv_model$smooth[[smooth_int]]$by != "NA"){ + if(trend_effects){ + title(sub('series', 'trend', + object2$mgcv_model$smooth[[smooth_int]]$label, + fixed = TRUE), + adj = 0) + } else { + title(object2$mgcv_model$smooth[[smooth_int]]$label, + adj = 0) + } } else { - title(paste0('s(', smooth, ') for ', unique(pred_dat$series)), - adj = 0) + if(trend_effects){ + title(paste0('s(', smooth, ')'), + adj = 0) + } else { + title(paste0('s(', smooth, ') for ', unique(pred_dat$series)), + adj = 0) + } } if(realisations){ for(i in 1:n_realisations){ + index <- sample(1:NROW(preds), 1, replace = TRUE) lines(x = pred_vals, - y = preds[i,], + y = preds[index,], col = 'white', lwd = 2.5) lines(x = pred_vals, - y = preds[i,], + y = preds[index,], col = sample(c("#DCBCBC", "#C79999", "#B97C7C", @@ -705,7 +785,7 @@ plot_mvgam_smooth = function(object, } # Show observed values of the smooth as a rug - if(class(object$obs_data)[1] == 'list'){ + if(class(object2$obs_data)[1] == 'list'){ rug((as.vector(as.matrix(data_train[[smooth]])))[which(data_train$series == levels(data_train$series)[series])], lwd = 1.75, ticksize = 0.025, col = c_mid_highlight) diff --git a/R/ppc.mvgam.R b/R/ppc.mvgam.R index 6b42f774..d1cb954d 100644 --- a/R/ppc.mvgam.R +++ b/R/ppc.mvgam.R @@ -489,7 +489,7 @@ ppc.mvgam = function(object, newdata, data_test, series = 1, type = 'hist', if(object$family == 'beta'){ xlimits <- c(0, 1) - } else if(object$family %in% c('poisson', 'negative binomial', 'lognormal')){ + } else if(object$family %in% c('poisson', 'negative binomial', 'lognormal', 'Gamma')){ xlimits <- c(0, max_x) } else { xlimits <- c(min_x, max_x) @@ -550,9 +550,6 @@ ppc.mvgam = function(object, newdata, data_test, series = 1, type = 'hist', } } - bin_lims <- range(c(truths, as.vector(preds)), na.rm = TRUE) - #delta <- diff(range(preds)) / n_bins - breaks <- seq(bin_lims[1], bin_lims[2], length.out = n_bins) xlim <- c(min(min(density(preds[1,], na.rm = TRUE)$x), min(density(truths, na.rm = TRUE)$x)), max(max(density(preds[1,], na.rm = TRUE)$x), @@ -560,14 +557,21 @@ ppc.mvgam = function(object, newdata, data_test, series = 1, type = 'hist', if(object$family == 'beta'){ xlim <- c(0, 1) - } else if(object$family %in% c('poisson', 'negative binomial', 'lognormal')){ + } else if(object$family %in% c('poisson', 'negative binomial', 'lognormal', 'Gamma')){ xlim <- c(0, xlim[2]) } else { xlim <- xlim } + breaks <- seq(xlim[1], xlim[2], length.out = n_bins) + truths <- truths[truths<=xlim[2]] + truths <- truths[truths>=xlim[1]] + preds <- preds[preds<=xlim[2]] + preds <- preds[preds>=xlim[1]] + ylim <- c(0, max(c(max(hist(truths, breaks = breaks, plot = F)$density, na.rm = TRUE), - max(hist(preds, breaks = breaks, plot = F)$density, na.rm = TRUE)))) + max(hist(preds, breaks = breaks, plot = F)$density, na.rm = TRUE)), + na.rm = TRUE)) if(missing(xlab)){ xlab <- paste0('Count') diff --git a/R/stan_utils.R b/R/stan_utils.R index e46b85b7..2a1db3b5 100644 --- a/R/stan_utils.R +++ b/R/stan_utils.R @@ -1426,7 +1426,7 @@ vectorise_stan_lik = function(model_file, model_data, family = 'poisson', 'flat_ys ~ beta(\n', ifelse(offset,'inv_logit(append_col(flat_xs, flat_trends) * append_row(b, 1.0) + offset[obs_ind]) .* flat_phis,\n', 'inv_logit(append_col(flat_xs, flat_trends) * append_row(b, 1.0)) .* flat_phis,\n'), - ifelse(offset,'(1 - append_col(flat_xs, flat_trends) * append_row(b, 1.0) + offset[obs_ind]) .* flat_phis,\n', + ifelse(offset,'(1 - inv_logit(append_col(flat_xs, flat_trends) * append_row(b, 1.0) + offset[obs_ind])) .* flat_phis);\n}\n}\n', '(1 - inv_logit(append_col(flat_xs, flat_trends) * append_row(b, 1.0))) .* flat_phis);\n}\n}\n')) } model_file <- readLines(textConnection(model_file), n = -1) @@ -1501,7 +1501,7 @@ vectorise_stan_lik = function(model_file, model_data, family = 'poisson', 'target += reduce_sum(partial_log_lik, seq,\n', 'grainsize,\n', 'flat_ys,\n', - ifelse(offset,'exp(append_col(flat_xs, flat_trends) * append_row(b, 1.0)) + offset[obs_ind],\n', + ifelse(offset,'exp(append_col(flat_xs, flat_trends) * append_row(b, 1.0) + offset[obs_ind]),\n', 'exp(append_col(flat_xs, flat_trends) * append_row(b, 1.0)),\n'), 'flat_phis);\n}\n}\n') } else { @@ -1511,7 +1511,7 @@ vectorise_stan_lik = function(model_file, model_data, family = 'poisson', 'flat_trends = (to_vector(trend))[obs_ind];\n', 'flat_phis = to_array_1d(rep_each(phi_inv, n)[obs_ind]);\n', 'flat_ys ~ neg_binomial_2(\n', - ifelse(offset,'exp(append_col(flat_xs, flat_trends) * append_row(b, 1.0)) + offset[obs_ind],\n', + ifelse(offset,'exp(append_col(flat_xs, flat_trends) * append_row(b, 1.0) + offset[obs_ind]),\n', 'exp(append_col(flat_xs, flat_trends) * append_row(b, 1.0)),\n'), 'inv(flat_phis));\n}\n}\n') } @@ -2277,6 +2277,372 @@ trend_map_mods = function(model_file, model_data = model_data)) } +#### Modifications to Stan code for adding predictors to trend models #### +#' @noRd +add_trend_predictors = function(trend_formula, + trend_map, + trend_model, + data_train, + data_test, + model_file, + model_data, + drift = FALSE){ + + #### Creating the trend mvgam model file and data structures #### + # Replace any terms labelled 'trend' with 'series' for creating the necessary + # structures + trend_formula <- formula(paste(gsub('trend', 'series', + as.character(trend_formula), + fixed = TRUE), + collapse = " ")) + + # Drop any intercept from the formula + if(attr(terms(trend_formula), 'intercept') == 1){ + trend_formula <- update(trend_formula, trend_y ~ . -1) + } else { + trend_formula <- update(trend_formula, trend_y ~ .) + } + + trend_train <- data_train + trend_train$trend_y <- rnorm(length(trend_train$time)) + + # Add indicators of trend names as factor levels using the trend_map + trend_indicators <- vector(length = length(trend_train$time)) + for(i in 1:length(trend_train$time)){ + trend_indicators[i] <- trend_map$trend[which(trend_map$series == + trend_train$series[i])] + } + trend_indicators <- as.factor(paste0('trend', trend_indicators)) + trend_train$series <- trend_indicators + trend_train$y <- NULL + + # Only keep one time observation per trend + trend_train %>% + dplyr::group_by(series, time) %>% + dplyr::slice_head(n = 1) %>% + dplyr::ungroup() -> trend_train + + if(!is.null(data_test)){ + # If newdata supplied, also create a fake design matrix + # for the test data + trend_test <- data_test + trend_test$trend_y <- rnorm(length(trend_test$time)) + trend_indicators <- vector(length = length(trend_test$time)) + for(i in 1:length(trend_test$time)){ + trend_indicators[i] <- trend_map$trend[which(trend_map$series == trend_test$series[i])] + } + trend_indicators <- as.factor(paste0('trend', trend_indicators)) + trend_test$series <- trend_indicators + trend_test$y <- NULL + + trend_test %>% + dplyr::group_by(series, time) %>% + dplyr::slice_head(n = 1) %>% + dplyr::ungroup() -> trend_test + + # Construct the model file and data structures for testing and training + trend_mvgam <- mvgam(trend_formula, + data = trend_train, + newdata = trend_test, + family = gaussian(), + trend_model = 'None', + return_model_data = TRUE, + run_model = FALSE) + } else { + # Construct the model file and data structures for training only + trend_mvgam <- mvgam(trend_formula, + data = trend_train, + family = gaussian(), + trend_model = 'None', + return_model_data = TRUE, + run_model = FALSE) + } + + trend_model_file <- trend_mvgam$model_file + + #### Modifying the model_file and model_data #### + # Add lines for the raw trend basis coefficients + model_file[grep("vector[num_basis] b_raw;", model_file, fixed = TRUE)] <- + paste0("vector[num_basis] b_raw;\n", + "vector[num_basis_trend] b_raw_trend;") + + # Add lines to data declarations for trend design matrix + model_file[grep("matrix[total_obs, num_basis] X; // mgcv GAM design matrix", + model_file, fixed = TRUE)] <- + paste0("matrix[total_obs, num_basis] X; // mgcv GAM design matrix\n", + "matrix[n * n_lv, num_basis_trend] X_trend; // trend model design matrix") + + model_file[grep("int num_basis; // total number of basis coefficients", + model_file, fixed = TRUE)] <- + paste0("int num_basis; // total number of basis coefficients\n", + "int num_basis_trend; // number of trend basis coefficients") + + model_file[grep("int ytimes[n, n_series]; // time-ordered matrix (which col in X belongs to each [time, series] observation?)", + model_file, fixed = TRUE)] <- + paste0("int ytimes[n, n_series]; // time-ordered matrix (which col in X belongs to each [time, series] observation?)\n", + "int ytimes_trend[n, n_lv]; // time-ordered matrix for latent trends") + + model_data$ytimes_trend <- trend_mvgam$model_data$ytimes + model_data$num_basis_trend <- trend_mvgam$model_data$num_basis + model_data$X_trend <- trend_mvgam$model_data$X + + # Update names to reflect process models rather than latent factors + model_file[grep("// trends and dynamic factor loading matrix", + model_file, fixed = TRUE)] <- + "// latent states and loading matrix" + + if(trend_model %in% c('RW', 'AR1', 'AR2', 'AR3')){ + model_file[grep("// latent factor SD terms", + model_file, fixed = TRUE)] <- + "// latent state SD terms" + model_file[grep("// priors for factor SD parameters", + model_file, fixed = TRUE)] <- + "// priors for latent state SD parameters" + } + + model_file[grep("// derived latent trends", + model_file, fixed = TRUE)] <- "// derived latent states" + + # Add beta_trend lines + b_trend_lines <- trend_model_file[grep('b[', + trend_model_file, fixed = TRUE)] + b_trend_lines <- gsub('\\bb\\b', 'b_trend', b_trend_lines) + b_trend_lines <- gsub('raw', 'raw_trend', b_trend_lines) + b_trend_lines <- gsub('num_basis', 'num_basis_trend', b_trend_lines) + model_file[grep("// derived latent states", model_file, fixed = TRUE)] <- + paste0(paste(b_trend_lines, collapse = '\n'), + '\n\n// derived latent states') + model_file[grep("vector[num_basis] b;", model_file, fixed = TRUE)] <- + paste0("vector[num_basis] b;\n", + "vector[num_basis_trend] b_trend;") + + model_file <- readLines(textConnection(model_file), n = -1) + + # Add any parametric effect beta lines + if(any(grepl('// parametric effect', trend_model_file))){ + trend_parametrics <- TRUE + pstart <- as.numeric(sub('.*(?=.$)', '', + sub("\\:.*", "", + trend_model_file[grep('// parametric effect', + trend_model_file) + 1]), perl = T)) + pend <- as.numeric(substr(sub(".*\\:", "", + trend_model_file[grep('// parametric effect', + trend_model_file) + 1]), + 1, 1)) + model_file[grep("// dynamic factor estimates", model_file, fixed = TRUE)] <- + paste0('// dynamic process models\n', + paste0('b_raw_trend[', pstart, ':', pend, '] ~ std_normal();')) + model_file <- readLines(textConnection(model_file), n = -1) + } + + trend_smooths_included <- FALSE + + # Add any multinormal smooth lines + if(any(grepl('multi_normal_prec', trend_model_file))){ + trend_smooths_included <- TRUE + + if(any(grepl("int n_sp; // number of smoothing parameters", + model_file, fixed = TRUE))){ + model_file[grep("int n_sp; // number of smoothing parameters", + model_file, fixed = TRUE)] <- + paste0("int n_sp; // number of smoothing parameters\n", + "int n_sp_trend; // number of trend smoothing parameters") + } else { + model_file[grep("int n; // number of timepoints per series", + model_file, fixed = TRUE)] <- + paste0("int n; // number of timepoints per series\n", + "int n_sp_trend; // number of trend smoothing parameters") + } + model_data$n_sp_trend <- trend_mvgam$model_data$n_sp + + spline_coef_lines <- trend_model_file[grepl('multi_normal_prec', + trend_model_file)] + spline_coef_lines <- gsub('_raw', '_raw_trend', spline_coef_lines) + spline_coef_lines <- gsub('lambda', 'lambda_trend', spline_coef_lines) + spline_coef_lines <- gsub('zero', 'zero_trend', spline_coef_lines) + spline_coef_lines <- gsub('S', 'S_trend', spline_coef_lines, fixed = TRUE) + + lambda_prior_line <- sub('lambda', 'lambda_trend', + trend_model_file[grep('lambda ~', trend_model_file, fixed = TRUE)]) + lambda_param_line <- sub('lambda', 'lambda_trend', + trend_model_file[grep('vector[n_sp] lambda;', + trend_model_file, fixed = TRUE)]) + lambda_param_line <- sub('n_sp', 'n_sp_trend', lambda_param_line) + + if(any(grepl('// dynamic process models', model_file, fixed = TRUE))){ + model_file[grep('// dynamic process models', model_file, fixed = TRUE) + 1] <- + paste0(model_file[grep('// dynamic process models', model_file, fixed = TRUE) + 1], + '\n', + paste(spline_coef_lines, collapse = '\n'), + '\n', + lambda_prior_line, + '\n') + + } else { + model_file[grep("// dynamic factor estimates", model_file, fixed = TRUE)] <- + paste0('// dynamic process models\n', + paste(spline_coef_lines, collapse = '\n'), + '\n', + lambda_prior_line) + } + if(any(grepl("vector[n_sp] lambda;", model_file, fixed = TRUE))){ + model_file[grep("// dynamic factors", model_file, fixed = TRUE)] <- + "// latent states" + model_file[grep("vector[n_sp] lambda;", model_file, fixed = TRUE)] <- + paste0("vector[n_sp] lambda;\n", + "vector[n_sp_trend] lambda_trend;") + } else { + model_file <- model_file[-grep("matrix[n, n_lv] LV;", + model_file, fixed = TRUE)] + model_file[grep("// dynamic factors", model_file, fixed = TRUE)] <- + paste0("// latent states\n", + "matrix[n, n_lv] LV;\n\n", + "// smoothing parameters\n", + "vector[n_sp_trend] lambda_trend;") + } + + S_lines <- trend_model_file[grep('mgcv smooth penalty matrix', + trend_model_file, fixed = TRUE)] + S_lines <- gsub('S', 'S_trend', S_lines, fixed = TRUE) + model_file[grep("int n_nonmissing; // number of nonmissing observations", + model_file, fixed = TRUE)] <- + paste0("int n_nonmissing; // number of nonmissing observations\n", + paste(S_lines, collapse = '\n')) + + S_mats <- trend_mvgam$model_data[paste0('S', 1:length(S_lines))] + names(S_mats) <- gsub('S', 'S_trend', names(S_mats)) + model_data <- append(model_data, S_mats) + + model_file[grep("int num_basis_trend; // number of trend basis coefficients", + model_file, fixed = TRUE)] <- + paste0("int num_basis_trend; // number of trend basis coefficients\n", + "vector[num_basis_trend] zero_trend; // prior locations for trend basis coefficients") + model_data$zero_trend <- trend_mvgam$model_data$zero + + if(any(grepl("vector[n_sp] rho;", model_file, fixed = TRUE))){ + model_file[grep("vector[n_sp] rho;", model_file, fixed = TRUE)] <- + paste0("vector[n_sp] rho;\n", + "vector[n_sp_trend] rho_trend;") + + model_file[grep("rho = log(lambda);", model_file, fixed = TRUE)] <- + paste0("rho = log(lambda);\n", + "rho_trend = log(lambda_trend);") + } else { + model_file[grep("matrix[n, n_series] mus;", model_file, fixed = TRUE)] <- + paste0("matrix[n, n_series] mus;\n", + "vector[n_sp_trend] rho_trend;") + + model_file[grep("// posterior predictions", model_file, fixed = TRUE)] <- + paste0("rho_trend = log(lambda_trend);\n\n", + "// posterior predictions") + } + + + model_file <- readLines(textConnection(model_file), n = -1) + + } + + trend_random_included <- FALSE + + # Add any random effect beta lines + if(any(grepl('mu_raw[', trend_model_file, fixed = TRUE))){ + trend_random_included <- TRUE + smooth_labs <- do.call(rbind, + lapply(seq_along(trend_mvgam$mgcv_model$smooth), + function(x){ + data.frame(label = trend_mvgam$mgcv_model$smooth[[x]]$label, + first.para = trend_mvgam$mgcv_model$smooth[[x]]$first.para, + last.para = trend_mvgam$mgcv_model$smooth[[x]]$last.para, + class = class(trend_mvgam$mgcv_model$smooth[[x]])[1]) + })) + random_inds <- vector() + for(i in 1:NROW(smooth_labs)){ + if(smooth_labs$class[i] == 'random.effect'){ + random_inds[i] <- paste0(smooth_labs$first.para[i], + ':', + smooth_labs$last.para[i]) + } + } + random_inds <- random_inds[!is.na(random_inds)] + trend_rand_idxs <- unlist(lapply(seq_along(random_inds), function(x){ + seq(as.numeric(sub("\\:.*", "", random_inds[x])), + sub(".*\\:", "", random_inds[x])) + })) + model_data$trend_rand_idxs <- trend_rand_idxs + + model_file[grep("int obs_ind[n_nonmissing]; // indices of nonmissing observations", + model_file, fixed = TRUE)] <- + paste0("int obs_ind[n_nonmissing]; // indices of nonmissing observations\n", + paste0("int trend_rand_idxs[", length(trend_rand_idxs),']; // trend random effect indices')) + + random_param_lines <- trend_model_file[c(grep("// random effect variances", + trend_model_file, fixed = TRUE) + 1, + grep("// random effect means", + trend_model_file, fixed = TRUE) + 1)] + random_param_lines <- gsub('raw', 'raw_trend', random_param_lines) + model_file[grep("vector[num_basis_trend] b_raw_trend;", + model_file, fixed = TRUE)] <- + paste0("vector[num_basis_trend] b_raw_trend;\n\n", + "// trend random effects\n", + paste(random_param_lines, collapse = '\n')) + + if(trend_model %in% c('RW', 'AR1', 'AR2', 'AR3')){ + model_file[grep("LV[1, 1:n_lv] ~ normal(0, sigma);", model_file, + fixed = TRUE)] <- paste0( + "sigma_raw_trend ~ exponential(0.5);\n", + "mu_raw_trend ~ std_normal();\n", + paste0("b_raw_trend[", 'trend_rand_idxs', + "] ~ std_normal();\n"), + "LV[1, 1:n_lv] ~ normal(0, sigma);") + } + + model_file <- readLines(textConnection(model_file), n = -1) + } + + # Update the trend model statements + model_file[grep("// latent states and loading matrix", + model_file, fixed = TRUE)] <- + paste0("// latent states and loading matrix\n", + "vector[n * n_lv] trend_mus;") + model_file[grep("// derived latent states", + model_file, fixed = TRUE)] <- + paste0("// latent state linear predictors\n", + "trend_mus = X_trend * b_trend;\n\n", + "// derived latent states") + model_file <- readLines(textConnection(model_file), n = -1) + + if(trend_model == 'RW'){ + model_file <- model_file[-c(grep("for(j in 1:n_lv){", model_file, fixed = TRUE): + (grep("for(j in 1:n_lv){", model_file, fixed = TRUE) + 2))] + + if(drift){ + model_file[grep("LV[1, 1:n_lv] ~ normal(0, sigma);", + model_file, fixed = TRUE)] <- + paste0("for(j in 1:n_lv){\n", + "LV[1, j] ~ normal(trend_mus[ytimes_trend[1, j]], sigma[j]);\n", + "for(i in 2:n){\n", + "LV[i, j] ~ normal(drift[j] + trend_mus[ytimes_trend[i, j]] + LV[i - 1, j] - trend_mus[ytimes_trend[i - 1, j]], sigma[j]);\n", + "}\n}") + } else { + model_file[grep("LV[1, 1:n_lv] ~ normal(0, sigma);", + model_file, fixed = TRUE)] <- + paste0("for(j in 1:n_lv){\n", + "LV[1, j] ~ normal(trend_mus[ytimes_trend[1, j]], sigma[j]);\n", + "for(i in 2:n){\n", + "LV[i, j] ~ normal(trend_mus[ytimes_trend[i, j]] + LV[i - 1, j] - trend_mus[ytimes_trend[i - 1, j]], sigma[j]);\n", + "}\n}") + } + + model_file <- readLines(textConnection(model_file), n = -1) + } + + return(list(model_file = model_file, + model_data = model_data, + trend_mgcv_model = trend_mvgam$mgcv_model, + trend_smooths_included = trend_smooths_included, + trend_random_included = trend_random_included)) +} + #### Helper functions for extracting parameter estimates from cmdstan objects #### #' All functions were directly copied from `brms` and so all credit must #' go to the `brms` development team diff --git a/R/summary.mvgam.R b/R/summary.mvgam.R index cdcda58d..bce10838 100644 --- a/R/summary.mvgam.R +++ b/R/summary.mvgam.R @@ -20,9 +20,19 @@ summary.mvgam = function(object, ...){ #### Standard summary of formula and model arguments #### -message("GAM formula:") -print(object$call) -message() + if(!is.null(object$trend_call)){ + message("GAM observation formula:") + print(object$call) + message() + + message("GAM process formula:") + print(object$trend_call) + message() + } else { + message("GAM formula:") + print(object$call) + message() + } message("Family:") cat(paste0(object$family, '\n')) @@ -37,9 +47,16 @@ cat(paste0(object$trend_model, '\n')) message() if(object$use_lv){ - message("N latent factors:") - cat(object$n_lv, '\n') - message() + if(!is.null(object$trend_call)){ + message("N process models:") + cat(object$n_lv, '\n') + message() + } else { + message("N latent factors:") + cat(object$n_lv, '\n') + message() + } + } message('N series:') @@ -108,12 +125,23 @@ if(object$family == 'lognormal'){ message() } -message("GAM coefficient (beta) estimates:") -coef_names <- names(object$mgcv_model$coefficients) -mvgam_coefs <- mcmc_summary(object$model_output, 'b')[,c(3:7)] -rownames(mvgam_coefs) <- coef_names -print(mvgam_coefs) -message() +if(!is.null(object$trend_call)){ + message("GAM observation coefficient (beta) estimates:") + coef_names <- names(object$mgcv_model$coefficients) + mvgam_coefs <- mcmc_summary(object$model_output, 'b')[,c(3:7)] + rownames(mvgam_coefs) <- coef_names + print(mvgam_coefs) + message() + +} else { + message("GAM coefficient (beta) estimates:") + coef_names <- names(object$mgcv_model$coefficients) + mvgam_coefs <- mcmc_summary(object$model_output, 'b')[,c(3:7)] + rownames(mvgam_coefs) <- coef_names + print(mvgam_coefs) + message() +} + smooth_labs <- do.call(rbind, lapply(seq_along(object$mgcv_model$smooth), function(x){ data.frame(label = object$mgcv_model$smooth[[x]]$label, @@ -143,17 +171,31 @@ if(any(smooth_labs$class == 'random.effect')){ rownames(re_sds) <- rownames(re_mus) <- re_smooths - message("GAM random effect population mean estimates:") - print(re_mus) - message() + if(!is.null(object$trend_call)){ + message("GAM random effect population mean estimates:") + print(re_mus) + message() - message("GAM random effect population SD estimates:") - print(re_sds) - message() + message("GAM random effect population SD estimates:") + print(re_sds) + message() + } else { + message("GAM observation random effect population mean estimates:") + print(re_mus) + message() + + message("GAM observation random effect population SD estimates:") + print(re_sds) + message() + } } if(any(!is.na(object$sp_names)) & !all(smooth_labs$class == 'random.effect')){ - message("GAM smoothing parameter (rho) estimates:") + if(!is.null(object$trend_call)){ + message("GAM smoothing parameter (rho) estimates:") + } else { + message("GAM observation smoothing parameter (rho) estimates:") + } rho_coefs <- mcmc_summary(object$model_output, 'rho')[,c(3:7)] rownames(rho_coefs) <- object$sp_names @@ -175,10 +217,19 @@ if(object$use_lv){ if(object$trend_model != 'None'){ if(object$trend_model == 'RW'){ if(object$drift){ - message("Latent trend drift estimates:") + if(!is.null(object$trend_call)){ + message("Process model drift estimates:") + } else { + message("Latent trend drift estimates:") + } print(mcmc_summary(object$model_output, c('drift'))[,c(3:7)]) message() } else { + if(!is.null(object$trend_call)){ + message("Process error parameter estimates:") + print(mcmc_summary(object$model_output, c('sigma'))[,c(3:7)]) + message() + } } } @@ -297,6 +348,64 @@ if(!object$use_lv){ } } +if(!is.null(object$trend_call)){ + message("GAM process coefficient (beta) estimates:") + coef_names <- names(object$trend_mgcv_model$coefficients) + mvgam_coefs <- mcmc_summary(object$model_output, 'b_trend')[,c(3:7)] + rownames(mvgam_coefs) <- gsub('series', 'trend', + coef_names, fixed = TRUE) + print(mvgam_coefs) + message() + + process_rhos <- try(mcmc_summary(object$model_output, 'rho_trend')[,c(3:7)], + silent = TRUE) + if(inherits(process_rhos, 'try-error')){ + + } else { + message("GAM process smoothing parameter (rho) estimates:") + rho_coefs <- mcmc_summary(object$model_output, 'rho_trend')[,c(3:7)] + rownames(rho_coefs) <- gsub('series', 'trend', + unlist(purrr::map(object$trend_mgcv_model$smooth, + 'label')), + fixed = TRUE) + + # Don't print random effect lambdas as they follow the prior distribution + to_print <- vector(length = length(object$trend_mgcv_model$smooth)) + for(i in 1:length(object$trend_mgcv_model$smooth)){ + if(inherits(object$trend_mgcv_model$smooth[[i]], 'random.effect')){ + to_print[i] <- FALSE + } else { + to_print[i] <- TRUE + } + } + + print(rho_coefs[to_print,]) + message() + + if(any(to_print == FALSE)){ + re_sds <- mcmc_summary(object$model_output, 'sigma_raw_trend', + ISB = TRUE)[,c(3:7)] + + re_mus <- mcmc_summary(object$model_output, 'mu_raw_trend', + ISB = TRUE)[,c(3:7)] + + rownames(re_sds) <- rownames(re_mus) <- + gsub('series', 'trend', rownames(rho_coefs)[to_print == FALSE], + fixed = TRUE) + + + message("GAM process random effect population mean estimates:") + print(re_mus) + message() + + message("GAM process random effect population SD estimates:") + print(re_sds) + message() + } + } + +} + if(object$fit_engine == 'stan'){ message('Stan MCMC diagnostics') check_all_diagnostics(object$model_output, @@ -323,9 +432,20 @@ if(object$fit_engine == 'jags'){ #' @rdname summary.mvgam #' @export summary.mvgam_prefit = function(object, ...){ - message("GAM formula:") - print(object$call) - message() + + if(!is.null(object$trend_call)){ + message("GAM observation formula:") + print(object$call) + message() + + message("GAM process formula:") + print(object$trend_call) + message() + } else { + message("GAM formula:") + print(object$call) + message() + } message("Family:") cat(paste0(object$family, '\n')) @@ -340,9 +460,16 @@ summary.mvgam_prefit = function(object, ...){ message() if(object$use_lv){ - message("N latent factors:") - cat(object$n_lv, '\n') - message() + if(!is.null(object$trend_call)){ + message("N process models:") + cat(object$n_lv, '\n') + message() + } else { + message("N latent factors:") + cat(object$n_lv, '\n') + message() + } + } message('N series:') diff --git a/man/mvgam-class.Rd b/man/mvgam-class.Rd index 764686e7..b3511635 100644 --- a/man/mvgam-class.Rd +++ b/man/mvgam-class.Rd @@ -12,7 +12,9 @@ Method functions \code{as.data.frame}, \code{code}, \code{coef}, \code{forecast} \details{ A \code{mvgam} object contains the following elements: \itemize{ -\item \code{call} the original model formula +\item \code{call} the original observation model formula +\item \code{trend_call} If a \verb{trend_formula was supplied}, the original trend model formula is +returned. Otherwise \code{NULL} \item \code{family} \code{character} description of the observation distribution \item \code{trend_model} \code{character} description of the latent trend model \item \code{drift} Logical specifying whether a drift term was used in the trend model @@ -38,6 +40,8 @@ coefficients in this model object will contain the posterior median coefficients but these are only used if generating plots of smooth functions that \code{mvgam} currently cannot handle (such as plots for three-dimensional smooths). This model therefore should not be used for inference. See \code{\link[mgcv]{gamObject}} for details. +\item \code{trend_mgcv_model} If a \verb{trend_formula was supplied}, an object of class \code{gam} containing +the \code{mgcv} version of the trend model. Otherwise \code{NULL} \item \code{ytimes} The \code{matrix} object used in model fitting for indexing which series and timepoints were observed in each row of the supplied data. Used internally by some downstream plotting and prediction functions diff --git a/man/mvgam.Rd b/man/mvgam.Rd index 091f42b8..8d85d6ef 100644 --- a/man/mvgam.Rd +++ b/man/mvgam.Rd @@ -6,6 +6,7 @@ \usage{ mvgam( formula, + trend_formula, knots, data, data_train, @@ -36,10 +37,15 @@ mvgam( ) } \arguments{ -\item{formula}{A \code{character} string specifying the GAM formula. These are exactly like the formula +\item{formula}{A \code{character} string specifying the GAM observation model formula. These are exactly like the formula for a GLM except that smooth terms, s, te, ti and t2, can be added to the right hand side to specify that the linear predictor depends on smooth functions of predictors (or linear functionals of these).} +\item{trend_formula}{An optional \code{character} string specifying the GAM process model formula. If +supplied, a linear predictor will be modelled for the latent trends to capture process model evolution +separately from the observation model. Should not have a response variable specified on the left-hand side +of the formula (i.e. a valid option would be \code{~ season + s(year)})} + \item{knots}{An optional \code{list} containing user specified knot values to be used for basis construction. For most bases the user simply supplies the knots to be used, which must match up with the k value supplied (note that the number of knots is not always just k). Different terms can use different numbers of knots, @@ -398,7 +404,7 @@ abline(v = 180, lty = 'dashed', lwd = 2) # Example showing how to incorporate an offset; simulate some count data # with different means per series set.seed(100) -dat <- sim_mvgam(trend_rel = 0, mu_obs = c(4, 8, 8), seasonality = 'hierarchical') +dat <- sim_mvgam(trend_rel = 0, mu = c(0, 2, 2), seasonality = 'hierarchical') # Add offset terms to the training and testing data dat$data_train$offset <- 0.5 * as.numeric(dat$data_train$series) @@ -407,7 +413,8 @@ dat$data_test$offset <- 0.5 * as.numeric(dat$data_test$series) # Fit a model that includes the offset in the linear predictor as well as # hierarchical seasonal smooths mod1 <- mvgam(formula = y ~ offset(offset) + - s(season, bs = 'cc') + + s(series, bs = 're') + + s(season, bs = 'cc') + s(season, by = series, m = 1, k = 5), data = dat$data_train, trend_model = 'None', @@ -420,9 +427,9 @@ mod1$model_file # Forecasts for the first two series will differ in magnitude layout(matrix(1:2, ncol = 2)) plot(mod1, type = 'forecast', series = 1, newdata = dat$data_test, - ylim = c(0, 70)) + ylim = c(0, 75)) plot(mod1, type = 'forecast', series = 2, newdata = dat$data_test, - ylim = c(0, 70)) + ylim = c(0, 75)) layout(1) # Changing the offset for the testing data should lead to changes in diff --git a/man/plot.mvgam.Rd b/man/plot.mvgam.Rd index 31fd6bc7..08d01207 100644 --- a/man/plot.mvgam.Rd +++ b/man/plot.mvgam.Rd @@ -11,6 +11,7 @@ residuals = FALSE, newdata, data_test, + trend_effects = FALSE, ... ) } @@ -50,6 +51,9 @@ uncertainty components (\code{type = uncertainty}).} \item{data_test}{Deprecated. Still works in place of \code{newdata} but users are recommended to use \code{newdata} instead for more seamless integration into \code{R} workflows} +\item{trend_effects}{logical. If \code{TRUE} and a \code{trend_formula} was used in model +fitting, terms from the trend (i.e. process) model will be plotted} + \item{...}{Additional arguments for each individual plotting function.} } \value{ diff --git a/man/plot_mvgam_pterms.Rd b/man/plot_mvgam_pterms.Rd index d3fa5173..73ba2f9a 100644 --- a/man/plot_mvgam_pterms.Rd +++ b/man/plot_mvgam_pterms.Rd @@ -4,10 +4,13 @@ \alias{plot_mvgam_pterms} \title{Plot mvgam parametric term partial effects} \usage{ -plot_mvgam_pterms(object) +plot_mvgam_pterms(object, trend_effects = FALSE) } \arguments{ \item{object}{\code{list} object returned from \code{mvgam}} + +\item{trend_effects}{logical. If \code{TRUE} and a \code{trend_formula} was used in model +fitting, terms from the trend (i.e. process) model will be plotted} } \value{ A base \code{R} graphics plot diff --git a/man/plot_mvgam_randomeffects.Rd b/man/plot_mvgam_randomeffects.Rd index 124ac7fb..9fc4dfc1 100644 --- a/man/plot_mvgam_randomeffects.Rd +++ b/man/plot_mvgam_randomeffects.Rd @@ -4,10 +4,13 @@ \alias{plot_mvgam_randomeffects} \title{Plot mvgam random effect terms} \usage{ -plot_mvgam_randomeffects(object) +plot_mvgam_randomeffects(object, trend_effects = FALSE) } \arguments{ \item{object}{\code{list} object returned from \code{mvgam}} + +\item{trend_effects}{logical. If \code{TRUE} and a \code{trend_formula} was used in model +fitting, terms from the trend (i.e. process) model will be plotted} } \value{ A base \code{R} graphics plot diff --git a/man/plot_mvgam_smooth.Rd b/man/plot_mvgam_smooth.Rd index e4ebfbb4..65900446 100644 --- a/man/plot_mvgam_smooth.Rd +++ b/man/plot_mvgam_smooth.Rd @@ -6,6 +6,7 @@ \usage{ plot_mvgam_smooth( object, + trend_effects = FALSE, series = 1, smooth, residuals = FALSE, @@ -19,6 +20,9 @@ plot_mvgam_smooth( \arguments{ \item{object}{\code{list} object returned from \code{mvgam}} +\item{trend_effects}{logical. If \code{TRUE} and a \code{trend_formula} was used in model +fitting, terms from the trend (i.e. process) model will be plotted} + \item{series}{\code{integer} specifying which series in the set is to be plotted} \item{smooth}{either a \code{character} or \code{integer} specifying which smooth term to be plotted} diff --git a/tests/testthat/Rplots.pdf b/tests/testthat/Rplots.pdf index 5625eae5..91f4f7cd 100644 Binary files a/tests/testthat/Rplots.pdf and b/tests/testthat/Rplots.pdf differ diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index f1b07b17..2b9b7700 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -2,6 +2,10 @@ library("testthat") library("mvgam") +expect_match2 <- function(object, regexp) { + any(grepl(regexp, object, fixed = TRUE)) +} + #### Fit two models for each testing combination to ensure # Stan-based forecasts and mvgam-based forecasts are similar; # use 1000 posterior samples for each chain so out of sample forecast diff --git a/tests/testthat/test-mvgam-methods.R b/tests/testthat/test-mvgam-methods.R new file mode 100644 index 00000000..6bbc6309 --- /dev/null +++ b/tests/testthat/test-mvgam-methods.R @@ -0,0 +1,65 @@ +context("tests for mvgam and mvgam_forecast class methods") + +test_that("as.data.frame and friends have resonable outputs", { + out <- as.data.frame(gaus_ar1, variable = 'betas') + expect_s3_class(out, "data.frame") + expect_equal(names(out)[1], "(Intercept)") + + out <- as.data.frame(gaus_ar1, variable = 'trend_params') + expect_s3_class(out, "data.frame") + expect_equal(names(out)[1], "sigma[1]") + + out <- as.data.frame(gaus_ar1, variable = 'obs_params') + expect_s3_class(out, "data.frame") + expect_equal(names(out)[1], "sigma_obs[1]") + + out <- as.matrix(beta_gp, variable = 'obs_params') + expect_true(inherits(out, "matrix")) + expect_equal(dimnames(out)[[2]][1], "phi[1]") +}) + +test_that("coef has resonable outputs", { + out <- coef(gaus_ar1) + expect_equal(rownames(out)[1], "(Intercept)") + expect_equal(dim(out), c(9, 5)) +}) + +test_that("logLik has reasonable ouputs", { + expect_equal(dim(logLik(gaus_ar1, n_cores = 1)), + c(3000, NROW(gaus_ar1$obs_data))) +}) + +test_that("predict has reasonable outputs", { + gaus_preds <- predict(gaus_ar1, n_cores = 1) + expect_equal(dim(gaus_preds), + c(3000, NROW(gaus_data$data_train))) + expect_lt(max(gaus_preds), 2) + expect_gt(max(gaus_preds), -2) + + beta_preds <- predict(beta_gp, n_cores = 1, type = 'response') + expect_equal(dim(beta_preds), + c(3000, NROW(beta_data$data_train))) + expect_lt(max(beta_preds), 1) + expect_gt(max(gaus_preds), 0) +}) + +test_that("forecast and friends have reasonable outputs", { + expect_error(forecast(beta_gp), + 'newdata must be supplied to compute forecasts') + hc <- hindcast(beta_gp) + expect_s3_class(hc, 'mvgam_forecast') + expect_true(is.null(hc$forecasts)) + expect_equal(dim(hc$hindcasts[[1]]), + c(3000, NROW(beta_data$data_train) / + length(unique(beta_data$data_train$series)))) + expect_equal(hc$train_observations[[1]], + beta_data$data_train$y[which(beta_data$data_train$series == 'series_1')]) + + fc <- forecast(beta_gpfc) + expect_s3_class(fc, 'mvgam_forecast') + expect_equal(dim(fc$forecasts[[1]]), + c(3000, NROW(beta_data$data_test) / + length(unique(beta_data$data_test$series)))) + expect_equal(fc$test_observations[[1]], + beta_data$data_test$y[which(beta_data$data_test$series == 'series_1')]) +}) diff --git a/tests/testthat/test-mvgam.R b/tests/testthat/test-mvgam.R index 9c5575ec..35cbc020 100644 --- a/tests/testthat/test-mvgam.R +++ b/tests/testthat/test-mvgam.R @@ -9,6 +9,15 @@ test_that("family must be correctly specified", { 'family not recognized') }) +test_that("response variable must be specified", { +expect_error(mod <- mvgam( ~ s(season), + trend_model = 'AR1', + data = beta_data$data_train, + family = 'besta', + run_model = FALSE), + 'response variable is missing from formula') +}) + test_that("trend_model must be correctly specified", { expect_error(mod <- mvgam(y ~ s(season), trend_model = 'AR11', diff --git a/tests/testthat/test-mvgam_priors.R b/tests/testthat/test-mvgam_priors.R index 1dd80729..acd31576 100644 --- a/tests/testthat/test-mvgam_priors.R +++ b/tests/testthat/test-mvgam_priors.R @@ -1,9 +1,5 @@ context("mvgam_priors") -expect_match2 <- function(object, regexp) { - any(grepl(regexp, object, fixed = TRUE)) -} - test_that("family must be correctly specified", { expect_error(get_mvgam_priors(y ~ s(season), trend_model = 'AR1', diff --git a/tests/testthat/test-offset.R b/tests/testthat/test-offset.R new file mode 100644 index 00000000..f1f9d228 --- /dev/null +++ b/tests/testthat/test-offset.R @@ -0,0 +1,66 @@ +context("offsets") + +test_that("offset incorporated into link-level linpred for beta", { + beta_data$data_train$pop <- as.numeric(beta_data$data_train$series) + 0.5 + beta_data$data_test$pop <- as.numeric(beta_data$data_test$series) + 0.5 + testmod <- mvgam(y ~ s(season, bs = 'cc') + + offset(pop), + trend_model = 'GP', + data = beta_data$data_train, + family = betar(), + run_model = FALSE) + stancode <- testmod$model_file + + # Offset should be recorded in the mgcv model + expect_true(!is.null(attr(testmod$mgcv_model$terms, 'offset'))) + + # Offset should be in the linpred calculation + expect_true(expect_match2(stancode, + 'eta = X * b + offset;')) + + # Offset should be inv_logit in the model declaration + expect_true(expect_match2(stancode, + 'inv_logit(append_col(flat_xs, flat_trends) * append_row(b, 1.0) + offset[obs_ind])')) + + + # Offset should be provided in 'data' + expect_true(expect_match2(stancode, + 'int obs_ind[n_nonmissing];')) + + # Data for the offset vector should also be incorporated + # in model_data + expect_true(!is.null(testmod$model_data$offset)) +}) + +test_that("offset incorporated into link-level linpred for NB", { + data = data.frame(out = rpois(100, lambda = 5), + pop = rnorm(100), + time = 1:100) + testmod <- mvgam(out ~ 1 + + offset(pop), + trend_model = 'GP', + data = data, + family = nb(), + run_model = FALSE) + stancode <- testmod$model_file + + # Offset should be recorded in the mgcv model + expect_true(!is.null(attr(testmod$mgcv_model$terms, 'offset'))) + + # Offset should be in the linpred calculation + expect_true(expect_match2(stancode, + 'eta = X * b + offset;')) + + # Offset should be exponentiated in the model declaration + expect_true(expect_match2(stancode, + 'exp(append_col(flat_xs, flat_trends) * append_row(b, 1.0) + offset[obs_ind])')) + + # Offset should be provided in 'data' + expect_true(expect_match2(stancode, + 'int obs_ind[n_nonmissing];')) + + # Data for the offset vector should also be incorporated + # in model_data + expect_true(!is.null(testmod$model_data$offset)) +}) +