diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 62ad0084..00000000 Binary files a/.DS_Store and /dev/null differ diff --git a/NAMESPACE b/NAMESPACE index ba39516c..f62a0dc1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,6 +29,7 @@ export(plot_mvgam_fc) export(plot_mvgam_pterms) export(plot_mvgam_randomeffects) export(plot_mvgam_resids) +export(plot_mvgam_series) export(plot_mvgam_smooth) export(plot_mvgam_trace) export(plot_mvgam_trend) diff --git a/NEON_manuscript/.DS_Store b/NEON_manuscript/.DS_Store deleted file mode 100644 index eaf6690a..00000000 Binary files a/NEON_manuscript/.DS_Store and /dev/null differ diff --git a/NEON_manuscript/Case studies/.DS_Store b/NEON_manuscript/Case studies/.DS_Store deleted file mode 100644 index aa69cddc..00000000 Binary files a/NEON_manuscript/Case studies/.DS_Store and /dev/null differ diff --git a/NEON_manuscript/Case studies/mvgam_case_study2_files/.DS_Store b/NEON_manuscript/Case studies/mvgam_case_study2_files/.DS_Store deleted file mode 100644 index e7b32978..00000000 Binary files a/NEON_manuscript/Case studies/mvgam_case_study2_files/.DS_Store and /dev/null differ diff --git a/NEON_manuscript/Case studies/rsconnect/.DS_Store b/NEON_manuscript/Case studies/rsconnect/.DS_Store deleted file mode 100644 index 6ca3a5fd..00000000 Binary files a/NEON_manuscript/Case studies/rsconnect/.DS_Store and /dev/null differ diff --git a/NEON_manuscript/Case studies/rsconnect/documents/.DS_Store b/NEON_manuscript/Case studies/rsconnect/documents/.DS_Store deleted file mode 100644 index 15248328..00000000 Binary files a/NEON_manuscript/Case studies/rsconnect/documents/.DS_Store and /dev/null differ diff --git a/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study1.Rmd/.DS_Store b/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study1.Rmd/.DS_Store deleted file mode 100644 index b4aee0ce..00000000 Binary files a/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study1.Rmd/.DS_Store and /dev/null differ diff --git a/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study1.Rmd/rpubs.com/.DS_Store b/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study1.Rmd/rpubs.com/.DS_Store deleted file mode 100644 index 9f1da32b..00000000 Binary files a/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study1.Rmd/rpubs.com/.DS_Store and /dev/null differ diff --git a/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study2.Rmd/.DS_Store b/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study2.Rmd/.DS_Store deleted file mode 100644 index b4aee0ce..00000000 Binary files a/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study2.Rmd/.DS_Store and /dev/null differ diff --git a/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study2.Rmd/rpubs.com/.DS_Store b/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study2.Rmd/rpubs.com/.DS_Store deleted file mode 100644 index 9f1da32b..00000000 Binary files a/NEON_manuscript/Case studies/rsconnect/documents/mvgam_case_study2.Rmd/rpubs.com/.DS_Store and /dev/null differ diff --git a/NEON_manuscript/Figures/.DS_Store b/NEON_manuscript/Figures/.DS_Store deleted file mode 100644 index f3a3eff1..00000000 Binary files a/NEON_manuscript/Figures/.DS_Store and /dev/null differ diff --git a/NEON_manuscript/Manuscript/.DS_Store b/NEON_manuscript/Manuscript/.DS_Store deleted file mode 100644 index 4854a53d..00000000 Binary files a/NEON_manuscript/Manuscript/.DS_Store and /dev/null differ diff --git a/NEON_manuscript/next_todo.R b/NEON_manuscript/next_todo.R index f9e89f1d..6e62af76 100644 --- a/NEON_manuscript/next_todo.R +++ b/NEON_manuscript/next_todo.R @@ -1,19 +1,20 @@ library(mvgam) -dat <- sim_mvgam(T = 100, n_series=4, n_lv = 1) -dat$true_corrs +dat <- sim_mvgam(T = 100, n_series=3, prop_missing = .4) +plot_mvgam_series(data_train = dat$data_train, series = 1) -mod1 <- mvgam(formula = y ~ s(season, bs = 'cc') + - s(series, bs = 're'), +mod1 <- mvgam(formula = y ~ s(series, bs = 're'), data_train = dat$data_train, trend_model = 'AR1', family = 'poisson', use_lv = TRUE, - n_lv = 2, use_stan = TRUE, - run_model = T, + run_model = TRUE, burnin = 10) mod1$model_file +mod1$model_data + + summary(mod1) plot_mvgam_factors(mod1) plot(mod1, type = 'residuals') @@ -23,238 +24,6 @@ plot(mod1, 'trend', data_test = fake) -plot_mvgam_series = function(data_train, data_test, series, n_bins, - log_scale = FALSE){ - - if(series == 'all'){ - n_plots <- length(levels(data_train$series)) - pages <- 1 - .pardefault <- par(no.readonly=T) - par(.pardefault) - - if (n_plots > 4) pages <- 2 - if (n_plots > 8) pages <- 3 - if (n_plots > 12) pages <- 4 - if (pages != 0) { - ppp <- n_plots %/% pages - - if (n_plots %% pages != 0) { - ppp <- ppp + 1 - while (ppp * (pages - 1) >= n_plots) pages <- pages - 1 - } - - # Configure layout matrix - c <- r <- trunc(sqrt(ppp)) - if (c<1) r <- c <- 1 - if (c*r < ppp) c <- c + 1 - if (c*r < ppp) r <- r + 1 - oldpar<-par(mfrow=c(r,c)) - - } else { ppp <- 1; oldpar <- par()} - - all_ys <- lapply(seq_len(n_plots), function(x){ - if(log_scale){ - log( data.frame(y = data_train$y, - series = data_train$series, - time = data_train$time) %>% - dplyr::filter(series == levels(data_train$series)[x]) %>% - dplyr::pull(y) + 1) - } else { - data.frame(y = data_train$y, - series = data_train$series, - time = data_train$time) %>% - dplyr::filter(series == levels(data_train$series)[x]) %>% - dplyr::pull(y) - } - }) - - for(i in 1:n_plots){ - s_name <- levels(data_train$series)[i] - - truth <- data.frame(y = data_train$y, - series = data_train$series, - time = data_train$time) %>% - dplyr::filter(series == s_name) %>% - dplyr::select(time, y) %>% - dplyr::distinct() %>% - dplyr::arrange(time) %>% - dplyr::pull(y) - - if(log_scale){ - truth <- log(truth + 1) - ylab <- 'log(Y + 1)' - } else { - ylab <- 'Y' - } - - plot(1, type = "n", bty = 'L', - xlab = 'Time', - ylab = ylab, - ylim = range(unlist(all_ys)), - xlim = c(0, length(c(truth)))) - title(s_name, line = 0) - - if(n_plots > 1){ - for(x in 1:n_plots){ - lines(all_ys[[x]], lwd = 1.85, col = 'grey85') - } - } - - lines(x = 1:length(truth), y = truth, lwd = 3, col = "white") - lines(x = 1:length(truth), y = truth, lwd = 2.5, col = "#8F2727") - box(bty = 'L', lwd = 2) - - } - - } else { - s_name <- levels(data_train$series)[series] - truth <- data.frame(y = data_train$y, - series = data_train$series, - time = data_train$time) %>% - dplyr::filter(series == s_name) %>% - dplyr::select(time, y) %>% - dplyr::distinct() %>% - dplyr::arrange(time) %>% - dplyr::pull(y) - - layout(matrix(1:4, nrow = 2, byrow = TRUE)) - if(!missing(data_test)){ - test <- data.frame(y = data_test$y, - series = data_test$series, - time = data_test$time) %>% - dplyr::filter(series == s_name) %>% - dplyr::select(time, y) %>% - dplyr::distinct() %>% - dplyr::arrange(time) %>% - dplyr::pull(y) - - plot(1, type = "n", bty = 'L', - xlab = 'Time', - ylab = 'Y', - ylim = range(c(truth, test)), - xlim = c(0, length(c(truth, test)))) - title('Time series', line = 0) - - lines(x = 1:length(truth), y = truth, lwd = 2, col = "#8F2727") - lines(x = (length(truth)+1):length(c(truth, test)), y = test, lwd = 2, col = "black") - abline(v = length(truth)+1, col = '#FFFFFF60', lwd = 2.85) - abline(v = length(truth)+1, col = 'black', lwd = 2.5, lty = 'dashed') - box(bty = 'L', lwd = 2) - - if(missing(n_bins)){ - n_bins <- max(c(length(hist(c(truth, test), plot = F)$breaks), - 20)) - } - - hist(c(truth, test), border = "#8F2727", - lwd = 2, - freq = FALSE, - breaks = n_bins, - col = "#C79999", - ylab = 'Density', - xlab = 'Count', main = '') - title('Histogram', line = 0) - - acf(c(truth, test), - na.action = na.pass, bty = 'L', - lwd = 2.5, ci.col = 'black', col = "#8F2727", - main = '', ylab = 'Autocorrelation') - acf1 <- acf(c(truth, test), plot = F, - na.action = na.pass) - clim <- qnorm((1 + .95)/2)/sqrt(acf1$n.used) - abline(h = clim, col = '#FFFFFF', lwd = 2.85) - abline(h = clim, col = 'black', lwd = 2.5, lty = 'dashed') - abline(h = -clim, col = '#FFFFFF', lwd = 2.85) - abline(h = -clim, col = 'black', lwd = 2.5, lty = 'dashed') - box(bty = 'L', lwd = 2) - title('ACF', line = 0) - - ecdf_plotdat = function(vals, x){ - func <- ecdf(vals) - func(x) - } - - plot_x <- seq(min(c(truth, test), na.rm = T), - max(c(truth, test), na.rm = T)) - plot(1, type = "n", bty = 'L', - xlab = 'Count', - ylab = 'Empirical CDF', - xlim = c(min(plot_x), max(plot_x)), - ylim = c(0, 1)) - title('CDF', line = 0) - lines(x = plot_x, - y = ecdf_plotdat(c(truth, test), - plot_x), - col = "#8F2727", - lwd = 2.5) - box(bty = 'L', lwd = 2) - - } else { - plot(1, type = "n", bty = 'L', - xlab = 'Time', - ylab = 'Observations', - ylim = range(c(truth)), - xlim = c(0, length(c(truth)))) - title('Time series', line = 0) - - lines(x = 1:length(truth), y = truth, lwd = 2, col = "#8F2727") - box(bty = 'L', lwd = 2) - - if(missing(n_bins)){ - n_bins <- max(c(length(hist(c(truth), plot = F)$breaks), - 20)) - } - - hist(c(truth), border = "#8F2727", - lwd = 2, - freq = FALSE, - breaks = n_bins, - col = "#C79999", - ylab = 'Density', - xlab = 'Count', main = '') - title('Histogram', line = 0) - - - acf(c(truth), - na.action = na.pass, bty = 'L', - lwd = 2.5, ci.col = 'black', col = "#8F2727", - main = '', ylab = 'Autocorrelation') - acf1 <- acf(c(truth), plot = F, - na.action = na.pass) - clim <- qnorm((1 + .95)/2)/sqrt(acf1$n.used) - abline(h = clim, col = '#FFFFFF', lwd = 2.85) - abline(h = clim, col = 'black', lwd = 2.5, lty = 'dashed') - abline(h = -clim, col = '#FFFFFF', lwd = 2.85) - abline(h = -clim, col = 'black', lwd = 2.5, lty = 'dashed') - box(bty = 'L', lwd = 2) - title('ACF', line = 0) - - - ecdf_plotdat = function(vals, x){ - func <- ecdf(vals) - func(x) - } - - plot_x <- seq(min(truth, na.rm = T), - max(truth, na.rm = T)) - plot(1, type = "n", bty = 'L', - xlab = 'Count', - ylab = 'Empirical CDF', - xlim = c(min(plot_x), max(plot_x)), - ylim = c(0, 1)) - title('CDF', line = 0) - lines(x = plot_x, - y = ecdf_plotdat(truth, - plot_x), - col = "#8F2727", - lwd = 2.5) - box(bty = 'L', lwd = 2) - } - - layout(1) - } - -} dat <- sim_mvgam(T = 100, n_series = 4, n_lv = 2, mu_obs = c(4, 6, 10, 14), trend_rel = 0.3, diff --git a/R/add_base_dgam_lines.R b/R/add_base_dgam_lines.R index a5d797da..61d4d145 100644 --- a/R/add_base_dgam_lines.R +++ b/R/add_base_dgam_lines.R @@ -33,12 +33,16 @@ add_base_dgam_lines = function(use_lv, stan = FALSE){ } transformed parameters { - // basis coefficients - row_vector[num_basis] b; + // GAM contribution to expectations (log scale) + vector[total_obs] eta; - // dynamic factor loading matrix + // trends and dynamic factor loading matrix + matrix[n, n_series] trend; matrix[n_series, n_lv] lv_coefs; + // basis coefficients + row_vector[num_basis] b; + // constraints allow identifiability of loadings for (i in 1:(n_lv - 1)) { for (j in (i + 1):(n_lv)){ @@ -57,15 +61,12 @@ add_base_dgam_lines = function(use_lv, stan = FALSE){ } // derived latent trends - matrix[n, n_series] trend; for (i in 1:n){; for (s in 1:n_series){ trend[i, s] = dot_product(lv_coefs[s,], LV[i,]); } } - // GAM contribution to expectations (log scale) - vector[total_obs] eta; eta = to_vector(b * X); } @@ -98,12 +99,12 @@ add_base_dgam_lines = function(use_lv, stan = FALSE){ generated quantities { vector[n_sp] rho; - rho = log(lambda); vector[n_lv] penalty; + matrix[n, n_series] ypred; + rho = log(lambda); penalty = rep_vector(1.0, n_lv); // posterior predictions - matrix[n, n_series] ypred; for(i in 1:n){ for(s in 1:n_series){ ypred[i, s] = poisson_log_rng(eta[ytimes[i, s]] + trend[i, s]); @@ -130,11 +131,12 @@ add_base_dgam_lines = function(use_lv, stan = FALSE){ } transformed parameters { + // GAM contribution to expectations (log scale) + vector[total_obs] eta; + // basis coefficients row_vector[num_basis] b; - // GAM contribution to expectations (log scale) - vector[total_obs] eta; eta = to_vector(b * X); } @@ -167,12 +169,12 @@ add_base_dgam_lines = function(use_lv, stan = FALSE){ generated quantities { vector[n_sp] rho; - rho = log(lambda); vector[n_series] tau; + matrix[n, n_series] ypred; + rho = log(lambda); tau = sigma ^ -2; // posterior predictions - matrix[n, n_series] ypred; for(i in 1:n){ for(s in 1:n_series){ ypred[i, s] = poisson_log_rng(eta[ytimes[i, s]] + trend[i, s]); diff --git a/R/add_stan_data.R b/R/add_stan_data.R index 90725caf..1134a8ce 100644 --- a/R/add_stan_data.R +++ b/R/add_stan_data.R @@ -228,16 +228,39 @@ add_stan_data = function(jags_file, stan_file, use_lv = FALSE, jags_smooth_text <- gsub('##', '//', jags_smooth_text) jags_smooth_text <- gsub('dexp', 'exponential', jags_smooth_text) - K_starts <- grep('K.* <- ', jags_smooth_text) - for(i in 1:length(K_starts)){ - jags_smooth_text[K_starts[i]+1] <- gsub('\\bb\\b', 'b_raw', - gsub('dmnorm', 'multi_normal_prec', - paste0(gsub('K.*', - trimws(gsub('K.* <- ', '', - jags_smooth_text[K_starts[i]])), - jags_smooth_text[K_starts[i]+1]), ')'))) + if(any(grep('K.* <- ', jags_smooth_text))){ + K_starts <- grep('K.* <- ', jags_smooth_text) + for(i in 1:length(K_starts)){ + jags_smooth_text[K_starts[i]+1] <- gsub('\\bb\\b', 'b_raw', + gsub('dmnorm', 'multi_normal_prec', + paste0(gsub('K.*', + trimws(gsub('K.* <- ', '', + jags_smooth_text[K_starts[i]])), + jags_smooth_text[K_starts[i]+1]), ')'))) + } + jags_smooth_text <- jags_smooth_text[-K_starts] + } else { + # If no K terms then there are no smoothing parameters in the model + # (probably the only smooth terms included are random effect bases, which don't need + # smoothing parameters when we use the non-centred parameterisation) + stan_file <- stan_file[-grep('// priors for smoothing parameters', stan_file, + fixed = TRUE)] + stan_file <- stan_file[-grep('lambda ~ exponential', stan_file, + fixed = TRUE)] + stan_file <- stan_file[-grep('vector[n_sp] rho', stan_file, + fixed = TRUE)] + stan_file <- stan_file[-grep('rho = log', stan_file, + fixed = TRUE)] + stan_file <- stan_file[-grep('// smoothing parameters', stan_file, + fixed = TRUE)] + stan_file <- stan_file[-grep('[n_sp] lambda', stan_file, + fixed = TRUE)] + stan_file <- stan_file[-grep('vector[num_basis] zero; //', stan_file, + fixed = TRUE)] + stan_file <- stan_file[-grep('int n_sp; //', stan_file, + fixed = TRUE)] } - jags_smooth_text <- jags_smooth_text[-K_starts] + if(any(grep('b\\[i\\] = b_raw', jags_smooth_text))){ jags_smooth_text <- jags_smooth_text[-grep('b\\[i\\] = b_raw', jags_smooth_text)] } @@ -284,6 +307,7 @@ add_stan_data = function(jags_file, stan_file, use_lv = FALSE, '\n// random effect variances\n', paste0('vector[',n_sigma_raw,'] sigma_raw', ';\n', collapse = ''), '\n', + '\n// random effect means\n', paste0('vector[',n_sigma_raw,'] mu_raw', ';\n', collapse = '')) b_raw_text <- vector() @@ -300,7 +324,7 @@ add_stan_data = function(jags_file, stan_file, use_lv = FALSE, sub(".*\\:", "", stan_file[b_raw_indices[i]-1]))))), ') {\nb[i] = mu_raw[', i, '] + b_raw[i] * sigma_raw[',i, - '];\n}') + '];\n}\n') min_beta[i] <- as.numeric(sub("for \\(i in ", "", sub("\\:.*", "", stan_file[b_raw_indices[i] - 1]))) @@ -419,7 +443,7 @@ add_stan_data = function(jags_file, stan_file, use_lv = FALSE, stan_data$total_obs <- NCOL(stan_data$X) stan_data$num_basis <- NROW(stan_data$X) - if(any(grepl('## smoothing parameter priors...', jags_file))){ + if(any(grepl('// priors for smoothing parameters', stan_file, fixed = TRUE))){ stan_data$n_sp <- as.numeric(sub('\\) \\{', '', sub('for \\(i in 1\\:', '', jags_file[grep('lambda\\[i\\] ~ ', diff --git a/R/mvgam.R b/R/mvgam.R index c2a2b09b..2bc5efe3 100644 --- a/R/mvgam.R +++ b/R/mvgam.R @@ -858,6 +858,13 @@ mvgam = function(formula, parametric_tdata <- NULL } + # A second check for any smooth parameters + if(any(grep('K.* <- ', model_file))){ + smooths_included <- TRUE + } else { + smooths_included <- FALSE + } + if(smooths_included){ zeros <- paste0('vector zero; prior basis coefficient locations vector of length ncol(X)\n') } else { diff --git a/R/plot_mvgam_series.R b/R/plot_mvgam_series.R new file mode 100644 index 00000000..e05b34cd --- /dev/null +++ b/R/plot_mvgam_series.R @@ -0,0 +1,286 @@ +#'Plot observed time series used for mvgam modelling +#' +#'This function takes either a fitted \code{mvgam} object or a \code{data_train} object +#'and produces plots of observed time series, ACF, CDF and histograms for exploratory data analysis +#' +#'@param object Optional \code{list} object returned from \code{mvgam}. Either \code{object} or \code{data_train} +#'must be supplied. +#'@param data_train Optional \code{dataframe} or \code{list} of training data containing at least 'series' and 'time'. +#'Use this argument if training data have been gathered in the correct format for \code{mvgam} modelling +#'but no model has yet been fitted. +#'@param data_test Optional \code{dataframe} or \code{list} of test data containing at least 'series' and 'time' +#'for the forecast horizon, in addition to any other variables included in the linear predictor of \code{formula}. If +#'included, the observed values in the test data are compared to the model's forecast distribution for exploring +#'biases in model predictions. +#'#'@param series Either a \code{integer} specifying which series in the set is to be plotted or +#'the string 'all', which plots all series available in the supplied data +#'@param n_bins \code{integer} specifying the number of bins to use for binning observed values when plotting +#'a the histogram. Default is to use the number of bins returned by a call to `hist` in base `R` +#'@param log_scale \code{logical}. If \code{series == 'all'}, this flag is used to control whether +#'the time series plot is shown on the log scale (using `log(Y + 1)`). This can be useful when +#'visualising many series that may have different observed ranges. Default is \code{FALSE} +#'@author Nicholas J Clark +#'@return A set of base \code{R} graphics plots. If \code{series} is an integer, the plots will +#'show observed time series, autocorrelation and +#'cumulative distribution functions, and a histogram for the series. If \code{series == 'all'}, +#'a set of observed time series plots is returned in which all series are shown on each plot but +#'only a single focal series is highlighted, with all remaining series shown as faint gray lines. +#'@export +plot_mvgam_series = function(object, data_train, data_test, + series, n_bins, + log_scale = FALSE){ + + # Check arguments + if(is.character(series)){ + if(series != 'all'){ + stop('argument "series" must be either a positive integer or "all"', + call. = FALSE) + } + } else { + if(sign(series) != 1){ + stop('argument "series" must be either a positive integer or "all"', + call. = FALSE) + } else { + if(series%%1 != 0){ + stop('argument "series" must be either a positive integer or "all"', + call. = FALSE) + } + } + } + + if(!missing(object)){ + if(!missing(data_train)){ + warning('both "object" and "data_train" were supplied; only using "object"') + } + data_train <- object$obs_data + } + + if(series == 'all'){ + n_plots <- length(levels(data_train$series)) + pages <- 1 + .pardefault <- par(no.readonly=T) + par(.pardefault) + + if (n_plots > 4) pages <- 2 + if (n_plots > 8) pages <- 3 + if (n_plots > 12) pages <- 4 + if (pages != 0) { + ppp <- n_plots %/% pages + + if (n_plots %% pages != 0) { + ppp <- ppp + 1 + while (ppp * (pages - 1) >= n_plots) pages <- pages - 1 + } + + # Configure layout matrix + c <- r <- trunc(sqrt(ppp)) + if (c<1) r <- c <- 1 + if (c*r < ppp) c <- c + 1 + if (c*r < ppp) r <- r + 1 + oldpar<-par(mfrow=c(r,c)) + + } else { ppp <- 1; oldpar <- par()} + + all_ys <- lapply(seq_len(n_plots), function(x){ + if(log_scale){ + log( data.frame(y = data_train$y, + series = data_train$series, + time = data_train$time) %>% + dplyr::filter(series == levels(data_train$series)[x]) %>% + dplyr::pull(y) + 1) + } else { + data.frame(y = data_train$y, + series = data_train$series, + time = data_train$time) %>% + dplyr::filter(series == levels(data_train$series)[x]) %>% + dplyr::pull(y) + } + }) + + for(i in 1:n_plots){ + s_name <- levels(data_train$series)[i] + + truth <- data.frame(y = data_train$y, + series = data_train$series, + time = data_train$time) %>% + dplyr::filter(series == s_name) %>% + dplyr::select(time, y) %>% + dplyr::distinct() %>% + dplyr::arrange(time) %>% + dplyr::pull(y) + + if(log_scale){ + truth <- log(truth + 1) + ylab <- 'log(Y + 1)' + } else { + ylab <- 'Y' + } + + plot(1, type = "n", bty = 'L', + xlab = 'Time', + ylab = ylab, + ylim = range(unlist(all_ys), na.rm = TRUE), + xlim = c(0, length(c(truth)))) + title(s_name, line = 0) + + if(n_plots > 1){ + for(x in 1:n_plots){ + lines(all_ys[[x]], lwd = 1.85, col = 'grey85') + } + } + + lines(x = 1:length(truth), y = truth, lwd = 3, col = "white") + lines(x = 1:length(truth), y = truth, lwd = 2.5, col = "#8F2727") + box(bty = 'L', lwd = 2) + + } + + } else { + s_name <- levels(data_train$series)[series] + truth <- data.frame(y = data_train$y, + series = data_train$series, + time = data_train$time) %>% + dplyr::filter(series == s_name) %>% + dplyr::select(time, y) %>% + dplyr::distinct() %>% + dplyr::arrange(time) %>% + dplyr::pull(y) + + layout(matrix(1:4, nrow = 2, byrow = TRUE)) + if(!missing(data_test)){ + test <- data.frame(y = data_test$y, + series = data_test$series, + time = data_test$time) %>% + dplyr::filter(series == s_name) %>% + dplyr::select(time, y) %>% + dplyr::distinct() %>% + dplyr::arrange(time) %>% + dplyr::pull(y) + + plot(1, type = "n", bty = 'L', + xlab = 'Time', + ylab = 'Y', + ylim = range(c(truth, test), na.rm = TRUE), + xlim = c(0, length(c(truth, test)))) + title('Time series', line = 0) + + lines(x = 1:length(truth), y = truth, lwd = 2, col = "#8F2727") + lines(x = (length(truth)+1):length(c(truth, test)), y = test, lwd = 2, col = "black") + abline(v = length(truth)+1, col = '#FFFFFF60', lwd = 2.85) + abline(v = length(truth)+1, col = 'black', lwd = 2.5, lty = 'dashed') + box(bty = 'L', lwd = 2) + + if(missing(n_bins)){ + n_bins <- max(c(length(hist(c(truth, test), plot = F)$breaks), + 20)) + } + + hist(c(truth, test), border = "#8F2727", + lwd = 2, + freq = FALSE, + breaks = n_bins, + col = "#C79999", + ylab = 'Density', + xlab = 'Count', main = '') + title('Histogram', line = 0) + + acf(c(truth, test), + na.action = na.pass, bty = 'L', + lwd = 2.5, ci.col = 'black', col = "#8F2727", + main = '', ylab = 'Autocorrelation') + acf1 <- acf(c(truth, test), plot = F, + na.action = na.pass) + clim <- qnorm((1 + .95)/2)/sqrt(acf1$n.used) + abline(h = clim, col = '#FFFFFF', lwd = 2.85) + abline(h = clim, col = 'black', lwd = 2.5, lty = 'dashed') + abline(h = -clim, col = '#FFFFFF', lwd = 2.85) + abline(h = -clim, col = 'black', lwd = 2.5, lty = 'dashed') + box(bty = 'L', lwd = 2) + title('ACF', line = 0) + + ecdf_plotdat = function(vals, x){ + func <- ecdf(vals) + func(x) + } + + plot_x <- seq(min(c(truth, test), na.rm = T), + max(c(truth, test), na.rm = T)) + plot(1, type = "n", bty = 'L', + xlab = 'Count', + ylab = 'Empirical CDF', + xlim = c(min(plot_x), max(plot_x)), + ylim = c(0, 1)) + title('CDF', line = 0) + lines(x = plot_x, + y = ecdf_plotdat(c(truth, test), + plot_x), + col = "#8F2727", + lwd = 2.5) + box(bty = 'L', lwd = 2) + + } else { + plot(1, type = "n", bty = 'L', + xlab = 'Time', + ylab = 'Y', + ylim = range(c(truth), na.rm = TRUE), + xlim = c(0, length(c(truth)))) + title('Time series', line = 0) + + lines(x = 1:length(truth), y = truth, lwd = 2, col = "#8F2727") + box(bty = 'L', lwd = 2) + + if(missing(n_bins)){ + n_bins <- max(c(length(hist(c(truth), plot = F)$breaks), + 20)) + } + + hist(c(truth), border = "#8F2727", + lwd = 2, + freq = FALSE, + breaks = n_bins, + col = "#C79999", + ylab = 'Density', + xlab = 'Count', main = '') + title('Histogram', line = 0) + + + acf(c(truth), + na.action = na.pass, bty = 'L', + lwd = 2.5, ci.col = 'black', col = "#8F2727", + main = '', ylab = 'Autocorrelation') + acf1 <- acf(c(truth), plot = F, + na.action = na.pass) + clim <- qnorm((1 + .95)/2)/sqrt(acf1$n.used) + abline(h = clim, col = '#FFFFFF', lwd = 2.85) + abline(h = clim, col = 'black', lwd = 2.5, lty = 'dashed') + abline(h = -clim, col = '#FFFFFF', lwd = 2.85) + abline(h = -clim, col = 'black', lwd = 2.5, lty = 'dashed') + box(bty = 'L', lwd = 2) + title('ACF', line = 0) + + + ecdf_plotdat = function(vals, x){ + func <- ecdf(vals) + func(x) + } + + plot_x <- seq(min(truth, na.rm = T), + max(truth, na.rm = T)) + plot(1, type = "n", bty = 'L', + xlab = 'Count', + ylab = 'Empirical CDF', + xlim = c(min(plot_x), max(plot_x)), + ylim = c(0, 1)) + title('CDF', line = 0) + lines(x = plot_x, + y = ecdf_plotdat(truth, + plot_x), + col = "#8F2727", + lwd = 2.5) + box(bty = 'L', lwd = 2) + } + + layout(1) + } + +} diff --git a/R/summary.mvgam.R b/R/summary.mvgam.R index 28747c27..fccebbfa 100644 --- a/R/summary.mvgam.R +++ b/R/summary.mvgam.R @@ -106,7 +106,7 @@ rownames(mvgam_coefs) <- coef_names print(mvgam_coefs) message() -if(length(object$mgcv_model$smooth) > 0){ +if(any(grep('rho', rownames(MCMCvis::MCMCsummary(object$model_output))))){ message("GAM smoothing parameter (rho) estimates:") rho_coefs <- MCMCvis::MCMCsummary(object$model_output, 'rho')[,c(3:7)] diff --git a/man/plot_mvgam_series.Rd b/man/plot_mvgam_series.Rd new file mode 100644 index 00000000..82b0f3d2 --- /dev/null +++ b/man/plot_mvgam_series.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_mvgam_series.R +\name{plot_mvgam_series} +\alias{plot_mvgam_series} +\title{Plot observed time series used for mvgam modelling} +\usage{ +plot_mvgam_series( + object, + data_train, + data_test, + series, + n_bins, + log_scale = FALSE +) +} +\arguments{ +\item{object}{Optional \code{list} object returned from \code{mvgam}. Either \code{object} or \code{data_train} +must be supplied.} + +\item{data_train}{Optional \code{dataframe} or \code{list} of training data containing at least 'series' and 'time'. +Use this argument if training data have been gathered in the correct format for \code{mvgam} modelling +but no model has yet been fitted.} + +\item{data_test}{Optional \code{dataframe} or \code{list} of test data containing at least 'series' and 'time' +for the forecast horizon, in addition to any other variables included in the linear predictor of \code{formula}. If +included, the observed values in the test data are compared to the model's forecast distribution for exploring +biases in model predictions. +#'@param series Either a \code{integer} specifying which series in the set is to be plotted or +the string 'all', which plots all series available in the supplied data} + +\item{n_bins}{\code{integer} specifying the number of bins to use for binning observed values when plotting +a the histogram. Default is to use the number of bins returned by a call to \code{hist} in base \code{R}} + +\item{log_scale}{\code{logical}. If \code{series == 'all'}, this flag is used to control whether +the time series plot is shown on the log scale (using \code{log(Y + 1)}). This can be useful when +visualising many series that may have different observed ranges. Default is \code{FALSE}} +} +\value{ +A set of base \code{R} graphics plots. If \code{series} is an integer, the plots will +show observed time series, autocorrelation and +cumulative distribution functions, and a histogram for the series. If \code{series == 'all'}, +a set of observed time series plots is returned in which all series are shown on each plot but +only a single focal series is highlighted, with all remaining series shown as faint gray lines. +} +\description{ +This function takes either a fitted \code{mvgam} object or a \code{data_train} object +and produces plots of observed time series, ACF, CDF and histograms for exploratory data analysis +} +\author{ +Nicholas J Clark +}