diff --git a/DESCRIPTION b/DESCRIPTION index 484c9de0..442b0f92 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,7 +5,9 @@ Date: 2024-09-05 Authors@R: c(person("Nicholas J", "Clark", , "nicholas.j.clark1214@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-7131-3301")), person("Scott", "Pease", role = c("ctb"), - comment = c("broom enhancements"))) + comment = c("broom enhancements")), + person("Matthijs", "Hollanders", role = c("ctb"), + comment = c("ggplot visualizations"))) Description: Fit Bayesian Dynamic Generalized Additive Models to multivariate observations. Users can build nonlinear State-Space models that can incorporate semiparametric effects in observation and process components, using a wide range of observation families. Estimation is performed using Markov Chain Monte Carlo with Hamiltonian Monte Carlo in the software 'Stan'. References: Clark & Wells (2022) . URL: https://github.com/nicholasjclark/mvgam, https://nicholasjclark.github.io/mvgam/ BugReports: https://github.com/nicholasjclark/mvgam/issues @@ -31,7 +33,8 @@ Imports: magrittr, rlang, generics, - tibble (>= 3.0.0) + tibble (>= 3.0.0), + patchwork (>= 1.2.0) Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE index b4343840..c0f7474e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -306,6 +306,7 @@ importFrom(stats,formula) importFrom(stats,frequency) importFrom(stats,gaussian) importFrom(stats,is.ts) +importFrom(stats,lag) importFrom(stats,lm) importFrom(stats,logLik) importFrom(stats,mad) diff --git a/NEWS.md b/NEWS.md index 7cb72c8c..caf9ee0d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # mvgam 1.1.4 (development version; not yet on CRAN) ## New functionalities +* Improved various plotting functions by returning `ggplot` objects in place of base plots (thanks to @mhollanders #38) * Added `augment()` function to add residuals and fitted values to an mvgam object's observed data (thanks to @swpease #83) * Added support for approximate `gp()` effects with more than one covariate and with different kernel functions (#79) * Added function `jsdgam()` to estimate Joint Species Distribution Models in which both the latent factors and the observation model components can include any of mvgam's complex linear predictor effects. Also added a function `residual_cor()` to compute residual correlation, covariance and precision matrices from `jsdgam` models. See `?mvgam::jsdgam` and `?mvgam::residual_cor` for details diff --git a/R/globals.R b/R/globals.R index cac62eca..b95a90da 100644 --- a/R/globals.R +++ b/R/globals.R @@ -22,4 +22,4 @@ utils::globalVariables(c("y", "year", "smooth_vals", "smooth_num", "total_evd", "smooth_label", "by_variable", "gr", "tot_subgrs", "subgr", "lambda", "level", "sim_hilbert_gp", "trend_model", - "jags_path")) + "jags_path", "x")) diff --git a/R/mvgam_setup.R b/R/mvgam_setup.R index 753290a3..fcbb9aba 100644 --- a/R/mvgam_setup.R +++ b/R/mvgam_setup.R @@ -1142,7 +1142,7 @@ jagam <- function(formula, n.sp <- n.sp + 1 #cat(" for (i in ",b0,":",b1,") { b[i] ~ dnorm(0, lambda[",n.sp,"]) }\n",file=file,append=TRUE,sep="") #b0 <- b1 + 1 - cat(" for (i in ",compress.iseq((b0:b1)[D]),") { b[i] ~ dnorm(0, lambda[",n.sp,"]) }\n",file=file,append=TRUE,sep="") + cat(" for (i in ",compress_iseq((b0:b1)[D]),") { b[i] ~ dnorm(0, lambda[",n.sp,"]) }\n",file=file,append=TRUE,sep="") } } else { ## inseperable - requires the penalty matrices to be supplied to JAGS... b0 <- G$smooth[[i]]$first.para; b1 <- G$smooth[[i]]$last.para @@ -1416,6 +1416,79 @@ gam_setup.list <- function(formula, G } + +#' Takes a set of non-negative integers and returns minimal code for generating it +#' +#' This function is derived from \code{mgcv:::compress.iseq} +#' +#' @author Simon N Wood with modifications by Nicholas Clark +#' @noRd +compress_iseq <- function(x) { + x1 <- sort(x) + br <- diff(x1)!=1 ## TRUE at sequence breaks + txt <- paste(x1[c(TRUE, br)], x1[c(br, TRUE)], sep = ":") ## subsequences + txt1 <- paste(x1[c(TRUE, br)]) ## subseq starts + ii <- x1[c(TRUE, br)] == x1[c(br, TRUE)] ## index start and end equal + txt[ii] <- txt1[ii] ## replace length on sequences with integers + paste("c(", paste(txt, collapse = ","),")", sep = "") +} + +#' Returns a vector dind of columns of X to drop for identifiability +#' +#' This function is derived from \code{mgcv:::olid} +#' +#' @author Simon N Wood with modifications by Nicholas Clark +#' @noRd +olid <- function(X, nsdf, pstart, flpi, lpi) { + nlp <- length(lpi) ## number of linear predictors + n <- nrow(X) + nf <- length(nsdf) ## number of formulae blocks + Xp <- matrix(0 ,n * nlp, sum(nsdf)) + start <- 1 + ii <- 1:n + tind <- rep(0,0) ## complete index of all parametric columns in X + ## create a block matrix, Xp, with the same identifiability properties as + ## unpenalized part of model... + for (i in 1:nf) { + stop <- start - 1 + nsdf[i] + if (stop>=start) { + ind <- pstart[i] + 1:nsdf[i] - 1 + for (k in flpi[[i]]) { + Xp[ii+(k-1)*n, start:stop] <- X[,ind] + } + tind <- c(tind,ind) + start <- start + nsdf[i] + } + } + ## rank deficiency of Xp will reveal number of redundant parametric + ## terms, and a pivoted QR will reveal which to drop to restore + ## full rank... + qrx <- qr(Xp, LAPACK=TRUE,tol=0.0) ## unidentifiable columns get pivoted to final cols + r <- mgcv::Rrank(qr.R(qrx)) ## get rank from R factor of pivoted QR + if (r == ncol(Xp)) { ## full rank, all fine, drop nothing + dind <- rep(0,0) + } else { ## reduced rank, drop some columns + dind <- tind[sort(qrx$pivot[(r+1):ncol(X)],decreasing=TRUE)] ## columns to drop + ## now we need to adjust nsdf, pstart and lpi + for (d in dind) { ## working down through drop indices + ## following commented out code is useful should it ever prove necessary to + ## adjust pstart and nsdf, but at present these are only used in prediction, + ## and it is cleaner to leave them unchanged, and simply drop using dind during prediction. + #k <- if (d>=pstart[nf]) nlp else which(d >= pstart[1:(nf-1)] & d < pstart[2:nf]) + #nsdf[k] <- nsdf[k] - 1 ## one less unpenalized column in this block + #if (k0) lpi[[i]] <- lpi[[i]][-k] ## drop row + k <- which(lpi[[i]]>d) + if (length(k)>0) lpi[[i]][k] <- lpi[[i]][k] - 1 ## close up + } + } ## end of drop index loop + } + list(dind=dind,lpi=lpi) ##,pstart=pstart,nsdf=nsdf) +} + + #' Write linear predictor section of a jagam file #' #' This function is derived from \code{mgcv:::write.jagslp} diff --git a/R/plot.mvgam.R b/R/plot.mvgam.R index b09d5c96..310be2a1 100644 --- a/R/plot.mvgam.R +++ b/R/plot.mvgam.R @@ -112,7 +112,7 @@ plot.mvgam = function(x, type = 'residuals', # Other errors and warnings will propagate from individual functions below if(type == 'series'){ - plot_mvgam_series(object, series = series, data_test = data_test, ...) + print(plot_mvgam_series(object, series = series, newdata = data_test, ...)) } if(type == 're'){ diff --git a/R/plot_mvgam_series.R b/R/plot_mvgam_series.R index a1e2141e..4b450801 100644 --- a/R/plot_mvgam_series.R +++ b/R/plot_mvgam_series.R @@ -1,24 +1,21 @@ #'Plot observed time series used for mvgam modelling #' -#'This function takes either a fitted \code{mvgam} object or a \code{data_train} object +#'This function takes either a fitted \code{mvgam} object or a \code{data.frame} object #'and produces plots of observed time series, ACF, CDF and histograms for exploratory data analysis #' -#'@param object Optional \code{list} object returned from \code{mvgam}. Either \code{object} or \code{data_train} +#'@importFrom stats lag +#'@param object Optional \code{list} object returned from \code{mvgam}. Either \code{object} or \code{data} #'must be supplied. -#'@param data Optional \code{dataframe} or \code{list} of training data containing at least 'series' and 'time'. +#'@param data Optional \code{data.frame} or \code{list} of training data containing at least 'series' and 'time'. #'Use this argument if training data have been gathered in the correct format for \code{mvgam} modelling #'but no model has yet been fitted. -#'@param data_train Deprecated. Still works in place of \code{data} but users are recommended to use -#'\code{data} instead for more seamless integration into `R` workflows -#'@param newdata Optional \code{dataframe} or \code{list} of test data containing at least 'series' and 'time' +#'@param newdata Optional \code{data.frame} or \code{list} of test data containing at least 'series' and 'time' #'for the forecast horizon, in addition to any other variables included in the linear predictor of \code{formula}. If #'included, the observed values in the test data are compared to the model's forecast distribution for exploring #'biases in model predictions. -#'@param data_test Deprecated. Still works in place of \code{newdata} but users are recommended to use -#'\code{newdata} instead for more seamless integration into `R` workflows #'@param y Character. What is the name of the outcome variable in the supplied data? Defaults to #'\code{'y'} -#'@param lines Logical. If \code{TRUE}, line plots are used for visualising time series. If +#'@param lines Logical. If \code{TRUE}, line plots are used for visualizing time series. If #'\code{FALSE}, points are used. #'@param series Either a \code{integer} specifying which series in the set is to be plotted or #'the string 'all', which plots all series available in the supplied data @@ -26,9 +23,9 @@ #'a the histogram. Default is to use the number of bins returned by a call to `hist` in base `R` #'@param log_scale \code{logical}. If \code{series == 'all'}, this flag is used to control whether #'the time series plot is shown on the log scale (using `log(Y + 1)`). This can be useful when -#'visualising many series that may have different observed ranges. Default is \code{FALSE} -#'@author Nicholas J Clark -#'@return A set of base \code{R} graphics plots. If \code{series} is an integer, the plots will +#'visualizing many series that may have different observed ranges. Default is \code{FALSE} +#'@author Nicholas J Clark and Matthijs Hollanders +#'@return A set of ggplot objects. If \code{series} is an integer, the plots will #'show observed time series, autocorrelation and #'cumulative distribution functions, and a histogram for the series. If \code{series == 'all'}, #'a set of observed time series plots is returned in which all series are shown on each plot but @@ -48,9 +45,7 @@ #'@export plot_mvgam_series = function(object, data, - data_train, newdata, - data_test, y = 'y', lines = TRUE, series = 1, @@ -84,11 +79,13 @@ plot_mvgam_series = function(object, # Check variables in data / data_train if(!missing("data")){ data_train <- data + } else { + data_train <- rlang::missing_arg() } -# Choose models over data if both supplied + # Choose models over data if both supplied if(!missing(object)){ - if(!missing(data_train)){ + if(!missing("data")){ warning('both "object" and "data" were supplied; only using "object"') } data_train <- object$obs_data @@ -140,9 +137,11 @@ plot_mvgam_series = function(object, # Drop unused levels in data_train data_train$series <- droplevels(data_train$series) -# Check variables in newdata / data_test + # Check variables in newdata / data_test if(!missing(newdata)){ data_test <- newdata + } else { + data_test <- rlang::missing_arg() } if(!missing(data_test)){ @@ -180,323 +179,149 @@ plot_mvgam_series = function(object, data_test$series <- droplevels(data_test$series) } + # only plot time series if(series == 'all'){ - n_plots <- length(levels(data_train$series)) - pages <- 1 - - if (n_plots > 4) pages <- 2 - if (n_plots > 8) pages <- 3 - if (n_plots > 12) pages <- 4 - if (pages != 0) { - ppp <- n_plots %/% pages - - if (n_plots %% pages != 0) { - ppp <- ppp + 1 - while (ppp * (pages - 1) >= n_plots) pages <- pages - 1 - } - # Configure layout matrix - c <- r <- trunc(sqrt(ppp)) - if (c<1) r <- c <- 1 - if (c*r < ppp) c <- c + 1 - if (c*r < ppp) r <- r + 1 - - .pardefault <- par(no.readonly=T) - on.exit(par(.pardefault)) - oldpar <- par(mfrow = c(r,c)) - - } else { ppp <- 1; oldpar <- par()} - - all_ys <- lapply(seq_len(n_plots), function(x){ - if(log_scale){ - log( data.frame(y = data_train$y, - series = data_train$series, - time = data_train$time) %>% - dplyr::filter(series == levels(data_train$series)[x]) %>% - dplyr::pull(y) + 1) - } else { - data.frame(y = data_train$y, - series = data_train$series, - time = data_train$time) %>% - dplyr::filter(series == levels(data_train$series)[x]) %>% - dplyr::pull(y) - } - }) - - for(i in 1:n_plots){ - s_name <- levels(data_train$series)[i] - - truth <- data.frame(y = data_train$y, - series = data_train$series, - time = data_train$time) %>% - dplyr::filter(series == s_name) %>% - dplyr::select(time, y) %>% - dplyr::distinct() %>% - dplyr::arrange(time) %>% - dplyr::pull(y) - - if(log_scale){ - truth <- log(truth + 1) - ylab <- paste0('log(', y,' + 1)') - } else { - ylab <- paste0(y) - } - - plot(1, type = "n", bty = 'L', - xlab = 'Time', - ylab = ylab, - xaxt = 'n', - ylim = range(unlist(all_ys), na.rm = TRUE), - xlim = c(0, length(c(truth)))) - axis(side = 1, - at = floor(seq(0, max(data_train$time) - - (min(data_train$time)-1), - length.out = 6)), - labels = floor(seq(min(data_train$time), - max(data_train$time), - length.out = 6))) - title(s_name, line = 0) - - if(n_plots > 1){ - if(lines){ - for(x in 1:n_plots){ - lines(all_ys[[x]], lwd = 1.85, col = 'grey85') - } - } else { - for(x in 1:n_plots){ - points(all_ys[[x]], col = 'grey85', pch = 16) - } - } + # create tibble + dat <- dplyr::as_tibble(data_train) %>% + dplyr::distinct(time, y, series) - } - - if(lines){ - lines(x = 1:length(truth), y = truth, lwd = 3, col = "white") - lines(x = 1:length(truth), y = truth, lwd = 2.5, col = "#8F2727") - } else { - points(x = 1:length(truth), y = truth, cex = 1.2, col = "white", pch = 16) - points(x = 1:length(truth), y = truth, cex = 0.9, col = "#8F2727", pch = 16) - } - - box(bty = 'L', lwd = 2) + # log transform and y-axis labels + if(log_scale){ + dat$y <- log(dat$y + 1) + ylab <- paste0('log(', y,' + 1)') + } else { + ylab <- paste0(y) + } + # create faceted time serires + plot_ts <- dat %>% + ggplot2::ggplot(ggplot2::aes(time, y)) + + ggplot2::facet_wrap(~ series) + + ggplot2::labs(x = "Time", + y = ylab) + + ggplot2::theme_bw() + + if (lines) { + plot_ts <- plot_ts + ggplot2::geom_line(colour = "#8F2727", + linewidth = 0.75) + } else { + plot_ts <- plot_ts + ggplot2::geom_point(colour = "#8F2727") } - layout(1) + plot_ts + # multiple plots for one time series } else { + # series name s_name <- levels(data_train$series)[series] - truth <- data.frame(y = data_train$y, - series = data_train$series, - time = data_train$time) %>% + + # training data + dat <- dplyr::as_tibble(data_train) %>% dplyr::filter(series == s_name) %>% - dplyr::select(time, y) %>% - dplyr::distinct() %>% - dplyr::arrange(time) %>% - dplyr::pull(y) + dplyr::distinct(time, y) %>% + dplyr::mutate(data = "train") + + # optionally bind test data + if (!missing(data_test)) { + dat <- dplyr::bind_rows(dat, + dplyr::as_tibble(data_test) %>% + dplyr::filter(series == s_name) %>% + dplyr::distinct(time, y) %>% + dplyr::mutate(data = "test")) + } - if(log_scale){ - truth <- log(truth + 1) + # log-transform y + if (log_scale) { + truth$y <- log(truth$y + 1) ylab <- paste0('log(', y,' + 1)') } else { ylab <- paste0(y) } - .pardefault <- par(no.readonly=T) - on.exit(par(.pardefault)) - layout(matrix(1:4, nrow = 2, byrow = TRUE)) - - if(!missing(data_test)){ - test <- data.frame(y = data_test$y, - series = data_test$series, - time = data_test$time) %>% - dplyr::filter(series == s_name) %>% - dplyr::select(time, y) %>% - dplyr::distinct() %>% - dplyr::arrange(time) %>% - dplyr::pull(y) - - plot(1, type = "n", bty = 'L', - xlab = '', - ylab = '', - xaxt = 'n', - ylim = range(c(truth, test), na.rm = TRUE), - xlim = c(0, length(c(truth, test)))) - axis(side = 1, - at = floor(seq(0, max(data_test$time) - - (min(data_train$time)-1), - length.out = 6)), - labels = floor(seq(min(data_train$time), - max(data_test$time), - length.out = 6))) - - title('Time series', line = 0) - title(ylab = ylab, line = 1.85) - title(xlab = "Time", line = 1.85) - - if(lines){ - lines(x = 1:length(truth), y = truth, lwd = 2, col = "#8F2727") - lines(x = (length(truth)+1):length(c(truth, test)), - y = test, lwd = 2, col = "black") + # time series + if (!missing(data_test)) { + plot_ts <- dat %>% + ggplot2::ggplot(ggplot2::aes(time, y, colour = data)) + + ggplot2::geom_vline(xintercept = dat %>% + dplyr::filter(data == "test") %>% + dplyr::pull(time) %>% + min(), + linetype = "dashed", + colour = "black") + + ggplot2::scale_colour_manual(values = c("black", "#8F2727")) + + ggplot2::labs(title = "Time series", + x = "Time", + y = ylab) + + ggplot2::theme_bw() + if (lines) { + plot_ts <- plot_ts + ggplot2::geom_line(show.legend = F, + linewidth = 0.75) } else { - points(x = 1:length(truth), y = truth, pch = 16, col = "#8F2727") - points(x = (length(truth)+1):length(c(truth, test)), - y = test, pch = 16, col = "black") - } - - abline(v = length(truth)+1, col = '#FFFFFF60', lwd = 2.85) - abline(v = length(truth)+1, col = 'black', lwd = 2.5, lty = 'dashed') - box(bty = 'L', lwd = 2) - - if(missing(n_bins)){ - n_bins <- max(c(length(hist(c(truth, test), plot = F)$breaks), - 20)) - } - - hist(c(truth, test), border = "#8F2727", - lwd = 2, - freq = FALSE, - breaks = n_bins, - col = "#C79999", - ylab = '', - xlab = '', - main = '') - title('Histogram', line = 0) - title(ylab = 'Density', line = 1.85) - title(xlab = paste0(y), line = 1.85) - - acf(c(truth, test), - na.action = na.pass, bty = 'L', - lwd = 2.5, ci.col = 'black', col = "#8F2727", - main = '', ylab = '', xlab = '') - acf1 <- acf(c(truth, test), plot = F, - na.action = na.pass) - clim <- qnorm((1 + .95)/2)/sqrt(acf1$n.used) - abline(h = clim, col = '#FFFFFF', lwd = 2.85) - abline(h = clim, col = 'black', lwd = 2.5, lty = 'dashed') - abline(h = -clim, col = '#FFFFFF', lwd = 2.85) - abline(h = -clim, col = 'black', lwd = 2.5, lty = 'dashed') - box(bty = 'L', lwd = 2) - title('ACF', line = 0) - title(ylab = 'Autocorrelation', line = 1.85) - title(xlab = 'Lag', line = 1.85) - - ecdf_plotdat = function(vals, x){ - func <- ecdf(vals) - func(x) + plot_ts <- plot_ts + ggplot2::geom_point(show.legend = F) } - - plot_x <- seq(from = min(c(truth, test), na.rm = T), - to = max(c(truth, test), na.rm = T), - length.out = 100) - - plot(1, type = "n", bty = 'L', - xlab = '', - ylab = '', - xlim = c(min(plot_x), max(plot_x)), - ylim = c(0, 1)) - title('CDF', line = 0) - title(ylab = 'Empirical CDF', line = 1.85) - title(xlab = paste0(y), line = 1.85) - lines(x = plot_x, - y = ecdf_plotdat(c(truth, test), - plot_x), - col = "#8F2727", - lwd = 2.5) - box(bty = 'L', lwd = 2) - } else { - plot(1, type = "n", bty = 'L', - xlab = '', - ylab = '', - xaxt = 'n', - ylim = range(c(truth), na.rm = TRUE), - xlim = c(0, length(c(truth)))) - if(max(data_train$time < 6)){ - axis(side = 1, - at = 0:(max(data_train$time) - 1), - labels = 1:max(data_train$time)) - } else { - axis(side = 1, - at = floor(seq(0, max(data_train$time) - - (min(data_train$time)-1), - length.out = 6)), - labels = floor(seq(min(data_train$time), - max(data_train$time), - length.out = 6))) - } - - title('Time series', line = 0) - title(ylab = ylab, line = 1.85) - title(xlab = "Time", line = 1.85) - - if(lines){ - lines(x = 1:length(truth), y = truth, lwd = 2, col = "#8F2727") + plot_ts <- dat %>% + ggplot2::ggplot(ggplot2::aes(time, y)) + + ggplot2::labs(title = "Time series", + x = "Time", + y = ylab) + + ggplot2::theme_bw() + if (lines) { + plot_ts <- plot_ts + ggplot2::geom_line(colour = "#8F2727", + linewidth = 0.75) } else { - points(x = 1:length(truth), y = truth, pch = 16, col = "#8F2727") - } - - box(bty = 'L', lwd = 2) - - if(missing(n_bins)){ - n_bins <- max(c(length(hist(c(truth), plot = F)$breaks), - 20)) + plot_ts <- plot_ts + ggplot2::geom_point(colour = "#8F2727") } - - hist(c(truth), border = "#8F2727", - lwd = 2, - freq = FALSE, - breaks = n_bins, - col = "#C79999", - ylab = '', - xlab = '', - main = '') - title('Histogram', line = 0) - title(ylab = 'Density', line = 1.85) - title(xlab = paste0(y), line = 1.85) - - - acf(c(truth), - na.action = na.pass, bty = 'L', - lwd = 2.5, ci.col = 'black', col = "#8F2727", - main = '', ylab = '', xlab = '') - acf1 <- acf(c(truth), plot = F, - na.action = na.pass) - clim <- qnorm((1 + .95)/2)/sqrt(acf1$n.used) - abline(h = clim, col = '#FFFFFF', lwd = 2.85) - abline(h = clim, col = 'black', lwd = 2.5, lty = 'dashed') - abline(h = -clim, col = '#FFFFFF', lwd = 2.85) - abline(h = -clim, col = 'black', lwd = 2.5, lty = 'dashed') - box(bty = 'L', lwd = 2) - title('ACF', line = 0) - title(ylab = 'Autocorrelation', line = 1.85) - title(xlab = 'Lag', line = 1.85) - - - ecdf_plotdat = function(vals, x){ - func <- ecdf(vals) - func(x) - } - - plot_x <- seq(from = min(truth, na.rm = T), - to = max(truth, na.rm = T), - length.out = 100) - plot(1, type = "n", bty = 'L', - xlab = '', - ylab = '', - xlim = c(min(plot_x), max(plot_x)), - ylim = c(0, 1)) - title('CDF', line = 0) - title(ylab = 'Empirical CDF', line = 1.85) - title(xlab = paste0(y), line = 1.85) - lines(x = plot_x, - y = ecdf_plotdat(truth, - plot_x), - col = "#8F2727", - lwd = 2.5) - box(bty = 'L', lwd = 2) } + # histogram + if(missing(n_bins)){ + n_bins <- max(c(length(hist(c(dat$y), plot = F)$breaks), + 20)) + } + plot_hist <- dat %>% + ggplot2::ggplot(ggplot2::aes(y)) + + ggplot2::geom_histogram(bins = n_bins, fill = "#8F2727", + col = 'white') + + ggplot2::labs(title = "Histogram", + x = "y", + y = "Count") + + ggplot2::theme_bw() + + # ACF + acf_y <- acf(dat$y, plot = F, na.action = na.pass) + plot_acf <- data.frame(acf = acf_y$acf[,,1], + lag = acf_y$lag[,1,1]) %>% + ggplot2::ggplot(ggplot2::aes(x = lag, y = 0, yend = acf)) + + ggplot2::geom_hline(yintercept = c(-1, 1) * qnorm((1 + 0.95) / 2) / sqrt(acf_y$n.used), + linetype = "dashed") + + ggplot2::geom_segment(colour = "#8F2727", + linewidth = 0.75) + + ggplot2::labs(title = "ACF", + x = "Lag", + y = "Autocorrelation") + + ggplot2::theme_bw() + + # ECDF plot + range_y <- range(dat$y, na.rm = T) + plot_ecdf <- data.frame(x = seq(range_y[1], range_y[2], length.out = 100)) %>% + dplyr::mutate(y = ecdf(dat$y)(x)) %>% + ggplot2::ggplot(ggplot2::aes(x, y)) + + ggplot2::geom_line(colour = "#8F2727", + linewidth = 0.75) + + ggplot2::scale_y_continuous(limits = c(0, 1)) + + ggplot2::labs(title = "CDF", + x = "y", + y = "Empirical CDF") + + ggplot2::theme_bw() + + # plot + patchwork::wrap_plots(plot_ts, + plot_hist, + plot_acf, + plot_ecdf, + ncol = 2, + nrow = 2, + byrow = T) } - - layout(1) } diff --git a/README.md b/README.md index dbbf9ea6..daec7711 100644 --- a/README.md +++ b/README.md @@ -243,37 +243,38 @@ summary(lynx_mvgam) #> #> GAM coefficient (beta) estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> (Intercept) 6.400 6.60 6.900 1.00 788 -#> s(season).1 -0.600 -0.13 0.380 1.00 1208 -#> s(season).2 0.740 1.30 2.000 1.01 772 -#> s(season).3 1.200 1.90 2.500 1.00 908 -#> s(season).4 -0.062 0.54 1.100 1.00 1262 -#> s(season).5 -1.300 -0.71 -0.062 1.00 910 -#> s(season).6 -1.300 -0.55 0.085 1.00 670 -#> s(season).7 0.047 0.71 1.400 1.01 970 -#> s(season).8 0.640 1.40 2.100 1.00 821 -#> s(season).9 -0.360 0.23 0.810 1.00 995 -#> s(season).10 -1.400 -0.86 -0.370 1.00 1184 +#> (Intercept) 6.400 6.60 6.900 1.01 765 +#> s(season).1 -0.580 -0.12 0.370 1.00 960 +#> s(season).2 0.730 1.30 1.900 1.00 972 +#> s(season).3 1.200 1.90 2.600 1.01 739 +#> s(season).4 -0.110 0.54 1.200 1.00 848 +#> s(season).5 -1.300 -0.69 -0.062 1.00 747 +#> s(season).6 -1.300 -0.56 0.110 1.00 1024 +#> s(season).7 0.041 0.70 1.400 1.00 934 +#> s(season).8 0.650 1.40 2.100 1.01 700 +#> s(season).9 -0.360 0.21 0.830 1.00 731 +#> s(season).10 -1.400 -0.88 -0.360 1.00 1074 #> #> Approximate significance of GAM smooths: #> edf Ref.df Chi.sq p-value -#> s(season) 9.96 10 50.6 <2e-16 *** +#> s(season) 9.96 10 51.7 <2e-16 *** #> --- #> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #> #> Latent trend parameter AR estimates: #> 2.5% 50% 97.5% Rhat n_eff -#> ar1[1] 0.59 0.83 0.98 1 711 -#> sigma[1] 0.39 0.48 0.61 1 724 +#> ar1[1] 0.61 0.83 0.97 1.01 669 +#> sigma[1] 0.39 0.48 0.60 1.00 794 #> #> Stan MCMC diagnostics: #> n_eff / iter looks reasonable for all parameters #> Rhat looks reasonable for all parameters #> 0 of 2000 iterations ended with a divergence (0%) -#> 0 of 2000 iterations saturated the maximum tree depth of 10 (0%) +#> 1 of 2000 iterations saturated the maximum tree depth of 10 (0.05%) +#> *Run with max_treedepth set to a larger value to avoid saturation #> E-FMI indicated no pathological behavior #> -#> Samples were drawn using NUTS(diag_e) at Fri Nov 22 12:05:29 PM 2024. +#> Samples were drawn using NUTS(diag_e) at Fri Nov 22 1:11:35 PM 2024. #> For each parameter, n_eff is a crude measure of effective sample size, #> and Rhat is the potential scale reduction factor on split MCMC chains #> (at convergence, Rhat = 1) @@ -415,7 +416,7 @@ plot(lynx_mvgam, type = 'forecast', newdata = lynx_test) Plotting forecast distributions using mvgam in R #> Out of sample DRPS: - #> 2474.07104475 + #> 2467.599535 And the estimated latent trend component, again using the more flexible `plot_mvgam_...()` option to show first derivatives of the estimated @@ -574,7 +575,7 @@ summary(mod, include_betas = FALSE) #> 0 of 2000 iterations saturated the maximum tree depth of 10 (0%) #> E-FMI indicated no pathological behavior #> -#> Samples were drawn using NUTS(diag_e) at Fri Nov 22 12:06:48 PM 2024. +#> Samples were drawn using NUTS(diag_e) at Fri Nov 22 1:12:54 PM 2024. #> For each parameter, n_eff is a crude measure of effective sample size, #> and Rhat is the potential scale reduction factor on split MCMC chains #> (at convergence, Rhat = 1) diff --git a/docs/reference/figures/README-beta_sim-1.png b/docs/reference/figures/README-beta_sim-1.png index c82d4210..e39cf9f4 100644 Binary files a/docs/reference/figures/README-beta_sim-1.png and b/docs/reference/figures/README-beta_sim-1.png differ diff --git a/docs/reference/figures/README-unnamed-chunk-12-1.png b/docs/reference/figures/README-unnamed-chunk-12-1.png index 8a5e9af4..1e306527 100644 Binary files a/docs/reference/figures/README-unnamed-chunk-12-1.png and b/docs/reference/figures/README-unnamed-chunk-12-1.png differ diff --git a/docs/reference/figures/README-unnamed-chunk-13-1.png b/docs/reference/figures/README-unnamed-chunk-13-1.png index ab543e99..b424594d 100644 Binary files a/docs/reference/figures/README-unnamed-chunk-13-1.png and b/docs/reference/figures/README-unnamed-chunk-13-1.png differ diff --git a/docs/reference/figures/README-unnamed-chunk-14-1.png b/docs/reference/figures/README-unnamed-chunk-14-1.png index ac1d1bbb..da9cd6a1 100644 Binary files a/docs/reference/figures/README-unnamed-chunk-14-1.png and b/docs/reference/figures/README-unnamed-chunk-14-1.png differ diff --git a/docs/reference/figures/README-unnamed-chunk-15-1.png b/docs/reference/figures/README-unnamed-chunk-15-1.png index fedb893d..f6742ea6 100644 Binary files a/docs/reference/figures/README-unnamed-chunk-15-1.png and b/docs/reference/figures/README-unnamed-chunk-15-1.png differ diff --git a/docs/reference/figures/README-unnamed-chunk-16-1.png b/docs/reference/figures/README-unnamed-chunk-16-1.png index 302e3b13..c66cf177 100644 Binary files a/docs/reference/figures/README-unnamed-chunk-16-1.png and b/docs/reference/figures/README-unnamed-chunk-16-1.png differ diff --git a/docs/reference/figures/README-unnamed-chunk-17-1.png b/docs/reference/figures/README-unnamed-chunk-17-1.png index f09caae0..c99430d0 100644 Binary files a/docs/reference/figures/README-unnamed-chunk-17-1.png and b/docs/reference/figures/README-unnamed-chunk-17-1.png differ diff --git a/docs/reference/figures/README-unnamed-chunk-18-1.png b/docs/reference/figures/README-unnamed-chunk-18-1.png index 82b35e48..4b3ee8b8 100644 Binary files a/docs/reference/figures/README-unnamed-chunk-18-1.png and b/docs/reference/figures/README-unnamed-chunk-18-1.png differ diff --git a/docs/reference/figures/README-unnamed-chunk-19-1.png b/docs/reference/figures/README-unnamed-chunk-19-1.png index e4eb7845..b94bfa59 100644 Binary files a/docs/reference/figures/README-unnamed-chunk-19-1.png and b/docs/reference/figures/README-unnamed-chunk-19-1.png differ diff --git a/docs/reference/figures/README-unnamed-chunk-20-1.png b/docs/reference/figures/README-unnamed-chunk-20-1.png index e208435c..39dad857 100644 Binary files a/docs/reference/figures/README-unnamed-chunk-20-1.png and b/docs/reference/figures/README-unnamed-chunk-20-1.png differ diff --git a/docs/reference/figures/README-unnamed-chunk-21-1.png b/docs/reference/figures/README-unnamed-chunk-21-1.png index 249bff6d..9cf40aa5 100644 Binary files a/docs/reference/figures/README-unnamed-chunk-21-1.png and b/docs/reference/figures/README-unnamed-chunk-21-1.png differ diff --git a/docs/reference/figures/README-unnamed-chunk-22-1.png b/docs/reference/figures/README-unnamed-chunk-22-1.png index 0d4f897c..63ee45e3 100644 Binary files a/docs/reference/figures/README-unnamed-chunk-22-1.png and b/docs/reference/figures/README-unnamed-chunk-22-1.png differ diff --git a/docs/reference/figures/README-unnamed-chunk-23-1.png b/docs/reference/figures/README-unnamed-chunk-23-1.png index 48cacabe..40a9c459 100644 Binary files a/docs/reference/figures/README-unnamed-chunk-23-1.png and b/docs/reference/figures/README-unnamed-chunk-23-1.png differ diff --git a/docs/reference/figures/README-unnamed-chunk-24-1.png b/docs/reference/figures/README-unnamed-chunk-24-1.png index ed0830a9..dafbad43 100644 Binary files a/docs/reference/figures/README-unnamed-chunk-24-1.png and b/docs/reference/figures/README-unnamed-chunk-24-1.png differ diff --git a/docs/reference/figures/README-unnamed-chunk-8-1.png b/docs/reference/figures/README-unnamed-chunk-8-1.png index d5d13117..f1e45ee8 100644 Binary files a/docs/reference/figures/README-unnamed-chunk-8-1.png and b/docs/reference/figures/README-unnamed-chunk-8-1.png differ diff --git a/docs/reference/mvgam-package.html b/docs/reference/mvgam-package.html index 29d8c977..bde7b542 100644 --- a/docs/reference/mvgam-package.html +++ b/docs/reference/mvgam-package.html @@ -79,6 +79,7 @@

See alsoAuthor

Maintainer: Nicholas J Clark nicholas.j.clark1214@gmail.com (ORCID)

Other contributors: