Skip to content

Commit

Permalink
Merge pull request #418 from spsanderson/development
Browse files Browse the repository at this point in the history
Fixes #417
  • Loading branch information
spsanderson authored Apr 19, 2024
2 parents eed9d67 + 81aeea4 commit a7e762e
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 0 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ export(tidy_inverse_weibull)
export(tidy_kurtosis_vec)
export(tidy_logistic)
export(tidy_lognormal)
export(tidy_mcmc_sampling)
export(tidy_mixture_density)
export(tidy_multi_dist_autoplot)
export(tidy_multi_single_dist)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ None
1. Fix #405 - Add function `quantile_normalization()` to normalize data using quantiles.
2. Fix #409 - Add function `check_duplilcate_rows()` to check for duplicate rows in a data frame.
3. Fix #414 - Add function `util_chisquare_param_estimate()` to estimate the parameters of the chi-square distribution.
4. Fix #417 - Add function `tidy_mcmc_sampling()` to sample from a distribution using MCMC.
This outputs the function sampled data and a diagnostic plot.

## Minor Fixes and Improvements
1. Fix #401 - Update `tidy_multi_single_dist()` to respect the `.return_tibble` parameter
Expand Down
110 changes: 110 additions & 0 deletions R/utils-mcmc-sampling.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#' Tidy MCMC Sampling
#'
#' Perform MCMC sampling and return tidy data and a plot.
#'
#' @family Utility
#'
#' @author Steven P. Sanderson II, MPH
#'
#' @description
#' This function performs Markov Chain Monte Carlo (MCMC) sampling on the input
#' data and returns tidy data and a plot representing the results.
#'
#' @details
#' The function takes a data vector as input and performs MCMC sampling with the
#' specified number of simulations. It applies user-defined functions to each
#' MCMC sample and to the cumulative MCMC samples. The resulting data is
#' formatted in a tidy format, suitable for further analysis. Additionally, a
#' plot is generated to visualize the MCMC samples and cumulative statistics.
#'
#' @param .x The data vector for MCMC sampling.
#' @param .fns The function(s) to apply to each MCMC sample. Default is "mean".
#' @param .cum_fns The function(s) to apply to the cumulative MCMC samples. Default is "cmean".
#' @param .num_sims The number of simulations. Default is 2000.
#'
#' @return A list containing tidy data and a plot.
#'
#' @examples
#' # Generate MCMC samples
#' set.seed(123)
#' data <- rnorm(100)
#' result <- tidy_mcmc_sampling(data, "median", "cmedian", 500)
#' result
#'
#' @rdname tidy_mcmc_sampling
NULL

#' @name tidy_mcmc_sampling
#' @export
tidy_mcmc_sampling <- function(.x, .fns = "mean", .cum_fns = "cmean",
.num_sims = 2000) {

# Error handling for data argument
if (!is.vector(.x)) {
rlang::abort(
message = "Error: '.x' argument must be a vector.",
use_cli_format = TRUE
)
}

# Error handling for function arguments
if (!exists(.fns)) {
rlang::abort(
message = "Error: '.fns' argument must be a valid function name. Make sure
any necessary libraries are loaded.",
use_cli_format = TRUE
)
}

if (!exists(.cum_fns)) {
rlang::abort(
message = "Error: '.cum_fns' argument must be a valid function name. Make sure
any necessary libraries are loaded.",
use_cli_format = TRUE
)
}

# Validate number of simulations
nsims <- ifelse(.num_sims > 0, as.integer(.num_sims), 1L)

fns <- match.fun(.fns)
fns_name <- as.character(.fns)
cum_fns <- match.fun(.cum_fns)
cum_fns_name <- as.character(.cum_fns)
nsims <- as.integer(.num_sims)
fctr_lvl_nms <- c(
paste0(".sample_", fns_name),
paste0(".cum_stat_", cum_fns_name)
)

df <- TidyDensity::tidy_bootstrap(.x = .x, .num_sims = nsims) |>
dplyr::mutate(.sample = purrr::map(bootstrap_samples, \(x) fns(x))) |>
dplyr::select(sim_number, .sample) |>
tidyr::unnest(cols = .sample) |>
dplyr::rename_with(~paste0(., "_", fns_name), .cols = .sample)

mcmc_data <- df |>
dplyr::mutate(.cum_stat = cum_fns(df[[2]])) |>
dplyr::rename_with(~paste0(., "_", cum_fns_name), .cols = .cum_stat) |>
tidyr::pivot_longer(-sim_number) |>
dplyr::mutate(name = factor(name, levels = fctr_lvl_nms))

plt <- mcmc_data |>
ggplot2::ggplot(ggplot2::aes(x = as.numeric(sim_number), y = value)) +
ggplot2::facet_wrap(~name, scales = "free") +
ggplot2::geom_line() +
ggplot2::labs(
y = "Value",
x = "Simulation Number",
title = "MCMC Sampling"
) +
ggplot2::theme_minimal()

# Return
mcmc_list <- list(
mcmc_data = mcmc_data,
plt = plt
)

return(mcmc_list)
}

0 comments on commit a7e762e

Please sign in to comment.