Skip to content

Commit

Permalink
Merge pull request #214 from spsanderson/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
spsanderson authored Jun 13, 2022
2 parents df21181 + a7b158a commit dc72a1b
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 80 deletions.
15 changes: 15 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
# TidyDensity (development version)

## Breaking Changes
None

## New Features
None

## Minor Fixes and Improvments
1. Fix #210 - Fix param_grid order on internal which affected attributes and thus
the display of the order of the parameters.
2. Fix #211 - Add High and Low CI to `tidy_distribution_summary_tbl()`
3. Fix #213 - Use `purrr::compact()` on the list of distributions passed in order
to prevent the issue occurring in #212
4. Fix #212 - Make `tidy_distribution_comparison()` more robust in terms of handling
bad or erroneous data.

# TidyDensity 1.2.0

## Breaking Changes
Expand Down
2 changes: 2 additions & 0 deletions R/combine-tidy-distributions-tbl.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ tidy_combine_distributions <- function(...){
# Add distributions to a list
dist_list <- list(...)

dist_list <- purrr::compact(dist_list)

# Checks ----
if (length(dist_list) < 2){
rlang::abort(
Expand Down
2 changes: 1 addition & 1 deletion R/random-tidy-uniform.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ tidy_uniform <- function(.n = 50, .min = 0, .max = 1, .num_sims = 1) {
tidyr::unnest(cols = c(x, y, d, p, q)) %>%
dplyr::ungroup()

param_grid <- dplyr::tibble(.max, .min)
param_grid <- dplyr::tibble(.min, .max)

# Attach descriptive attributes to tibble
attr(df, "distribution_family_type") <- "continuous"
Expand Down
4 changes: 3 additions & 1 deletion R/tidy_distribution_summary_tbl.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ tidy_distribution_summary_tbl <- function(.data, ...) {
kurtosis = tidy_kurtosis_vec(y),
range = tidy_range_statistic(y),
iqr = stats::IQR(y),
variance = stats::var(y)
variance = stats::var(y),
ci_low = ci_lo(y),
ci_high = ci_hi(y)
) %>%
dplyr::ungroup()

Expand Down
232 changes: 154 additions & 78 deletions R/util-distribution-comparison.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,96 +66,172 @@ tidy_distribution_comparison <- function(.x, .distribution_type = "continuous"){

# Get parameter estimates for distributions
if (dist_type == "continuous"){
b <- util_beta_param_estimate(x_term)$parameter_tbl %>%
dplyr::filter(method == "NIST_MME") %>%
dplyr::select(dist_type, shape1, shape2) %>%
purrr::set_names("dist_type", "param_1", "param_2")

c <- util_cauchy_param_estimate(x_term)$parameter_tbl %>%
dplyr::select(dist_type, location, scale) %>%
purrr::set_names("dist_type", "param_1", "param_2")

e <- util_exponential_param_estimate(x_term)$parameter_tbl %>%
dplyr::select(dist_type, rate) %>%
dplyr::mutate(param_2 = NA) %>%
purrr::set_names("dist_type", "param_1", "param_2")

g <- util_gamma_param_estimate(x_term)$parameter_tbl %>%
dplyr::filter(method == "NIST_MME") %>%
dplyr::select(dist_type, shape, scale) %>%
purrr::set_names("dist_type", "param_1", "param_2")

l <- util_logistic_param_estimate(x_term)$parameter_tbl %>%
dplyr::filter(method == "EnvStats_MME") %>%
dplyr::select(dist_type, location, scale) %>%
purrr::set_names("dist_type", "param_1", "param_2")

ln <- util_lognormal_param_estimate(x_term)$parameter_tbl %>%
dplyr::filter(method == "EnvStats_MME") %>%
dplyr::select(dist_type, mean_log, sd_log) %>%
purrr::set_names("dist_type", "param_1", "param_2")

p <- util_pareto_param_estimate(x_term)$parameter_tbl %>%
dplyr::filter(method == "MLE") %>%
dplyr::select(dist_type, shape, scale) %>%
purrr::set_names("dist_type", "param_1", "param_2")

u <- util_uniform_param_estimate(x_term)$parameter_tbl %>%
dplyr::filter(method == "NIST_MME") %>%
dplyr::select(dist_type, min_est, max_est) %>%
purrr::set_names("dist_type", "param_1", "param_2")

w <- util_weibull_param_estimate(x_term)$parameter_tbl %>%
dplyr::select(dist_type, shape, scale) %>%
purrr::set_names("dist_type", "param_1", "param_2")
b <- try(util_beta_param_estimate(x_term)$parameter_tbl %>%
dplyr::filter(method == "NIST_MME") %>%
dplyr::select(dist_type, shape1, shape2) %>%
purrr::set_names("dist_type", "param_1", "param_2"))

if (!inherits(b, "try-error")){
tb <- tidy_beta(.n = n, .shape1 = round(b[[2]], 2), .shape2 = round(b[[3]], 2))
}

c <- try(util_cauchy_param_estimate(x_term)$parameter_tbl %>%
dplyr::select(dist_type, location, scale) %>%
purrr::set_names("dist_type", "param_1", "param_2"))

if (!inherits(c, "try-error")){
tc <- tidy_cauchy(.n = n, .location = round(c[[2]], 2), .scale = round(c[[3]], 2))
}

e <- try(util_exponential_param_estimate(x_term)$parameter_tbl %>%
dplyr::select(dist_type, rate) %>%
dplyr::mutate(param_2 = NA) %>%
purrr::set_names("dist_type", "param_1", "param_2"))

if (!inherits(e, "try-error")){
te <- tidy_exponential(.n = n, .rate = round(e[[2]], 2))
}

g <- try(util_gamma_param_estimate(x_term)$parameter_tbl %>%
dplyr::filter(method == "NIST_MME") %>%
dplyr::select(dist_type, shape, scale) %>%
purrr::set_names("dist_type", "param_1", "param_2"))

if (!inherits(g, "try-error")){
tg <- tidy_gamma(.n = n, .shape = round(g[[2]], 2), .scale = round(g[[3]], 2))
}

l <- try(util_logistic_param_estimate(x_term)$parameter_tbl %>%
dplyr::filter(method == "EnvStats_MME") %>%
dplyr::select(dist_type, location, scale) %>%
purrr::set_names("dist_type", "param_1", "param_2"))

if (!inherits(l, "try-error")){
tl <- tidy_logistic(.n = n, .location = round(l[[2]], 2), .scale = round(l[[3]], 2))
}

ln <- try(util_lognormal_param_estimate(x_term)$parameter_tbl %>%
dplyr::filter(method == "EnvStats_MME") %>%
dplyr::select(dist_type, mean_log, sd_log) %>%
purrr::set_names("dist_type", "param_1", "param_2"))

if (!inherits(ln, "try-error")){
tln <- tidy_lognormal(.n = n, .meanlog = round(ln[[2]], 2), .sdlog = round(ln[[3]], 2))
}

p <- try(util_pareto_param_estimate(x_term)$parameter_tbl %>%
dplyr::filter(method == "MLE") %>%
dplyr::select(dist_type, shape, scale) %>%
purrr::set_names("dist_type", "param_1", "param_2"))

if (!inherits(p, "try-error")){
tp <- tidy_pareto(.n = n, .shape = round(p[[2]], 2), .scale = round(p[[3]], 2))
}

u <- try(util_uniform_param_estimate(x_term)$parameter_tbl %>%
dplyr::filter(method == "NIST_MME") %>%
dplyr::select(dist_type, min_est, max_est) %>%
purrr::set_names("dist_type", "param_1", "param_2"))

if (!inherits(u, "try-error")){
tu <- tidy_uniform(.n = n, .min = round(u[[2]], 2), .max = round(u[[3]], 2))
}

w <- try(util_weibull_param_estimate(x_term)$parameter_tbl %>%
dplyr::select(dist_type, shape, scale) %>%
purrr::set_names("dist_type", "param_1", "param_2"))

if (!inherits(w, "try-error")){
tw <- tidy_weibull(.n = n, .shape = round(w[[2]], 2), .scale = round(w[[3]], 2))
}

comp_tbl <- tidy_combine_distributions(
tidy_empirical(x_term, .distribution_type = dist_type),
tidy_beta(.n = n, .shape1 = round(b[[2]], 2), .shape2 = round(b[[3]], 2)),
tidy_cauchy(.n = n, .location = round(c[[2]], 2), .scale = round(c[[3]], 2)),
tidy_exponential(.n = n, .rate = round(e[[2]], 2)),
tidy_gamma(.n = n, .shape = round(g[[2]], 2), .scale = round(g[[3]], 2)),
tidy_logistic(.n = n, .location = round(l[[2]], 2), .scale = round(l[[3]], 2)),
tidy_lognormal(.n = n, .meanlog = round(ln[[2]], 2), .sdlog = round(ln[[3]], 2)),
tidy_pareto(.n = n, .shape = round(p[[2]], 2), .scale = round(p[[3]], 2)),
tidy_uniform(.n = n, .min = round(u[[2]], 2), .max = round(u[[3]], 2)),
tidy_weibull(.n = n, .shape = round(w[[2]], 2), .scale = round(w[[3]], 2))
if (exists("tb") && nrow(tb) > 0){tb},
if (exists("tc") && nrow(tb) > 0){tc},
if (exists("te") && nrow(tb) > 0){te},
if (exists("tg") && nrow(tb) > 0){tg},
if (exists("tl") && nrow(tb) > 0){tl},
if (exists("tln") && nrow(tb) > 0){tln},
if (exists("tp") && nrow(tb) > 0){tp},
if (exists("tu") && nrow(tb) > 0){tu},
if (exists("tw") && nrow(tb) > 0){tw}
)

# comp_tbl <- tidy_combine_distributions(
# tidy_empirical(x_term, .distribution_type = dist_type),
# tidy_beta(.n = n, .shape1 = round(b[[2]], 2), .shape2 = round(b[[3]], 2)),
# tidy_cauchy(.n = n, .location = round(c[[2]], 2), .scale = round(c[[3]], 2)),
# tidy_exponential(.n = n, .rate = round(e[[2]], 2)),
# tidy_gamma(.n = n, .shape = round(g[[2]], 2), .scale = round(g[[3]], 2)),
# tidy_logistic(.n = n, .location = round(l[[2]], 2), .scale = round(l[[3]], 2)),
# tidy_lognormal(.n = n, .meanlog = round(ln[[2]], 2), .sdlog = round(ln[[3]], 2)),
# tidy_pareto(.n = n, .shape = round(p[[2]], 2), .scale = round(p[[3]], 2)),
# tidy_uniform(.n = n, .min = round(u[[2]], 2), .max = round(u[[3]], 2)),
# tidy_weibull(.n = n, .shape = round(w[[2]], 2), .scale = round(w[[3]], 2))
# )
} else {
bn <- util_binomial_param_estimate(trunc(tidy_scale_zero_one_vec(x_term)))$parameter_tbl %>%
dplyr::select(dist_type, size, prob) %>%
purrr::set_names("dist_type", "param_1", "param_2")

ge <- util_geometric_param_estimate(x_term)$parameter_tbl %>%
dplyr::filter(method == "EnvStats_MME") %>%
dplyr::select(dist_type, shape) %>%
dplyr::mutate(param_2 = NA) %>%
purrr::set_names("dist_type", "param_1", "param_2")

h <- util_hypergeometric_param_estimate(.x = x_term, .total = n, .k = n)$parameter_tbl %>%
tidyr::drop_na() %>%
dplyr::slice(1) %>%
dplyr::select(dist_type, m, total) %>%
purrr::set_names("dist_type", "param_1", "param_2")

po <- util_poisson_param_estimate(x_term)$parameter_tbl %>%
dplyr::select(dist_type, lambda) %>%
dplyr::mutate(param_2 = NA) %>%
purrr::set_names("dist_type", "param_1", "param_2")
bn <- try(util_binomial_param_estimate(trunc(tidy_scale_zero_one_vec(x_term)))$parameter_tbl %>%
dplyr::select(dist_type, size, prob) %>%
purrr::set_names("dist_type", "param_1", "param_2"))

comp_tbl <- tidy_combine_distributions(
tidy_empirical(.x = x_term, .distribution_type = dist_type),
tidy_binomial(.n = n, .size = round(bn[[2]], 2), .prob = round(bn[[3]], 2)),
tidy_geometric(.n = n, .prob = round(ge[[2]], 2)),
tidy_hypergeometric(
if (!inherits(bn, "try-error")){
tb <- tidy_binomial(.n = n, .size = round(bn[[2]], 2), .prob = round(bn[[3]], 2))
}

ge <- try(util_geometric_param_estimate(trunc(x_term))$parameter_tbl %>%
dplyr::filter(method == "EnvStats_MME") %>%
dplyr::select(dist_type, shape) %>%
dplyr::mutate(param_2 = NA) %>%
purrr::set_names("dist_type", "param_1", "param_2"))

if (!inherits(ge, "try-error")){
tg <- tidy_geometric(.n = n, .prob = round(ge[[2]], 2))
}

h <- try(util_hypergeometric_param_estimate(.x = trunc(x_term), .total = n, .k = n)$parameter_tbl %>%
tidyr::drop_na() %>%
dplyr::slice(1) %>%
dplyr::select(dist_type, m, total) %>%
purrr::set_names("dist_type", "param_1", "param_2"))

if (!inherits(h, "try-error")){
th <- tidy_hypergeometric(
.n = n,
.m = trunc(h[[2]]),
.nn = n - trunc(h[[2]]),
.k = trunc(h[[2]])
),
tidy_poisson(.n = n, .lambda = round(po[[2]], 2))
)
}

po <- try(util_poisson_param_estimate(trunc(x_term))$parameter_tbl %>%
dplyr::select(dist_type, lambda) %>%
dplyr::mutate(param_2 = NA) %>%
purrr::set_names("dist_type", "param_1", "param_2"))

if (!inherits(po, "try-error")){
tp <- tidy_poisson(.n = n, .lambda = round(po[[2]], 2))
}

comp_tbl <- tidy_combine_distributions(
tidy_empirical(.x = x_term, .distribution_type = dist_type),
if (exists("tb") && nrow(tb) > 0){tb},
if (exists("tg") && nrow(tb) > 0){tg},
if (exists("th") && nrow(tb) > 0){th},
if (exists("tp") && nrow(tb) > 0){tp}
)
# comp_tbl <- tidy_combine_distributions(
# tidy_empirical(.x = x_term, .distribution_type = dist_type),
# tidy_binomial(.n = n, .size = round(bn[[2]], 2), .prob = round(bn[[3]], 2)),
# tidy_geometric(.n = n, .prob = round(ge[[2]], 2)),
# tidy_hypergeometric(
# .n = n,
# .m = trunc(h[[2]]),
# .nn = n - trunc(h[[2]]),
# .k = trunc(h[[2]])
# ),
# tidy_poisson(.n = n, .lambda = round(po[[2]], 2))
# )

}

Expand Down

0 comments on commit dc72a1b

Please sign in to comment.