From 66e41dfa656e43cb039f19c32786ff4fff6b3531 Mon Sep 17 00:00:00 2001 From: mitchelloharawild Date: Fri, 13 Sep 2024 23:36:37 +1000 Subject: [PATCH] Added g-k distribution Ref: #84 --- DESCRIPTION | 3 +- NAMESPACE | 7 ++ NEWS.md | 4 ++ R/dist_gk.R | 117 ++++++++++++++++++++++++++++++++++ man/dist_gk.Rd | 78 +++++++++++++++++++++++ tests/testthat/test-dist-gk.R | 36 +++++++++++ 6 files changed, 244 insertions(+), 1 deletion(-) create mode 100644 R/dist_gk.R create mode 100644 man/dist_gk.Rd create mode 100644 tests/testthat/test-dist-gk.R diff --git a/DESCRIPTION b/DESCRIPTION index 0374cf6e..dd0cae20 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,7 +39,8 @@ Imports: numDeriv, utils, lifecycle, - pillar + pillar, + gk Suggests: testthat (>= 2.1.0), covr, diff --git a/NAMESPACE b/NAMESPACE index a90671a8..79f6317c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,6 +29,7 @@ S3method(cdf,dist_exponential) S3method(cdf,dist_f) S3method(cdf,dist_gamma) S3method(cdf,dist_geometric) +S3method(cdf,dist_gk) S3method(cdf,dist_gumbel) S3method(cdf,dist_hypergeometric) S3method(cdf,dist_inflated) @@ -108,6 +109,7 @@ S3method(density,dist_exponential) S3method(density,dist_f) S3method(density,dist_gamma) S3method(density,dist_geometric) +S3method(density,dist_gk) S3method(density,dist_gumbel) S3method(density,dist_hypergeometric) S3method(density,dist_inflated) @@ -154,6 +156,7 @@ S3method(format,dist_exponential) S3method(format,dist_f) S3method(format,dist_gamma) S3method(format,dist_geometric) +S3method(format,dist_gk) S3method(format,dist_gumbel) S3method(format,dist_hypergeometric) S3method(format,dist_inflated) @@ -199,6 +202,7 @@ S3method(generate,dist_exponential) S3method(generate,dist_f) S3method(generate,dist_gamma) S3method(generate,dist_geometric) +S3method(generate,dist_gk) S3method(generate,dist_gumbel) S3method(generate,dist_hypergeometric) S3method(generate,dist_inflated) @@ -272,6 +276,7 @@ S3method(log_density,dist_exponential) S3method(log_density,dist_f) S3method(log_density,dist_gamma) S3method(log_density,dist_geometric) +S3method(log_density,dist_gk) S3method(log_density,dist_gumbel) S3method(log_density,dist_hypergeometric) S3method(log_density,dist_inverse_exponential) @@ -360,6 +365,7 @@ S3method(quantile,dist_exponential) S3method(quantile,dist_f) S3method(quantile,dist_gamma) S3method(quantile,dist_geometric) +S3method(quantile,dist_gk) S3method(quantile,dist_gumbel) S3method(quantile,dist_hypergeometric) S3method(quantile,dist_inflated) @@ -455,6 +461,7 @@ export(dist_exponential) export(dist_f) export(dist_gamma) export(dist_geometric) +export(dist_gk) export(dist_gumbel) export(dist_hypergeometric) export(dist_inflated) diff --git a/NEWS.md b/NEWS.md index 136e5d42..80381c0f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,6 +14,10 @@ * `support()` now shows whether the interval of support is open or closed (@venpopov, #97) +### Probability distributions + +* Added `dist_gk()` for the g-and-k distributions. + ## Improvements * `dist_mixture()` now displays the components of the mixture when the output diff --git a/R/dist_gk.R b/R/dist_gk.R new file mode 100644 index 00000000..c846003a --- /dev/null +++ b/R/dist_gk.R @@ -0,0 +1,117 @@ +#' The g-and-k Distribution +#' +#' @description +#' `r lifecycle::badge('stable')` +#' +#' The g-and-k distribution is a flexible distribution often used to model univariate data. +#' It is particularly known for its ability to handle skewness and heavy-tailed behavior. +#' +#' @inheritParams gk::dgk +#' +#' @details +#' +#' We recommend reading this documentation on +#' , where the math +#' will render nicely. +#' +#' In the following, let \eqn{X} be a g-k random variable with parameters +#' `A`, `B`, `g`, `k`, and `c`. +#' +#' **Support**: \eqn{(-\infty, \infty)} +#' +#' **Mean**: Not available in closed form. +#' +#' **Variance**: Not available in closed form. +#' +#' **Probability density function (p.d.f)**: +#' +#' The g-k distribution does not have a closed-form expression for its density. Instead, +#' it is defined through its quantile function: +#' +#' \deqn{ +#' Q(u) = A + B \left( 1 + c \frac{1 - \exp(-gz(u))}{1 + \exp(-gz(u))} \right) (1 + z(u)^2)^k z(u) +#' }{ +#' Q(u) = A + B * (1 + c * ((1 - exp(-g * z(u))) / (1 + exp(-g * z(u))))) * (1 + z(u)^2)^k * z(u) +#' } +#' +#' where \eqn{z(u) = \Phi^{-1}(u)}, the standard normal quantile of u. +#' +#' **Cumulative distribution function (c.d.f)**: +#' +#' The cumulative distribution function is typically evaluated numerically due to the lack +#' of a closed-form expression. +#' +#' @seealso [gk::gk] +#' +#' @examples +#' dist <- dist_gk(A = 0, B = 1, g = 0, k = 0.5) +#' dist +#' +#' @examplesIf requireNamespace("gk", quietly = TRUE) +#' mean(dist) +#' variance(dist) +#' support(dist) +#' generate(dist, 10) +#' +#' density(dist, 2) +#' density(dist, 2, log = TRUE) +#' +#' cdf(dist, 4) +#' +#' quantile(dist, 0.7) +#' +#' @name dist_gk +#' @export +dist_gk <- function(A, B, g, k, c = 0.8){ + A <- vec_cast(A, double()) + B <- vec_cast(B, double()) + g <- vec_cast(g, double()) + k <- vec_cast(k, double()) + c <- vec_cast(c, double()) + if(any(B <= 0)){ + abort("The B parameter (scale) of the gk distribution must be strictly positive.") + } + new_dist(A = A, B = B, g = g, k = k, c = c, class = "dist_gk") +} + +#' @export +format.dist_gk <- function(x, digits = 2, ...){ + sprintf( + "gk(A = %s, B = %s, g = %s, k = %s%s)", + format(x[["A"]], digits = digits, ...), + format(x[["B"]], digits = digits, ...), + format(x[["g"]], digits = digits, ...), + format(x[["k"]], digits = digits, ...), + if (x[["c"]]==0.8) "" else paste0(", c = ", format(x[["c"]], digits = digits, ...)) + ) +} + +#' @export +density.dist_gk <- function(x, at, ...){ + require_package("gk") + gk::dgk(at, x[["A"]], x[["B"]], x[["g"]], x[["k"]], x[["c"]]) +} + +#' @export +log_density.dist_gk <- function(x, at, ...){ + require_package("gk") + gk::dgk(at, x[["A"]], x[["B"]], x[["g"]], x[["k"]], x[["c"]], log = TRUE) +} + +#' @export +quantile.dist_gk <- function(x, p, ...){ + require_package("gk") + gk::qgk(p, x[["A"]], x[["B"]], x[["g"]], x[["k"]], x[["c"]]) +} + +#' @export +cdf.dist_gk <- function(x, q, ...){ + require_package("gk") + gk::pgk(q, x[["A"]], x[["B"]], x[["g"]], x[["k"]], x[["c"]]) +} + +#' @export +generate.dist_gk <- function(x, times, ...){ + require_package("gk") + gk::rgk(times, x[["A"]], x[["B"]], x[["g"]], x[["k"]], x[["c"]]) +} diff --git a/man/dist_gk.Rd b/man/dist_gk.Rd new file mode 100644 index 00000000..b5856ca1 --- /dev/null +++ b/man/dist_gk.Rd @@ -0,0 +1,78 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dist_gk.R +\name{dist_gk} +\alias{dist_gk} +\title{The g-and-k Distribution} +\usage{ +dist_gk(A, B, g, k, c = 0.8) +} +\arguments{ +\item{A}{Vector of A (location) parameters.} + +\item{B}{Vector of B (scale) parameters. Must be positive.} + +\item{g}{Vector of g parameters.} + +\item{k}{Vector of k parameters. Must be at least -0.5.} + +\item{c}{Vector of c parameters. Often fixed at 0.8 which is the default.} +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} + +The g-and-k distribution is a flexible distribution often used to model univariate data. +It is particularly known for its ability to handle skewness and heavy-tailed behavior. +} +\details{ +We recommend reading this documentation on +\url{https://pkg.mitchelloharawild.com/distributional/}, where the math +will render nicely. + +In the following, let \eqn{X} be a g-k random variable with parameters +\code{A}, \code{B}, \code{g}, \code{k}, and \code{c}. + +\strong{Support}: \eqn{(-\infty, \infty)} + +\strong{Mean}: Not available in closed form. + +\strong{Variance}: Not available in closed form. + +\strong{Probability density function (p.d.f)}: + +The g-k distribution does not have a closed-form expression for its density. Instead, +it is defined through its quantile function: + +\deqn{ + Q(u) = A + B \left( 1 + c \frac{1 - \exp(-gz(u))}{1 + \exp(-gz(u))} \right) (1 + z(u)^2)^k z(u) + }{ + Q(u) = A + B * (1 + c * ((1 - exp(-g * z(u))) / (1 + exp(-g * z(u))))) * (1 + z(u)^2)^k * z(u) + } + +where \eqn{z(u) = \Phi^{-1}(u)}, the standard normal quantile of u. + +\strong{Cumulative distribution function (c.d.f)}: + +The cumulative distribution function is typically evaluated numerically due to the lack +of a closed-form expression. +} +\examples{ +dist <- dist_gk(A = 0, B = 1, g = 0, k = 0.5) +dist + +\dontshow{if (requireNamespace("gk", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +mean(dist) +variance(dist) +support(dist) +generate(dist, 10) + +density(dist, 2) +density(dist, 2, log = TRUE) + +cdf(dist, 4) + +quantile(dist, 0.7) +\dontshow{\}) # examplesIf} +} +\seealso{ +\link[gk:gk]{gk::gk} +} diff --git a/tests/testthat/test-dist-gk.R b/tests/testthat/test-dist-gk.R new file mode 100644 index 00000000..b9c01d05 --- /dev/null +++ b/tests/testthat/test-dist-gk.R @@ -0,0 +1,36 @@ +test_that("g-k distribution", { + # Define g-k distribution parameters + A <- 0 + B <- 1 + g <- 0 + k <- 0.5 + c <- 0.8 + dist <- dist_gk(A, B, g, k, c) + + # Check formatting + expect_equal(format(dist), "gk(A = 0, B = 1, g = 0, k = 0.5)") + + # Require package installed + skip_if_not_installed("gk", "0.1.0") + + # quantiles + expect_equal(quantile(dist, 0.1), gk::qgk(0.1, A, B, g, k, c)) + expect_equal(quantile(dist, 0.5), gk::qgk(0.5, A, B, g, k, c)) + + # pdf + expect_equal(density(dist, 0), gk::dgk(0, A, B, g, k, c)) + expect_equal(density(dist, 3), gk::dgk(3, A, B, g, k, c)) + + # cdf + expect_equal(cdf(dist, 0), gk::pgk(0, A, B, g, k, c)) + expect_equal(cdf(dist, 3), gk::pgk(3, A, B, g, k, c)) + + # F(Finv(a)) ~= a + expect_equal(cdf(dist, quantile(dist, 0.4)), 0.4, tolerance = 1e-3) + + # Generate random samples + set.seed(123) + samples <- generate(dist, 10) + set.seed(123) + expect_equal(samples[[1L]], gk::rgk(10, A, B, g, k, c)) +})