Skip to content

Commit

Permalink
cleaning package code
Browse files Browse the repository at this point in the history
  • Loading branch information
markolalovic committed Jun 6, 2024
1 parent 586b7d1 commit 7c437a6
Show file tree
Hide file tree
Showing 16 changed files with 129 additions and 152 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@
^data$
^man/figures/*\.png$
^CODE_OF_CONDUCT.md
^fix_site.py
^tests/informal$
Empty file removed .nojekyll
Empty file.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Description: Provides an easy framework to simulate survey data commonly analyze
The package also allows for the estimation of parameters from existing survey data to replicate it more accurately.
URL: https://lalovic.io/responsesR, https://github.com/markolalovic/responsesR
BugReports: https://github.com/markolalovic/responsesR/issues
License: MIT + file LICENSE
License: GPL-3
Encoding: UTF-8
LazyData: true
Depends: R (>= 3.3)
Expand Down
21 changes: 0 additions & 21 deletions LICENSE

This file was deleted.

27 changes: 14 additions & 13 deletions R/estimation_of_parameters.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#' variables assuming that the latent variables are normally distributed.
#'
#' @export
#' @param data survey responses, where the columns correspond to individual items.
#' @param data responses, where the columns correspond to individual items.
#' Apart from this, `data` can be of almost any class such as
#' "data.frame" "matrix" or "array".
#' @param K number of response categories, a vector or a number
Expand All @@ -25,8 +25,8 @@ estimate_parameters <- function(data, K, gamma1=0) {
if (is.numeric(K)) {
K <- rep(K, nitems)
}
mat = matrix(data=NA, nrow=nitems, ncol=2)
for (i in 1:ncol(data)) {
mat <- matrix(data=NA, nrow=nitems, ncol=2)
for (i in seq_len(ncol(data))) {
pk <- prop.table(table(data[,i]))
estimates <- estimate_mu_sd(pk, K[i], gamma1[i])
mat[i, ] <- estimates
Expand Down Expand Up @@ -62,14 +62,14 @@ estimate_mu_sd <- function(pk, K, gamma1=0, trace=FALSE) {
cp <- c("mu"=x[1], "sd"=x[2], "gamma1"=gamma1)
sim <- simulate_responses(K, cp)
xk <- c(-Inf, sim$xk, Inf)
pk <- sapply(as.character(1:K), function(k) { # pad missing levels
pk <- vapply(as.character(seq_len(K)), function(k) { # pad missing levels
ifelse(k %in% names(pk), pk[[k]], 0)
})
}, numeric(1))

if (abs(gamma1) > 0) { # X ~ skew normal
if (!requireNamespace("sn", quietly = TRUE)) {
stop(
"Package \"sn\" must be installed estimate parameters for skew responses.
"Package \"sn\" must be installed to estimate skew normal parameters.
Please run:\n\n \tinstall.packages(\"sn\")\n\n",
call. = FALSE)
}
Expand Down Expand Up @@ -114,16 +114,16 @@ estimate_mu_sd <- function(pk, K, gamma1=0, trace=FALSE) {
}
}
f <- function(x) { # function to find roots
matrix(sapply(1:K, function(k) hk(x, k)))
matrix(vapply(seq_len(K), function(k) { hk(x, k) }, numeric(1)))
}
Df <- function(x) { # Jacobian column wise
matrix(c(sapply(1:K, function(k) dhk_u(x, k)),
sapply(1:K, function(k) dhk_v(x, k))),
matrix(c(vapply(seq_len(K), function(k) { dhk_u(x, k) }, numeric(1)),
vapply(seq_len(K), function(k) { dhk_v(x, k) }, numeric(1))),
ncol=2)
}
x_trace <- c(x[1])
y_trace <- c(x[2])
for (i in 1:niters) {
for (i in seq_len(niters)) {
b <- f(x) # evaluate f
A <- Df(x) # evaluate Jacobian
A.svd <- svd(A) # can be unstable, use SVD
Expand Down Expand Up @@ -163,12 +163,13 @@ plot_contour <- function(f, x_trace, y_trace) {
xgrid <- seq(-3, 3, length.out = xlen) # -5, 5
ygrid <- seq(0.1, 3, length.out = ylen) # 0.1, 10
zvals <- matrix(NA, ncol = xlen, nrow = ylen)
for (i in 1:xlen) {
for (j in 1:ylen) {
for (i in seq_len(xlen)) {
for (j in seq_len(ylen)) {
zvals[i, j] <- norm(f(matrix(c(xgrid[i], ygrid[j]))) , "2")
}
}
graphics::contour(x = xgrid, y = ygrid, z = zvals, col="gray42", xlab = "u", ylab = "v")
graphics::contour(x = xgrid, y = ygrid, z = zvals,
col="gray42", xlab = "u", ylab = "v")
graphics::grid(col = "lightgray", lty = "dotted")
graphics::points(x_trace, y_trace, pch=20, col="blue")
}
22 changes: 2 additions & 20 deletions R/helper_functions.R
Original file line number Diff line number Diff line change
@@ -1,31 +1,13 @@
#' Calculate variance of probabilities `pk`
#'
#' @param pk vector of probabilities
#' @return variance of `pk`
get_pk_var <- function(pk) {
domain <- (1:length(pk))
m <- get_pk_mean(pk)
return(sum(pk * (domain - m)^2))
}

#' Variance of a skew normal distribution
#'
#' @param alpha determines the shape
#' @return variance of a skew-normal distribution
var_skew_normal <- function(alpha) {
return(1 - 2*(delta_skew_normal(alpha)^2)/pi)
}

#' Pad missing levels with zeros
#'
#' @export
#' @param pk proportions or probabilities across possible responses
#' @param K number of response categories
#' @return table of proportions across all possible responses
pad_levels <- function(pk, K) {
pk <- sapply(as.character(1:K), function(k) {
pk <- vapply(as.character(seq_len(K)), function(k) {
ifelse(k %in% names(pk), pk[[k]], 0)
})
}, numeric(1))
return(pk)
}

Expand Down
53 changes: 33 additions & 20 deletions R/simulation_of_responses.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' Get responses
#'
#' Generates a sample of random responses based on parameters of latent variables.
#' Returns a sample of random responses based on parameters of latent variables.
#'
#' @export
#' @param n number of observations
Expand All @@ -20,15 +20,17 @@
#' @examples
#' data <- get_responses(n = 5, mu = c(0, 1, 2))
get_responses <- function(n=10, mu=0, sd=1, gamma1=0, K=5, R=0) {
inputs <- list(mu, sd, gamma1, K)
nitems <- max(lengths(inputs))
raw_inputs <- list(mu, sd, gamma1, K)
nitems <- max(lengths(raw_inputs))

if (nitems == 1) {
return(get_univariate_responses(n, mu, sd, gamma1, K))
}

# single inputs are repeated to create a vector
inputs <- sapply(inputs, function(input) rep(input, length.out = nitems))
# single raw_inputs are repeated to create a vector
inputs <- vapply(raw_inputs,
function(input) { rep(input, length.out=nitems) },
numeric(nitems))
colnames(inputs) <- c("mu", "sd", "gamma1", "K")

# conditions to deal with correlation input `R`
Expand All @@ -42,7 +44,7 @@ get_responses <- function(n=10, mu=0, sd=1, gamma1=0, K=5, R=0) {
get_univariate_responses(
n, input[["mu"]], input[["sd"]], input[["gamma1"]], input[["K"]])
})
colnames(data) <- sapply(1:nitems, function(i) paste("Y", i, sep = ""))
colnames(data) <- sapply(seq_len(nitems), function(i) paste("Y", i, sep=""))
return(data)
}
if (case_2(R) == 1) { # random correlation matrix is used
Expand All @@ -56,7 +58,8 @@ get_responses <- function(n=10, mu=0, sd=1, gamma1=0, K=5, R=0) {
} else {
# a correlation matrix must be provided
if(case_4(R) != 1)
stop("Error: correlation R can be a number, \"random\", or a correlation matrix.")
stop("Error: correlation R can be a number, \"random\",
or a correlation matrix.")
}
sigma <- cor2cov(R, inputs[, "sd"])

Expand Down Expand Up @@ -84,10 +87,10 @@ get_responses <- function(n=10, mu=0, sd=1, gamma1=0, K=5, R=0) {
c("mu"=0, "sd"=1, "gamma1"=input[["gamma1"]]))
})

data <- sapply(1:nitems, function(i) {
data <- sapply(seq_len(nitems), function(i) {
findInterval(lat[, i], c(-Inf, sims[[i]]$xk, Inf))
})
colnames(data) <- sapply(1:nitems, function(i) paste("Y", i, sep = ""))
colnames(data) <- sapply(seq_len(nitems), function(i) paste("Y", i, sep = ""))
return(data)
}

Expand All @@ -105,7 +108,7 @@ get_responses <- function(n=10, mu=0, sd=1, gamma1=0, K=5, R=0) {
#' discrete random variable from which samples are taken.
get_univariate_responses <- function(n=10, mu=0, sd=1, gamma1=0, K=5) {
sim <- simulate_responses(K, c("mu" = mu, "sd" = sd, "gamma1" = gamma1))
data <- sample(x = 1:K, size = n, replace = TRUE, prob = sim$pk)
data <- sample(x = seq_len(K), size = n, replace = TRUE, prob = sim$pk)
return(data)
}

Expand All @@ -117,7 +120,7 @@ get_univariate_responses <- function(n=10, mu=0, sd=1, gamma1=0, K=5) {
#'
#' @export
#' @param K number of response categories
#' @param params parameters `xi`, `omega` and `alpha` of skew-normal distribution
#' @param params skew-normal parameters `xi`, `omega` and `alpha`
#' @return lists of probabilities `pk` and cut points `xk`
#' @examples
#' simulate_responses(K = 5, params = c("mu"=0, "sd"=1, "gamma1"=0))
Expand All @@ -141,7 +144,7 @@ simulate_responses <- function(K, params) {

#' Implementation of Lloyd's algorithm
#'
#' Given a probability density function `fX` of a continuous random variable `X`,
#' Given a probability density function `fX` of a random variable `X`,
#' the function returns a discrete random variable with `K` possible outcomes
#' that best approximates `X` in the mean-square or L2 sense by minimizing MSE.
#'
Expand All @@ -157,7 +160,7 @@ run_Lloyd <- function(fX, K) {
niters <- 100 # enough for convergence
pk_means <- c()
pk_MS_errors <- c()
for (i in 1:niters) {
for (i in seq_len(niters)) {
xk <- get_new_xk(rk) # calculate the new cut points
rk <- get_new_rk(xk, fX) # calculate the new representatives
## calculate estimated probabilities, means and distortion measure
Expand All @@ -179,7 +182,7 @@ run_Lloyd <- function(fX, K) {
get_new_xk <- function(rk) {
K <- (length(rk) - 1) # generalized to arbitrary number of points
xk <- rep(0, K)
for (k in 1:K) {
for (k in seq_len(K)) {
xk[k] <- (rk[k] + rk[k+1])/2
}
return(xk)
Expand All @@ -200,7 +203,7 @@ get_new_rk <- function(xk, fX) {
f_below <- function(x) {
fX(x)
}
for (k in 1:K) {
for (k in seq_len(K)) {
result_above <- stats::integrate(f_above, lower=xk[k], upper=xk[k+1])[[1]]
result_below <- stats::integrate(f_below, lower=xk[k], upper=xk[k+1])[[1]]
rk[k] <- result_above/result_below
Expand All @@ -217,7 +220,7 @@ get_pk <- function(xk, fX) {
K <- length(xk) + 1
xk <- c(-Inf, xk, Inf)
pk <- rep(0, K)
for (k in 1:K) {
for (k in seq_len(K)) {
lower_bound <- xk[k]
upper_bound <- xk[k+1]
pk[k] <- stats::integrate(fX, lower=lower_bound, upper=upper_bound)[[1]]
Expand All @@ -230,7 +233,7 @@ get_pk <- function(xk, fX) {
#' @param pk vector of probabilities `pk`
#' @return mean of `pk`
get_pk_mean <- function(pk) {
domain <- (1:length(pk))
domain <- seq_len(length(pk))
return(sum(pk * domain))
}

Expand All @@ -244,11 +247,12 @@ get_MSE <- function(xk, rk, fX) {
K <- length(rk) # generalized for testing
xk <- c(-Inf, xk, Inf)
mse <- 0
for (k in 1:K) {
for (k in seq_len(K)) {
lower_bound <- xk[k]
upper_bound <- xk[k+1]
integrand <- function(x) { ((x - rk[k])^2)*fX(x) }
mse <- mse + stats::integrate(integrand, lower=lower_bound, upper=upper_bound)[[1]]
mse <- mse + stats::integrate(integrand,
lower=lower_bound, upper=upper_bound)[[1]]
}
return(mse)
}
Expand All @@ -263,7 +267,8 @@ get_MSE <- function(xk, rk, fX) {
#' @return density at `x`
#' @seealso [sn::dsn()]
d_skew_normal <- function(x, xi=0, omega=1, alpha=0) {
return(2/omega*stats::dnorm((x - xi)/omega)*stats::pnorm(alpha*(x - xi)/omega))
return(2/omega*stats::dnorm((x - xi)/omega) *
stats::pnorm(alpha*(x - xi)/omega))
}

#' The mean of skew normal distribution
Expand All @@ -282,6 +287,14 @@ delta_skew_normal <- function(alpha) {
return(alpha / (sqrt(1 + alpha^2)))
}

#' Variance of a skew normal distribution
#'
#' @param alpha determines the shape
#' @return variance of a skew-normal distribution
var_skew_normal <- function(alpha) {
return(1 - 2*(delta_skew_normal(alpha)^2)/pi)
}

#' Convert parameters
#'
#' Converts from centered parameters to direct parameters appearing in
Expand Down
42 changes: 0 additions & 42 deletions README_files/figure-gfm/unnamed-chunk-4-1.svg

This file was deleted.

5 changes: 4 additions & 1 deletion tests/informal/discretization_test.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ plot_density <- function(K, params) {

params_dp <- convert_params(params)
x <- seq(-3, 3, length = 200)
y <- d_skew_normal(x, params_dp[["xi"]], params_dp[["omega"]], params_dp[["alpha"]])
y <- d_skew_normal(x,
params_dp[["xi"]],
params_dp[["omega"]],
params_dp[["alpha"]])

dd <- data.frame(x=x, y=y)
eps <- 0.008
Expand Down
1 change: 0 additions & 1 deletion tests/informal/sn_univariate_test.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
setwd("/home/marko/repos/responsesR/R/")
source("simulation_of_responses.R")
source("helper_functions.R")

Expand Down
14 changes: 13 additions & 1 deletion tests/testthat/test_discretization.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ test_that("discretization of N(0,1) with 5 categories gives expected result", {
expect_equal(sim$pk, pk, tolerance=0.1)
})

test_that("discretization of N(0,1) with 4 categories gives expected results", {
test_that("discretization of N(0,1) with 4 categories gives expected results", {
sim <- simulate_responses(K=4, params=c("mu"=0, "sd"=1, "gamma1"=0))
expected_sim <- list(
"pk"=c(0.16, 0.34, 0.34, 0.16), # expected probabilities
Expand Down Expand Up @@ -34,3 +34,15 @@ test_that("discretization of skew-normal with mu=-1, sd=0.5, gamma1=0.5
)
expect_equal(sim, expected_sim, tolerance=0.1)
})

test_that("scaling and shifting skew normal parameters works", {
expect_equal(delta_skew_normal(2), 2 / (sqrt(1 + 4)), tolerance=0.1)
expect_equal(mean_skew_normal(2),
delta_skew_normal(2) * sqrt(2/pi), tolerance=0.1)
expect_equal(var_skew_normal(2), 0.5, tolerance=0.1)

x <- c(-0.98, 0, 0.98)
dp <- c("xi"=0.5, "omega"=1, "alpha"=-1)
x_scaled_and_shifted <- c(-1.48, -0.50, 0.48)
expect_equal(scale_and_shift(x, dp), x_scaled_and_shifted)
})
Loading

0 comments on commit 7c437a6

Please sign in to comment.