prepare for cran

markolalovic · Jun 22, 2024 · e96ba73 · e96ba73
1 parent 3d98066
commit e96ba73
Show file tree

Hide file tree

Showing 45 changed files with 680 additions and 21,925 deletions.
diff --git a/NEWS.html b/NEWS.html
diff --git a/NEWS.md b/NEWS.md
@@ -1,33 +1,30 @@
-# latent2likert 0.1.0 (Release date: 2020-10-17)
-
+## Version 0.1.0
+- Release date: 2020-10-17
 - Initial release (development version).
 - Tested on platforms: x86_64-pc-linux-gnu (64-bit) and x86_64-w64-mingw32 (64-bit).
 
-# latent2likert 1.0.0 (Release date: 2024-03-28)
-
-- The option to generate correlated Likert scale items was added to the function `rLikert()`.
-- New function `estimate_parameters()` was added that allows for the estimation of parameters from existing survey data to replicate it more accurately.
-- Issues related to the dependencies have been resolved.
-- This version now only imports the standard R packages mvtnorm, stats, and graphics packages stats and graphics, which are typically included in R releases.
-- The package sn is necessary only when generating correlated responses based on skew normal distribution.
-- Added user prompts for installing the sn package when necessary.
-
-# latent2likert 1.1.0 (Release date: 2024-06-06)
-
-- A minor update of functions and vignettes.
-
-# latent2likert 1.1.1 (Release date: 2024-06-06)
-
-- The package was renamed from responsesR to latent2likert.
-- To capture the essence of converting latent variables into Likert scale responses.
-- Improvements of package website and documentation.
-
-# latent2likert 1.2.1 (Release date: 2024-06-12)
-
-- Refactored code for improved modularity and maintainability.
-- Modularized core functions for better readability.
+## Version 1.0.0
+- Release date: 2024-03-28
+- Added the option to generate correlated Likert scale items in the `rlikert()` function.
+- Added the `estimate_params()` function for estimating parameters from existing survey data.
+- Resolved dependency issues.
+- Reduced dependency to only standard R packages: mvtnorm, stats, and graphics.
+- The sn package is now required only for generating correlated responses using a skew normal distribution.
+- Added user prompts to install the sn package when needed.
+
+## Version 1.1.0
+- Release date: 2024-06-06
+- Minor updates to functions and vignettes.
+
+## Version 1.1.1
+- Release date: 2024-06-06
+- Renamed the package from `responsesR` to `latent2likert` to better reflect its purpose of converting latent variables into Likert scale responses.
+- Improved the package website and documentation.
+- **Note:** The codebase is under development, and finer details may change.
+
+## Version 1.2.1
+- Release date: 2024-06-12
+- Refactored code for enhanced modularity and maintainability.
+- Modularized core functions for improved readability.
 - Improved the structure and organization of the codebase.
-- Improved error handling of different cases for correlations input.
-- Updated the `estimate_parameters()` function for estimating latent parameters from survey data.
-- The codebase is currently in development, finer details may change.
-
+- Enhanced error handling for various correlation input scenarios.
diff --git a/R/discretization.R b/R/discretization.R
@@ -18,14 +18,15 @@
 #' @examples
 #' discretize_density(density_fn = stats::dnorm, n_levels = 5)
 #' discretize_density(density_fn = function(x) {
-#'    2 * stats::dnorm(x) * stats::pnorm(0.5 * x)}, n_levels = 4)
-#' @details 
-#' The function addresses the problem of transforming a continuous random 
-#' variable \eqn{X} into a discrete random variable \eqn{Y} with minimal 
+#'   2 * stats::dnorm(x) * stats::pnorm(0.5 * x)
+#' }, n_levels = 4)
+#' @details
+#' The function addresses the problem of transforming a continuous random
+#' variable \eqn{X} into a discrete random variable \eqn{Y} with minimal
 #' distortion. Distortion is measured as mean-squared error (MSE):
 #' \deqn{
-#'   \text{E}\left[ (X - Y)^2 \right] = 
-#'   \sum_{k=1}^{K} \int_{x_{k-1}}^{x_{k}} f_{X}(x) 
+#'   \text{E}\left[ (X - Y)^2 \right] =
+#'   \sum_{k=1}^{K} \int_{x_{k-1}}^{x_{k}} f_{X}(x)
 #'   \left( x - r_{k} \right)^2 \, dx
 #' }
 #' where:
@@ -38,41 +39,41 @@
 #' }
 #' This problem is solved using the following iterative procedure:
 #' \describe{
-#'   \item{\eqn{1.}}{Start with an arbitrary initial set of representation 
+#'   \item{\eqn{1.}}{Start with an arbitrary initial set of representation
 #' points: \eqn{r_{1} < r_{2} < \dots < r_{K}}.}
-#'   \item{\eqn{2.}}{Repeat the following steps until the improvement in MSE 
+#'   \item{\eqn{2.}}{Repeat the following steps until the improvement in MSE
 #' falls below given \eqn{\varepsilon}.}
 #'    \item{\eqn{3.}}{Calculate endpoints as \eqn{x_{k} = (r_{k+1} + r_{k})/2}
-#' for each \eqn{k = 1, \dots, K-1} and set \eqn{x_{0}} and \eqn{x_{K}} to 
+#' for each \eqn{k = 1, \dots, K-1} and set \eqn{x_{0}} and \eqn{x_{K}} to
 #' \eqn{-\infty} and \eqn{\infty}, respectively.}
-#'    \item{\eqn{4.}}{Update representation points by setting \eqn{r_{k}} 
+#'    \item{\eqn{4.}}{Update representation points by setting \eqn{r_{k}}
 #' equal to the conditional mean of \eqn{X} given \eqn{X \in (x_{k-1}, x_{k})}
 #' for each \eqn{k = 1, \dots, K}.}
 #' }
-#' 
+#'
 #' With each execution of step \eqn{(3)} and step \eqn{(4)}, the MSE decreases
-#'  or remains the same. As MSE is nonnegative, it approaches a limit. 
-#' The algorithm terminates when the improvement in MSE is less than a given 
-#' \eqn{\varepsilon > 0}, ensuring convergence after a finite number 
+#'  or remains the same. As MSE is nonnegative, it approaches a limit.
+#' The algorithm terminates when the improvement in MSE is less than a given
+#' \eqn{\varepsilon > 0}, ensuring convergence after a finite number
 #' of iterations.
-#'   
-#' This procedure is known as Lloyd-Max's algorithm, initially used for scalar 
-#' quantization and closely related to the k-means algorithm. Local convergence 
-#' has been proven for log-concave density functions by Kieffer. Many common 
-#' probability distributions are log-concave including the normal and skew 
+#'
+#' This procedure is known as Lloyd-Max's algorithm, initially used for scalar
+#' quantization and closely related to the k-means algorithm. Local convergence
+#' has been proven for log-concave density functions by Kieffer. Many common
+#' probability distributions are log-concave including the normal and skew
 #' normal distribution, as shown by Azzalini.
 #'
 #' @references
-#' Azzalini, A. (1985). 
-#' A class of distributions which includes the normal ones. 
+#' Azzalini, A. (1985).
+#' A class of distributions which includes the normal ones.
 #' \emph{Scandinavian Journal of Statistics} \bold{12(2)}, 171–178.
-#' 
+#'
 #' Kieffer, J. (1983).
-#' Uniqueness of locally optimal quantizer for log-concave density and convex 
+#' Uniqueness of locally optimal quantizer for log-concave density and convex
 #' error function.
 #' \emph{IEEE Transactions on Information Theory} \bold{29}, 42–47.
-#' 
-#' Lloyd, S. (1982). 
+#'
+#' Lloyd, S. (1982).
 #' Least squares quantization in PCM.
 #' \emph{IEEE Transactions on Information Theory} \bold{28 (2)}, 129–137.
 #'
@@ -134,7 +135,7 @@ update_epresentatives <- function(density_fn, midp) {
 
 #' Compute Distortion
 #'
-#' Computes the distortion (mean-squared error) given the midpoints and 
+#' Computes the distortion (mean-squared error) given the midpoints and
 #' representation points.
 #'
 #' @param density_fn probability density function.
@@ -148,7 +149,8 @@ compute_distortion <- function(density_fn, midp, repr) {
   endp <- c(-Inf, midp, Inf)
   mse <- vapply(seq_len(n_levels), function(k) {
     stats::integrate(function(x) {
-      density_fn(x) * (x - repr[k])^2}, endp[k], endp[k + 1])[[1]]
+      density_fn(x) * (x - repr[k])^2
+    }, endp[k], endp[k + 1])[[1]]
   }, numeric(1))
   return(sum(mse))
 }
@@ -166,7 +168,8 @@ calc_probs <- function(density_fn, endp) {
   n_levels <- length(endp) - 1
   prob <- vapply(seq_len(n_levels), function(k) {
     stats::integrate(function(x) {
-      density_fn(x)}, endp[k], endp[k + 1])[[1]]
+      density_fn(x)
+    }, endp[k], endp[k + 1])[[1]]
   }, numeric(1))
   return(prob)
 }
diff --git a/R/estimation.R b/R/estimation.R
@@ -46,7 +46,7 @@
 #' * Estimates the probabilities \eqn{p_{k}} for each item.
 #' * Computes the estimates of \eqn{\xi} and \eqn{\omega} for each item.
 #' * Combines the estimated parameters for all items into a table.
-#' 
+#'
 #' @seealso \code{\link{discretize_density}} for details on calculating
 #' the endpoints, and \code{\link{part_bfi}} for example of the survey data.
 #' @export
@@ -78,26 +78,26 @@ estimate_params <- function(data, n_levels, skew = 0) {
 }
 
 #' Estimate mean and standard deviation
-#' 
-#' Estimates the mean and standard deviation of a latent variable given the 
+#'
+#' Estimates the mean and standard deviation of a latent variable given the
 #' discrete probabilities of its observed Likert scale responses.
-#' 
+#'
 #' @param prob vector of probabilities for each response category.
 #' @param n_levels number of response categories for the Likert scale item.
 #' @param skew marginal skewness of the latent variable, defaults to 0.
 #' @param eps tolerance for convergence, defaults to 1e-6.
 #' @param maxit maximum number of iterations, defaults to 100.
-#' 
-#' @return A numeric vector with two elements: the estimated mean and 
+#'
+#' @return A numeric vector with two elements: the estimated mean and
 #' standard deviation.
-#' 
+#'
 #' @details
 #' This function uses an iterative algorithm to solve the system of non-linear
-#' equations that describe the relationship between the continuous latent 
+#' equations that describe the relationship between the continuous latent
 #' variable and the observed discrete probability distribution of Likert scale
 #' responses. The algorithm ensures stability by reparameterizing the system
 #' and applying constraints to prevent stepping into invalid regions.
-#' 
+#'
 #' @noRd
 estimate_mean_and_sd <- function(prob, n_levels, skew = 0,
                                  eps = 1e-6, maxit = 100) {
@@ -134,14 +134,14 @@ estimate_mean_and_sd <- function(prob, n_levels, skew = 0,
 
 #' Initialize CDF and PDF Functions
 #'
-#' Initializes the cumulative distribution function (CDF) and probability 
+#' Initializes the cumulative distribution function (CDF) and probability
 #' density function (PDF) based on the skewness parameter.
 #'
 #' @param skew numeric value representing the skewness of the distribution.
-#' 
-#' @return A list containing the CDF and PDF functions appropriate for the 
+#'
+#' @return A list containing the CDF and PDF functions appropriate for the
 #' given skewness.
-#' 
+#'
 #' @noRd
 initialize_distributions <- function(skew) {
   if (abs(skew) > 0) {
@@ -163,15 +163,15 @@ initialize_distributions <- function(skew) {
 #'
 #' @param x numeric vector of current estimates for the location and scaling
 #' parameters.
-#' @param endp numeric vector of endpoints defining the boundaries of the 
+#' @param endp numeric vector of endpoints defining the boundaries of the
 #' response categories.
 #' @param prob numeric vector of probabilities for each response category.
 #' @param cdf_X function representing the cumulative distribution function
 #' (CDF) of the latent variable.
-#' 
+#'
 #' @return A matrix of differences between the CDF evaluated at the endpoints
 #' and the observed probabilities.
-#' 
+#'
 #' @noRd
 fn <- function(x, endp, prob, cdf_X) {
   u <- x[1]
@@ -184,16 +184,16 @@ fn <- function(x, endp, prob, cdf_X) {
 #'
 #' Computes the Jacobian matrix used in the iterative root-finding process.
 #'
-#' @param x numeric vector of current estimates for the location and scaling 
+#' @param x numeric vector of current estimates for the location and scaling
 #' parameters.
-#' @param endp numeric vector of endpoints defining the boundaries of the 
+#' @param endp numeric vector of endpoints defining the boundaries of the
 #' response categories.
 #' @param pdf_X function representing the probability density function (PDF)
 #' of the latent variable.
-#' 
+#'
 #' @return A matrix representing the Jacobian of the system of equations with
 #' respect to the parameters.
-#' 
+#'
 #' @noRd
 jac <- function(x, endp, pdf_X) {
   u <- x[1]
@@ -209,38 +209,3 @@ jac <- function(x, endp, pdf_X) {
 
   return(cbind(du, dv))
 }
-
-#' Plot Contour
-#'
-#' Plots the contour of the objective function values over a grid
-#' of parameter values. It visualizes the norm of the function \code{fn}
-#' for different values of \code{u} (mean) and \code{v} (1/standard deviation)
-#' and overlays the trace of parameter updates during the optimization process.
-#'
-#' @param fn objective function to be minimized.
-#' @param endp endpoints of intervals that partition the continuous domain.
-#' @param prob discrete probability distribution.
-#' @param cdf_X cumulative distribution function of the latent variable.
-#' @param trace matrix of parameter updates.
-#' @noRd
-plot_contour <- function(fn, endp, prob, cdf_X, trace) {
-  xlen <- 50
-  ylen <- 50
-  xgrid <- seq(-3, 3, length.out = xlen) # Range for mean (mu)
-  ygrid <- seq(0.1, 3, length.out = ylen) # Range for 1/sd
-  zvals <- matrix(NA, ncol = xlen, nrow = ylen)
-  for (i in seq_len(xlen)) {
-    for (j in seq_len(ylen)) {
-      zvals[i, j] <- norm(fn(
-        matrix(c(xgrid[i], ygrid[j])),
-        endp, prob, cdf_X
-      ), "2")
-    }
-  }
-  graphics::contour(
-    x = xgrid, y = ygrid, z = zvals,
-    col = "gray42", xlab = "u = mu", ylab = "v = 1/sd"
-  )
-  graphics::grid(col = "lightgray", lty = "dotted")
-  graphics::points(trace[1, ], trace[2, ], pch = 20, col = "blue")
-}
diff --git a/R/part_bfi.R b/R/part_bfi.R
@@ -23,7 +23,7 @@
 #'   \item{A5}{Make people feel at ease.}
 #'   \item{gender}{Gender of the respondent.}
 #' }
-#' @source {International Personality Item Pool (ipip.ori.org)}
+#' @source {International Personality Item Pool ({https://ipip.ori.org})}
 #' @source {https://search.r-project.org/CRAN/refmans/psychTools/html/bfi.html}
 #' @references
 #' Revelle, W. (2024).