diff --git a/.Rbuildignore b/.Rbuildignore index 5f4ca01..c5df164 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -2,3 +2,4 @@ ^app\.R$ ^init\.R$ ^deployment\.R$ +data-raw/ \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 381885c..55125cf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: plotmipca Title: Plot mi-pca results -Version: 1.0.0 +Version: 2.0.0 Authors@R: person("Edoardo", "Costantini", , "e.costantini@tilburguniversity.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-9581-9913")) diff --git a/NAMESPACE b/NAMESPACE index 4c71830..87e43f3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,12 +1,9 @@ # Generated by roxygen2: do not edit by hand -export(plotMids) -export(plotResults) +export(plot_case) +export(plot_simulation) +export(plot_trace) +export(server) +export(start_app) +export(ui_call) import(dplyr) -import(ggplot2) -import(lattice) -import(mice) -import(pkgload) -import(shiny) -import(shinyWidgets) -import(shinybrowser) diff --git a/R/app.R b/R/app.R deleted file mode 100644 index bf1f2cb..0000000 --- a/R/app.R +++ /dev/null @@ -1,417 +0,0 @@ -# Project: plotmipca -# Objective: Function to run app -# Author: Edoardo Costantini -# Created: 2022-12-02 -# Modified: 2022-12-17 -# Notes: - -#' plotResults -#' -#' Starts a Shiny app to interact with the results of the \href{https://github.com/EdoardoCostantini/mi-pca}{mi-pca} project. -#' -#' @details -#' The interface of the Shiny app allows you to change the values of the following simulation study experimental factors: -#' -#' - Number of observed items -#' - Presence of latent structure (if yes, 7 latent variables are used) -#' - Discrete levels (number of categories for discretized items) -#' - Proportion of noise variables -#' - Missing data treatments: -#' -#' - MI-PCR-ALL: mi-pca using pca on all data, after a round of single imputation -#' - MI-PCR-ALL (oracle): mi-pca using pca on all data computed on the originally fully observed data -#' - MI-PCR-AUX: mi-pca using pca only on auxiliary variables -#' - MI-PCR-VBV: mi-pca using pca only on a variable-by-variable basis -#' - MI-QP: mice using the quickpred function from the R package mice to select the predictors for the univariate imputation models via the correlation-based threshold -#' - MI-OR: mice with oracle knowledge on which predictors to use (the univariate imputation models included the other variables under imputation and the predictors that were used to impose missingness) -#' - MI-MI: mice with minimal missing data models (only variables under imputation used as predictors in the imputation models) -#' - CC: complete case analysis -#' - OG: original fully-observed data -#' -#' - Number of principal components -#' - Outcome measure: -#' -#' - (percent relative) bias -#' - CIC (confidence interval coverage) -#' - CIW (average confidence interval) -#' - mcsd (standard deviation of the estimate across the monte carlo simulations) -#' -#' - Parameter (which statistic for which variable) -#' -#' @export -#' @import shiny -#' @import shinybrowser -#' @import dplyr -#' @import ggplot2 -#' @import shinyWidgets -#' @import pkgload -#' @return Shiny app UI. -#' -plotResults <- function() { - - # Set up ------------------------------------------------------------------- - - # Graph structure - plot_x_axis <- "K" - moderator <- "npc" - grid_x_axis <- "method" - grid_y_axis <- "pj" - - - # UI ----------------------------------------------------------------------- - - ui <- fluidPage( - fluidRow( - column( - 4, - hr(), - h4("Data generation"), - radioButtons("j", - "Number of observed items", - choices = unique(dataResults$j), - selected = unique(dataResults$j)[1], - inline = TRUE - ), - radioButtons("lv", - "Latent structure", - choices = rev(unique(dataResults$lv)), - selected = TRUE, - inline = TRUE - ), - checkboxGroupInput("K", - "Discrete levels", - inline = TRUE, - choices = levels(dataResults$K), - selected = levels(dataResults$K)[c(1, 3, 5)] - ), - checkboxGroupInput("pj", - "Proportion of noise variables", - inline = TRUE, - choices = unique(dataResults$pj), - selected = unique(dataResults$pj)[c(1, 4)] - ), - ), - column( - 4, - hr(), - h4("Outcome measures"), - selectInput("par", - "Parameter", - choices = levels(dataResults$par), - selected = "z1 correlation z2" - ), - radioButtons("plot_y_axis", - "Outcome measure", - choices = c("bias", "CIC", "CIW", "mcsd"), - inline = TRUE - ), - ), - column( - 4, - hr(), - h4("Missing data treatments"), - checkboxGroupInput("method", - "Methods", - choices = levels(dataResults$method), - selected = levels(dataResults$method)[c(1, 3:5)], - inline = TRUE - ), - shinyWidgets::sliderTextInput( - inputId = "npc", - label = "Number of principal components (NPC)", - hide_min_max = TRUE, - choices = sort(unique(dataResults$npc)), - selected = c(0, 10), - grid = TRUE - ), - ) - ), - - # Silent extraction of size - shinybrowser::detect(), - hr(), - plotOutput("plot"), - ) - - # Server ------------------------------------------------------------------- - - server <- function(input, output, session) { - # Width of the page - observe({ - if (shinybrowser::get_width() < 768) { - updateCheckboxGroupInput(session, - inputId = "method", - label = "Methods", - selected = levels(dataResults$method)[4] - ) - } - }) - - # Plot - output$plot <- renderPlot(res = 96, height = 500, { - dataResults %>% - filter( - j == input$j, - par == input$par, - lv == input$lv, - method %in% input$method, - npc <= input$npc[2], - npc >= input$npc[1], - K %in% input$K, - pj %in% input$pj - ) %>% - mutate(npc = factor(npc)) %>% - ggplot(aes_string( - x = plot_x_axis, - y = input$plot_y_axis, - group = "npc", - fill = "NPC" - )) + - geom_bar( - stat = "identity", - position = "dodge", - colour = "black", - size = .25 - ) + - scale_fill_manual( - values = c(gray.colors(2, start = 0.5, end = 0.8), "white") - ) + - facet_grid(reformulate(grid_x_axis, grid_y_axis), - labeller = labeller(.rows = label_both, .cols = label_value), - scales = "free", - switch = "y" - ) + - theme( - # Text - text = element_text(size = 12), - strip.text.y.right = element_text(angle = 0), - # Legend - legend.position = "right", - # Backgorund - panel.background = element_rect(fill = NA, color = "gray") - ) + - labs( - x = plot_x_axis, - y = input$plot_y_axis - ) - }) - } - - # Run app ------------------------------------------------------------------ - - shinyApp(ui, server) - -} - -#' plotMids -#' -#' Starts a Shiny app to check trace plots for multiple imputation convergence for the \href{https://github.com/EdoardoCostantini/mi-spcr}{mi-spcr} project. -#' @param study. A unit character vector taking value "sim" or "fdd" (simulation study and case study, respectively) -#' @details -#' The interface of the Shiny app allows you to change the values of the following simulation study experimental factors: -#' -#' - Missing data treatment used (see names in the interface):.cols -#' -#' - for the simulation study: -#' -#' - MIMI: mice with minimal missing data models (only variables under imputation used as predictors in the imputation models) -#' - MIOP: mice using the quickpred function from the R package mice (Van Buuren and Groothuis-Oudshoorn, 2011) to select the predictors for the univariate imputation models via the correlation-based threshold -#' - MIOR: mice with oracle knowledge on which predictors to use (the univariate imputation models included the other variables under imputation and the predictors that were used to impose missingness) -#' - aux: mi-pca using pca only on auxiliary variables -#' - vbv: mi-pca using pca only on a variable-by-variable basis -#' - all: mi-pca using pca on all data, after a round of single imputation -#' -#' - for the case study: -#' -#' - expert: mice specified by an expert -#' - si4auxall: convergence check for single imputation used for pre-processing for mi-pca-all -#' - pcraux: mi-pca using pca only on auxiliary variables -#' - vbv: mi-pca using pca only on a variable-by-variable basis -#' - default: mice specified with default arguments -#' -#' - Imputed variable -#' - Number of iterations used -#' -#' @export -#' @import shiny -#' @import shinybrowser -#' @import dplyr -#' @import ggplot2 -#' @import shinyWidgets -#' @import pkgload -#' @import mice -#' @import lattice -#' @return Shiny app UI. -#' -plotMids <- function(study = c("sim", "fdd")[1]) { - - if (study == "sim") { - mids_interest <- dataMids$sim - - # Method names - methods <- c("MIMI", "MIOP", "MIOR", "aux", "vbv") - - # Plot tag - plot_tag <- " for the simulation study" - } - if (study == "fdd") { - mids_interest <- dataMids$fdd - - # Method names - methods <- c("expert", "si4auxall", "pcraux", "vbv", "default") - - # Plot tag - plot_tag <- " for the fireworks disaster study" - } - - # UI ----------------------------------------------------------------------- - - ui <- fluidPage( - h1(paste0(" Trace plots for convergence checks", plot_tag)), - fluidRow( - - # Missing data treatments ------------------------------------------ - column( - 3, - hr(), - selectInput("method", - "Imputation method:", - choices = methods, - selected = methods[1] - ), - ), - - # Number of pcs ---------------------------------------------------- - - column( - 3, - hr(), - selectInput( - inputId = "var", - label = "Variable", - choices = rownames(mids_interest[[1]]$chainMean[,,1]), - selected = rownames(mids_interest[[1]]$chainMean[, , 1])[1] - ), - ), - - # Number of iterations --------------------------------------------- - - column( - 3, - hr(), - shinyWidgets::sliderTextInput( - inputId = "iters", - label = "Iteration range", - hide_min_max = TRUE, - choices = 0:100, - selected = c(0, 25), - grid = FALSE - ), - ), - ), - hr(), - plotOutput("plot"), - - # Silent extraction of size - shinybrowser::detect(), - ) - - # Server ------------------------------------------------------------------- - - server <- function(input, output, session) { - - output$plot <- renderPlot( - res = 96, - height = 750, - { - # Define condition to plot based on inputs - if (study == "sim") { - cnd_id <- grep(input$method, names(mids_interest)) - # cnd_id <- grep(methods[1], names(mids_interest)) - } - if (study == "fdd") { - cnd_id <- grep(input$method, names(mids_interest)) - } - - # Work with simple object name - x <- mids_interest[[cnd_id]] - - # Default arguments that you could change in MICE - type <- "l" - col <- 1:10 - lty <- 1 - theme <- mice::mice.theme() - layout <- c(2, 1) - - # Extract objects I need - mn <- x$chainMean - sm <- sqrt(x$chainVar) - m <- x$m - it <- x$iteration - - # select subset of non-missing entries - # obs <- apply(!(is.nan(mn) | is.na(mn)), 1, all) - # varlist <- names(obs)[obs] - varlist <- input$var - # varlist <- "z1" - - # Prepare objects for plotting - mn <- matrix(aperm(mn[varlist, , , drop = FALSE], c(2, 3, 1)), nrow = m * it) - sm <- matrix(aperm(sm[varlist, , , drop = FALSE], c(2, 3, 1)), nrow = m * it) - adm <- expand.grid(seq_len(it), seq_len(m), c("mean", "sd")) - data <- cbind(adm, rbind(mn, sm)) - colnames(data) <- c(".it", ".m", ".ms", varlist) - - # Create formula - formula <- as.formula(paste0( - paste0(varlist, collapse = "+"), - "~.it|.ms" - )) - - # Dummy to trick R CMD check - .m <- NULL - rm(.m) - - # Load function to obtain the correct plot arrangement - strip.combined <- function(which.given, which.panel, factor.levels, ...) { - if (which.given == 1) { - lattice::panel.rect(0, 0, 1, 1, - col = theme$strip.background$col, border = 1 - ) - lattice::panel.text( - x = 0, y = 0.5, pos = 4, - lab = factor.levels[which.panel[which.given]] - ) - } - if (which.given == 2) { - lattice::panel.text( - x = 1, y = 0.5, pos = 2, - lab = factor.levels[which.panel[which.given]] - ) - } - } - - # Make plot - lattice::xyplot( - x = formula, data = data, groups = .m, - type = type, lty = lty, col = col, layout = layout, - scales = list( - y = list(relation = "free"), - x = list(alternating = FALSE) - ), - as.table = TRUE, - xlim = c(input$iters[1] - 1, input$iters[2] + 1), - xlab = "Iteration", - ylab = "", - strip = strip.combined, - par.strip.text = list(lines = 0.5), - ) - } - ) - } - - # Run app ------------------------------------------------------------------ - - shinyApp(ui, server) - -} \ No newline at end of file diff --git a/R/data.R b/R/data.R index 07e7476..8208b91 100644 --- a/R/data.R +++ b/R/data.R @@ -2,7 +2,7 @@ # Objective: container for data documentation # Author: Edoardo Costantini # Created: 2022-12-17 -# Modified: 2022-12-17 +# Modified: 2023-04-30 #' dataResults #' @@ -40,13 +40,24 @@ #' @format A data frame with 12108 rows and 21 columns NULL -#' dataMids +#' mids_sim #' #' A list of (simplified) mids objects and condition descriptions used to obtain the trace plots for desired conditions. #' #' @docType data #' @keywords datasets -#' @name dataMids -#' @usage data(dataMids) -#' @format A list containing simplified mids for (1) the simulation study and (2) the data application +#' @name mids_sim +#' @usage data(mids_sim) +#' @format A list containing simplified mids for the simulation study +NULL + +#' mids_case +#' +#' A list of (simplified) mids objects and condition descriptions used to obtain the trace plots for desired conditions. +#' +#' @docType data +#' @keywords datasets +#' @name mids_case +#' @usage data(mids_case) +#' @format A list containing simplified mids for the case study NULL \ No newline at end of file diff --git a/R/plot_case.R b/R/plot_case.R new file mode 100644 index 0000000..236d7df --- /dev/null +++ b/R/plot_case.R @@ -0,0 +1,46 @@ +#' Plot case study results +#' +#' Generate the main plot for the case study. +#' +#' @param results object containing results produced by the simulation study +#' @param y dependent variable in the substantive analysis model +#' @return Returns the ggplot +#' @author Edoardo Costantini, 2023 +#' @examples +#' # Define example inputs +#' results <- dataFdd +#' y <- "PTSD-RI parent score" +#' +#' # Use the function +#' plot_case( +#' results = dataFdd, +#' y = "yp" +#' ) +#' +#' @export +plot_case <- function(results, y) { + # Plot YP + ggplot2::ggplot( + data = results, + ggplot2::aes( + x = Time, + y = .data[[y]], + group = rep + ) + ) + + ggplot2::facet_grid( + rows = ggplot2::vars(trt), + cols = ggplot2::vars(imp), + scales = "free" + ) + + ggplot2::geom_line(linewidth = .1) + + ggplot2::theme( + # Text + text = ggplot2::element_text(size = 10), + # Legend + legend.position = "bottom", + # Background + panel.border = ggplot2::element_rect(fill = NA, color = "gray"), + panel.background = ggplot2::element_rect(fill = NA) + ) +} \ No newline at end of file diff --git a/R/plot_simulation.R b/R/plot_simulation.R new file mode 100644 index 0000000..cbdada1 --- /dev/null +++ b/R/plot_simulation.R @@ -0,0 +1,99 @@ +#' Plot simulation results +#' +#' Generate the main plot for the simulation study. +#' +#' @param results object containing results produced by the simulation study +#' @param n_items experimental factor value: number of items +#' @param parameter estimated parameter of interest +#' @param latent_structure experimental factor value: whether the latent structure is imposed or not +#' @param method_vector experimental factor value: imputation methods considered +#' @param npc_range experimental factor value: number of components considered +#' @param categories experimental factor value: number of categories of discretized variables +#' @param outcome performance measure to plot +#' @return Returns the ggplot +#' @author Edoardo Costantini, 2023 +#' @examples +#' # Define example inputs +#' results <- dataResults +#' n_items <- unique(dataResults$j)[1] +#' parameter <- levels(dataResults$par)[15] +#' latent_structure <- unique(dataResults$lv)[2] +#' method_vector <- levels(dataResults$method)[c(1, 3:5)] +#' npc_range <- c(0, 10) +#' categories <- levels(dataResults$K)[c(1, 3, 5)] +#' prop_noise <- unique(dataResults$pj)[c(1, 4)] +#' outcome <- c("bias", "CIC", "CIW", "mcsd")[1] +#' +#' # Use the function +#' plot_simulation( +#' results = dataResults, +#' n_items = unique(dataResults$j)[1], +#' parameter = levels(dataResults$par)[15], +#' latent_structure = unique(dataResults$lv)[2], +#' method_vector = levels(dataResults$method)[c(1, 3:5)], +#' npc_range = c(0, 10), +#' categories = levels(dataResults$K)[c(1, 3, 5)], +#' prop_noise = unique(dataResults$pj)[c(1, 4)], +#' outcome = c("bias", "CIC", "CIW", "mcsd")[1] +#' ) +#' +#' @export +plot_simulation <- function(results, n_items, parameter, latent_structure, method_vector, npc_range, categories, prop_noise, outcome) { + # Filter the data as requested + results_filtered <- results %>% + dplyr::filter( + j == n_items, + par == parameter, + lv == latent_structure, + method %in% method_vector, + npc <= npc_range[2], + npc >= npc_range[1], + K %in% categories, + pj %in% prop_noise + ) + + # Make NPCs a factors + results_ready <- results_filtered %>% + dplyr::mutate(npc = factor(npc)) + + # Make plot + results_ready %>% + ggplot2::ggplot( + ggplot2::aes_string( + x = "K", + y = outcome, + group = "npc", + fill = "NPC" + ) + ) + + ggplot2::geom_bar( + stat = "identity", + position = "dodge", + colour = "black", + size = .25 + ) + + ggplot2::scale_fill_manual( + values = c(grDevices::gray.colors(2, start = 0.5, end = 0.8), "white") + ) + + ggplot2::facet_grid(reformulate("method", "pj"), + labeller = ggplot2::labeller( + .rows = ggplot2::label_both, + .cols = ggplot2::label_value + ), + scales = "free", + switch = "y" + ) + + ggplot2::theme( + # Text + text = ggplot2::element_text(size = 12), + strip.text.y.right = ggplot2::element_text(angle = 0), + # Legend + legend.position = "right", + # Backgorund + panel.background = ggplot2::element_rect(fill = NA, color = "gray") + ) + + ggplot2::labs( + x = "K", + y = outcome + ) +} \ No newline at end of file diff --git a/R/plot_trace.R b/R/plot_trace.R new file mode 100644 index 0000000..3a28f13 --- /dev/null +++ b/R/plot_trace.R @@ -0,0 +1,107 @@ +#' Trace plots +#' +#' Generate trace plots to study convergence of imputation methods +#' +#' @param mids_data object containing imputation history produced by the simulation study +#' @param method unit character vector naming the method to display +#' @param var unit character vector naming the variable to display +#' @param iters numeric vector containing the iteration bounds (from-to) to plot +#' @return Returns the lattice plot +#' @author Edoardo Costantini, 2023 +#' @examples +#' # Define example inputs +#' mids_data <- mids_case +#' method <- "expert" +#' layout <- c(2, 3) +#' iters <- c(0, 25) +#' +#' # Use the function +#' plot_trace( +#' mids_data = mids_sim, +#' method = "MIMI", +#' layout <- c(2, 4), +#' iters = c(0, 100) +#' ) +#' +#' @export +plot_trace <- function(mids_data, method, iters = c(0, 25), layout = c(2, 1)) { + # Define the condition we are working with + cnd_id <- grep(method, names(mids_data)) + + # Extract the mids object of interest + x <- mids_data[[cnd_id]] + + # Default arguments that you could change in MICE + type <- "l" + col <- 1:10 + lty <- 1 + theme <- mice::mice.theme() + + # Extract objects I need + mn <- x$chainMean + sm <- sqrt(x$chainVar) + m <- x$m + it <- x$iteration + + # select subset of non-missing entries + obs <- apply(!(is.nan(mn) | is.na(mn)), 1, all) + varlist <- names(obs)[obs] + + # Prepare objects for plotting + mn <- matrix(aperm(mn[varlist, , , drop = FALSE], c(2, 3, 1)), nrow = m * it) + sm <- matrix(aperm(sm[varlist, , , drop = FALSE], c(2, 3, 1)), nrow = m * it) + adm <- expand.grid(seq_len(it), seq_len(m), c("mean", "sd")) + data <- cbind(adm, rbind(mn, sm)) + colnames(data) <- c(".it", ".m", ".ms", varlist) + + # Create formula + formula <- as.formula(paste0( + paste0(varlist, collapse = "+"), + "~.it|.ms" + )) + + # Dummy to trick R CMD check + .m <- NULL + rm(.m) + + # Load function to obtain the correct plot arrangement + strip.combined <- function(which.given, which.panel, factor.levels, ...) { + if (which.given == 1) { + lattice::panel.rect(0, 0, 1, 1, + col = theme$strip.background$col, border = 1 + ) + lattice::panel.text( + x = 0, y = 0.5, pos = 4, + lab = factor.levels[which.panel[which.given]] + ) + } + if (which.given == 2) { + lattice::panel.text( + x = 1, y = 0.5, pos = 2, + lab = factor.levels[which.panel[which.given]] + ) + } + } + + # Make plot + lattice::xyplot( + x = formula, + data = data, + groups = .m, + type = type, + lty = lty, + col = col, + layout = layout, + scales = list( + y = list(relation = "free"), + x = list(alternating = FALSE) + ), + as.table = TRUE, + xlim = c(iters[1] - 1, iters[2] + 1), + xlab = "Iteration", + ylab = "", + strip = strip.combined, + par.strip.text = list(lines = 0.5) + ) + +} \ No newline at end of file diff --git a/R/server.R b/R/server.R new file mode 100644 index 0000000..b413c05 --- /dev/null +++ b/R/server.R @@ -0,0 +1,83 @@ +#' server +#' +#' server function for the shiny app +#' +#' @param input set of inputs coming from ui +#' @param output set of outputs +#' @param session session info and status +#' @author Edoardo Costantini, 2023 +#' @export +server <- function(input, output, session) { + # Width of the page + observe({ + if (shinybrowser::get_width() < 768) { + updateCheckboxGroupInput(session, + inputId = "method", + label = "Methods", + selected = levels(dataResults$method)[4] + ) + } + }) + + # Simulation study plot ------------------------------------------------ + + output$plot <- renderPlot( + res = 96, + height = 725, + { + plot_simulation( + results = dataResults, + n_items = input$j, + parameter = input$par, + latent_structure = input$lv, + method_vector = input$method, + npc_range = input$npc, + categories = input$K, + prop_noise = input$pj, + outcome = input$plot_y_axis + ) + } + ) + + # Simulation study traceplots ------------------------------------------ + + output$mids_sim_plot <- renderPlot( + res = 96, + height = 725, + { + plot_trace( + mids_data = mids_sim, + method = input$conv_sim_method, + layout = c(2, 4), + iters = input$conv_sim_iters + ) + } + ) + + # > Case study: results ---------------------------------------------------- + output$case_plot_res <- renderPlot( + res = 96, + height = 725, + { + plot_case( + results = dataFdd, + y = input$res_case_dv + ) + } + ) + + # > Case study traceplots ---------------------------------------------- + + output$mids_case_plot <- renderPlot( + res = 96, + height = 725, + { + plot_trace( + mids_data = mids_case, + method = input$conv_case_method, + layout = c(2, 6), + iters = input$conv_case_iters + ) + } + ) +} diff --git a/R/start_app.R b/R/start_app.R new file mode 100644 index 0000000..f11874f --- /dev/null +++ b/R/start_app.R @@ -0,0 +1,43 @@ +#' start_app +#' +#' Starts a Shiny app to interact with the results of the \href{https://github.com/EdoardoCostantini/mi-pca}{mi-pca} project. +#' +#' @details +#' The interface of the Shiny app allows you to change the values of the following simulation study experimental factors: +#' +#' - Number of observed items +#' - Presence of latent structure (if yes, 7 latent variables are used) +#' - Discrete levels (number of categories for discretized items) +#' - Proportion of noise variables +#' - Missing data treatments: +#' +#' - MI-PCR-ALL: mi-pca using pca on all data, after a round of single imputation +#' - MI-PCR-ALL (oracle): mi-pca using pca on all data computed on the originally fully observed data +#' - MI-PCR-AUX: mi-pca using pca only on auxiliary variables +#' - MI-PCR-VBV: mi-pca using pca only on a variable-by-variable basis +#' - MI-QP: mice using the quickpred function from the R package mice to select the predictors for the univariate imputation models via the correlation-based threshold +#' - MI-OR: mice with oracle knowledge on which predictors to use (the univariate imputation models included the other variables under imputation and the predictors that were used to impose missingness) +#' - MI-MI: mice with minimal missing data models (only variables under imputation used as predictors in the imputation models) +#' - CC: complete case analysis +#' - OG: original fully-observed data +#' +#' - Number of principal components +#' - Outcome measure: +#' +#' - (percent relative) bias +#' - CIC (confidence interval coverage) +#' - CIW (average confidence interval) +#' - mcsd (standard deviation of the estimate across the monte carlo simulations) +#' +#' - Parameter (which statistic for which variable) +#' +#' @export +#' @import dplyr +#' @return Shiny app UI. +#' +start_app <- function() { + shiny::shinyApp( + ui = ui_call(), + server = server + ) +} \ No newline at end of file diff --git a/R/ui.R b/R/ui.R new file mode 100644 index 0000000..d1be10f --- /dev/null +++ b/R/ui.R @@ -0,0 +1,230 @@ +#' User interface call +#' +#' Calls the definition of the user interface and returns it as an output +#' +#' @return UI object that can be passed directly to shiny::shinyApp() +#' @author Edoardo Costantini, 2023 +#' @export +ui_call <- function() { + # Define ui object + ui <- shiny::fluidPage( + + # App title + shiny::titlePanel( + shiny::h1( + 'Solving the "many variables" problem in MICE with principal component regression', + align = "center" + ) + ), + shiny::column( + width = 10, + offset = 1, + # Create tabs for different plotting aspects + shiny::tabsetPanel( + type = "tabs", + selected = "About this Shiny app", + shiny::tabPanel( + title = "About this Shiny app", + shiny::column( + width = 8, + offset = 2, + shiny::HTML( + "
+ This Shiny app accompanies the paper: +
+
+ Costantini, E., Lang, K. M., Sijtsma, K., & Reeskens, T. (2022). Solving the 'many variables' problem in MICE with principal component regression. arXiv preprint arXiv:2206.15107. +
+
+ With this app, the user can: + + For questions and feedback, feel free to send me an email. + " + ) + ) + ), + shiny::tabPanel( + title = "Module 1: Simulation study", + shiny::fluidRow( + shiny::column( + width = 3, + # Simulation study: Description -------------------- + shiny::HTML( + "
+ This tab allows you to plot the results of the simulation study reported in the article. + You change the values of the experimental factors to plot the results you are most interested in. +
+
" + ), + shiny::navlistPanel( + widths = c(11, 12), + shiny::tabPanel( + title = "1. Data generation", + shiny::radioButtons("j", + "Number of observed items", + choices = unique(dataResults$j), + selected = unique(dataResults$j)[1], + inline = TRUE + ), + shiny::radioButtons("lv", + "Latent structure", + choices = rev(unique(dataResults$lv)), + selected = TRUE, + inline = TRUE + ), + shiny::checkboxGroupInput("K", + "Discrete levels", + inline = TRUE, + choices = levels(dataResults$K), + selected = levels(dataResults$K)[c(1, 3, 5)] + ), + shiny::checkboxGroupInput("pj", + "Proportion of noise variables", + inline = TRUE, + choices = unique(dataResults$pj), + selected = unique(dataResults$pj) + ) + ), + shiny::tabPanel( + title = "2. Missing data treatments", + shiny::checkboxGroupInput("method", + "Methods", + choices = levels(dataResults$method), + selected = levels(dataResults$method)[c(1, 3:5, 8)], + inline = TRUE + ), + shinyWidgets::sliderTextInput( + inputId = "npc", + label = "Number of principal components (NPC)", + hide_min_max = TRUE, + choices = sort(unique(dataResults$npc)), + selected = c(0, 10), + grid = TRUE + ) + ), + shiny::tabPanel( + title = "3. Simulation outcomes", + shiny::selectInput("par", + "Parameter", + choices = levels(dataResults$par), + selected = "z1 correlation z2" + ), + shiny::radioButtons("plot_y_axis", + "Outcome measure", + choices = c("bias", "CIC", "CIW", "mcsd"), + inline = TRUE + ) + ) + ) + ), + shiny::column( + width = 9, + shiny::plotOutput("plot"), + style = "border-left: 1px solid; border-left-color: #DDDDDD" + ) + ) + ), + shiny::tabPanel( + title = "Module 2: Case study", + shiny::fluidRow( + shiny::column( + width = 3, + shiny::HTML( + "
+ This tab allows you to plot the results of the case study reported in the article. +
+
+ " + ), + shiny::selectInput("res_case_dv", + "Dependent variable", + choices = c("PTSD-RI parent score", "PTSD-RI children score"), + selected = "PTSD-RI parent score" + ) + ), + shiny::column( + width = 9, + shiny::plotOutput("case_plot_res"), + style = "border-left: 1px solid; border-left-color: #DDDDDD" + ) + ) + ), + shiny::tabPanel( + title = "Module 3: Simulation study convergence", + shiny::fluidRow( + shiny::column( + width = 3, + shiny::HTML( + "
+ This tab allows you to interact with the trace plots for the imputation methods used in the simulation study. +
+
+ " + ), + shiny::selectInput("conv_sim_method", + "Imputation method:", + choices = c("MI-PCA-AUX", "MI-PCA-VBV", "MI-MI", "MI-OP", "MI-OR"), + selected = "MI-PCA-AUX" + ), + shinyWidgets::sliderTextInput( + inputId = "conv_sim_iters", + label = "Iteration range", + hide_min_max = TRUE, + choices = 0:100, + selected = c(0, 25), + grid = FALSE + ) + ), + shiny::column( + width = 9, + shiny::plotOutput("mids_sim_plot"), + style = "border-left: 1px solid; border-left-color: #DDDDDD" + ) + ) + ), + shiny::tabPanel( + title = "Module 4: Case study convergence", + shiny::fluidRow( + shiny::column( + width = 3, + # Case study convergence: description -------------- + shiny::HTML( + "
+ This tab allows you to interact with the trace plots for the imputation methods used in the case study. +
+
+ " + ), + shiny::selectInput("conv_case_method", + "Imputation method:", + choices = c("MI-PCA-AUX", "MI-PCA-VBV", "SI-PCA-ALL", "Default", "Expert"), + selected = "MI-PCA-AUX" + ), + shinyWidgets::sliderTextInput( + inputId = "conv_case_iters", + label = "Iteration range", + hide_min_max = TRUE, + choices = 0:100, + selected = c(0, 25), + grid = FALSE + ), + ), + shiny::column( + width = 9, + shiny::plotOutput("mids_case_plot"), + style = "border-left: 1px solid; border-left-color: #DDDDDD" + ) + ) + ) + ) + ) + ) + + # Return ui object + return(ui) +} \ No newline at end of file diff --git a/README.md b/README.md index e5e785d..b951c4e 100644 --- a/README.md +++ b/README.md @@ -48,24 +48,10 @@ To start the shiny apps and interact with the plots, open an R session and load library("plotmipca") ``` -### Simulation study results - -When you run the following R function: - -``` -plotmipca::plotResults() -``` - -a shiny app is started that allows you to plot the results of the simulation study. -The help file shows how to use and read the plots produced by the app. - -### Convergence plots - -When you run the following R function: +Then, run the following command in the R console: ``` -plotmipca::plotMids() +start_app() ``` -a shiny app is started that allows you to study the trace plots for MI imputation algorithms used in the simulation study. -The help file shows how to use and read the plots produced by the app. +The app interface will explain how to interact with it. \ No newline at end of file diff --git a/app.R b/app.R index 1bdc35b..cc84878 100644 --- a/app.R +++ b/app.R @@ -6,4 +6,4 @@ # Notes: pkgload::load_all(export_all = FALSE, helpers = FALSE, attach_testthat = FALSE) -plotmipca::plotResults() \ No newline at end of file +plotmipca::start_app() \ No newline at end of file diff --git a/data-raw/20211220_144954_res.rds b/data-raw/20211220_144954_res.rds new file mode 100644 index 0000000..6b141b2 Binary files /dev/null and b/data-raw/20211220_144954_res.rds differ diff --git a/data-raw/convergence-default.rds b/data-raw/convergence-default.rds new file mode 100644 index 0000000..ab5c6f4 Binary files /dev/null and b/data-raw/convergence-default.rds differ diff --git a/data-raw/convergence-expert.rds b/data-raw/convergence-expert.rds new file mode 100644 index 0000000..a08ff06 Binary files /dev/null and b/data-raw/convergence-expert.rds differ diff --git a/data-raw/convergence-pcraux.rds b/data-raw/convergence-pcraux.rds new file mode 100644 index 0000000..e9f319d Binary files /dev/null and b/data-raw/convergence-pcraux.rds differ diff --git a/data-raw/convergence-si-4-aux-all.rds b/data-raw/convergence-si-4-aux-all.rds new file mode 100644 index 0000000..83370cf Binary files /dev/null and b/data-raw/convergence-si-4-aux-all.rds differ diff --git a/data-raw/convergence-sim2-p242.rds b/data-raw/convergence-sim2-p242.rds new file mode 100644 index 0000000..b7d2857 Binary files /dev/null and b/data-raw/convergence-sim2-p242.rds differ diff --git a/data-raw/convergence-vbv.rds b/data-raw/convergence-vbv.rds new file mode 100644 index 0000000..b4d3797 Binary files /dev/null and b/data-raw/convergence-vbv.rds differ diff --git a/data-raw/process-data.R b/data-raw/process-data.R new file mode 100644 index 0000000..b033f89 --- /dev/null +++ b/data-raw/process-data.R @@ -0,0 +1,101 @@ +# Project: plotmipca +# Objective: Prepare and deploy fdd results for app +# Author: Edoardo Costantini +# Created: 2023-04-29 +# Modified: 2023-04-29 +# Notes: + +# Load data -------------------------------------------------------------------- + +# Load mids for convergence checks +mids_sim <- readRDS("./data-raw/convergence-sim2-p242.rds") +mids_caseStudy_expert <- readRDS("./data-raw/convergence-expert.rds") +mids_caseStudy_si4auxall <- readRDS("./data-raw/convergence-si-4-aux-all.rds") +mids_caseStudy_pcraux <- readRDS("./data-raw/convergence-pcraux.rds") +mids_caseStudy_vbv <- readRDS("./data-raw/convergence-vbv.rds") +mids_caseStudy_default <- readRDS("./data-raw/convergence-default.rds") + +# results for FDD +dataFdd <- readRDS("./data-raw/20211220_144954_res.rds") + +# Case study: results ---------------------------------------------------------- + +# Rename methods to match what is used in the paper +dataFdd$imp <- factor(dataFdd$imp, + levels = c( + "expert", + "pcr.aux", + "pcr.all", + "pcr.vbv", + "default" + ), + labels = c( + "Expert", + "MI-PCR-AUX", + "MI-PCR-ALL", + "MI-PCR-VBV", + "Default" + ) +) + +# Give meaningful names to the variables +names(dataFdd)[4] <- "Time" +names(dataFdd)[5] <- "PTSD-RI parent score" +names(dataFdd)[6] <- "PTSD-RI children score" + +# Rename outcome measures as used in the paper +levels(dataFdd$trt) <- c("EMDR", "CBT") + +# Rename time points as used in the paper +dataFdd$Time <- factor(dataFdd$Time, labels = c("T1", "T2", "T3")) + +# Use the data +usethis::use_data(dataFdd, overwrite = TRUE) + +# Mids ------------------------------------------------------------------------- + +# Replace names of methods in mids sim +names(mids_sim) <- gsub("MIMI", "MI-MI", names(mids_sim)) +names(mids_sim) <- gsub("MIOP", "MI-OP", names(mids_sim)) +names(mids_sim) <- gsub("MIOR", "MI-OR", names(mids_sim)) +names(mids_sim) <- gsub("aux", "MI-PCA-AUX", names(mids_sim)) +names(mids_sim) <- gsub("vbv", "MI-PCA-VBV", names(mids_sim)) + +# Subset important elements from mids sim +for (i in 1:length(mids_sim)) { + temp <- mids_sim[[i]][c("chainMean", "chainVar", "m", "iteration")] + temp[["chainMean"]] <- temp[["chainMean"]][1:4, , ] + temp[["chainVar"]] <- temp[["chainVar"]][1:4, , ] + mids_sim[[i]] <- temp +} + +# Subset important elements from mids case study +mids_case <- list( + expert = mids_caseStudy_expert[c("chainMean", "chainVar", "m", "iteration")], + si4auxall = mids_caseStudy_si4auxall[c("chainMean", "chainVar", "m", "iteration")], + pcraux = mids_caseStudy_pcraux$mids[c("chainMean", "chainVar", "m", "iteration")], + vbv = mids_caseStudy_vbv[c("chainMean", "chainVar", "m", "iteration")], + deafult = mids_caseStudy_default[c("chainMean", "chainVar", "m", "iteration")] +) + +for (i in 1:length(mids_case)) { + temp <- mids_case[[i]] + + # Subset correct variables + temp[["chainMean"]] <- temp[["chainMean"]][c("yp1", "yp2", "yp3", "yc1", "yc2", "yc3"), , ] + temp[["chainVar"]] <- temp[["chainVar"]][c("yp1", "yp2", "yp3", "yc1", "yc2", "yc3"), , ] + + # Store output + mids_case[[i]] <- temp +} + +# Replace names of methods in mids sim +names(mids_case) <- gsub("expert", "Expert", names(mids_case)) +names(mids_case) <- gsub("si4auxall", "SI-PCA-ALL", names(mids_case)) +names(mids_case) <- gsub("pcraux", "MI-PCA-AUX", names(mids_case)) +names(mids_case) <- gsub("vbv", "MI-PCA-VBV", names(mids_case)) +names(mids_case) <- gsub("deafult", "Default", names(mids_case)) + +# Use data in the package +usethis::use_data(mids_sim, overwrite = TRUE) +usethis::use_data(mids_case, overwrite = TRUE) diff --git a/data/dataFdd.rda b/data/dataFdd.rda new file mode 100644 index 0000000..ff96335 Binary files /dev/null and b/data/dataFdd.rda differ diff --git a/data/dataMids.rda b/data/dataMids.rda deleted file mode 100644 index 27afc5c..0000000 Binary files a/data/dataMids.rda and /dev/null differ diff --git a/data/mids_case.rda b/data/mids_case.rda new file mode 100644 index 0000000..beaa9d9 Binary files /dev/null and b/data/mids_case.rda differ diff --git a/data/mids_sim.rda b/data/mids_sim.rda new file mode 100644 index 0000000..87665a1 Binary files /dev/null and b/data/mids_sim.rda differ diff --git a/man/dataMids.Rd b/man/mids_case.Rd similarity index 63% rename from man/dataMids.Rd rename to man/mids_case.Rd index 8542155..9a2559b 100644 --- a/man/dataMids.Rd +++ b/man/mids_case.Rd @@ -1,14 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/data.R \docType{data} -\name{dataMids} -\alias{dataMids} -\title{dataMids} +\name{mids_case} +\alias{mids_case} +\title{mids_case} \format{ -A list containing simplified mids for (1) the simulation study and (2) the data application +A list containing simplified mids for the case study } \usage{ -data(dataMids) +data(mids_case) } \description{ A list of (simplified) mids objects and condition descriptions used to obtain the trace plots for desired conditions. diff --git a/man/mids_sim.Rd b/man/mids_sim.Rd new file mode 100644 index 0000000..3e67dfe --- /dev/null +++ b/man/mids_sim.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{mids_sim} +\alias{mids_sim} +\title{mids_sim} +\format{ +A list containing simplified mids for the simulation study +} +\usage{ +data(mids_sim) +} +\description{ +A list of (simplified) mids objects and condition descriptions used to obtain the trace plots for desired conditions. +} +\keyword{datasets} diff --git a/man/plotMids.Rd b/man/plotMids.Rd deleted file mode 100644 index a78990d..0000000 --- a/man/plotMids.Rd +++ /dev/null @@ -1,44 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/app.R -\name{plotMids} -\alias{plotMids} -\title{plotMids} -\usage{ -plotMids(study = c("sim", "fdd")[1]) -} -\arguments{ -\item{study.}{A unit character vector taking value "sim" or "fdd" (simulation study and case study, respectively)} -} -\value{ -Shiny app UI. -} -\description{ -Starts a Shiny app to check trace plots for multiple imputation convergence for the \href{https://github.com/EdoardoCostantini/mi-spcr}{mi-spcr} project. -} -\details{ -The interface of the Shiny app allows you to change the values of the following simulation study experimental factors: -\itemize{ -\item Missing data treatment used (see names in the interface):.cols -\itemize{ -\item for the simulation study: -\itemize{ -\item MIMI: mice with minimal missing data models (only variables under imputation used as predictors in the imputation models) -\item MIOP: mice using the quickpred function from the R package mice (Van Buuren and Groothuis-Oudshoorn, 2011) to select the predictors for the univariate imputation models via the correlation-based threshold -\item MIOR: mice with oracle knowledge on which predictors to use (the univariate imputation models included the other variables under imputation and the predictors that were used to impose missingness) -\item aux: mi-pca using pca only on auxiliary variables -\item vbv: mi-pca using pca only on a variable-by-variable basis -\item all: mi-pca using pca on all data, after a round of single imputation -} -\item for the case study: -\itemize{ -\item expert: mice specified by an expert -\item si4auxall: convergence check for single imputation used for pre-processing for mi-pca-all -\item pcraux: mi-pca using pca only on auxiliary variables -\item vbv: mi-pca using pca only on a variable-by-variable basis -\item default: mice specified with default arguments -} -} -\item Imputed variable -\item Number of iterations used -} -} diff --git a/man/plot_case.Rd b/man/plot_case.Rd new file mode 100644 index 0000000..7ae7b82 --- /dev/null +++ b/man/plot_case.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_case.R +\name{plot_case} +\alias{plot_case} +\title{Plot case study results} +\usage{ +plot_case(results, y) +} +\arguments{ +\item{results}{object containing results produced by the simulation study} + +\item{y}{dependent variable in the substantive analysis model} +} +\value{ +Returns the ggplot +} +\description{ +Generate the main plot for the case study. +} +\examples{ +# Define example inputs +results <- dataFdd +y <- "PTSD-RI parent score" + +# Use the function +plot_case( + results = dataFdd, + y = "yp" +) + +} +\author{ +Edoardo Costantini, 2023 +} diff --git a/man/plot_simulation.Rd b/man/plot_simulation.Rd new file mode 100644 index 0000000..a00a80f --- /dev/null +++ b/man/plot_simulation.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_simulation.R +\name{plot_simulation} +\alias{plot_simulation} +\title{Plot simulation results} +\usage{ +plot_simulation( + results, + n_items, + parameter, + latent_structure, + method_vector, + npc_range, + categories, + prop_noise, + outcome +) +} +\arguments{ +\item{results}{object containing results produced by the simulation study} + +\item{n_items}{experimental factor value: number of items} + +\item{parameter}{estimated parameter of interest} + +\item{latent_structure}{experimental factor value: whether the latent structure is imposed or not} + +\item{method_vector}{experimental factor value: imputation methods considered} + +\item{npc_range}{experimental factor value: number of components considered} + +\item{categories}{experimental factor value: number of categories of discretized variables} + +\item{outcome}{performance measure to plot} +} +\value{ +Returns the ggplot +} +\description{ +Generate the main plot for the simulation study. +} +\examples{ +# Define example inputs +results <- dataResults +n_items <- unique(dataResults$j)[1] +parameter <- levels(dataResults$par)[15] +latent_structure <- unique(dataResults$lv)[2] +method_vector <- levels(dataResults$method)[c(1, 3:5)] +npc_range <- c(0, 10) +categories <- levels(dataResults$K)[c(1, 3, 5)] +prop_noise <- unique(dataResults$pj)[c(1, 4)] +outcome <- c("bias", "CIC", "CIW", "mcsd")[1] + +# Use the function +plot_simulation( + results = dataResults, + n_items = unique(dataResults$j)[1], + parameter = levels(dataResults$par)[15], + latent_structure = unique(dataResults$lv)[2], + method_vector = levels(dataResults$method)[c(1, 3:5)], + npc_range = c(0, 10), + categories = levels(dataResults$K)[c(1, 3, 5)], + prop_noise = unique(dataResults$pj)[c(1, 4)], + outcome = c("bias", "CIC", "CIW", "mcsd")[1] +) + +} +\author{ +Edoardo Costantini, 2023 +} diff --git a/man/plot_trace.Rd b/man/plot_trace.Rd new file mode 100644 index 0000000..c16bca1 --- /dev/null +++ b/man/plot_trace.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_trace.R +\name{plot_trace} +\alias{plot_trace} +\title{Trace plots} +\usage{ +plot_trace(mids_data, method, iters = c(0, 25), layout = c(2, 1)) +} +\arguments{ +\item{mids_data}{object containing imputation history produced by the simulation study} + +\item{method}{unit character vector naming the method to display} + +\item{iters}{numeric vector containing the iteration bounds (from-to) to plot} + +\item{var}{unit character vector naming the variable to display} +} +\value{ +Returns the lattice plot +} +\description{ +Generate trace plots to study convergence of imputation methods +} +\examples{ +# Define example inputs +mids_data <- mids_case +method <- "expert" +layout <- c(2, 3) +iters <- c(0, 25) + +# Use the function +plot_trace( + mids_data = mids_sim, + method = "MIMI", + layout <- c(2, 4), + iters = c(0, 100) +) + +} +\author{ +Edoardo Costantini, 2023 +} diff --git a/man/server.Rd b/man/server.Rd new file mode 100644 index 0000000..f977e6d --- /dev/null +++ b/man/server.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/server.R +\name{server} +\alias{server} +\title{server} +\usage{ +server(input, output, session) +} +\arguments{ +\item{input}{set of inputs coming from ui} + +\item{output}{set of outputs} + +\item{session}{session info and status} +} +\description{ +server function for the shiny app +} +\author{ +Edoardo Costantini, 2023 +} diff --git a/man/plotResults.Rd b/man/start_app.Rd similarity index 94% rename from man/plotResults.Rd rename to man/start_app.Rd index cabe0c0..ea6eaff 100644 --- a/man/plotResults.Rd +++ b/man/start_app.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/app.R -\name{plotResults} -\alias{plotResults} -\title{plotResults} +% Please edit documentation in R/start_app.R +\name{start_app} +\alias{start_app} +\title{start_app} \usage{ -plotResults() +start_app() } \value{ Shiny app UI. diff --git a/man/ui_call.Rd b/man/ui_call.Rd new file mode 100644 index 0000000..3260bba --- /dev/null +++ b/man/ui_call.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ui.R +\name{ui_call} +\alias{ui_call} +\title{User interface call} +\usage{ +ui_call() +} +\value{ +UI object that can be passed directly to shiny::shinyApp() +} +\description{ +Calls the definition of the user interface and returns it as an output +} +\author{ +Edoardo Costantini, 2023 +}