diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..d5e02f7 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,34 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: + - '**' # This matches all branches + pull_request: + +name: R-CMD-check.yaml + +permissions: read-all + +jobs: + R-CMD-check: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/.gitignore b/.gitignore index ab3dca7..c833b6f 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,5 @@ src/*.so src/*.dll *.Rcheck/ analysis/ +*.o +*.so diff --git a/R/RcppExports.R b/R/RcppExports.R index 8146c4b..9dcc4b2 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -8,10 +8,10 @@ NULL NULL .pairwise_col_diff_cpp <- function(x, col_names) { - .Call('_biomarkerPanels_pairwise_col_diff_cpp', PACKAGE = 'biomarkerPanels', x, col_names) + .Call(`_biomarkerPanels_pairwise_col_diff_cpp`, x, col_names) } .pairwise_ratios_cpp <- function(x, feature_col, feature_name, other_names) { - .Call('_biomarkerPanels_pairwise_ratios_cpp', PACKAGE = 'biomarkerPanels', x, feature_col, feature_name, other_names) + .Call(`_biomarkerPanels_pairwise_ratios_cpp`, x, feature_col, feature_name, other_names) } diff --git a/data-raw/simulate_gene_expression.R b/data-raw/simulated_gene_expression.R similarity index 94% rename from data-raw/simulate_gene_expression.R rename to data-raw/simulated_gene_expression.R index 3b7a100..f2de497 100644 --- a/data-raw/simulate_gene_expression.R +++ b/data-raw/simulated_gene_expression.R @@ -1,3 +1,5 @@ +## code to prepare `simulated_gene_expression` dataset goes here + #!/usr/bin/env Rscript # Simulate four log-scale gene expression datasets with dataset-specific shifts @@ -5,7 +7,7 @@ set.seed(20240220) -n_features <- 5000L +n_features <- 500L n_samples <- 100L n_datasets <- 4L dataset_ids <- seq_len(n_datasets) @@ -81,7 +83,7 @@ for (i in dataset_ids) { names(x_list) <- sprintf("x%d", dataset_ids) names(y_list) <- sprintf("y%d", dataset_ids) -output <- list( +simulated_gene_expression <- list( metadata = list( seed = 20240220, n_samples = n_samples, @@ -95,6 +97,5 @@ output <- list( y_list = y_list ) -saveRDS(output, file = "simulated_gene_expression.Rds") - cat("Simulation complete. Saved to simulated_gene_expression.Rds\n") +usethis::use_data(simulated_gene_expression, compress = "xz") diff --git a/data/simulated_gene_expression.rda b/data/simulated_gene_expression.rda new file mode 100644 index 0000000..f67e077 Binary files /dev/null and b/data/simulated_gene_expression.rda differ diff --git a/man/optimize_panel.Rd b/man/optimize_panel.Rd index f93f129..fe85ef8 100644 --- a/man/optimize_panel.Rd +++ b/man/optimize_panel.Rd @@ -47,6 +47,17 @@ mappings).} from \code{\link[=min_metric_constraint]{min_metric_constraint()}}) that must evaluate to \code{TRUE} for a candidate solution to be considered feasible.} +\item{cohort_aggregator}{Transformation applied to cohort feature matrices +prior to alignment. Defaults to \code{"pairwise_ratios"}, which generates +pairwise within-cohort contrasts via \code{\link[=pairwise_col_diff]{pairwise_col_diff()}} to dampen +distributional shifts across sites. Future work: support additional +harmonisation strategies (e.g., empirical Bayes, domain adversarial +mappings).} + +\item{constraints}{Optional list of constraint descriptors (e.g., +from \code{\link[=min_metric_constraint]{min_metric_constraint()}}) that must evaluate to \code{TRUE} for a candidate +solution to be considered feasible.} + \item{scoring_fn}{Function producing per-sample scores from the selected features. Signature: \verb{function(x_selected, selected_features, truth, cohort = NULL, ...)}.} diff --git a/moo.Rproj b/moo.Rproj new file mode 100644 index 0000000..9b5fffd --- /dev/null +++ b/moo.Rproj @@ -0,0 +1,18 @@ +Version: 1.0 +ProjectId: a48d3ba8-5f77-4365-83ec-cb7d9519c322 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/simulated_gene_expression.Rds b/simulated_gene_expression.Rds deleted file mode 100644 index 5643c40..0000000 Binary files a/simulated_gene_expression.Rds and /dev/null differ diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd index 169017e..418ed08 100644 --- a/vignettes/getting-started.Rmd +++ b/vignettes/getting-started.Rmd @@ -6,8 +6,10 @@ output: toc: true vignette: > %\VignetteIndexEntry{Getting Started with biomarkerPanels} - %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + chunk_output_type: console --- ```{r setup, include=FALSE} @@ -53,28 +55,37 @@ package build, these files would live in `inst/extdata/`; here we reuse the fixture directly. ```{r load-fixture} -fixture_candidates <- c( - system.file("extdata", "fake_gene_expression.Rds", package = "biomarkerPanels"), - system.file("test-data", "fake_gene_expression.Rds", package = "biomarkerPanels"), - file.path("..", "tests", "data", "fake_gene_expression.Rds"), - file.path("..", "..", "tests", "data", "fake_gene_expression.Rds") -) -fixture_candidates <- fixture_candidates[nzchar(fixture_candidates)] -fixture_path <- NULL -for (candidate in fixture_candidates) { - if (file.exists(candidate)) { - fixture_path <- candidate - break - } -} -if (is.null(fixture_path)) { - stop( - "Unable to locate `fake_gene_expression.Rds`. ", - "Regenerate it via `data-raw/simulate_gene_expression.R` if needed.", - call. = FALSE - ) -} -cohorts <- readRDS(fixture_path) +# fixture_path <- system.file("test-data", "fake_gene_expression.Rds", package = "biomarkerPanels") +# if (fixture_path == "") { +# fixture_path <- file.path("./tests", "data", "fake_gene_expression.Rds") +# } +# stopifnot(file.exists(fixture_path)) # didn't exist for some reason. +# cohorts <- readRDS("/dskh/nobackup/harryr/moo/tests/data/fake_gene_expression.Rds") +data(simulated_gene_expression, package = "biomarkerPanels") +cohorts = simulated_gene_expression + +# fixture_candidates <- c( +# system.file("extdata", "fake_gene_expression.Rds", package = "biomarkerPanels"), +# system.file("test-data", "fake_gene_expression.Rds", package = "biomarkerPanels"), +# file.path("..", "tests", "data", "fake_gene_expression.Rds"), +# file.path("..", "..", "tests", "data", "fake_gene_expression.Rds") +# ) +# fixture_candidates <- fixture_candidates[nzchar(fixture_candidates)] +# fixture_path <- NULL +# for (candidate in fixture_candidates) { +# if (file.exists(candidate)) { +# fixture_path <- candidate +# break +# } +# } +# if (is.null(fixture_path)) { +# stop( +# "Unable to locate `fake_gene_expression.Rds`. ", +# "Regenerate it via `data-raw/simulate_gene_expression.R` if needed.", +# call. = FALSE +# ) +# } +# cohorts <- readRDS(fixture_path) lapply(cohorts$x_list, dim) colnames(cohorts$x_list[[1]])[1:6]