diff --git a/.github/.gitignore b/.github/.gitignore
new file mode 100644
index 0000000..2d19fc7
--- /dev/null
+++ b/.github/.gitignore
@@ -0,0 +1 @@
+*.html
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
new file mode 100644
index 0000000..d5e02f7
--- /dev/null
+++ b/.github/workflows/R-CMD-check.yaml
@@ -0,0 +1,34 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: 
+      - '**' # This matches all branches
+  pull_request:
+
+name: R-CMD-check.yaml
+
+permissions: read-all
+
+jobs:
+  R-CMD-check:
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      R_KEEP_PKG_SOURCE: yes
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::rcmdcheck
+          needs: check
+
+      - uses: r-lib/actions/check-r-package@v2
+        with:
+          upload-snapshots: true
+          build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
diff --git a/.gitignore b/.gitignore
index ab3dca7..c833b6f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,5 @@ src/*.so
 src/*.dll
 *.Rcheck/
 analysis/
+*.o
+*.so
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 8146c4b..9dcc4b2 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -8,10 +8,10 @@ NULL
 NULL
 
 .pairwise_col_diff_cpp <- function(x, col_names) {
-    .Call('_biomarkerPanels_pairwise_col_diff_cpp', PACKAGE = 'biomarkerPanels', x, col_names)
+    .Call(`_biomarkerPanels_pairwise_col_diff_cpp`, x, col_names)
 }
 
 .pairwise_ratios_cpp <- function(x, feature_col, feature_name, other_names) {
-    .Call('_biomarkerPanels_pairwise_ratios_cpp', PACKAGE = 'biomarkerPanels', x, feature_col, feature_name, other_names)
+    .Call(`_biomarkerPanels_pairwise_ratios_cpp`, x, feature_col, feature_name, other_names)
 }
 
diff --git a/data-raw/simulate_gene_expression.R b/data-raw/simulated_gene_expression.R
similarity index 94%
rename from data-raw/simulate_gene_expression.R
rename to data-raw/simulated_gene_expression.R
index 3b7a100..f2de497 100644
--- a/data-raw/simulate_gene_expression.R
+++ b/data-raw/simulated_gene_expression.R
@@ -1,3 +1,5 @@
+## code to prepare `simulated_gene_expression` dataset goes here
+
 #!/usr/bin/env Rscript
 
 # Simulate four log-scale gene expression datasets with dataset-specific shifts
@@ -5,7 +7,7 @@
 
 set.seed(20240220)
 
-n_features <- 5000L
+n_features <- 500L
 n_samples <- 100L
 n_datasets <- 4L
 dataset_ids <- seq_len(n_datasets)
@@ -81,7 +83,7 @@ for (i in dataset_ids) {
 names(x_list) <- sprintf("x%d", dataset_ids)
 names(y_list) <- sprintf("y%d", dataset_ids)
 
-output <- list(
+simulated_gene_expression <- list(
   metadata = list(
     seed = 20240220,
     n_samples = n_samples,
@@ -95,6 +97,5 @@ output <- list(
   y_list = y_list
 )
 
-saveRDS(output, file = "simulated_gene_expression.Rds")
-
 cat("Simulation complete. Saved to simulated_gene_expression.Rds\n")
+usethis::use_data(simulated_gene_expression, compress = "xz")
diff --git a/data/simulated_gene_expression.rda b/data/simulated_gene_expression.rda
new file mode 100644
index 0000000..f67e077
Binary files /dev/null and b/data/simulated_gene_expression.rda differ
diff --git a/man/optimize_panel.Rd b/man/optimize_panel.Rd
index f93f129..fe85ef8 100644
--- a/man/optimize_panel.Rd
+++ b/man/optimize_panel.Rd
@@ -47,6 +47,17 @@ mappings).}
 from \code{\link[=min_metric_constraint]{min_metric_constraint()}}) that must evaluate to \code{TRUE} for a candidate
 solution to be considered feasible.}
 
+\item{cohort_aggregator}{Transformation applied to cohort feature matrices
+prior to alignment. Defaults to \code{"pairwise_ratios"}, which generates
+pairwise within-cohort contrasts via \code{\link[=pairwise_col_diff]{pairwise_col_diff()}} to dampen
+distributional shifts across sites. Future work: support additional
+harmonisation strategies (e.g., empirical Bayes, domain adversarial
+mappings).}
+
+\item{constraints}{Optional list of constraint descriptors (e.g.,
+from \code{\link[=min_metric_constraint]{min_metric_constraint()}}) that must evaluate to \code{TRUE} for a candidate
+solution to be considered feasible.}
+
 \item{scoring_fn}{Function producing per-sample scores from the selected
 features. Signature:
 \verb{function(x_selected, selected_features, truth, cohort = NULL, ...)}.}
diff --git a/moo.Rproj b/moo.Rproj
new file mode 100644
index 0000000..9b5fffd
--- /dev/null
+++ b/moo.Rproj
@@ -0,0 +1,18 @@
+Version: 1.0
+ProjectId: a48d3ba8-5f77-4365-83ec-cb7d9519c322
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
diff --git a/simulated_gene_expression.Rds b/simulated_gene_expression.Rds
deleted file mode 100644
index 5643c40..0000000
Binary files a/simulated_gene_expression.Rds and /dev/null differ
diff --git a/vignettes/getting-started.Rmd b/vignettes/getting-started.Rmd
index 169017e..418ed08 100644
--- a/vignettes/getting-started.Rmd
+++ b/vignettes/getting-started.Rmd
@@ -6,8 +6,10 @@ output:
     toc: true
 vignette: >
   %\VignetteIndexEntry{Getting Started with biomarkerPanels}
-  %\VignetteEngine{knitr::rmarkdown}
   %\VignetteEncoding{UTF-8}
+  %\VignetteEngine{knitr::rmarkdown}
+editor_options: 
+  chunk_output_type: console
 ---
 
 ```{r setup, include=FALSE}
@@ -53,28 +55,37 @@ package build, these files would live in `inst/extdata/`; here we reuse the
 fixture directly.
 
 ```{r load-fixture}
-fixture_candidates <- c(
-  system.file("extdata", "fake_gene_expression.Rds", package = "biomarkerPanels"),
-  system.file("test-data", "fake_gene_expression.Rds", package = "biomarkerPanels"),
-  file.path("..", "tests", "data", "fake_gene_expression.Rds"),
-  file.path("..", "..", "tests", "data", "fake_gene_expression.Rds")
-)
-fixture_candidates <- fixture_candidates[nzchar(fixture_candidates)]
-fixture_path <- NULL
-for (candidate in fixture_candidates) {
-  if (file.exists(candidate)) {
-    fixture_path <- candidate
-    break
-  }
-}
-if (is.null(fixture_path)) {
-  stop(
-    "Unable to locate `fake_gene_expression.Rds`. ",
-    "Regenerate it via `data-raw/simulate_gene_expression.R` if needed.",
-    call. = FALSE
-  )
-}
-cohorts <- readRDS(fixture_path)
+# fixture_path <- system.file("test-data", "fake_gene_expression.Rds", package = "biomarkerPanels")
+# if (fixture_path == "") {
+#   fixture_path <- file.path("./tests", "data", "fake_gene_expression.Rds")
+# }
+# stopifnot(file.exists(fixture_path)) # didn't exist for some reason. 
+# cohorts <- readRDS("/dskh/nobackup/harryr/moo/tests/data/fake_gene_expression.Rds")
+data(simulated_gene_expression, package = "biomarkerPanels")
+cohorts = simulated_gene_expression
+
+# fixture_candidates <- c(
+#   system.file("extdata", "fake_gene_expression.Rds", package = "biomarkerPanels"),
+#   system.file("test-data", "fake_gene_expression.Rds", package = "biomarkerPanels"),
+#   file.path("..", "tests", "data", "fake_gene_expression.Rds"),
+#   file.path("..", "..", "tests", "data", "fake_gene_expression.Rds")
+# )
+# fixture_candidates <- fixture_candidates[nzchar(fixture_candidates)]
+# fixture_path <- NULL
+# for (candidate in fixture_candidates) {
+#   if (file.exists(candidate)) {
+#     fixture_path <- candidate
+#     break
+#   }
+# }
+# if (is.null(fixture_path)) {
+#   stop(
+#     "Unable to locate `fake_gene_expression.Rds`. ",
+#     "Regenerate it via `data-raw/simulate_gene_expression.R` if needed.",
+#     call. = FALSE
+#   )
+# }
+# cohorts <- readRDS(fixture_path)
 
 lapply(cohorts$x_list, dim)
 colnames(cohorts$x_list[[1]])[1:6]