cytomining · shntnu · Jul 2, 2025 · Jul 3, 2025 · Jul 3, 2025 · Jul 3, 2025
diff --git a/.gitignore b/.gitignore
@@ -159,6 +159,11 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 
-examples/data/
+examples/data/*
+utils/data/*
 examples/cache/
-.vscode/
+.vscode/
+examples/results/
+CLAUDE.md
+.claude/settings.local.json
+utils/output/
diff --git a/utils/configs/activity_analysis.yaml b/utils/configs/activity_analysis.yaml
@@ -0,0 +1,58 @@
+# Configuration for phenotypic activity analysis
+# Matches the phenotypic_activity.ipynb example
+
+data:
+  path: "data/2016_04_01_a549_48hr_batch1_plateSQ00014812.csv"
+  metadata_regex: "^Metadata"
+
+preprocessing:
+  # Remove constant columns (as done in notebook)
+  # Note: This is handled differently in the runner, but we can achieve similar results
+
+  # Assign reference index for controls (DMSO)
+  - type: apply_assign_reference
+    params:
+      condition: "Metadata_broad_sample == 'DMSO'"
+      reference_col: "Metadata_reference_index"
+      default_value: -1
+
+    # Example: Add a column for high-dose EGFR inhibitors
+  - type: add_column_from_query
+    params:
+      query: '(Metadata_moa == "EGFR inhibitor") & (Metadata_mmoles_per_liter > 1)'
+      column_name: "Metadata_is_high_dose_EGFR_inhibitor"
+      fill_value: False  # Optional: fill NaN values (e.g., when moa or concentration is missing)
+
+average_precision:
+  params:
+    # Positive pairs: replicates of the same compound
+    pos_sameby: ["Metadata_broad_sample", "Metadata_reference_index"]
+    pos_diffby: []
+
+    # Negative pairs: compound vs control
+    neg_sameby: []
+    neg_diffby: ["Metadata_broad_sample", "Metadata_reference_index"]
+
+    # Using default distance (cosine) as in notebook
+
+mean_average_precision:
+  params:
+    sameby: ["Metadata_broad_sample"]  # Group by compound
+    null_size: 1000000                 # As used in notebook
+    threshold: 0.05
+    seed: 0                            # As used in notebook
+
+output:
+  path: "data/activity_map_runner.csv"
+  save_ap_scores: true  # Save AP scores to match notebook output
+
+plotting:
+  enabled: true
+  path: "output/map_activity_plot.png"
+  format: "png"  # or pdf, svg, etc.
+  title: "Phenotypic Activity Assessment"
+  xlabel: "mAP"
+  ylabel: "-log10(p-value)"
+  annotation_prefix: "Phenotypically active"
+  figsize: [8, 6]
+  dpi: 100
diff --git a/utils/configs/consistency_analysis.yaml b/utils/configs/consistency_analysis.yaml
@@ -0,0 +1,67 @@
+# Configuration for phenotypic consistency analysis
+# Matches the phenotypic_consistency.ipynb example
+
+data:
+  path: "data/2016_04_01_a549_48hr_batch1_plateSQ00014812.csv"
+  metadata_regex: "^Metadata"
+
+preprocessing:
+  # Filter to only active compounds based on activity analysis
+  - type: filter_active
+    params:
+      activity_csv: "data/activity_map_runner.csv"
+      on_column: "Metadata_broad_sample"
+
+  # Remove rows with missing targets (implicit in notebook via query)
+  - type: dropna
+    params:
+      columns: ["Metadata_target"]
+
+  # Aggregate replicates by taking median of features (as done in notebook)
+  - type: aggregate_replicates
+    params:
+      groupby: ["Metadata_broad_sample", "Metadata_target"]
+
+  # Split the pipe-separated target values into lists for multilabel analysis
+  - type: split_multilabel
+    params:
+      column: "Metadata_target"
+      separator: "|"
+
+average_precision:
+  # Use multilabel since compounds have multiple targets (separated by |)
+  multilabel: true
+
+  params:
+    # Positive pairs: compounds sharing the same target
+    pos_sameby: ["Metadata_target"]
+    pos_diffby: []
+
+    # Negative pairs: compounds with different targets
+    neg_sameby: []
+    neg_diffby: ["Metadata_target"]
+
+    # For multilabel analysis, specify the column
+    multilabel_col: "Metadata_target"
+
+mean_average_precision:
+  params:
+    sameby: ["Metadata_target"]  # Group by target
+    null_size: 1000000           # As used in notebook
+    threshold: 0.05
+    seed: 0                      # As used in notebook
+
+output:
+  path: "data/target_maps_runner.csv"
+  save_ap_scores: false
+
+plotting:
+  enabled: true
+  path: "output/map_consistency_plot.png"
+  format: "png"  # or pdf, svg, etc.
+  title: "Phenotypic Consistency Assessment"
+  xlabel: "mAP"
+  ylabel: "-log10(p-value)"
+  annotation_prefix: "Phenotypically consistent"
+  figsize: [8, 6]
+  dpi: 100