Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,11 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

examples/data/
examples/data/*
utils/data/*
examples/cache/
.vscode/
.vscode/
examples/results/
CLAUDE.md
.claude/settings.local.json
utils/output/
58 changes: 58 additions & 0 deletions utils/configs/activity_analysis.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Configuration for phenotypic activity analysis
# Matches the phenotypic_activity.ipynb example

data:
path: "data/2016_04_01_a549_48hr_batch1_plateSQ00014812.csv"
metadata_regex: "^Metadata"

preprocessing:
# Remove constant columns (as done in notebook)
# Note: This is handled differently in the runner, but we can achieve similar results

# Assign reference index for controls (DMSO)
- type: apply_assign_reference
params:
condition: "Metadata_broad_sample == 'DMSO'"
reference_col: "Metadata_reference_index"
default_value: -1

# Example: Add a column for high-dose EGFR inhibitors
- type: add_column_from_query
params:
query: '(Metadata_moa == "EGFR inhibitor") & (Metadata_mmoles_per_liter > 1)'
column_name: "Metadata_is_high_dose_EGFR_inhibitor"
fill_value: False # Optional: fill NaN values (e.g., when moa or concentration is missing)

average_precision:
params:
# Positive pairs: replicates of the same compound
pos_sameby: ["Metadata_broad_sample", "Metadata_reference_index"]
pos_diffby: []

# Negative pairs: compound vs control
neg_sameby: []
neg_diffby: ["Metadata_broad_sample", "Metadata_reference_index"]

# Using default distance (cosine) as in notebook

mean_average_precision:
params:
sameby: ["Metadata_broad_sample"] # Group by compound
null_size: 1000000 # As used in notebook
threshold: 0.05
seed: 0 # As used in notebook

output:
path: "data/activity_map_runner.csv"
save_ap_scores: true # Save AP scores to match notebook output

plotting:
enabled: true
path: "output/map_activity_plot.png"
format: "png" # or pdf, svg, etc.
title: "Phenotypic Activity Assessment"
xlabel: "mAP"
ylabel: "-log10(p-value)"
annotation_prefix: "Phenotypically active"
figsize: [8, 6]
dpi: 100
67 changes: 67 additions & 0 deletions utils/configs/consistency_analysis.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Configuration for phenotypic consistency analysis
# Matches the phenotypic_consistency.ipynb example

data:
path: "data/2016_04_01_a549_48hr_batch1_plateSQ00014812.csv"
metadata_regex: "^Metadata"

preprocessing:
# Filter to only active compounds based on activity analysis
- type: filter_active
params:
activity_csv: "data/activity_map_runner.csv"
on_column: "Metadata_broad_sample"

# Remove rows with missing targets (implicit in notebook via query)
- type: dropna
params:
columns: ["Metadata_target"]

# Aggregate replicates by taking median of features (as done in notebook)
- type: aggregate_replicates
params:
groupby: ["Metadata_broad_sample", "Metadata_target"]

# Split the pipe-separated target values into lists for multilabel analysis
- type: split_multilabel
params:
column: "Metadata_target"
separator: "|"

average_precision:
# Use multilabel since compounds have multiple targets (separated by |)
multilabel: true

params:
# Positive pairs: compounds sharing the same target
pos_sameby: ["Metadata_target"]
pos_diffby: []

# Negative pairs: compounds with different targets
neg_sameby: []
neg_diffby: ["Metadata_target"]

# For multilabel analysis, specify the column
multilabel_col: "Metadata_target"

mean_average_precision:
params:
sameby: ["Metadata_target"] # Group by target
null_size: 1000000 # As used in notebook
threshold: 0.05
seed: 0 # As used in notebook

output:
path: "data/target_maps_runner.csv"
save_ap_scores: false

plotting:
enabled: true
path: "output/map_consistency_plot.png"
format: "png" # or pdf, svg, etc.
title: "Phenotypic Consistency Assessment"
xlabel: "mAP"
ylabel: "-log10(p-value)"
annotation_prefix: "Phenotypically consistent"
figsize: [8, 6]
dpi: 100
Loading