From 229e059483aac9ec5967c85ee41c9e2c197505bc Mon Sep 17 00:00:00 2001 From: Brian Clarke <9725212+bfclarke@users.noreply.github.com> Date: Wed, 30 Oct 2024 12:08:36 +0100 Subject: [PATCH] scale gene impairment scores to fill full dosage range for REGENIE --- deeprvat/deeprvat/associate.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/deeprvat/deeprvat/associate.py b/deeprvat/deeprvat/associate.py index e1f64fdc..ad6560c7 100644 --- a/deeprvat/deeprvat/associate.py +++ b/deeprvat/deeprvat/associate.py @@ -451,7 +451,14 @@ def make_regenie_input_( ) as f: for i in trange(n_genes): varid = f"pseudovariant_gene_{ensgids[i]}" - this_burdens = burdens[:, i] # Rescale scores to be in range (0, 2) + this_burdens = burdens[:, i] + + # Rescale scores to fill out range [0, 1] (making dosages in [0, 2]) + min_burden = np.min(this_burdens) + max_burden = np.max(this_burdens) + this_burdens = (this_burdens - min_burden) / (max_burden - min_burden) + + # REGENIE assumes by default genotypes are stored alt-first genotypes = np.stack( (this_burdens, np.zeros(this_burdens.shape), 1 - this_burdens), axis=1,