Skip to content

Commit 2b762c6

Browse files
authored
Add gather --allow-partial, improve related error message (#588)
* improve error message * Add --allow-partial to gather * use it for the other call as well * ignore coverage on line that should never happen * Make it possible to do both RHFE and RBFE
1 parent 844d5de commit 2b762c6

File tree

2 files changed

+100
-16
lines changed

2 files changed

+100
-16
lines changed

openfecli/commands/gather.py

+72-14
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from openfecli import OFECommandPlugin
66
from openfecli.clicktypes import HyphenAwareChoice
77
import pathlib
8+
import warnings
89

910

1011
def _get_column(val):
@@ -85,40 +86,90 @@ def legacy_get_type(res_fn):
8586
return 'complex'
8687

8788

88-
def _get_ddgs(legs):
89+
def _generate_bad_legs_error_message(set_vals, ligpair):
90+
expected_rbfe = {'complex', 'solvent'}
91+
expected_rhfe = {'solvent', 'vacuum'}
92+
maybe_rhfe = bool(set_vals & expected_rhfe)
93+
maybe_rbfe = bool(set_vals & expected_rbfe)
94+
if maybe_rhfe and not maybe_rbfe:
95+
msg = (
96+
"This appears to be an RHFE calculation, but we're "
97+
f"missing {expected_rhfe - set_vals} runs for the "
98+
f"edge with ligands {ligpair}."
99+
)
100+
elif maybe_rbfe and not maybe_rhfe:
101+
msg = (
102+
"This appears to be an RBFE calculation, but we're "
103+
f"missing {expected_rbfe - set_vals} runs for the "
104+
f"edge with ligands {ligpair}."
105+
)
106+
elif maybe_rbfe and maybe_rhfe:
107+
msg = (
108+
"Unable to determine whether this is an RBFE "
109+
f"or an RHFE calculation. Found legs {set_vals} "
110+
f"for ligands {ligpair}. Those ligands are missing one "
111+
f"of: {(expected_rhfe | expected_rbfe) - set_vals}."
112+
)
113+
else: # -no-cov-
114+
# this should never happen
115+
msg = (
116+
"Something went very wrong while determining the type "
117+
f"of RFE calculation. For the ligand pair {ligpair}, "
118+
f"we found legs labelled {set_vals}. We expected either "
119+
f"{expected_rhfe} or {expected_rbfe}."
120+
)
121+
122+
msg += (
123+
"\n\nYou can force partial gathering of results, without "
124+
"problematic edges, by using the --allow-partial flag of the gather "
125+
"command. Note that this may cause problems with predicting "
126+
"absolute free energies from the relative free energies."
127+
)
128+
return msg
129+
130+
131+
def _get_ddgs(legs, error_on_missing=True):
89132
import numpy as np
90133
DDGs = []
91134
for ligpair, vals in sorted(legs.items()):
135+
set_vals = set(vals)
92136
DDGbind = None
93137
DDGhyd = None
94138
bind_unc = None
95139
hyd_unc = None
96140

97-
if 'complex' in vals and 'solvent' in vals:
141+
do_rbfe = (len(set_vals & {'complex', 'solvent'}) == 2)
142+
do_rhfe = (len(set_vals & {'vacuum', 'solvent'}) == 2)
143+
144+
if do_rbfe:
98145
DG1_mag, DG1_unc = vals['complex']
99146
DG2_mag, DG2_unc = vals['solvent']
100147
if not ((DG1_mag is None) or (DG2_mag is None)):
101148
# DDG(2,1)bind = DG(1->2)complex - DG(1->2)solvent
102149
DDGbind = (DG1_mag - DG2_mag).m
103150
bind_unc = np.sqrt(np.sum(np.square([DG1_unc.m, DG2_unc.m])))
104-
elif 'solvent' in vals and 'vacuum' in vals:
151+
152+
if do_rhfe:
105153
DG1_mag, DG1_unc = vals['solvent']
106154
DG2_mag, DG2_unc = vals['vacuum']
107155
if not ((DG1_mag is None) or (DG2_mag is None)):
108156
DDGhyd = (DG1_mag - DG2_mag).m
109157
hyd_unc = np.sqrt(np.sum(np.square([DG1_unc.m, DG2_unc.m])))
110-
else:
111-
raise RuntimeError("Unable to determine type of RFE calculation "
112-
f"for edges with labels {list(vals)} for "
113-
f"ligands {ligpair}")
158+
159+
if not do_rbfe and not do_rhfe:
160+
msg = _generate_bad_legs_error_message(set_vals, ligpair)
161+
if error_on_missing:
162+
raise RuntimeError(msg)
163+
else:
164+
warnings.warn(msg)
114165

115166
DDGs.append((*ligpair, DDGbind, bind_unc, DDGhyd, hyd_unc))
116167

117168
return DDGs
118169

119170

120-
def _write_ddg(legs, writer):
121-
DDGs = _get_ddgs(legs)
171+
def _write_ddg(legs, writer, allow_partial):
172+
DDGs = _get_ddgs(legs, error_on_missing=not allow_partial)
122173
writer.writerow(["ligand_i", "ligand_j", "DDG(i->j) (kcal/mol)",
123174
"uncertainty (kcal/mol)"])
124175
for ligA, ligB, DDGbind, bind_unc, DDGhyd, hyd_unc in DDGs:
@@ -133,7 +184,7 @@ def _write_ddg(legs, writer):
133184
writer.writerow([ligA, ligB, DDGhyd, hyd_unc])
134185

135186

136-
def _write_dg_raw(legs, writer):
187+
def _write_dg_raw(legs, writer, allow_partial):
137188
writer.writerow(["leg", "ligand_i", "ligand_j", "DG(i->j) (kcal/mol)",
138189
"uncertainty (kcal/mol)"])
139190
for ligpair, vals in sorted(legs.items()):
@@ -146,11 +197,11 @@ def _write_dg_raw(legs, writer):
146197
writer.writerow([simtype, *ligpair, m, u])
147198

148199

149-
def _write_dg_mle(legs, writer):
200+
def _write_dg_mle(legs, writer, allow_partial):
150201
import networkx as nx
151202
import numpy as np
152203
from cinnabar.stats import mle
153-
DDGs = _get_ddgs(legs)
204+
DDGs = _get_ddgs(legs, error_on_missing=not allow_partial)
154205
MLEs = []
155206
# 4b) perform MLE
156207
g = nx.DiGraph()
@@ -219,7 +270,14 @@ def _write_dg_mle(legs, writer):
219270
@click.option('output', '-o',
220271
type=click.File(mode='w'),
221272
default='-')
222-
def gather(rootdir, output, report):
273+
@click.option(
274+
'--allow-partial', is_flag=True, default=False,
275+
help=(
276+
"Do not raise errors is results are missing parts for some edges. "
277+
"(Skip those edges and issue warning instead.)"
278+
)
279+
)
280+
def gather(rootdir, output, report, allow_partial):
223281
"""Gather simulation result jsons of relative calculations to a tsv file
224282
225283
This walks ROOTDIR recursively and finds all result JSON files from the
@@ -287,7 +345,7 @@ def gather(rootdir, output, report):
287345
'ddg': _write_ddg,
288346
'dg-raw': _write_dg_raw,
289347
}[report.lower()]
290-
writing_func(legs, writer)
348+
writing_func(legs, writer, allow_partial)
291349

292350

293351
PLUGIN = OFECommandPlugin(

openfecli/tests/commands/test_gather.py

+28-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
import pytest
77

88
from openfecli.commands.gather import (
9-
gather, format_estimate_uncertainty, _get_column
9+
gather, format_estimate_uncertainty, _get_column,
10+
_generate_bad_legs_error_message,
1011
)
1112

1213
@pytest.mark.parametrize('est,unc,unc_prec,est_str,unc_str', [
@@ -108,6 +109,21 @@ def test_gather(results_dir, report):
108109
assert set(expected.split(b'\n')) == actual_lines
109110

110111

112+
@pytest.mark.parametrize('include', ['complex', 'solvent', 'vacuum'])
113+
def test_generate_bad_legs_error_message(include):
114+
expected = {
115+
'complex': ("appears to be an RBFE", "missing {'solvent'}"),
116+
'vacuum': ("appears to be an RHFE", "missing {'solvent'}"),
117+
'solvent': ("whether this is an RBFE or an RHFE",
118+
"'complex'", "'solvent'"),
119+
}[include]
120+
set_vals = {include}
121+
ligpair = {'lig1', 'lig2'}
122+
msg = _generate_bad_legs_error_message(set_vals, ligpair)
123+
for string in expected:
124+
assert string in msg
125+
126+
111127
def test_missing_leg_error(results_dir):
112128
file_to_remove = "easy_rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json"
113129
(pathlib.Path("results") / file_to_remove).unlink()
@@ -116,6 +132,16 @@ def test_missing_leg_error(results_dir):
116132
result = runner.invoke(gather, ['results'] + ['-o', '-'])
117133
assert result.exit_code == 1
118134
assert isinstance(result.exception, RuntimeError)
119-
assert "labels ['solvent']" in str(result.exception)
135+
assert "Unable to determine" in str(result.exception)
120136
assert "'lig_ejm_31'" in str(result.exception)
121137
assert "'lig_ejm_42'" in str(result.exception)
138+
139+
140+
def test_missing_leg_allow_partial(results_dir):
141+
file_to_remove = "easy_rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json"
142+
(pathlib.Path("results") / file_to_remove).unlink()
143+
144+
runner = CliRunner()
145+
result = runner.invoke(gather,
146+
['results'] + ['--allow-partial', '-o', '-'])
147+
assert result.exit_code == 0

0 commit comments

Comments
 (0)