From d23d95d9bc1b5790139e460c5e134d682fc6f2c9 Mon Sep 17 00:00:00 2001 From: Aaron Virshup Date: Wed, 5 Oct 2016 18:13:55 -0700 Subject: [PATCH] Make `assign_forcefield` work in non-notebook environments --- moldesign/_tests/helpers.py | 11 +-- moldesign/_tests/test_mm.py | 96 +++++++++++++++++++++++++++ moldesign/helpers/pdb.py | 40 +++++++++++ moldesign/interfaces/ambertools.py | 80 +++++++++++++++++++++- moldesign/molecules/README.md | 9 ++- moldesign/widgets/parameterization.py | 57 +--------------- 6 files changed, 226 insertions(+), 67 deletions(-) create mode 100644 moldesign/_tests/test_mm.py diff --git a/moldesign/_tests/helpers.py b/moldesign/_tests/helpers.py index b210754..5c505cd 100644 --- a/moldesign/_tests/helpers.py +++ b/moldesign/_tests/helpers.py @@ -2,10 +2,10 @@ from moldesign import units as u import numpy as np -DEFSTEP = 0.0000005*u.angstrom +DEFSTEP = 0.000005*u.angstrom -def num_grad(mol, fn, step=DEFSTEP, fnargs=None, fnkwargs=None): +def num_grad(mol, fn, step=DEFSTEP, atoms=None, fnargs=None, fnkwargs=None): grad = None origpos = mol.positions.copy() if fnargs is None: @@ -13,7 +13,10 @@ def num_grad(mol, fn, step=DEFSTEP, fnargs=None, fnkwargs=None): if fnkwargs is None: fnkwargs = dict() - for iatom, atom in enumerate(mol.atoms): + if atoms is None: + atoms = mol.atoms + + for iatom, atom in enumerate(atoms): for idim in xrange(3): atom.position[idim] += step vplus = fn(*fnargs, **fnkwargs) @@ -22,7 +25,7 @@ def num_grad(mol, fn, step=DEFSTEP, fnargs=None, fnkwargs=None): mol.positions = origpos # reset positions if grad is None: - grad = np.zeros(mol.positions.shape) * vplus.units/mol.positions.units + grad = np.zeros((len(atoms), 3)) * vplus.units/mol.positions.units grad[iatom, idim] = (vplus - vminus) / (2.0*step) return grad diff --git a/moldesign/_tests/test_mm.py b/moldesign/_tests/test_mm.py new file mode 100644 index 0000000..493f9de --- /dev/null +++ b/moldesign/_tests/test_mm.py @@ -0,0 +1,96 @@ +import random + +import pytest +import numpy as np + +import moldesign as mdt +from moldesign import units as u + +from . import helpers + + +registered_types = {} + +def typedfixture(*types, **kwargs): + """This is a decorator that lets us associate fixtures with one or more arbitrary types. + We'll later use this type to determine what tests to run on the result""" + + def fixture_wrapper(func): + for t in types: + registered_types.setdefault(t, []).append(func.__name__) + return pytest.fixture(**kwargs)(func) + + return fixture_wrapper + + +@pytest.fixture +def small_molecule(): + return mdt.from_smiles('CNCOS(=O)C') + + +@typedfixture('mdready') +def parameterize_zeros(small_molecule): + params = mdt.parameterize(small_molecule, charges='zero') + mol = mdt.assign_forcefield(small_molecule, parameters=params) + mol.set_energy_model(mdt.models.ForceField) + return mol + + +@typedfixture('mdready') +def parameterize_am1bcc(small_molecule): + params = mdt.parameterize(small_molecule, charges='am1-bcc', ffname='gaff') + mol = mdt.assign_forcefield(small_molecule, parameters=params) + mol.set_energy_model(mdt.models.ForceField) + return mol + + +@typedfixture('mdready') +def protein_default_amber_forcefield(): + mol = mdt.from_pdb('1YU8') + newmol = mdt.assign_forcefield(mol) + newmol.set_energy_model(mdt.models.ForceField) + return newmol + + +@typedfixture('mdready') +def gaff_model_gasteiger(small_molecule): + small_molecule.set_energy_model(mdt.models.GAFF, charges='gasteiger') + return small_molecule + + +@pytest.mark.parametrize('objkey', registered_types['mdready']) +def test_properties(objkey, request): + mol = request.getfuncargvalue(objkey) + energy = mol.calculate_potential_energy() + forces = mol.calculate_forces() + assert forces.shape == mol.positions.shape + + +@pytest.mark.parametrize('objkey', registered_types['mdready']) +def test_forces(objkey, request): + mol = request.getfuncargvalue(objkey) + + if mol.num_atoms > 20: + atoms = random.sample(mol.atoms, 20) + else: + atoms = mol.atoms + + anagrad = -mol.calculate_forces().defunits_value() + numgrad = helpers.num_grad(mol, + mol.calculate_potential_energy, + atoms=atoms, + step=0.005*u.angstrom + ).defunits_value() + testgrad = np.array([anagrad[a.index] for a in atoms]) + np.testing.assert_allclose(testgrad, numgrad, rtol=1.0, atol=1.0e-5) + # note: rtol doesn't work here, because the force varies too slowly in some directions and + # too quickly in others. So we just use an absolute error cutoff instead + + +@pytest.mark.parametrize('objkey', registered_types['mdready']) +def test_minimize(objkey, request): + mol = request.getfuncargvalue(objkey) + e1 = mol.calculate_potential_energy() + mol = request.getfuncargvalue(objkey) + traj = mol.minimize() + assert mol.calculate_potential_energy() < e1 diff --git a/moldesign/helpers/pdb.py b/moldesign/helpers/pdb.py index 71a3c43..6e8da63 100644 --- a/moldesign/helpers/pdb.py +++ b/moldesign/helpers/pdb.py @@ -107,6 +107,46 @@ def warn_assemblies(mol, assemblies): ' to build one of the above assemblies' +def guess_atnum_from_name(s): + """ Guess an atomic number given a name string (usually 1-3 characters). + + Args: + s (str): atomic number + + Returns: + int: atomic number + + Raises: + KeyError: if atomic number can't be determined + + Examples: + >>> guess_atnum_from_name('C') + 6 + >>> guess_atnum_from_name('C1') + 6 + >>> guess_atnum_from_name('cl3') + 17 + >>> guess_atnum_from_name('CL') + 17 + """ + try: # the unmodified string + return mdt.data.ATOMIC_NUMBERS[s] + except KeyError: + pass + + cleaned = ''.join((c.upper() if i==0 else c.lower()) + for i,c in enumerate(s) + if c.isalpha()) + + try: # just the letters, with proper capitalization + return mdt.data.ATOMIC_NUMBERS[cleaned] + except KeyError: + pass + + # otherwise, just the first letter + return mdt.data.ATOMIC_NUMBERS[cleaned[0]] + + def get_conect_records(pdbfile): """Parse a PDB file, return CONECT records diff --git a/moldesign/interfaces/ambertools.py b/moldesign/interfaces/ambertools.py index e546fbb..9980bf5 100644 --- a/moldesign/interfaces/ambertools.py +++ b/moldesign/interfaces/ambertools.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. import collections -import warnings +import re + +import traitlets import moldesign as mdt import pyccc @@ -259,8 +261,15 @@ def assign_forcefield(mol, **kwargs): else: newmol = None - report = ParameterizationDisplay(job, mol, molout=newmol) - uibase.display_log(report, title='ERRORS/WARNINGS', show=True) + errors = _parse_tleap_errors(job, mol) + + try: + report = ParameterizationDisplay(errors, mol, molout=newmol) + uibase.display_log(report, title='ERRORS/WARNINGS', show=True) + except traitlets.TraitError: + print 'Forcefield assignment: %s' % ('Success' if newmol is not None else 'Failure') + for err in errors: + print err.desc if newmol is not None: return newmol @@ -293,6 +302,9 @@ def parameterize(mol, charges='esp', ffname='gaff2', **kwargs): forcefield parameters for other systems that contain this molecule """ assert mol.num_residues == 1 + if mol.residues[0].resname is None: + mol.residues[0].resname = 'UNL' + print 'Assigned residue name "UNL" to %s' % mol resname = mol.residues[0].resname if charges == 'am1-bcc' and 'am1-bcc' not in mol.properties: @@ -340,3 +352,65 @@ def finish_job(j): return mdt.compute.run_job(job, _return_result=True, **kwargs) +ATOMSPEC = re.compile(r'\.R<(\S+) (\d+)>\.A<(\S+) (\d+)>') + + +def _parse_tleap_errors(job, molin): + from moldesign.widgets.parameterization import UnusualBond, UnknownAtom, UnknownResidue + + # TODO: special messages for known problems (e.g. histidine) + msg = [] + unknown_res = set() # so we can print only one error per unkonwn residue + lineiter = iter(job.stdout.split('\n')) + offset = utils.if_not_none(molin.residues[0].pdbindex, 0) + reslookup = {str(i+offset): r for i,r in enumerate(molin.residues)} + + def _atom_from_re(s): + resname, residx, atomname, atomidx = s + r = reslookup[residx] + a = r[atomname] + return a + + def unusual_bond(l): + atomre1, atomre2 = ATOMSPEC.findall(l) + try: + a1, a2 = _atom_from_re(atomre1), _atom_from_re(atomre2) + except KeyError: + a1 = a2 = None + r1 = reslookup[atomre1[1]] + r2 = reslookup[atomre2[1]] + return UnusualBond(l, (a1, a2), (r1, r2)) + + while True: + try: + line = lineiter.next() + except StopIteration: + break + + fields = line.split() + if fields[0:2] == ['Unknown','residue:']: + # EX: "Unknown residue: 3TE number: 499 type: Terminal/beginning" + res = molin.residues[int(fields[4])] + unknown_res.add(res) + msg.append(UnknownResidue(line,res)) + + elif fields[:4] == 'Warning: Close contact of'.split(): + # EX: "Warning: Close contact of 1.028366 angstroms between .R.A and .R.A

" + msg.append(unusual_bond(line)) + + elif fields[:6] == 'WARNING: There is a bond of'.split(): + # Matches two lines, EX: + # "WARNING: There is a bond of 34.397700 angstroms between:" + # "------- .R.A and .R.A

" + nextline = lineiter.next() + msg.append(unusual_bond(line + nextline)) + + elif fields[:5] == 'Created a new atom named:'.split(): + # EX: "Created a new atom named: P within residue: .R" + residue = reslookup[fields[-1][:-1]] + if residue in unknown_res: continue # suppress atoms from an unknown res ... + atom = residue[fields[5]] + msg.append(UnknownAtom(line, residue, atom)) + + + return msg \ No newline at end of file diff --git a/moldesign/molecules/README.md b/moldesign/molecules/README.md index a49879c..292b879 100644 --- a/moldesign/molecules/README.md +++ b/moldesign/molecules/README.md @@ -1,5 +1,6 @@ -## Structure subpackage -The `moldesign.molecule` subpackage contains class definitions for atomic, molecular, biomolecular, and trajectory data structures. +## Molecule subpackage +The `moldesign.molecule` subpackage contains class definitions for molecular data, +including atomic, molecular, biomolecular, and trajectory data structures. ### Naming and indexing conventions @@ -33,6 +34,4 @@ atom.pdbindex | | | residue.pdbindex atom.e ``` #### Small molecules -Small molecules can come from a variety of sources with a variety of different metadata available. If a given molecule is provided with PDB-type metadata, we'll name and index it according to the biomolecule conventions above. - -Other formats (like XYZ files or SMILES strings) don't contain as much metadata. For molecules created from these formats, a chain ("Z") and residue ("UNL1") will be automatically created. If the atom names are just the names of the elements (e.g., all carbon atoms are named C), atom.name will be automatically assigned as `"%s%d" % (atom.elem, atom.index)`. \ No newline at end of file +Small molecules can come from a variety of sources with a variety of different metadata available. If a given molecule is provided with PDB-type metadata, we'll name and index it according to the biomolecule conventions above. Otherwise, a 'placeholder' residue and chain will be created to hold the atoms. If the atom names are just the names of the elements (e.g., all carbon atoms are named C), atom.name will be automatically assigned as `"%s%d" % (atom.elem, atom.index)`. \ No newline at end of file diff --git a/moldesign/widgets/parameterization.py b/moldesign/widgets/parameterization.py index f6be813..a71e4ac 100644 --- a/moldesign/widgets/parameterization.py +++ b/moldesign/widgets/parameterization.py @@ -31,14 +31,11 @@ class ParameterizationDisplay(ipy.Box): - ATOMSPEC = re.compile(r'\.R<(\S+) (\d+)>\.A<(\S+) (\d+)>') - def __init__(self, job, molin, molout=None): + def __init__(self, errormessages, molin, molout=None): self.molin = molin self.molout = molout - self.job = job - self.msg = [] - self.parse_errors(self.job) + self.msg = errormessages self.status = ipy.HTML('

Forcefield assignment: %s

' % ('Success' if molout else 'FAILED')) @@ -67,56 +64,6 @@ def switch_display(self, d): new.show(self.viewer) self.errmsg.value = new.desc - def parse_errors(self, job): - # TODO: special messages for known problems (e.g. histidine) - unknown_res = set() - lineiter = iter(job.stdout.split('\n')) - reslookup = {str(i + self.molin.residues[0].pdbindex): r for i,r in enumerate(self.molin.residues)} - - def _atom_from_re(s): - resname, residx, atomname, atomidx = s - r = reslookup[residx] - a = r[atomname] - return a - - def unusual_bond(l): - atomre1, atomre2 = self.ATOMSPEC.findall(l) - try: - a1, a2 = _atom_from_re(atomre1), _atom_from_re(atomre2) - except KeyError: - a1 = a2 = None - r1 = reslookup[atomre1[1]] - r2 = reslookup[atomre2[1]] - self.msg.append(UnusualBond(l, (a1, a2), (r1, r2))) - - while True: - try: line = lineiter.next() - except StopIteration: break - - fields = line.split() - if fields[0:2] == ['Unknown','residue:']: - # EX: "Unknown residue: 3TE number: 499 type: Terminal/beginning" - res = self.molin.residues[int(fields[4])] - self.msg.append(UnknownResidue(line,res)) - unknown_res.add(res) - - elif fields[:4] == 'Warning: Close contact of'.split(): - # EX: "Warning: Close contact of 1.028366 angstroms between .R.A and .R.A

" - unusual_bond(line) - - elif fields[:6] == 'WARNING: There is a bond of'.split(): - # Matches two lines, EX: - # "WARNING: There is a bond of 34.397700 angstroms between:" - # "------- .R.A and .R.A

" - nextline = lineiter.next() - unusual_bond(line + nextline) - - elif fields[:5] == 'Created a new atom named:'.split(): - # EX: "Created a new atom named: P within residue: .R" - residue = reslookup[fields[-1][:-1]] - if residue in unknown_res: continue # suppress atoms from an unknown res ... - atom = residue[fields[5]] - self.msg.append(UnknownAtom(line, residue, atom)) class ForceFieldMessage(object):