diff --git a/moldesign/_tests/test_atoms.py b/moldesign/_tests/test_atoms.py index 4e9c617..af653d5 100644 --- a/moldesign/_tests/test_atoms.py +++ b/moldesign/_tests/test_atoms.py @@ -233,21 +233,21 @@ def assert_not_bonded(mol, a1, a2): def assert_consistent_bond(mol, a1, a2, order): assert a1 in a2.bond_graph assert a2 in a1.bond_graph - for bond in a1.bonds: - if bond.a2 is a2: - assert bond.order == order - break - else: - assert False, "bond not found in atom.bonds" - - for bond in a2.bonds: - if bond.a1 is a1: - assert bond.order == order - break - else: - assert False, "bond not found in atom.bonds" + _unique_bond_check(a1, a2, order) + _unique_bond_check(a2, a1, order) assert a1.bond_graph[a2] == a2.bond_graph[a1] == order assert mol.bond_graph[a1][a2] == mol.bond_graph[a2][a1] == order +def _unique_bond_check(a1, a2, order): + found = False + for bond in a1.bonds: + if bond.partner(a1) is a2: + assert not found, '%s appeared twice' % bond + assert bond.order == order + found = True + + if a1.num_bonds > 0 and not found: + assert False, "bond not found in atom.bonds" + diff --git a/moldesign/_tests/test_pdb_processing.py b/moldesign/_tests/test_pdb_processing.py index e970e78..7ce8f7e 100644 --- a/moldesign/_tests/test_pdb_processing.py +++ b/moldesign/_tests/test_pdb_processing.py @@ -46,7 +46,7 @@ def pdb_1hpk_roundtrip(pdb_1hpk): @pytest.mark.parametrize('mol', 'pdb_1hpk pdb_1hpk_roundtrip'.split()) def test_1hpk(request, mol): mol = request.getfixturevalue(mol) - mol = mdt.interfaces.ambertools._prep_for_tleap(mol) + mol = mdt.interfaces.tleap_interface._prep_for_tleap(mol) for residx in (0, 21, 49, 61, 73, 78): residue = mol.residues[residx] assert residue.resname == 'CYX' diff --git a/moldesign/exceptions.py b/moldesign/exceptions.py index 1c2fe94..1e68d4e 100644 --- a/moldesign/exceptions.py +++ b/moldesign/exceptions.py @@ -41,3 +41,13 @@ class QMConvergenceError(Exception): """ Raised when an iterative QM calculation (typically SCF) fails to converge """ pass + + +class ForcefieldAssignmentError(Exception): + """ Class that define displays for common errors in assigning a forcefield + """ + def __init__(self, msg, errors, mol=None, job=None): + self.args = [msg] + self.errors = errors + self.mol = mol + self.job = job diff --git a/moldesign/forcefields/errors.py b/moldesign/forcefields/errors.py index 6565799..070c830 100644 --- a/moldesign/forcefields/errors.py +++ b/moldesign/forcefields/errors.py @@ -16,29 +16,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -""" -Class that define displays for common errors in assigning a forcefield -""" -import cgi +import html from .. import utils from .. import units as u +from ..helpers.widgets import nbmolviz_installed def show_parameterization_results(errormessages, molin, molout=None): print('Forcefield assignment: %s' % ('Success' if molout is not None else 'Failure')) - for err in errormessages: - print(utils.html_to_text(err.desc)) - - -class ForcefieldAssignmentError(Exception): - def __init__(self, messages, mol, molout=None): - self.errors = messages - - def draw(self): - # TODO: get this working again - raise NotImplementedError() + if not nbmolviz_installed: + for err in errormessages: + print(utils.html_to_text(err.desc)) + else: + from nbmolviz.uielements.logwidget import display_log + from nbmolviz.widgets.parameterization import ParameterizationDisplay + report = ParameterizationDisplay(errormessages, molin, molout) + display_log(report, title='ERRORS/WARNINGS', show=True) class ForceFieldMessage(object): @@ -144,8 +138,12 @@ def __init__(self, message, atoms, residues): self.atoms[1], self.atoms[0].distance(self.atoms[1])) else: - self.short = 'WARN: Unusual bond - atoms not shown' - self.desc = 'TLeap message:
%s' % cgi.escape(self.message) + if self.residues[0] is self.residues[1]: + self.short = 'WARN: Unusual bond length in %s' % self.residues[0] + else: + self.short = 'WARN: unusual bond length between %s and %s' % (self.residues[0], + self.residues[1]) + self.desc = 'TLeap message:
%s' % html.escape(self.message) def show(self, viewer): if self._has_atoms: @@ -165,4 +163,4 @@ def show(self, viewer): def unshow(self, viewer): viewer.ribbon(opacity=0.7, render=False) if self._shape: viewer.remove(self._shape) - self._shape = None \ No newline at end of file + self._shape = None diff --git a/moldesign/forcefields/forcefieldbase.py b/moldesign/forcefields/forcefieldbase.py index ac1fc4d..4370f47 100644 --- a/moldesign/forcefields/forcefieldbase.py +++ b/moldesign/forcefields/forcefieldbase.py @@ -18,10 +18,10 @@ # limitations under the License. import moldesign as mdt -from .errors import ForcefieldAssignmentError, show_parameterization_results +from ..exceptions import ForcefieldAssignmentError +from .errors import show_parameterization_results from ..utils import exports - class Forcefield(object): """ Abstract class for biomolecular forcefield definitions such as amber14sb or charm22. @@ -122,31 +122,33 @@ def __init__(self, fflines, file_list=None): def assign(self, mol): newmol = self.create_prepped_molecule(mol) if newmol.num_atoms != mol.num_atoms: - # TODO: much better error message here - raise ForcefieldAssignmentError() + raise ForcefieldAssignmentError( + "Can't assign forcefield - this molecule is missing atoms. " + "Use `ForceField.create_prepped_molecule` to add them automatically.", + []) else: # TODO: much more rigorous consistency checking newmol.ff.copy_to(mol) def create_prepped_molecule(self, mol, display=True): - from ..interfaces import ambertools + from ..interfaces import tleap_interface - clean_molecule = ambertools._prep_for_tleap(mol) + clean_molecule = tleap_interface._prep_for_tleap(mol) - job = ambertools._run_tleap_assignment(clean_molecule, self._fflines, self._file_list) + job = tleap_interface._run_tleap_assignment(clean_molecule, self._fflines, self._file_list) if 'output.inpcrd' in job.get_output(): prmtop = job.get_output('output.prmtop') inpcrd = job.get_output('output.inpcrd') - params = ambertools.AmberParameters(prmtop, inpcrd, job) + params = tleap_interface.AmberParameters(prmtop, inpcrd, job) m = mdt.read_amber(params.prmtop, params.inpcrd) newmol = mdt.helpers.restore_topology(m, mol) newmol.ff = mdt.forcefields.ForcefieldParams(newmol, params) else: newmol = None - errors = ambertools._parse_tleap_errors(job, clean_molecule) + errors = tleap_interface._parse_tleap_errors(job, clean_molecule) show_parameterization_results(errors, clean_molecule, molout=newmol) @@ -154,7 +156,8 @@ def create_prepped_molecule(self, mol, display=True): return newmol else: raise ForcefieldAssignmentError( - 'TLeap failed to assign force field parameters for %s' % mol, job) + 'TLeap failed to assign force field parameters for %s' % mol, + errors, job=job, mol=mol) def add_ff(self, ff): self._fflines.extend(ff._fflines) diff --git a/moldesign/interfaces/__init__.py b/moldesign/interfaces/__init__.py index c994cb0..d483353 100644 --- a/moldesign/interfaces/__init__.py +++ b/moldesign/interfaces/__init__.py @@ -8,7 +8,10 @@ from . import pdbfixer_interface from . import pyscf_interface from . import symmol_interface +from . import tleap_interface +# These statements only import functions for python object conversion, +# i.e. mol_to_[pkg] and [pkg]_to_mol from .biopython_interface import * from .openbabel import * from .openmm import * diff --git a/moldesign/interfaces/ambertools.py b/moldesign/interfaces/ambertools.py index 3ab0f00..5674a12 100644 --- a/moldesign/interfaces/ambertools.py +++ b/moldesign/interfaces/ambertools.py @@ -16,52 +16,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from past.builtins import basestring -import re -import os -import tempfile - import moldesign as mdt import pyccc from .. import compute, utils from .. import units as u -from .. import forcefields from ..molecules import AtomicProperties IMAGE = 'ambertools' -class AmberParameters(object): - def __getstate__(self): - state = self.__dict__.copy() - state['job'] = None - return state - - def __init__(self, prmtop, inpcrd, job=None): - self.prmtop = prmtop - self.inpcrd = inpcrd - self.job = job - - def to_parmed(self): - import parmed - prmtoppath = os.path.join(tempfile.mkdtemp(), 'prmtop') - self.prmtop.put(prmtoppath) - pmd = parmed.load_file(prmtoppath) - return pmd - - -class ExtraAmberParameters(object): - def __getstate__(self): - state = self.__dict__.copy() - state['job'] = None - return state - - def __init__(self, lib, frcmod, job=None): - self.lib = lib - self.frcmod = frcmod - self.job = job - - @utils.kwargs_from(mdt.compute.run_job) def calc_am1_bcc_charges(mol, **kwargs): """Calculate am1 bcc charges @@ -199,262 +162,3 @@ def finish_job(job): return mdt.compute.run_job(job, _return_result=True, **kwargs) - -@utils.kwargs_from(compute.run_job) -def _run_tleap_assignment(mol, leapcmds, files=None, **kwargs): - """ - Drives tleap to create a prmtop and inpcrd file. Specifically uses the AmberTools 16 - tleap distribution. - - Defaults are as recommended in the ambertools manual. - - Args: - mol (moldesign.Molecule): Molecule to set up - leapcmds (List[str]): list of the commands to load the forcefields - files (List[pyccc.FileReference]): (optional) list of additional files - to send - **kwargs: keyword arguments to :meth:`compute.run_job` - - References: - Ambertools Manual, http://ambermd.org/doc12/Amber16.pdf. See page 33 for forcefield - recommendations. - """ - leapstr = leapcmds[:] - inputs = {} - if files is not None: - inputs.update(files) - inputs['input.pdb'] = mol.write(format='pdb') - - leapstr.append('mol = loadpdb input.pdb\n' - "check mol\n" - "saveamberparm mol output.prmtop output.inpcrd\n" - "savepdb mol output.pdb\n" - "quit\n") - - inputs['input.leap'] = '\n'.join(leapstr) - - job = pyccc.Job(image=compute.get_image_path(IMAGE), - command='tleap -f input.leap', - inputs=inputs, - name="tleap, %s" % mol.name) - - return compute.run_job(job, **kwargs) - - -def _prep_for_tleap(mol): - """ Returns a modified *copy* that's been modified for input to tleap - - Makes the following modifications: - 1. Reassigns all residue IDs - 2. Assigns tleap-appropriate cysteine resnames - """ - change = False - clean = mdt.Molecule(mol.atoms) - for residue in clean.residues: - residue.pdbindex = residue.index+1 - - if residue.resname == 'CYS': # deal with cysteine states - if 'SG' not in residue.atoms or 'HG' in residue.atoms: - continue # sulfur's missing, we'll let tleap create it - else: - sulfur = residue.atoms['SG'] - - if sulfur.formal_charge == -1*u.q_e: - residue.resname = 'CYM' - change = True - continue - - # check for a reasonable hybridization state - if sulfur.formal_charge != 0 or sulfur.num_bonds not in (1, 2): - raise ValueError("Unknown sulfur hybridization state for %s" - % sulfur) - - # check for a disulfide bond - for otheratom in sulfur.bonded_atoms: - if otheratom.residue is not residue: - if otheratom.name != 'SG' or otheratom.residue.resname not in ('CYS', 'CYX'): - raise ValueError('Unknown bond from cysteine sulfur (%s)' % sulfur) - - # if we're here, this is a cystine with a disulfide bond - print('INFO: disulfide bond detected. Renaming %s from CYS to CYX' % residue) - sulfur.residue.resname = 'CYX' - - clean._rebuild_from_atoms() - - return clean - - -@utils.kwargs_from(mdt.compute.run_job) -def create_ff_parameters(mol, charges='esp', baseff='gaff2', **kwargs): - """Parameterize ``mol``, typically using GAFF parameters. - - This will both assign a forcefield to the molecule (at ``mol.ff``) and produce the parameters - so that they can be used in other systems (e.g., so that this molecule can be simulated - embedded in a larger protein) - - Note: - 'am1-bcc' and 'gasteiger' partial charges will be automatically computed if necessary. - Other charge types must be precomputed. - - Args: - mol (moldesign.Molecule): - charges (str or dict): what partial charges to use? Can be a dict (``{atom:charge}``) OR - a string, in which case charges will be read from - ``mol.properties.[charges name]``; typical values will be 'esp', 'mulliken', - 'am1-bcc', etc. Use 'zero' to set all charges to 0 (for QM/MM and testing) - baseff (str): Name of the gaff-like forcefield file (default: gaff2) - - Returns: - TLeapForcefield: Forcefield object for this residue - """ - # Check that there's only 1 residue, give it a name - assert mol.num_residues == 1 - if mol.residues[0].resname is None: - mol.residues[0].resname = 'UNL' - print('Assigned residue name "UNL" to %s' % mol) - resname = mol.residues[0].resname - - # check that atoms have unique names - if len(set(atom.name for atom in mol.atoms)) != mol.num_atoms: - raise ValueError('This molecule does not have uniquely named atoms, cannot assign FF') - - if charges == 'am1-bcc' and 'am1-bcc' not in mol.properties: - calc_am1_bcc_charges(mol) - elif charges == 'gasteiger' and 'gasteiger' not in mol.properties: - calc_gasteiger_charges(mol) - elif charges == 'esp' and 'esp' not in mol.properties: - # TODO: use NWChem ESP to calculate - raise NotImplementedError() - - if charges == 'zero': - charge_array = [0.0 for atom in mol.atoms] - elif isinstance(charges, basestring): - charge_array = u.array([mol.properties[charges][atom] for atom in mol.atoms]) - if not charge_array.dimensionless: # implicitly convert floats to fundamental charge units - charge_array = charge_array.to(u.q_e).magnitude - else: - charge_array = [charges[atom] for atom in mol.atoms] - - inputs = {'mol.mol2': mol.write(format='mol2'), - 'mol.charges': '\n'.join(map(str, charge_array))} - - cmds = ['antechamber -i mol.mol2 -fi mol2 -o mol_charged.mol2 ' - ' -fo mol2 -c rc -cf mol.charges -rn %s' % resname, - 'parmchk -i mol_charged.mol2 -f mol2 -o mol.frcmod', - 'tleap -f leap.in', - 'sed -e "s/tempresname/%s/g" mol_rename.lib > mol.lib' % resname] - - base_forcefield = forcefields.TLeapLib(baseff) - inputs['leap.in'] = '\n'.join(["source leaprc.%s"%baseff, - "tempresname = loadmol2 mol_charged.mol2", - "fmod = loadamberparams mol.frcmod", - "check tempresname", - "saveoff tempresname mol_rename.lib", - "saveamberparm tempresname mol.prmtop mol.inpcrd", - "quit\n"]) - - def finish_job(j): - leapcmds = ['source leaprc.gaff2'] - files = {} - for fname, f in j.glob_output("*.lib").items(): - leapcmds.append('loadoff %s' % fname) - files[fname] = f - for fname, f in j.glob_output("*.frcmod").items(): - leapcmds.append('loadAmberParams %s' % fname) - files[fname] = f - - param = forcefields.TLeapForcefield(leapcmds, files) - param.add_ff(base_forcefield) - param.assign(mol) - return param - - job = pyccc.Job(image=mdt.compute.get_image_path(IMAGE), - command=' && '.join(cmds), - inputs=inputs, - when_finished=finish_job, - name="GAFF assignment: %s" % mol.name) - - return mdt.compute.run_job(job, _return_result=True, **kwargs) - - -ATOMSPEC = re.compile(r'\.R<(\S+) ([\-0-9]+)>\.A<(\S+) ([\-0-9]+)>') - - -def _parse_tleap_errors(job, molin): - # TODO: special messages for known problems (e.g. histidine) - msg = [] - unknown_res = set() # so we can print only one error per unkonwn residue - lineiter = iter(job.stdout.split('\n')) - offset = utils.if_not_none(molin.residues[0].pdbindex, 1) - reslookup = {str(i+offset): r for i,r in enumerate(molin.residues)} - - def _atom_from_re(s): - resname, residx, atomname, atomidx = s - r = reslookup[residx] - a = r[atomname] - return a - - def unusual_bond(l): - atomre1, atomre2 = ATOMSPEC.findall(l) - try: - a1, a2 = _atom_from_re(atomre1), _atom_from_re(atomre2) - except KeyError: - a1 = a2 = None - r1 = reslookup[atomre1[1]] - r2 = reslookup[atomre2[1]] - return forcefields.errors.UnusualBond(l, (a1, a2), (r1, r2)) - - def _parse_tleap_logline(line): - fields = line.split() - if fields[0:2] == ['Unknown', 'residue:']: - # EX: "Unknown residue: 3TE number: 499 type: Terminal/beginning" - res = molin.residues[int(fields[4])] - unknown_res.add(res) - return forcefields.errors.UnknownResidue(line, res) - - elif fields[:4] == 'Warning: Close contact of'.split(): - # EX: "Warning: Close contact of 1.028366 angstroms between .R.A and .R.A

" - return unusual_bond(line) - - elif fields[:6] == 'WARNING: There is a bond of'.split(): - # Matches two lines, EX: - # "WARNING: There is a bond of 34.397700 angstroms between:" - # "------- .R.A and .R.A

" - nextline = next(lineiter) - return unusual_bond(line+nextline) - - elif fields[:5] == 'Created a new atom named:'.split(): - # EX: "Created a new atom named: P within residue: .R" - residue = reslookup[fields[-1][:-1]] - if residue in unknown_res: - return None # suppress atoms from an unknown res ... - atom = residue[fields[5]] - return forcefields.errors.UnknownAtom(line, residue, atom) - - elif (fields[:5] == '** No torsion terms for'.split() or - fields[:5] == 'Could not find angle parameter:'.split()): - # EX: " ** No torsion terms for ca-ce-c3-hc" - return forcefields.errors.MissingTerms(line.strip()) - - else: # ignore this line - return None - - while True: - try: - line = next(lineiter) - except StopIteration: - break - - try: - errmsg = _parse_tleap_logline(line) - except (KeyError, ValueError): - print("WARNING: failed to process TLeap message '%s'" % line) - msg.append(forcefields.errors.ForceFieldMessage(line)) - - else: - if errmsg is not None: - msg.append(errmsg) - - return msg - - diff --git a/moldesign/interfaces/tleap_interface.py b/moldesign/interfaces/tleap_interface.py new file mode 100644 index 0000000..5284e5f --- /dev/null +++ b/moldesign/interfaces/tleap_interface.py @@ -0,0 +1,323 @@ +from __future__ import print_function, absolute_import, division +from future.builtins import * +from future import standard_library +standard_library.install_aliases() + +# Copyright 2017 Autodesk Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from past.builtins import basestring +import os +import re +import tempfile + +import pyccc + +import moldesign as mdt +from . import ambertools +from .. import units as u +from .. import compute +from .. import utils +from .. import forcefields + +IMAGE = 'ambertools' + + +@utils.kwargs_from(mdt.compute.run_job) +def create_ff_parameters(mol, charges='esp', baseff='gaff2', **kwargs): + """Parameterize ``mol``, typically using GAFF parameters. + + This will both assign a forcefield to the molecule (at ``mol.ff``) and produce the parameters + so that they can be used in other systems (e.g., so that this molecule can be simulated + embedded in a larger protein) + + Note: + 'am1-bcc' and 'gasteiger' partial charges will be automatically computed if necessary. + Other charge types must be precomputed. + + Args: + mol (moldesign.Molecule): + charges (str or dict): what partial charges to use? Can be a dict (``{atom:charge}``) OR + a string, in which case charges will be read from + ``mol.properties.[charges name]``; typical values will be 'esp', 'mulliken', + 'am1-bcc', etc. Use 'zero' to set all charges to 0 (for QM/MM and testing) + baseff (str): Name of the gaff-like forcefield file (default: gaff2) + + Returns: + TLeapForcefield: Forcefield object for this residue + """ + # Check that there's only 1 residue, give it a name + assert mol.num_residues == 1 + if mol.residues[0].resname is None: + mol.residues[0].resname = 'UNL' + print('Assigned residue name "UNL" to %s' % mol) + resname = mol.residues[0].resname + + # check that atoms have unique names + if len(set(atom.name for atom in mol.atoms)) != mol.num_atoms: + raise ValueError('This molecule does not have uniquely named atoms, cannot assign FF') + + if charges == 'am1-bcc' and 'am1-bcc' not in mol.properties: + ambertools.calc_am1_bcc_charges(mol) + elif charges == 'gasteiger' and 'gasteiger' not in mol.properties: + ambertools.calc_gasteiger_charges(mol) + elif charges == 'esp' and 'esp' not in mol.properties: + # TODO: use NWChem ESP to calculate + raise NotImplementedError() + + if charges == 'zero': + charge_array = [0.0 for atom in mol.atoms] + elif isinstance(charges, basestring): + charge_array = u.array([mol.properties[charges][atom] for atom in mol.atoms]) + if not charge_array.dimensionless: # implicitly convert floats to fundamental charge units + charge_array = charge_array.to(u.q_e).magnitude + else: + charge_array = [charges[atom] for atom in mol.atoms] + + inputs = {'mol.mol2': mol.write(format='mol2'), + 'mol.charges': '\n'.join(map(str, charge_array))} + + cmds = ['antechamber -i mol.mol2 -fi mol2 -o mol_charged.mol2 ' + ' -fo mol2 -c rc -cf mol.charges -rn %s' % resname, + 'parmchk -i mol_charged.mol2 -f mol2 -o mol.frcmod', + 'tleap -f leap.in', + 'sed -e "s/tempresname/%s/g" mol_rename.lib > mol.lib' % resname] + + base_forcefield = forcefields.TLeapLib(baseff) + inputs['leap.in'] = '\n'.join(["source leaprc.%s" % baseff, + "tempresname = loadmol2 mol_charged.mol2", + "fmod = loadamberparams mol.frcmod", + "check tempresname", + "saveoff tempresname mol_rename.lib", + "saveamberparm tempresname mol.prmtop mol.inpcrd", + "quit\n"]) + + def finish_job(j): + leapcmds = ['source leaprc.gaff2'] + files = {} + for fname, f in j.glob_output("*.lib").items(): + leapcmds.append('loadoff %s' % fname) + files[fname] = f + for fname, f in j.glob_output("*.frcmod").items(): + leapcmds.append('loadAmberParams %s' % fname) + files[fname] = f + + param = forcefields.TLeapForcefield(leapcmds, files) + param.add_ff(base_forcefield) + param.assign(mol) + return param + + job = pyccc.Job(image=mdt.compute.get_image_path(IMAGE), + command=' && '.join(cmds), + inputs=inputs, + when_finished=finish_job, + name="GAFF assignment: %s" % mol.name) + + return mdt.compute.run_job(job, _return_result=True, **kwargs) + + +class AmberParameters(object): + """ Forcefield parameters for a system in amber ``prmtop`` format + """ + def __getstate__(self): + state = self.__dict__.copy() + state['job'] = None + return state + + def __init__(self, prmtop, inpcrd, job=None): + self.prmtop = prmtop + self.inpcrd = inpcrd + self.job = job + + def to_parmed(self): + import parmed + prmtoppath = os.path.join(tempfile.mkdtemp(), 'prmtop') + self.prmtop.put(prmtoppath) + pmd = parmed.load_file(prmtoppath) + return pmd + + +@utils.kwargs_from(compute.run_job) +def _run_tleap_assignment(mol, leapcmds, files=None, **kwargs): + """ + Drives tleap to create a prmtop and inpcrd file. Specifically uses the AmberTools 16 + tleap distribution. + + Defaults are as recommended in the ambertools manual. + + Args: + mol (moldesign.Molecule): Molecule to set up + leapcmds (List[str]): list of the commands to load the forcefields + files (List[pyccc.FileReference]): (optional) list of additional files + to send + **kwargs: keyword arguments to :meth:`compute.run_job` + + References: + Ambertools Manual, http://ambermd.org/doc12/Amber16.pdf. See page 33 for forcefield + recommendations. + """ + leapstr = leapcmds[:] + inputs = {} + if files is not None: + inputs.update(files) + inputs['input.pdb'] = mol.write(format='pdb') + + leapstr.append('mol = loadpdb input.pdb\n' + "check mol\n" + "saveamberparm mol output.prmtop output.inpcrd\n" + "savepdb mol output.pdb\n" + "quit\n") + + inputs['input.leap'] = '\n'.join(leapstr) + + job = pyccc.Job(image=compute.get_image_path(IMAGE), + command='tleap -f input.leap', + inputs=inputs, + name="tleap, %s" % mol.name) + + return compute.run_job(job, **kwargs) + + +def _prep_for_tleap(mol): + """ Returns a modified *copy* that's been modified for input to tleap + + Makes the following modifications: + 1. Reassigns all residue IDs + 2. Assigns tleap-appropriate cysteine resnames + """ + change = False + clean = mdt.Molecule(mol.atoms) + for residue in clean.residues: + residue.pdbindex = residue.index+1 + + if residue.resname == 'CYS': # deal with cysteine states + if 'SG' not in residue.atoms or 'HG' in residue.atoms: + continue # sulfur's missing, we'll let tleap create it + else: + sulfur = residue.atoms['SG'] + + if sulfur.formal_charge == -1*u.q_e: + residue.resname = 'CYM' + change = True + continue + + # check for a reasonable hybridization state + if sulfur.formal_charge != 0 or sulfur.num_bonds not in (1, 2): + raise ValueError("Unknown sulfur hybridization state for %s" + % sulfur) + + # check for a disulfide bond + for otheratom in sulfur.bonded_atoms: + if otheratom.residue is not residue: + if otheratom.name != 'SG' or otheratom.residue.resname not in ('CYS', 'CYX'): + raise ValueError('Unknown bond from cysteine sulfur (%s)' % sulfur) + + # if we're here, this is a cystine with a disulfide bond + print('INFO: disulfide bond detected. Renaming %s from CYS to CYX' % residue) + sulfur.residue.resname = 'CYX' + + clean._rebuild_from_atoms() + + return clean + + +ATOMSPEC = re.compile(r'\.R<(\S+) ([\-0-9]+)>\.A<(\S+) ([\-0-9]+)>') + + +def _parse_tleap_errors(job, molin): + # TODO: special messages for known problems (e.g. histidine) + msg = [] + unknown_res = set() # so we can print only one error per unkonwn residue + lineiter = iter(job.stdout.split('\n')) + offset = utils.if_not_none(molin.residues[0].pdbindex, 1) + reslookup = {str(i+offset): r for i,r in enumerate(molin.residues)} + + def _atom_from_re(s): + resname, residx, atomname, atomidx = s + r = reslookup[residx] + a = r[atomname] + return a + + def unusual_bond(l): + atomre1, atomre2 = ATOMSPEC.findall(l) + try: + a1, a2 = _atom_from_re(atomre1), _atom_from_re(atomre2) + except KeyError: + a1 = a2 = None + r1 = reslookup[atomre1[1]] + r2 = reslookup[atomre2[1]] + return forcefields.errors.UnusualBond(l, (a1, a2), (r1, r2)) + + def _parse_tleap_logline(line): + fields = line.split() + if fields[0:2] == ['Unknown', 'residue:']: + # EX: "Unknown residue: 3TE number: 499 type: Terminal/beginning" + res = molin.residues[int(fields[4])] + unknown_res.add(res) + return forcefields.errors.UnknownResidue(line, res) + + elif fields[:4] == 'Warning: Close contact of'.split(): + # EX: "Warning: Close contact of 1.028366 angstroms between .R.A and .R.A

" + return unusual_bond(line) + + elif fields[:6] == 'WARNING: There is a bond of'.split(): + # Matches two lines, EX: + # "WARNING: There is a bond of 34.397700 angstroms between:" + # "------- .R.A and .R.A

" + nextline = next(lineiter) + return unusual_bond(line+nextline) + + elif fields[:5] == 'Created a new atom named:'.split(): + # EX: "Created a new atom named: P within residue: .R" + residue = reslookup[fields[-1][:-1]] + if residue in unknown_res: + return None # suppress atoms from an unknown res ... + atom = residue[fields[5]] + return forcefields.errors.UnknownAtom(line, residue, atom) + + elif fields[:2] == ('FATAL:', 'Atom'): + # EX: "FATAL: Atom .R.A does not have a type." + assert fields[-5:] == "does not have a type.".split() + atom = _atom_from_re(ATOMSPEC.findall(line)[0]) + return forcefields.errors.UnknownAtom(line, atom.residue, atom) + + elif (fields[:5] == '** No torsion terms for'.split() or + fields[:5] == 'Could not find angle parameter:'.split() or + fields[:5] == 'Could not find bond parameter for:'.split()): + # EX: " ** No torsion terms for ca-ce-c3-hc" + # EX: "Could not find bond parameter for: -" + # EX: "Could not find angle parameter: - -" + return forcefields.errors.MissingTerms(line.strip()) + + else: # ignore this line + return None + + while True: + try: + line = next(lineiter) + except StopIteration: + break + + try: + errmsg = _parse_tleap_logline(line) + except (KeyError, ValueError): + print("WARNING: failed to process TLeap message '%s'" % line) + msg.append(forcefields.errors.ForceFieldMessage(line)) + + else: + if errmsg is not None: + msg.append(errmsg) + + return msg + + diff --git a/moldesign/tools/topology.py b/moldesign/tools/topology.py index d023821..53e0673 100644 --- a/moldesign/tools/topology.py +++ b/moldesign/tools/topology.py @@ -27,7 +27,7 @@ from moldesign.interfaces.openbabel import add_hydrogen, guess_bond_orders from moldesign.interfaces.pdbfixer_interface import mutate_residues, add_water -from moldesign.interfaces.ambertools import create_ff_parameters +from moldesign.interfaces.tleap_interface import create_ff_parameters from moldesign.interfaces.ambertools import calc_am1_bcc_charges, calc_gasteiger_charges _pkgall.extend(('add_hydrogen guess_bond_orders mutate_residues add_water'