Add CONECT records to PDB files

Autodesk · Feb 28, 2017 · b43ff62 · b43ff62
1 parent 48a4b72
commit b43ff62
Show file tree

Hide file tree

Showing 4 changed files with 98 additions and 4 deletions.
diff --git a/moldesign/_tests/test_io.py b/moldesign/_tests/test_io.py
@@ -163,7 +163,7 @@ def test_1kbu_assembly_build(key, request):
                                   mol.chains[asym.num_chains].positions.defunits_value())
 
 
-@pytest.mark.parametrize('fmt', 'smiles pdb mol2 sdf inchi'.split())
+@pytest.mark.parametrize('fmt', 'smiles pdb mol2 sdf inchi mmcif'.split())
 def test_topology_preserved_in_serialization(bipyridine_smiles, fmt):
     """ Test that bond topology is preserved even if it doesn't make sense from distances
     """

diff --git a/moldesign/helpers/pdb.py b/moldesign/helpers/pdb.py
@@ -93,6 +93,76 @@ def insert_ter_records(mol, pdbfile):
     else:
         return newf
 
+CONECT = 'CONECT %4d'
+
+def insert_conect_records(mol, pdbfile, write_to=None):
+    """ Inserts TER records to indicate the end of the biopolymeric part of a chain
+
+    Put CONECT records at the end of a pdb file that doesn't have them
+
+    Args:
+        mol (moldesign.Molecule): the MDT version of the molecule that pdbfile describes
+        pdbfile (TextIO OR str): pdb file (file-like or string)
+
+    Returns:
+        cStringIO.StringIO OR str: copy of the pdb file with added TER records - it will be
+         returned as the same type passed (i.e., as a filelike buffer or as a string)
+    """
+    pairs = _get_conect_pairs(mol)
+
+    if isinstance(pdbfile, basestring):
+        pdbfile = StringIO(pdbfile)
+
+    # First, identify where to insert records (if anywhere)
+    ter_residues = set()
+    for chain in mol.chains:
+        if chain.type == 'protein':
+            ter_residues.add((chain.pdbname, chain.c_terminal.pdbindex))
+        elif chain.type == 'dna':
+            ter_residues.add((chain.pdbname, chain.threeprime_end.pdbindex))
+
+    # insert the records (if necessary)
+    if write_to is None:
+        newf = StringIO()
+    else:
+        newf = write_to
+    pdbfile.seek(0)
+
+    for line in pdbfile:
+        if line.split() == ['END']:
+            for a1idx in pairs:
+                for istart in xrange(0, len(pairs[a1idx]), 4):
+                    pairindices = ''.join(("%5d" % idx) for idx in pairs[a1idx][istart:istart+4])
+                    newf.write(CONECT % a1idx + pairindices + '\n')
+
+        newf.write(line)
+
+
+def _get_conect_pairs(mol):
+    bonds = {}
+
+    for residue in mol.residues:
+        # intra-residue bonds
+        if not residue.is_standard_residue:
+            for bond in residue.bonds:
+                if bond.order <= 1:
+                    order = 1
+                else:
+                    order = bond.order
+                for i in xrange(order):
+                    bonds.setdefault(bond.a1.index+1, []).append(bond.a2.index+1)
+
+        # inter-residue bonds
+        try:
+            r2 = residue.next_residue
+        except (StopIteration, NotImplemented, NotImplementedError):
+            continue
+        if not (residue.is_standard_residue and r2.is_standard_residue):
+            for bond in residue.bonds_to(r2):
+                bonds.setdefault(bond.a1.index+1, []).append(bond.a2.index+1)
+
+    return bonds
+
 
 def warn_assemblies(mol, assemblies):
     """ Print a warning message if the PDB structure contains a biomolecular assembly

diff --git a/moldesign/interfaces/parmed_interface.py b/moldesign/interfaces/parmed_interface.py
@@ -15,6 +15,7 @@
 
 import itertools
 import parmed
+from StringIO import StringIO
 
 import moldesign as mdt
 from moldesign import units as u
@@ -70,7 +71,9 @@ def write_pdb(mol, fileobj):
         for obj in itertools.chain(pmedmol.atoms, pmedmol.residues):
             obj.number = _ProtectFloordiv(obj.number)
 
-    pmedmol.write_pdb(fileobj, renumber=not protect_numbers)
+    tempfile = StringIO()
+    pmedmol.write_pdb(tempfile, renumber=not protect_numbers)
+    mdt.helpers.insert_conect_records(mol, tempfile, write_to=fileobj)
 
 
 class _ProtectFloordiv(int):
@@ -226,6 +229,8 @@ def _parmed_to_ff(topo, atom_map):
 def _reassign_chains(f, mol):
     """ Change chain ID assignments to the mmCIF standard (parmed uses author assignments)
 
+    If the required fields don't exist, a copy of the molecule is returned unchanged.
+
     Args:
         f (file): mmcif file/stream
         mol (moldesign.Molecule): molecule with default parmed assignemnts
@@ -235,8 +240,11 @@ def _reassign_chains(f, mol):
     """
     data = mdt.interfaces.biopython_interface.get_mmcif_data(f)
     f.seek(0)
-    newchain_names = set(data['_pdbx_poly_seq_scheme.asym_id']+
-                         data['_pdbx_nonpoly_scheme.asym_id'])
+    try:
+        newchain_names = set(data['_pdbx_poly_seq_scheme.asym_id']+
+                             data['_pdbx_nonpoly_scheme.asym_id'])
+    except KeyError:
+        return mol.copy(name=mol.name)
     newchains = {name: mdt.Chain(name) for name in newchain_names}
 
     residue_iterator = itertools.chain(

diff --git a/moldesign/molecules/atomcollections.py b/moldesign/molecules/atomcollections.py
@@ -446,6 +446,22 @@ def bonded_atoms(self):
                     atoms.append(nbr)
         return atoms
 
+    def bonds_to(self, other):
+        """ Returns list of bonds between this object and another one
+
+        Args:
+            other (AtomContainer): other object
+
+        Returns:
+            List[moldesign.Bond]: bonds between this object and another
+        """
+        bonds = []
+        otheratoms = set(other.atoms)
+        for bond in self.internal_bonds:
+            if bond.a1 in otheratoms or bond.a2 in otheratoms:
+                bonds.append(bond)
+        return bonds
+
 
 @toplevel
 class AtomList(AtomContainer, list):  # order is important, list will override methods otherwise