Get CONECTs to work with arbitrary serial numbers (part of #138)

Autodesk · Mar 1, 2017 · 9206984 · 9206984
1 parent b43ff62
commit 9206984
Show file tree

Hide file tree

Showing 3 changed files with 75 additions and 63 deletions.
diff --git a/moldesign/helpers/pdb.py b/moldesign/helpers/pdb.py
@@ -93,53 +93,14 @@ def insert_ter_records(mol, pdbfile):
     else:
         return newf
 
-CONECT = 'CONECT %4d'
 
-def insert_conect_records(mol, pdbfile, write_to=None):
-    """ Inserts TER records to indicate the end of the biopolymeric part of a chain
-
-    Put CONECT records at the end of a pdb file that doesn't have them
-
-    Args:
-        mol (moldesign.Molecule): the MDT version of the molecule that pdbfile describes
-        pdbfile (TextIO OR str): pdb file (file-like or string)
+def get_conect_pairs(mol):
+    """ Returns a dicitonary of HETATM bonds for a PDB CONECT record
 
-    Returns:
-        cStringIO.StringIO OR str: copy of the pdb file with added TER records - it will be
-         returned as the same type passed (i.e., as a filelike buffer or as a string)
+    Note that this doesn't return the text records themselves, because they need
+    to reference a specific PDB sequence number
     """
-    pairs = _get_conect_pairs(mol)
-
-    if isinstance(pdbfile, basestring):
-        pdbfile = StringIO(pdbfile)
-
-    # First, identify where to insert records (if anywhere)
-    ter_residues = set()
-    for chain in mol.chains:
-        if chain.type == 'protein':
-            ter_residues.add((chain.pdbname, chain.c_terminal.pdbindex))
-        elif chain.type == 'dna':
-            ter_residues.add((chain.pdbname, chain.threeprime_end.pdbindex))
-
-    # insert the records (if necessary)
-    if write_to is None:
-        newf = StringIO()
-    else:
-        newf = write_to
-    pdbfile.seek(0)
-
-    for line in pdbfile:
-        if line.split() == ['END']:
-            for a1idx in pairs:
-                for istart in xrange(0, len(pairs[a1idx]), 4):
-                    pairindices = ''.join(("%5d" % idx) for idx in pairs[a1idx][istart:istart+4])
-                    newf.write(CONECT % a1idx + pairindices + '\n')
-
-        newf.write(line)
-
-
-def _get_conect_pairs(mol):
-    bonds = {}
+    conects = collections.OrderedDict()
 
     for residue in mol.residues:
         # intra-residue bonds
@@ -150,18 +111,18 @@ def _get_conect_pairs(mol):
                 else:
                     order = bond.order
                 for i in xrange(order):
-                    bonds.setdefault(bond.a1.index+1, []).append(bond.a2.index+1)
+                    conects.setdefault(bond.a1, []).append(bond.a2)
 
         # inter-residue bonds
         try:
             r2 = residue.next_residue
-        except (StopIteration, NotImplemented, NotImplementedError):
+        except (StopIteration, KeyError, NotImplementedError):
             continue
         if not (residue.is_standard_residue and r2.is_standard_residue):
             for bond in residue.bonds_to(r2):
-                bonds.setdefault(bond.a1.index+1, []).append(bond.a2.index+1)
+                conects.setdefault(bond.a1, []).append(bond.a2)
 
-    return bonds
+    return conects
 
 
 def warn_assemblies(mol, assemblies):

diff --git a/moldesign/interfaces/parmed_interface.py b/moldesign/interfaces/parmed_interface.py
@@ -59,21 +59,18 @@ def read_pdb(f):
 def write_pdb(mol, fileobj):
     pmedmol = mol_to_parmed(mol)
 
-    # Check if we have appropriate indices for the PDB file
-    for obj in itertools.chain(mol.atoms, mol.residues):
-        if obj.pdbindex is None:
-            protect_numbers = False
-            break
-    else:
-        protect_numbers = True
-
-    if protect_numbers:  # a hack to prevent parmed from changing negative numbers
-        for obj in itertools.chain(pmedmol.atoms, pmedmol.residues):
-            obj.number = _ProtectFloordiv(obj.number)
+    # Assign fixed indices
+    # TODO: respect the original pdbindex values as much as possible
+    for iatom, atom in enumerate(pmedmol.atoms):
+        atom.number = iatom + 1
+    for ires, residue in enumerate(pmedmol.residues):
+        residue.number = ires + 1
+    for obj in itertools.chain(pmedmol.atoms, pmedmol.residues):
+        obj.number = _ProtectFloordiv(obj.number)
 
     tempfile = StringIO()
-    pmedmol.write_pdb(tempfile, renumber=not protect_numbers)
-    mdt.helpers.insert_conect_records(mol, tempfile, write_to=fileobj)
+    pmedmol.write_pdb(tempfile, renumber=False)
+    _insert_conect_records(mol, pmedmol, tempfile, write_to=fileobj)
 
 
 class _ProtectFloordiv(int):
@@ -89,6 +86,50 @@ def __nonzero__(self):
         return True
 
 
+CONECT = 'CONECT %4d'
+
+def _insert_conect_records(mol, pmdmol, pdbfile, write_to=None):
+    """ Inserts TER records to indicate the end of the biopolymeric part of a chain
+
+    Put CONECT records at the end of a pdb file that doesn't have them
+
+    Args:
+        mol (moldesign.Molecule): the MDT version of the molecule that pdbfile describes
+        pdbfile (TextIO OR str): pdb file (file-like or string)
+
+    Returns:
+        cStringIO.StringIO OR str: copy of the pdb file with added TER records - it will be
+         returned as the same type passed (i.e., as a filelike buffer or as a string)
+    """
+    conect_bonds = mdt.helpers.get_conect_pairs(mol)
+
+    def get_atomseq(atom):
+        return pmdmol.atoms[atom.index].number
+
+    pairs = collections.OrderedDict()
+    for atom, nbrs in conect_bonds.iteritems():
+        pairs[get_atomseq(atom)] = map(get_atomseq, nbrs)
+
+    if isinstance(pdbfile, basestring):
+        pdbfile = StringIO(pdbfile)
+
+    if write_to is None:
+        newf = StringIO()
+    else:
+        newf = write_to
+    pdbfile.seek(0)
+
+    for line in pdbfile:
+        if line.split() == ['END']:
+            for a1idx in pairs:
+                for istart in xrange(0, len(pairs[a1idx]), 4):
+                    pairindices = ''.join(("%5d" % idx) for idx in pairs[a1idx][istart:istart+4])
+                    newf.write(CONECT % a1idx + pairindices + '\n')
+
+        newf.write(line)
+
+
+
 def write_mmcif(mol, fileobj):
     mol_to_parmed(mol).write_cif(fileobj)
 

diff --git a/moldesign/molecules/molecule.py b/moldesign/molecules/molecule.py
@@ -487,8 +487,16 @@ def _assign_residue_indices(self):
         default_residue = self._defres
         default_chain = self._defchain
         num_biores = 0
+        conflicts = set()
+
+        last_pdb_idx = None
 
         for atom in self.atoms:
+            if last_pdb_idx is not None and atom.pdbindex <= last_pdb_idx:
+                atom.pdbindex = last_pdb_idx + 1
+                conflicts.add('atom numbers')
+            last_pdb_idx = atom.pdbindex
+
             # if atom has no chain/residue, assign defaults
             if atom.residue is None:
                 atom.residue = default_residue
@@ -510,8 +518,7 @@ def _assign_residue_indices(self):
                     atom.chain.name = chr(ord(atom.chain.name)+1)
 
                 if oldname != atom.chain.name:
-                    print 'Warning: chain ID conflict. Renamed Chain %s -> %s' % (
-                        oldname, atom.chain.name)
+                    conflicts.add('chain')
                 self.chains.add(atom.chain)
             else:
                 assert atom.chain.molecule is self
@@ -526,6 +533,9 @@ def _assign_residue_indices(self):
             else:
                 assert atom.chain.molecule is self
 
+        if conflicts:
+            print 'WARNING: %s indices modified due to name clashes' % (
+                ', '.join(conflicts))
         self.is_biomolecule = (num_biores >= 2)
 
     def __eq__(self, other):