Skip to content

Commit

Permalink
Add CONECT records to PDB files
Browse files Browse the repository at this point in the history
  • Loading branch information
avirshup committed Feb 28, 2017
1 parent 48a4b72 commit b43ff62
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 4 deletions.
2 changes: 1 addition & 1 deletion moldesign/_tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def test_1kbu_assembly_build(key, request):
mol.chains[asym.num_chains].positions.defunits_value())


@pytest.mark.parametrize('fmt', 'smiles pdb mol2 sdf inchi'.split())
@pytest.mark.parametrize('fmt', 'smiles pdb mol2 sdf inchi mmcif'.split())
def test_topology_preserved_in_serialization(bipyridine_smiles, fmt):
""" Test that bond topology is preserved even if it doesn't make sense from distances
"""
Expand Down
70 changes: 70 additions & 0 deletions moldesign/helpers/pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,76 @@ def insert_ter_records(mol, pdbfile):
else:
return newf

CONECT = 'CONECT %4d'

def insert_conect_records(mol, pdbfile, write_to=None):
""" Inserts TER records to indicate the end of the biopolymeric part of a chain
Put CONECT records at the end of a pdb file that doesn't have them
Args:
mol (moldesign.Molecule): the MDT version of the molecule that pdbfile describes
pdbfile (TextIO OR str): pdb file (file-like or string)
Returns:
cStringIO.StringIO OR str: copy of the pdb file with added TER records - it will be
returned as the same type passed (i.e., as a filelike buffer or as a string)
"""
pairs = _get_conect_pairs(mol)

if isinstance(pdbfile, basestring):
pdbfile = StringIO(pdbfile)

# First, identify where to insert records (if anywhere)
ter_residues = set()
for chain in mol.chains:
if chain.type == 'protein':
ter_residues.add((chain.pdbname, chain.c_terminal.pdbindex))
elif chain.type == 'dna':
ter_residues.add((chain.pdbname, chain.threeprime_end.pdbindex))

# insert the records (if necessary)
if write_to is None:
newf = StringIO()
else:
newf = write_to
pdbfile.seek(0)

for line in pdbfile:
if line.split() == ['END']:
for a1idx in pairs:
for istart in xrange(0, len(pairs[a1idx]), 4):
pairindices = ''.join(("%5d" % idx) for idx in pairs[a1idx][istart:istart+4])
newf.write(CONECT % a1idx + pairindices + '\n')

newf.write(line)


def _get_conect_pairs(mol):
bonds = {}

for residue in mol.residues:
# intra-residue bonds
if not residue.is_standard_residue:
for bond in residue.bonds:
if bond.order <= 1:
order = 1
else:
order = bond.order
for i in xrange(order):
bonds.setdefault(bond.a1.index+1, []).append(bond.a2.index+1)

# inter-residue bonds
try:
r2 = residue.next_residue
except (StopIteration, NotImplemented, NotImplementedError):
continue
if not (residue.is_standard_residue and r2.is_standard_residue):
for bond in residue.bonds_to(r2):
bonds.setdefault(bond.a1.index+1, []).append(bond.a2.index+1)

return bonds


def warn_assemblies(mol, assemblies):
""" Print a warning message if the PDB structure contains a biomolecular assembly
Expand Down
14 changes: 11 additions & 3 deletions moldesign/interfaces/parmed_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import itertools
import parmed
from StringIO import StringIO

import moldesign as mdt
from moldesign import units as u
Expand Down Expand Up @@ -70,7 +71,9 @@ def write_pdb(mol, fileobj):
for obj in itertools.chain(pmedmol.atoms, pmedmol.residues):
obj.number = _ProtectFloordiv(obj.number)

pmedmol.write_pdb(fileobj, renumber=not protect_numbers)
tempfile = StringIO()
pmedmol.write_pdb(tempfile, renumber=not protect_numbers)
mdt.helpers.insert_conect_records(mol, tempfile, write_to=fileobj)


class _ProtectFloordiv(int):
Expand Down Expand Up @@ -226,6 +229,8 @@ def _parmed_to_ff(topo, atom_map):
def _reassign_chains(f, mol):
""" Change chain ID assignments to the mmCIF standard (parmed uses author assignments)
If the required fields don't exist, a copy of the molecule is returned unchanged.
Args:
f (file): mmcif file/stream
mol (moldesign.Molecule): molecule with default parmed assignemnts
Expand All @@ -235,8 +240,11 @@ def _reassign_chains(f, mol):
"""
data = mdt.interfaces.biopython_interface.get_mmcif_data(f)
f.seek(0)
newchain_names = set(data['_pdbx_poly_seq_scheme.asym_id']+
data['_pdbx_nonpoly_scheme.asym_id'])
try:
newchain_names = set(data['_pdbx_poly_seq_scheme.asym_id']+
data['_pdbx_nonpoly_scheme.asym_id'])
except KeyError:
return mol.copy(name=mol.name)
newchains = {name: mdt.Chain(name) for name in newchain_names}

residue_iterator = itertools.chain(
Expand Down
16 changes: 16 additions & 0 deletions moldesign/molecules/atomcollections.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,22 @@ def bonded_atoms(self):
atoms.append(nbr)
return atoms

def bonds_to(self, other):
""" Returns list of bonds between this object and another one
Args:
other (AtomContainer): other object
Returns:
List[moldesign.Bond]: bonds between this object and another
"""
bonds = []
otheratoms = set(other.atoms)
for bond in self.internal_bonds:
if bond.a1 in otheratoms or bond.a2 in otheratoms:
bonds.append(bond)
return bonds


@toplevel
class AtomList(AtomContainer, list): # order is important, list will override methods otherwise
Expand Down

0 comments on commit b43ff62

Please sign in to comment.