Skip to content

Commit

Permalink
Get CONECTs to work with arbitrary serial numbers (part of #138)
Browse files Browse the repository at this point in the history
  • Loading branch information
avirshup committed Mar 1, 2017
1 parent b43ff62 commit 9206984
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 63 deletions.
57 changes: 9 additions & 48 deletions moldesign/helpers/pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,53 +93,14 @@ def insert_ter_records(mol, pdbfile):
else:
return newf

CONECT = 'CONECT %4d'

def insert_conect_records(mol, pdbfile, write_to=None):
""" Inserts TER records to indicate the end of the biopolymeric part of a chain
Put CONECT records at the end of a pdb file that doesn't have them
Args:
mol (moldesign.Molecule): the MDT version of the molecule that pdbfile describes
pdbfile (TextIO OR str): pdb file (file-like or string)
def get_conect_pairs(mol):
""" Returns a dicitonary of HETATM bonds for a PDB CONECT record
Returns:
cStringIO.StringIO OR str: copy of the pdb file with added TER records - it will be
returned as the same type passed (i.e., as a filelike buffer or as a string)
Note that this doesn't return the text records themselves, because they need
to reference a specific PDB sequence number
"""
pairs = _get_conect_pairs(mol)

if isinstance(pdbfile, basestring):
pdbfile = StringIO(pdbfile)

# First, identify where to insert records (if anywhere)
ter_residues = set()
for chain in mol.chains:
if chain.type == 'protein':
ter_residues.add((chain.pdbname, chain.c_terminal.pdbindex))
elif chain.type == 'dna':
ter_residues.add((chain.pdbname, chain.threeprime_end.pdbindex))

# insert the records (if necessary)
if write_to is None:
newf = StringIO()
else:
newf = write_to
pdbfile.seek(0)

for line in pdbfile:
if line.split() == ['END']:
for a1idx in pairs:
for istart in xrange(0, len(pairs[a1idx]), 4):
pairindices = ''.join(("%5d" % idx) for idx in pairs[a1idx][istart:istart+4])
newf.write(CONECT % a1idx + pairindices + '\n')

newf.write(line)


def _get_conect_pairs(mol):
bonds = {}
conects = collections.OrderedDict()

for residue in mol.residues:
# intra-residue bonds
Expand All @@ -150,18 +111,18 @@ def _get_conect_pairs(mol):
else:
order = bond.order
for i in xrange(order):
bonds.setdefault(bond.a1.index+1, []).append(bond.a2.index+1)
conects.setdefault(bond.a1, []).append(bond.a2)

# inter-residue bonds
try:
r2 = residue.next_residue
except (StopIteration, NotImplemented, NotImplementedError):
except (StopIteration, KeyError, NotImplementedError):
continue
if not (residue.is_standard_residue and r2.is_standard_residue):
for bond in residue.bonds_to(r2):
bonds.setdefault(bond.a1.index+1, []).append(bond.a2.index+1)
conects.setdefault(bond.a1, []).append(bond.a2)

return bonds
return conects


def warn_assemblies(mol, assemblies):
Expand Down
67 changes: 54 additions & 13 deletions moldesign/interfaces/parmed_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,21 +59,18 @@ def read_pdb(f):
def write_pdb(mol, fileobj):
pmedmol = mol_to_parmed(mol)

# Check if we have appropriate indices for the PDB file
for obj in itertools.chain(mol.atoms, mol.residues):
if obj.pdbindex is None:
protect_numbers = False
break
else:
protect_numbers = True

if protect_numbers: # a hack to prevent parmed from changing negative numbers
for obj in itertools.chain(pmedmol.atoms, pmedmol.residues):
obj.number = _ProtectFloordiv(obj.number)
# Assign fixed indices
# TODO: respect the original pdbindex values as much as possible
for iatom, atom in enumerate(pmedmol.atoms):
atom.number = iatom + 1
for ires, residue in enumerate(pmedmol.residues):
residue.number = ires + 1
for obj in itertools.chain(pmedmol.atoms, pmedmol.residues):
obj.number = _ProtectFloordiv(obj.number)

tempfile = StringIO()
pmedmol.write_pdb(tempfile, renumber=not protect_numbers)
mdt.helpers.insert_conect_records(mol, tempfile, write_to=fileobj)
pmedmol.write_pdb(tempfile, renumber=False)
_insert_conect_records(mol, pmedmol, tempfile, write_to=fileobj)


class _ProtectFloordiv(int):
Expand All @@ -89,6 +86,50 @@ def __nonzero__(self):
return True


CONECT = 'CONECT %4d'

def _insert_conect_records(mol, pmdmol, pdbfile, write_to=None):
""" Inserts TER records to indicate the end of the biopolymeric part of a chain
Put CONECT records at the end of a pdb file that doesn't have them
Args:
mol (moldesign.Molecule): the MDT version of the molecule that pdbfile describes
pdbfile (TextIO OR str): pdb file (file-like or string)
Returns:
cStringIO.StringIO OR str: copy of the pdb file with added TER records - it will be
returned as the same type passed (i.e., as a filelike buffer or as a string)
"""
conect_bonds = mdt.helpers.get_conect_pairs(mol)

def get_atomseq(atom):
return pmdmol.atoms[atom.index].number

pairs = collections.OrderedDict()
for atom, nbrs in conect_bonds.iteritems():
pairs[get_atomseq(atom)] = map(get_atomseq, nbrs)

if isinstance(pdbfile, basestring):
pdbfile = StringIO(pdbfile)

if write_to is None:
newf = StringIO()
else:
newf = write_to
pdbfile.seek(0)

for line in pdbfile:
if line.split() == ['END']:
for a1idx in pairs:
for istart in xrange(0, len(pairs[a1idx]), 4):
pairindices = ''.join(("%5d" % idx) for idx in pairs[a1idx][istart:istart+4])
newf.write(CONECT % a1idx + pairindices + '\n')

newf.write(line)



def write_mmcif(mol, fileobj):
mol_to_parmed(mol).write_cif(fileobj)

Expand Down
14 changes: 12 additions & 2 deletions moldesign/molecules/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,8 +487,16 @@ def _assign_residue_indices(self):
default_residue = self._defres
default_chain = self._defchain
num_biores = 0
conflicts = set()

last_pdb_idx = None

for atom in self.atoms:
if last_pdb_idx is not None and atom.pdbindex <= last_pdb_idx:
atom.pdbindex = last_pdb_idx + 1
conflicts.add('atom numbers')
last_pdb_idx = atom.pdbindex

# if atom has no chain/residue, assign defaults
if atom.residue is None:
atom.residue = default_residue
Expand All @@ -510,8 +518,7 @@ def _assign_residue_indices(self):
atom.chain.name = chr(ord(atom.chain.name)+1)

if oldname != atom.chain.name:
print 'Warning: chain ID conflict. Renamed Chain %s -> %s' % (
oldname, atom.chain.name)
conflicts.add('chain')
self.chains.add(atom.chain)
else:
assert atom.chain.molecule is self
Expand All @@ -526,6 +533,9 @@ def _assign_residue_indices(self):
else:
assert atom.chain.molecule is self

if conflicts:
print 'WARNING: %s indices modified due to name clashes' % (
', '.join(conflicts))
self.is_biomolecule = (num_biores >= 2)

def __eq__(self, other):
Expand Down

0 comments on commit 9206984

Please sign in to comment.