From 920698496b701b5584c8d0795d6645819997e953 Mon Sep 17 00:00:00 2001 From: Aaron Virshup Date: Tue, 28 Feb 2017 23:24:51 -0800 Subject: [PATCH] Get CONECTs to work with arbitrary serial numbers (part of #138) --- moldesign/helpers/pdb.py | 57 ++++---------------- moldesign/interfaces/parmed_interface.py | 67 +++++++++++++++++++----- moldesign/molecules/molecule.py | 14 ++++- 3 files changed, 75 insertions(+), 63 deletions(-) diff --git a/moldesign/helpers/pdb.py b/moldesign/helpers/pdb.py index 7a2872e..137e9cd 100644 --- a/moldesign/helpers/pdb.py +++ b/moldesign/helpers/pdb.py @@ -93,53 +93,14 @@ def insert_ter_records(mol, pdbfile): else: return newf -CONECT = 'CONECT %4d' -def insert_conect_records(mol, pdbfile, write_to=None): - """ Inserts TER records to indicate the end of the biopolymeric part of a chain - - Put CONECT records at the end of a pdb file that doesn't have them - - Args: - mol (moldesign.Molecule): the MDT version of the molecule that pdbfile describes - pdbfile (TextIO OR str): pdb file (file-like or string) +def get_conect_pairs(mol): + """ Returns a dicitonary of HETATM bonds for a PDB CONECT record - Returns: - cStringIO.StringIO OR str: copy of the pdb file with added TER records - it will be - returned as the same type passed (i.e., as a filelike buffer or as a string) + Note that this doesn't return the text records themselves, because they need + to reference a specific PDB sequence number """ - pairs = _get_conect_pairs(mol) - - if isinstance(pdbfile, basestring): - pdbfile = StringIO(pdbfile) - - # First, identify where to insert records (if anywhere) - ter_residues = set() - for chain in mol.chains: - if chain.type == 'protein': - ter_residues.add((chain.pdbname, chain.c_terminal.pdbindex)) - elif chain.type == 'dna': - ter_residues.add((chain.pdbname, chain.threeprime_end.pdbindex)) - - # insert the records (if necessary) - if write_to is None: - newf = StringIO() - else: - newf = write_to - pdbfile.seek(0) - - for line in pdbfile: - if line.split() == ['END']: - for a1idx in pairs: - for istart in xrange(0, len(pairs[a1idx]), 4): - pairindices = ''.join(("%5d" % idx) for idx in pairs[a1idx][istart:istart+4]) - newf.write(CONECT % a1idx + pairindices + '\n') - - newf.write(line) - - -def _get_conect_pairs(mol): - bonds = {} + conects = collections.OrderedDict() for residue in mol.residues: # intra-residue bonds @@ -150,18 +111,18 @@ def _get_conect_pairs(mol): else: order = bond.order for i in xrange(order): - bonds.setdefault(bond.a1.index+1, []).append(bond.a2.index+1) + conects.setdefault(bond.a1, []).append(bond.a2) # inter-residue bonds try: r2 = residue.next_residue - except (StopIteration, NotImplemented, NotImplementedError): + except (StopIteration, KeyError, NotImplementedError): continue if not (residue.is_standard_residue and r2.is_standard_residue): for bond in residue.bonds_to(r2): - bonds.setdefault(bond.a1.index+1, []).append(bond.a2.index+1) + conects.setdefault(bond.a1, []).append(bond.a2) - return bonds + return conects def warn_assemblies(mol, assemblies): diff --git a/moldesign/interfaces/parmed_interface.py b/moldesign/interfaces/parmed_interface.py index 71805da..bdf3883 100644 --- a/moldesign/interfaces/parmed_interface.py +++ b/moldesign/interfaces/parmed_interface.py @@ -59,21 +59,18 @@ def read_pdb(f): def write_pdb(mol, fileobj): pmedmol = mol_to_parmed(mol) - # Check if we have appropriate indices for the PDB file - for obj in itertools.chain(mol.atoms, mol.residues): - if obj.pdbindex is None: - protect_numbers = False - break - else: - protect_numbers = True - - if protect_numbers: # a hack to prevent parmed from changing negative numbers - for obj in itertools.chain(pmedmol.atoms, pmedmol.residues): - obj.number = _ProtectFloordiv(obj.number) + # Assign fixed indices + # TODO: respect the original pdbindex values as much as possible + for iatom, atom in enumerate(pmedmol.atoms): + atom.number = iatom + 1 + for ires, residue in enumerate(pmedmol.residues): + residue.number = ires + 1 + for obj in itertools.chain(pmedmol.atoms, pmedmol.residues): + obj.number = _ProtectFloordiv(obj.number) tempfile = StringIO() - pmedmol.write_pdb(tempfile, renumber=not protect_numbers) - mdt.helpers.insert_conect_records(mol, tempfile, write_to=fileobj) + pmedmol.write_pdb(tempfile, renumber=False) + _insert_conect_records(mol, pmedmol, tempfile, write_to=fileobj) class _ProtectFloordiv(int): @@ -89,6 +86,50 @@ def __nonzero__(self): return True +CONECT = 'CONECT %4d' + +def _insert_conect_records(mol, pmdmol, pdbfile, write_to=None): + """ Inserts TER records to indicate the end of the biopolymeric part of a chain + + Put CONECT records at the end of a pdb file that doesn't have them + + Args: + mol (moldesign.Molecule): the MDT version of the molecule that pdbfile describes + pdbfile (TextIO OR str): pdb file (file-like or string) + + Returns: + cStringIO.StringIO OR str: copy of the pdb file with added TER records - it will be + returned as the same type passed (i.e., as a filelike buffer or as a string) + """ + conect_bonds = mdt.helpers.get_conect_pairs(mol) + + def get_atomseq(atom): + return pmdmol.atoms[atom.index].number + + pairs = collections.OrderedDict() + for atom, nbrs in conect_bonds.iteritems(): + pairs[get_atomseq(atom)] = map(get_atomseq, nbrs) + + if isinstance(pdbfile, basestring): + pdbfile = StringIO(pdbfile) + + if write_to is None: + newf = StringIO() + else: + newf = write_to + pdbfile.seek(0) + + for line in pdbfile: + if line.split() == ['END']: + for a1idx in pairs: + for istart in xrange(0, len(pairs[a1idx]), 4): + pairindices = ''.join(("%5d" % idx) for idx in pairs[a1idx][istart:istart+4]) + newf.write(CONECT % a1idx + pairindices + '\n') + + newf.write(line) + + + def write_mmcif(mol, fileobj): mol_to_parmed(mol).write_cif(fileobj) diff --git a/moldesign/molecules/molecule.py b/moldesign/molecules/molecule.py index 454b898..95956d7 100644 --- a/moldesign/molecules/molecule.py +++ b/moldesign/molecules/molecule.py @@ -487,8 +487,16 @@ def _assign_residue_indices(self): default_residue = self._defres default_chain = self._defchain num_biores = 0 + conflicts = set() + + last_pdb_idx = None for atom in self.atoms: + if last_pdb_idx is not None and atom.pdbindex <= last_pdb_idx: + atom.pdbindex = last_pdb_idx + 1 + conflicts.add('atom numbers') + last_pdb_idx = atom.pdbindex + # if atom has no chain/residue, assign defaults if atom.residue is None: atom.residue = default_residue @@ -510,8 +518,7 @@ def _assign_residue_indices(self): atom.chain.name = chr(ord(atom.chain.name)+1) if oldname != atom.chain.name: - print 'Warning: chain ID conflict. Renamed Chain %s -> %s' % ( - oldname, atom.chain.name) + conflicts.add('chain') self.chains.add(atom.chain) else: assert atom.chain.molecule is self @@ -526,6 +533,9 @@ def _assign_residue_indices(self): else: assert atom.chain.molecule is self + if conflicts: + print 'WARNING: %s indices modified due to name clashes' % ( + ', '.join(conflicts)) self.is_biomolecule = (num_biores >= 2) def __eq__(self, other):