diff --git a/moldesign/_tests/test_io.py b/moldesign/_tests/test_io.py index 06a5490..d1c3628 100644 --- a/moldesign/_tests/test_io.py +++ b/moldesign/_tests/test_io.py @@ -163,7 +163,7 @@ def test_1kbu_assembly_build(key, request): mol.chains[asym.num_chains].positions.defunits_value()) -@pytest.mark.parametrize('fmt', 'smiles pdb mol2 sdf inchi'.split()) +@pytest.mark.parametrize('fmt', 'smiles pdb mol2 sdf inchi mmcif'.split()) def test_topology_preserved_in_serialization(bipyridine_smiles, fmt): """ Test that bond topology is preserved even if it doesn't make sense from distances """ diff --git a/moldesign/helpers/pdb.py b/moldesign/helpers/pdb.py index f5c8a58..7a2872e 100644 --- a/moldesign/helpers/pdb.py +++ b/moldesign/helpers/pdb.py @@ -93,6 +93,76 @@ def insert_ter_records(mol, pdbfile): else: return newf +CONECT = 'CONECT %4d' + +def insert_conect_records(mol, pdbfile, write_to=None): + """ Inserts TER records to indicate the end of the biopolymeric part of a chain + + Put CONECT records at the end of a pdb file that doesn't have them + + Args: + mol (moldesign.Molecule): the MDT version of the molecule that pdbfile describes + pdbfile (TextIO OR str): pdb file (file-like or string) + + Returns: + cStringIO.StringIO OR str: copy of the pdb file with added TER records - it will be + returned as the same type passed (i.e., as a filelike buffer or as a string) + """ + pairs = _get_conect_pairs(mol) + + if isinstance(pdbfile, basestring): + pdbfile = StringIO(pdbfile) + + # First, identify where to insert records (if anywhere) + ter_residues = set() + for chain in mol.chains: + if chain.type == 'protein': + ter_residues.add((chain.pdbname, chain.c_terminal.pdbindex)) + elif chain.type == 'dna': + ter_residues.add((chain.pdbname, chain.threeprime_end.pdbindex)) + + # insert the records (if necessary) + if write_to is None: + newf = StringIO() + else: + newf = write_to + pdbfile.seek(0) + + for line in pdbfile: + if line.split() == ['END']: + for a1idx in pairs: + for istart in xrange(0, len(pairs[a1idx]), 4): + pairindices = ''.join(("%5d" % idx) for idx in pairs[a1idx][istart:istart+4]) + newf.write(CONECT % a1idx + pairindices + '\n') + + newf.write(line) + + +def _get_conect_pairs(mol): + bonds = {} + + for residue in mol.residues: + # intra-residue bonds + if not residue.is_standard_residue: + for bond in residue.bonds: + if bond.order <= 1: + order = 1 + else: + order = bond.order + for i in xrange(order): + bonds.setdefault(bond.a1.index+1, []).append(bond.a2.index+1) + + # inter-residue bonds + try: + r2 = residue.next_residue + except (StopIteration, NotImplemented, NotImplementedError): + continue + if not (residue.is_standard_residue and r2.is_standard_residue): + for bond in residue.bonds_to(r2): + bonds.setdefault(bond.a1.index+1, []).append(bond.a2.index+1) + + return bonds + def warn_assemblies(mol, assemblies): """ Print a warning message if the PDB structure contains a biomolecular assembly diff --git a/moldesign/interfaces/parmed_interface.py b/moldesign/interfaces/parmed_interface.py index f272b49..71805da 100644 --- a/moldesign/interfaces/parmed_interface.py +++ b/moldesign/interfaces/parmed_interface.py @@ -15,6 +15,7 @@ import itertools import parmed +from StringIO import StringIO import moldesign as mdt from moldesign import units as u @@ -70,7 +71,9 @@ def write_pdb(mol, fileobj): for obj in itertools.chain(pmedmol.atoms, pmedmol.residues): obj.number = _ProtectFloordiv(obj.number) - pmedmol.write_pdb(fileobj, renumber=not protect_numbers) + tempfile = StringIO() + pmedmol.write_pdb(tempfile, renumber=not protect_numbers) + mdt.helpers.insert_conect_records(mol, tempfile, write_to=fileobj) class _ProtectFloordiv(int): @@ -226,6 +229,8 @@ def _parmed_to_ff(topo, atom_map): def _reassign_chains(f, mol): """ Change chain ID assignments to the mmCIF standard (parmed uses author assignments) + If the required fields don't exist, a copy of the molecule is returned unchanged. + Args: f (file): mmcif file/stream mol (moldesign.Molecule): molecule with default parmed assignemnts @@ -235,8 +240,11 @@ def _reassign_chains(f, mol): """ data = mdt.interfaces.biopython_interface.get_mmcif_data(f) f.seek(0) - newchain_names = set(data['_pdbx_poly_seq_scheme.asym_id']+ - data['_pdbx_nonpoly_scheme.asym_id']) + try: + newchain_names = set(data['_pdbx_poly_seq_scheme.asym_id']+ + data['_pdbx_nonpoly_scheme.asym_id']) + except KeyError: + return mol.copy(name=mol.name) newchains = {name: mdt.Chain(name) for name in newchain_names} residue_iterator = itertools.chain( diff --git a/moldesign/molecules/atomcollections.py b/moldesign/molecules/atomcollections.py index b3b9f4b..14e6621 100644 --- a/moldesign/molecules/atomcollections.py +++ b/moldesign/molecules/atomcollections.py @@ -446,6 +446,22 @@ def bonded_atoms(self): atoms.append(nbr) return atoms + def bonds_to(self, other): + """ Returns list of bonds between this object and another one + + Args: + other (AtomContainer): other object + + Returns: + List[moldesign.Bond]: bonds between this object and another + """ + bonds = [] + otheratoms = set(other.atoms) + for bond in self.internal_bonds: + if bond.a1 in otheratoms or bond.a2 in otheratoms: + bonds.append(bond) + return bonds + @toplevel class AtomList(AtomContainer, list): # order is important, list will override methods otherwise