diff --git a/moldesign/_tests/data/2p8w.cif.bz2 b/moldesign/_tests/data/2p8w.cif.bz2 new file mode 100644 index 0000000..eaf765d Binary files /dev/null and b/moldesign/_tests/data/2p8w.cif.bz2 differ diff --git a/moldesign/_tests/data/3b5x.cif.bz2 b/moldesign/_tests/data/3b5x.cif.bz2 new file mode 100644 index 0000000..05dd98b Binary files /dev/null and b/moldesign/_tests/data/3b5x.cif.bz2 differ diff --git a/moldesign/_tests/test_pdb_processing.py b/moldesign/_tests/test_pdb_processing.py index 26f10e3..9401493 100644 --- a/moldesign/_tests/test_pdb_processing.py +++ b/moldesign/_tests/test_pdb_processing.py @@ -123,6 +123,20 @@ def test_numeric_residue_name_1PYN(request, mol): assert list(contents.keys())[0] == '941' +def test_single_chain_2p8w(): + mol = mdt.read(get_data_path('2p8w.cif.bz2')) + assert mol.num_chains == 3 + assert mol.chains['C'].num_residues == 1 + assert mol.chains['C'].residues['GNP843'].num_atoms == 32 + + +def test_missing_atoms_3b5x(): + mol = mdt.read(get_data_path('3b5x.cif.bz2')) + assert mol.num_chains == 2 + assert mol.num_atoms == 1144 + assert mol.num_residues == 1144 + + MISSINGRES_2JAJ = [('A', 'GLY', -4), ('A', 'PRO', -3), ('A', 'LEU', -2), ('A', 'GLY', -1), ('A', 'MET', 0), ('A', 'ALA', 1), ('A', 'GLY', 2), ('A', 'LEU', 3), ('A', 'GLY', 4), ('A', 'HIS', 5), ('A', 'PRO', 6), ('A', 'ALA', 7), diff --git a/moldesign/interfaces/parmed_interface.py b/moldesign/interfaces/parmed_interface.py index b5fe99f..2663265 100644 --- a/moldesign/interfaces/parmed_interface.py +++ b/moldesign/interfaces/parmed_interface.py @@ -261,23 +261,26 @@ def _reassign_chains(f, mol): """ data = mdt.interfaces.biopython_interface.get_mmcif_data(f) f.seek(0) + try: - newchain_names = set(data['_pdbx_poly_seq_scheme.asym_id']+ - data['_pdbx_nonpoly_scheme.asym_id']) + poly_seq_ids = _aslist(data['_pdbx_poly_seq_scheme.asym_id']) + nonpoly_ids = _aslist(data['_pdbx_nonpoly_scheme.asym_id']) except KeyError: return mol.copy(name=mol.name) + + newchain_names = set(poly_seq_ids + nonpoly_ids) newchains = {name: mdt.Chain(name) for name in newchain_names} residue_iterator = itertools.chain( - zip(data['_pdbx_poly_seq_scheme.mon_id'], - data['_pdbx_poly_seq_scheme.pdb_seq_num'], - data['_pdbx_poly_seq_scheme.pdb_strand_id'], - data['_pdbx_poly_seq_scheme.asym_id']), + zip(_aslist(data['_pdbx_poly_seq_scheme.mon_id']), + _aslist(data['_pdbx_poly_seq_scheme.pdb_seq_num']), + _aslist(data['_pdbx_poly_seq_scheme.pdb_strand_id']), + _aslist(data['_pdbx_poly_seq_scheme.asym_id'])), - zip(data['_pdbx_nonpoly_scheme.mon_id'], - data['_pdbx_nonpoly_scheme.pdb_seq_num'], - data['_pdbx_nonpoly_scheme.pdb_strand_id'], - data['_pdbx_nonpoly_scheme.asym_id'])) + zip(_aslist(data['_pdbx_nonpoly_scheme.mon_id']), + _aslist(data['_pdbx_nonpoly_scheme.pdb_seq_num']), + _aslist(data['_pdbx_nonpoly_scheme.pdb_strand_id']), + _aslist(data['_pdbx_nonpoly_scheme.asym_id']))) reschains = {(rname, ridx, rchain): newchains[chainid] for rname, ridx, rchain, chainid in residue_iterator} @@ -290,3 +293,9 @@ def _reassign_chains(f, mol): return mdt.Molecule(mol.atoms, name=mol.name, metadata=mol.metadata) + +def _aslist(l): + if isinstance(l, list): + return l + else: + return [l] diff --git a/moldesign/molecules/residue.py b/moldesign/molecules/residue.py index 4bcc05e..b8325e9 100644 --- a/moldesign/molecules/residue.py +++ b/moldesign/molecules/residue.py @@ -247,11 +247,10 @@ def _is_ending_residue(self): except KeyError: # If we're here, the residue is missing some atoms. We'll fall back to checking the # next residues in line - if self.index == len(self.molecule.residues): + if self.index == len(self.molecule.residues) - 1: return True else: - print('WARNING: %s is missing expected atoms. Attempting to infer chain end' % \ - self) + print('WARNING: %s is missing expected atoms. Attempting to infer chain end' % self) nextres = self.molecule.residues[self.index + 1] return not self._same_polymer(nextres)