diff --git a/rmgpy/molecule/converter.pxd b/rmgpy/molecule/converter.pxd index 25752cfd2d..18ab765abd 100644 --- a/rmgpy/molecule/converter.pxd +++ b/rmgpy/molecule/converter.pxd @@ -29,7 +29,7 @@ cimport rmgpy.molecule.molecule as mm cimport rmgpy.molecule.element as elements -cpdef to_rdkit_mol(mm.Molecule mol, bint remove_h=*, bint return_mapping=*, bint sanitize=*, bint save_order=?) +cpdef to_rdkit_mol(mm.Molecule mol, bint remove_h=*, bint return_mapping=*, bint sanitize=*, bint save_order=?, int X=?) cpdef mm.Molecule from_rdkit_mol(mm.Molecule mol, object rdkitmol, bint raise_atomtype_exception=?) diff --git a/rmgpy/molecule/converter.py b/rmgpy/molecule/converter.py index e5e619a2b7..0905a675f3 100644 --- a/rmgpy/molecule/converter.py +++ b/rmgpy/molecule/converter.py @@ -49,11 +49,13 @@ from rmgpy.exceptions import DependencyError -def to_rdkit_mol(mol, remove_h=True, return_mapping=False, sanitize=True, save_order=False): +def to_rdkit_mol(mol, remove_h=True, return_mapping=False, sanitize=True, save_order=False, X=0): """ Convert a molecular structure to a RDKit rdmol object. Uses `RDKit `_ to perform the conversion. Perceives aromaticity and, unless remove_h==False, removes Hydrogen atoms. + X is the atomic number or symbol to use for surface sites + (default is 0, which works well for SMILES, but you might want 78 (Pt) for InChIs). If return_mapping==True then it also returns a dictionary mapping the atoms to RDKit's atom indices. @@ -69,7 +71,7 @@ def to_rdkit_mol(mol, remove_h=True, return_mapping=False, sanitize=True, save_o rdkitmol = Chem.rdchem.EditableMol(Chem.rdchem.Mol()) for index, atom in enumerate(mol.vertices): if atom.element.symbol == 'X': - rd_atom = Chem.rdchem.Atom('Pt') # not sure how to do this with linear scaling when this might not be Pt + rd_atom = Chem.rdchem.Atom(X) # set in the function call. Default 0 else: rd_atom = Chem.rdchem.Atom(atom.element.symbol) if atom.element.isotope != -1: diff --git a/rmgpy/molecule/inchi.py b/rmgpy/molecule/inchi.py index 59e49d8522..2337435d8c 100644 --- a/rmgpy/molecule/inchi.py +++ b/rmgpy/molecule/inchi.py @@ -602,7 +602,7 @@ def create_augmented_layers(mol): else: molcopy = mol.copy(deep=True) - rdkitmol = to_rdkit_mol(molcopy, remove_h=True) + rdkitmol = to_rdkit_mol(molcopy, remove_h=True, X=78) _, auxinfo = Chem.MolToInchiAndAuxInfo(rdkitmol, options='-SNon') # suppress stereo warnings hydrogens = [at for at in molcopy.atoms if at.number == 1] diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index 731d8d8d8e..1a7add7bfc 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -373,7 +373,7 @@ def _rdkit_translator(input_object, identifier_type, mol=None): if identifier_type == 'smi': rdkitmol = to_rdkit_mol(input_object, sanitize=False) else: - rdkitmol = to_rdkit_mol(input_object, sanitize=True) + rdkitmol = to_rdkit_mol(input_object, sanitize=True, X=78) # Use Pt (78) for surface sites. if identifier_type == 'inchi': output = Chem.inchi.MolToInchi(rdkitmol, options='-SNon') elif identifier_type == 'inchikey': @@ -441,6 +441,9 @@ def _openbabel_translator(input_object, identifier_type, mol=None): raise ValueError('Unexpected identifier type {0}.'.format(identifier_type)) obmol = to_ob_mol(input_object) output = ob_conversion.WriteString(obmol).strip() + if identifier_type == 'smi': + # If we use * for surface sites, RDKit can read it again. + output = output.replace('[Pt]','*') else: raise ValueError('Unexpected input format. Should be a Molecule or a string.') diff --git a/test/rmgpy/molecule/converterTest.py b/test/rmgpy/molecule/converterTest.py index d9395a90a3..e9d36e9eb5 100644 --- a/test/rmgpy/molecule/converterTest.py +++ b/test/rmgpy/molecule/converterTest.py @@ -169,3 +169,17 @@ def test_ob_round_trip(self): new_mol = from_ob_mol(Molecule(), ob_mol) assert mol.is_isomorphic(new_mol) or self.test_Hbond_free_mol.is_isomorphic(new_mol) assert mol.get_element_count() == new_mol.get_element_count() + + def test_rdkit_adsorbate_round_trip(self): + """Test conversion to and from RDKitMol for adsorbates""" + adsorbates = [ + Molecule().from_smiles("C*"), + Molecule().from_smiles("[*H]"), + Molecule().from_smiles("CC(=*)CO"), + Molecule().from_smiles("*COC*"), + ] + for mol in adsorbates: + rdkit_mol = to_rdkit_mol(mol) + new_mol = from_rdkit_mol(Molecule(), rdkit_mol) + assert mol.is_isomorphic(new_mol) + assert mol.get_element_count() == new_mol.get_element_count() diff --git a/test/rmgpy/molecule/translatorTest.py b/test/rmgpy/molecule/translatorTest.py index cb62d9dc97..e0a911e5e2 100644 --- a/test/rmgpy/molecule/translatorTest.py +++ b/test/rmgpy/molecule/translatorTest.py @@ -863,8 +863,86 @@ def test_aromatics(self): assert smiles2 != smiles3 assert smiles1 != smiles3 + def test_smiles_rdkit_adsorbates(self): + """ + Test that we can get from an adsorbate + to SMILES and back again using RDKit + """ + def compare(adjlist, smiles, alternatives=[]): + "the adjlist should convert to smiles, but the alternative smiles should give the same molecule" + molecule = Molecule().from_adjacency_list(adjlist) + assert to_smiles(molecule, backend="rdkit") == smiles, f"Expected {smiles}, got {to_smiles(molecule, backend='rdkit')}" + molecule2 = Molecule() + from_smiles(molecule2, smiles, backend="rdkit") + assert molecule2.is_isomorphic(molecule), f"Expected {molecule.to_smiles()} got {molecule2.to_smiles()}" + for alt in alternatives: + from_smiles(molecule2, alt, backend="rdkit") + assert molecule2.is_isomorphic(molecule), f"Expected {molecule.to_smiles()} got {molecule2.to_smiles()}" + + + adjlist = """ + 1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S} + 2 X u0 p0 c0 {1,S} + 3 H u0 p0 c0 {1,S} + 4 H u0 p0 c0 {1,S} + 5 H u0 p0 c0 {1,S} + """ + smiles = '*C' + compare(adjlist, smiles, alternatives=['C*']) + + adjlist = """ + 1 C u0 p0 c0 {2,S} {4,S} {5,S} {6,S} + 2 C u0 p0 c0 {1,S} {3,S} {7,S} {8,S} + 3 X u0 p0 c0 {2,S} + 4 H u0 p0 c0 {1,S} + 5 H u0 p0 c0 {1,S} + 6 H u0 p0 c0 {1,S} + 7 H u0 p0 c0 {2,S} + 8 H u0 p0 c0 {2,S} + """ + smiles = '*CC' + compare(adjlist, smiles, alternatives=['CC*', 'C(*)C']) + + adjlist = """ + 1 C u0 p0 c0 {2,D} {3,S} {4,S} + 2 X u0 p0 c0 {1,D} + 3 H u0 p0 c0 {1,S} + 4 H u0 p0 c0 {1,S} + """ + smiles = '*=C' + compare(adjlist, smiles, alternatives=['C=*']) + + adjlist = """ + 1 X u0 p0 c0 {2,S} + 2 H u0 p0 c0 {1,S} + """ + smiles = '*[H]' + compare(adjlist, smiles, alternatives=['[*H]', '[H]*']) + + adjlist = """ + 1 X u0 p0 c0 {2,D} + 2 O u0 p2 c0 {1,D} + """ + smiles = '*=O' + compare(adjlist, smiles, alternatives=['O=*']) + + adjlist = """ + 1 X u0 p0 c0 {2,S} + 2 C u0 p0 c0 {1,S} {3,S} {6,S} {7,S} + 3 O u0 p2 c0 {2,S} {4,S} + 4 C u0 p0 c0 {3,S} {5,S} {8,S} {9,S} + 5 X u0 p0 c0 {4,S} + 6 H u0 p0 c0 {2,S} + 7 H u0 p0 c0 {2,S} + 8 H u0 p0 c0 {4,S} + 9 H u0 p0 c0 {4,S} + """ + smiles = '*COC*' + compare(adjlist, smiles) + + def test_surface_molecule_rdkit(self): - """Test InChI generation for a surface molecule using RDKit""" + """Test SMILES generation for a surface molecule using RDKit""" mol = Molecule().from_adjacency_list( """ 1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S} @@ -874,12 +952,11 @@ def test_surface_molecule_rdkit(self): 5 X u0 p0 c0 {1,S} """ ) - smiles = "C[Pt]" - + smiles = "*C" assert to_smiles(mol, backend="rdkit") == smiles def test_surface_molecule_ob(self): - """Test InChI generation for a surface molecule using OpenBabel""" + """Test SMILES generation for a surface molecule using OpenBabel""" mol = Molecule().from_adjacency_list( """ 1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S} @@ -889,10 +966,31 @@ def test_surface_molecule_ob(self): 5 X u0 p0 c0 {1,S} """ ) - smiles = "C[Pt]" - + smiles = "C*" assert to_smiles(mol, backend="openbabel") == smiles + def test_smiles_adsorbates_round_trip(self): + """ + Test that we can get from an adsorbate SMILES and back again, + by whatever back-end is chosen automatically. + """ + def check(s0): + m = Molecule(smiles=s0) + s1 = m.to_smiles() + m2 = Molecule(smiles=s1) + assert m.is_isomorphic(m2) + s2 = m2.to_smiles() + assert s1 == s2 + check("*C") + check("*CC") + check("*=C") + check("*[H]") + check("*=O") + check("*COC*") + check("*CNC") + check("*C1CCCC1") + check("*N") + check("*N(Cl)") class ParsingTest: def setup_class(self):