ReactionMechanismGenerator · rwest · Aug 2, 2024 · Aug 2, 2024 · Aug 2, 2024 · Aug 2, 2024
diff --git a/rmgpy/molecule/converter.pxd b/rmgpy/molecule/converter.pxd
@@ -29,7 +29,7 @@ cimport rmgpy.molecule.molecule as mm
 cimport rmgpy.molecule.element as elements
 
 
-cpdef to_rdkit_mol(mm.Molecule mol, bint remove_h=*, bint return_mapping=*, bint sanitize=*, bint save_order=?)
+cpdef to_rdkit_mol(mm.Molecule mol, bint remove_h=*, bint return_mapping=*, bint sanitize=*, bint save_order=?, int X=?)
 
 cpdef mm.Molecule from_rdkit_mol(mm.Molecule mol, object rdkitmol, bint raise_atomtype_exception=?)
 

diff --git a/rmgpy/molecule/converter.py b/rmgpy/molecule/converter.py
@@ -49,11 +49,13 @@
 from rmgpy.exceptions import DependencyError
 
 
-def to_rdkit_mol(mol, remove_h=True, return_mapping=False, sanitize=True, save_order=False):
+def to_rdkit_mol(mol, remove_h=True, return_mapping=False, sanitize=True, save_order=False, X=0):
     """
     Convert a molecular structure to a RDKit rdmol object. Uses
     `RDKit <http://rdkit.org/>`_ to perform the conversion.
     Perceives aromaticity and, unless remove_h==False, removes Hydrogen atoms.
+    X is the atomic number or symbol to use for surface sites
+    (default is 0, which works well for SMILES, but you might want 78 (Pt) for InChIs).
 
     If return_mapping==True then it also returns a dictionary mapping the
     atoms to RDKit's atom indices.
@@ -69,7 +71,7 @@ def to_rdkit_mol(mol, remove_h=True, return_mapping=False, sanitize=True, save_o
     rdkitmol = Chem.rdchem.EditableMol(Chem.rdchem.Mol())
     for index, atom in enumerate(mol.vertices):
         if atom.element.symbol == 'X':
-            rd_atom = Chem.rdchem.Atom('Pt')  # not sure how to do this with linear scaling when this might not be Pt
+            rd_atom = Chem.rdchem.Atom(X) # set in the function call. Default 0
         else:
             rd_atom = Chem.rdchem.Atom(atom.element.symbol)
         if atom.element.isotope != -1:

diff --git a/rmgpy/molecule/inchi.py b/rmgpy/molecule/inchi.py
@@ -602,7 +602,7 @@ def create_augmented_layers(mol):
     else:
         molcopy = mol.copy(deep=True)
 
-        rdkitmol = to_rdkit_mol(molcopy, remove_h=True)
+        rdkitmol = to_rdkit_mol(molcopy, remove_h=True, X=78)
         _, auxinfo = Chem.MolToInchiAndAuxInfo(rdkitmol, options='-SNon')  # suppress stereo warnings
 
         hydrogens = [at for at in molcopy.atoms if at.number == 1]

diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py
@@ -373,7 +373,7 @@ def _rdkit_translator(input_object, identifier_type, mol=None):
         if identifier_type == 'smi':
             rdkitmol = to_rdkit_mol(input_object, sanitize=False)
         else:
-            rdkitmol = to_rdkit_mol(input_object, sanitize=True)
+            rdkitmol = to_rdkit_mol(input_object, sanitize=True, X=78) # Use Pt (78) for surface sites.
         if identifier_type == 'inchi':
             output = Chem.inchi.MolToInchi(rdkitmol, options='-SNon')
         elif identifier_type == 'inchikey':
@@ -441,6 +441,9 @@ def _openbabel_translator(input_object, identifier_type, mol=None):
             raise ValueError('Unexpected identifier type {0}.'.format(identifier_type))
         obmol = to_ob_mol(input_object)
         output = ob_conversion.WriteString(obmol).strip()
+        if identifier_type == 'smi':
+            # If we use * for surface sites, RDKit can read it again.
+            output = output.replace('[Pt]','*')
     else:
         raise ValueError('Unexpected input format. Should be a Molecule or a string.')
 

diff --git a/test/rmgpy/molecule/converterTest.py b/test/rmgpy/molecule/converterTest.py
@@ -169,3 +169,17 @@ def test_ob_round_trip(self):
             new_mol = from_ob_mol(Molecule(), ob_mol)
             assert mol.is_isomorphic(new_mol) or self.test_Hbond_free_mol.is_isomorphic(new_mol)
             assert mol.get_element_count() == new_mol.get_element_count()
+
+    def test_rdkit_adsorbate_round_trip(self):
+        """Test conversion to and from RDKitMol for adsorbates"""
+        adsorbates = [
+            Molecule().from_smiles("C*"),
+            Molecule().from_smiles("[*H]"),
+            Molecule().from_smiles("CC(=*)CO"),
+            Molecule().from_smiles("*COC*"),
+        ]
+        for mol in adsorbates:
+            rdkit_mol = to_rdkit_mol(mol)
+            new_mol = from_rdkit_mol(Molecule(), rdkit_mol)
+            assert mol.is_isomorphic(new_mol)
+            assert mol.get_element_count() == new_mol.get_element_count()
diff --git a/test/rmgpy/molecule/translatorTest.py b/test/rmgpy/molecule/translatorTest.py
@@ -863,8 +863,86 @@ def test_aromatics(self):
         assert smiles2 != smiles3
         assert smiles1 != smiles3
 
+    def test_smiles_rdkit_adsorbates(self):
+        """
+        Test that we can get from an adsorbate 
+        to SMILES and back again using RDKit
+        """
+        def compare(adjlist, smiles, alternatives=[]):
+            "the adjlist should convert to smiles, but the alternative smiles should give the same molecule"
+            molecule = Molecule().from_adjacency_list(adjlist)
+            assert to_smiles(molecule, backend="rdkit") == smiles, f"Expected {smiles}, got {to_smiles(molecule, backend='rdkit')}"
+            molecule2 = Molecule()
+            from_smiles(molecule2, smiles, backend="rdkit")
+            assert molecule2.is_isomorphic(molecule), f"Expected {molecule.to_smiles()} got {molecule2.to_smiles()}"
+            for alt in alternatives:
+                from_smiles(molecule2, alt, backend="rdkit")
+                assert molecule2.is_isomorphic(molecule), f"Expected {molecule.to_smiles()} got {molecule2.to_smiles()}"
+
+
+        adjlist = """
+        1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S}
+        2 X u0 p0 c0 {1,S}
+        3 H u0 p0 c0 {1,S}
+        4 H u0 p0 c0 {1,S}
+        5 H u0 p0 c0 {1,S}
+        """
+        smiles = '*C'
+        compare(adjlist, smiles, alternatives=['C*'])
+
+        adjlist = """
+        1 C u0 p0 c0 {2,S} {4,S} {5,S} {6,S}
+        2 C u0 p0 c0 {1,S} {3,S} {7,S} {8,S}
+        3 X u0 p0 c0 {2,S}
+        4 H u0 p0 c0 {1,S}
+        5 H u0 p0 c0 {1,S}
+        6 H u0 p0 c0 {1,S}
+        7 H u0 p0 c0 {2,S}
+        8 H u0 p0 c0 {2,S}
+        """
+        smiles = '*CC'
+        compare(adjlist, smiles, alternatives=['CC*', 'C(*)C'])
+
+        adjlist = """
+        1 C u0 p0 c0 {2,D} {3,S} {4,S}
+        2 X u0 p0 c0 {1,D}
+        3 H u0 p0 c0 {1,S}
+        4 H u0 p0 c0 {1,S}
+        """
+        smiles = '*=C'
+        compare(adjlist, smiles, alternatives=['C=*'])
+
+        adjlist = """
+        1 X u0 p0 c0 {2,S}
+        2 H u0 p0 c0 {1,S}
+        """
+        smiles = '*[H]'
+        compare(adjlist, smiles, alternatives=['[*H]', '[H]*'])
+
+        adjlist = """
+        1 X u0 p0 c0 {2,D}
+        2 O u0 p2 c0 {1,D}
+        """
+        smiles = '*=O'
+        compare(adjlist, smiles, alternatives=['O=*'])
+
+        adjlist = """
+        1 X u0 p0 c0 {2,S}
+        2 C u0 p0 c0 {1,S} {3,S} {6,S} {7,S}
+        3 O u0 p2 c0 {2,S} {4,S}
+        4 C u0 p0 c0 {3,S} {5,S} {8,S} {9,S}
+        5 X u0 p0 c0 {4,S}
+        6 H u0 p0 c0 {2,S}
+        7 H u0 p0 c0 {2,S}
+        8 H u0 p0 c0 {4,S}
+        9 H u0 p0 c0 {4,S}
+        """
+        smiles = '*COC*'
+        compare(adjlist, smiles)
+
+
     def test_surface_molecule_rdkit(self):
-        """Test InChI generation for a surface molecule using RDKit"""
+        """Test SMILES generation for a surface molecule using RDKit"""
         mol = Molecule().from_adjacency_list(
             """
 1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S}
@@ -874,12 +952,11 @@ def test_surface_molecule_rdkit(self):
 5 X u0 p0 c0 {1,S}
 """
         )
-        smiles = "C[Pt]"
-
+        smiles = "*C"
         assert to_smiles(mol, backend="rdkit") == smiles
 
     def test_surface_molecule_ob(self):
-        """Test InChI generation for a surface molecule using OpenBabel"""
+        """Test SMILES generation for a surface molecule using OpenBabel"""
         mol = Molecule().from_adjacency_list(
             """
 1 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S}
@@ -889,10 +966,31 @@ def test_surface_molecule_ob(self):
 5 X u0 p0 c0 {1,S}
 """
         )
-        smiles = "C[Pt]"
-
+        smiles = "C*"
         assert to_smiles(mol, backend="openbabel") == smiles
 
+    def test_smiles_adsorbates_round_trip(self):
+        """
+        Test that we can get from an adsorbate SMILES and back again,
+        by whatever back-end is chosen automatically.
+        """
+        def check(s0):
+            m = Molecule(smiles=s0)
+            s1 = m.to_smiles()
+            m2 = Molecule(smiles=s1)
+            assert m.is_isomorphic(m2)
+            s2 = m2.to_smiles()
+            assert s1 == s2
+        check("*C")
+        check("*CC")
+        check("*=C")
+        check("*[H]")
+        check("*=O")
+        check("*COC*")
+        check("*CNC")
+        check("*C1CCCC1")
+        check("*N")
+        check("*N(Cl)")
 
 class ParsingTest:
     def setup_class(self):