diff --git a/arkane/encorr/bacTest.py b/arkane/encorr/bacTest.py index f01f5b6cc5..adde151977 100644 --- a/arkane/encorr/bacTest.py +++ b/arkane/encorr/bacTest.py @@ -38,7 +38,7 @@ from collections import Counter import numpy as np -import pybel +from openbabel import pybel from rmgpy import settings from rmgpy.molecule import Molecule diff --git a/arkane/encorr/data.py b/arkane/encorr/data.py index cadbc3af8d..0d5307a5e4 100644 --- a/arkane/encorr/data.py +++ b/arkane/encorr/data.py @@ -42,7 +42,7 @@ from typing import Callable, Iterable, List, Sequence, Set, Union import numpy as np -import pybel +from openbabel import pybel from rmgpy import settings from rmgpy.molecule import Atom, Bond, get_element diff --git a/arkane/encorr/dataTest.py b/arkane/encorr/dataTest.py index 11a18a12ed..7a837bce18 100644 --- a/arkane/encorr/dataTest.py +++ b/arkane/encorr/dataTest.py @@ -35,7 +35,7 @@ from collections import Counter import numpy as np -import pybel +from openbabel import pybel from rmgpy.molecule import Molecule as RMGMolecule diff --git a/documentation/source/users/rmg/species.rst b/documentation/source/users/rmg/species.rst index 40052c1ee4..2b20a45cc0 100644 --- a/documentation/source/users/rmg/species.rst +++ b/documentation/source/users/rmg/species.rst @@ -42,7 +42,9 @@ the same species: :: structure=InChI("InChI=1S/CH4/h1H4"), - +Be careful using the SMILES shorthand with lowercase letters for aromatics, +radicals, or double bonds, because these can be ambiguous and the resulting +molecule may depend on the version of OpenBabel, RDKit, and RMG in use. To quickly generate any adjacency list, or to generate an adjacency list from other types of molecular representations such as SMILES, InChI, or even common species names, use the Molecule Search tool found here: http://rmg.mit.edu/molecule_search diff --git a/environment.yml b/environment.yml index f2af2f5af1..e9203b551a 100644 --- a/environment.yml +++ b/environment.yml @@ -28,7 +28,7 @@ dependencies: - nose - rmg::numdifftools - numpy >=1.10.0 - - openbabel + - conda-forge::openbabel >= 3 - pandas - psutil - rmg::pydas >=1.0.2 diff --git a/rmgpy/molecule/converter.pxd b/rmgpy/molecule/converter.pxd index fd6a79cb7c..b44a5df8de 100644 --- a/rmgpy/molecule/converter.pxd +++ b/rmgpy/molecule/converter.pxd @@ -26,6 +26,7 @@ ############################################################################### cimport rmgpy.molecule.molecule as mm +cimport rmgpy.molecule.element as elements cpdef to_rdkit_mol(mm.Molecule mol, bint remove_h=*, bint return_mapping=*, bint sanitize=*) diff --git a/rmgpy/molecule/converter.py b/rmgpy/molecule/converter.py index a3ca1e008a..2dfceaa8d7 100644 --- a/rmgpy/molecule/converter.py +++ b/rmgpy/molecule/converter.py @@ -39,7 +39,7 @@ from rdkit import Chem # Test if openbabel is installed try: - import openbabel + from openbabel import openbabel except ImportError: openbabel = None @@ -233,6 +233,7 @@ def to_ob_mol(mol, return_mapping=False): if atom.element.isotope != -1: a.SetIsotope(atom.element.isotope) a.SetFormalCharge(atom.charge) + # a.SetImplicitHCount(0) # the default is 0 ob_atom_ids[atom] = a.GetId() orders = {1: 1, 2: 2, 3: 3, 4: 4, 1.5: 5} for atom1 in mol.vertices: @@ -257,11 +258,21 @@ def from_ob_mol(mol, obmol, raise_atomtype_exception=True): """ Convert a OpenBabel Mol object `obmol` to a molecular structure. Uses `OpenBabel `_ to perform the conversion. + + It estimates radical placement based on undervalence of atoms, + and assumes overall spin multiplicity is radical count + 1 """ # Below are the declared variables for cythonizing the module - # cython.declare(i=cython.int) - # cython.declare(radical_electrons=cython.int, charge=cython.int, lone_pairs=cython.int) - # cython.declare(atom=mm.Atom, atom1=mm.Atom, atom2=mm.Atom, bond=mm.Bond) + cython.declare( + number=cython.int, + isotope=cython.int, + element=elements.Element, + charge=cython.int, + valence=cython.int, + radical_electrons=cython.int, + atom=mm.Atom, + ) + if openbabel is None: raise DependencyError('OpenBabel is not installed. Please install or use RDKit.') @@ -279,8 +290,10 @@ def from_ob_mol(mol, obmol, raise_atomtype_exception=True): element = elements.get_element(number, isotope or -1) # Process charge charge = obatom.GetFormalCharge() - obatom_multiplicity = obatom.GetSpinMultiplicity() - radical_electrons = obatom_multiplicity - 1 if obatom_multiplicity != 0 else 0 + # Calculate the radical electrons due to undervalence, + # ignoring whatever may be set on obatom.GetSpinMultiplicity() + valence = obatom.GetTotalValence() + radical_electrons = openbabel.GetTypicalValence(number, valence, charge) - valence atom = mm.Atom(element, radical_electrons, charge, '', 0) mol.vertices.append(atom) diff --git a/rmgpy/molecule/pathfinderTest.py b/rmgpy/molecule/pathfinderTest.py index 011c91ba5e..de75e30f94 100644 --- a/rmgpy/molecule/pathfinderTest.py +++ b/rmgpy/molecule/pathfinderTest.py @@ -311,15 +311,14 @@ def test_c5h6o2(self): expected_idx_path = [3, 2, 4, 6] self.assertEquals(idx_path, expected_idx_path) - def test_c6h6o4(self): - inchi = "InChI=1S/C6H6O4/c1-2-4-9-6(7)3-5-10-8/h2-3H,1,5H2" + def test_c8h14o4(self): + inchi = "InChI=1S/C8H14O4S/c1-3-6-13(2,11)7-8(9)4-5-12-10/h3,6H,1,4-5,7H2,2H3,(H-,10,11)" mol = Molecule().from_inchi(inchi) start = mol.atoms[0] path = find_butadiene_end_with_charge(start) idx_path = [mol.atoms.index(atom) + 1 for atom in path[0::2]] - - expected_idx_path = [1, 2, 4, 9] - self.assertEquals(idx_path, expected_idx_path) + expected_idx_path = [1, 3, 6, 13] + self.assertEqual(idx_path, expected_idx_path) def test_c6h6o6(self): inchi = "InChI=1S/C6H6O6/c7-6(2-5-12-9)10-3-1-4-11-8/h1,7H,4-5H2" @@ -399,9 +398,9 @@ def test_allyl_radical(self): self.assertTrue(paths) def test_nitrogenated_birad(self): - smiles = '[CH]=C[N]' + smiles = '[N]C=[CH]' mol = Molecule().from_smiles(smiles) - paths = find_allyl_delocalization_paths(mol.atoms[3]) + paths = find_allyl_delocalization_paths(mol.atoms[0]) self.assertTrue(paths) diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index 9d8a7e0d98..5b9d584faf 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -39,7 +39,7 @@ from rdkit import Chem # Test if openbabel is installed try: - import openbabel + from openbabel import openbabel except ImportError: BACKENDS = ['rdkit'] else: @@ -394,7 +394,10 @@ def _openbabel_translator(input_object, identifier_type, mol=None): obmol = openbabel.OBMol() ob_conversion.ReadString(obmol, input_object) obmol.AddHydrogens() - obmol.AssignSpinMultiplicity(True) + # In OpenBabel 3+ the function obmol.AssignSpinMultiplicity(True) does nothing. + # We could write our own method here and call obatom.SetSpinMultiplicity on + # each atom, but instead we will leave them blank for now and fix them + # in the from_ob_mol() method. if mol is None: mol = mm.Molecule() output = from_ob_mol(mol, obmol) diff --git a/rmgpy/molecule/translatorTest.py b/rmgpy/molecule/translatorTest.py index e92e8c05d8..d75f3a4508 100644 --- a/rmgpy/molecule/translatorTest.py +++ b/rmgpy/molecule/translatorTest.py @@ -1338,7 +1338,17 @@ def test_c3h4o4(self): u_indices = [4, 5] self.compare(inchi, u_indices) + @work_in_progress def test_c6h6o4(self): + """ + This test used to pass with OpenBabel < 3.0, but I think the inchi is invalid? + or at least not standard. + OpenBabel reports: + Problems/mismatches: Mobile-H( Hydrogens: Locations or number, Number; Charge(s): Do not match) + and cactus.nci.nih.gov converts it to InChI=1S/C6H7O4/c1-2-4-9-6(7)3-5-10-8/h2-3,8H,1,5H2/q+1 + which at least doesn't make OpenBabel complain. However, both have a net charge + and cause RMG to crash. I'm not sure what the molecule was ever supposed to represent. + """ inchi = 'InChI=1S/C6H6O4/c1-2-4-9-6(7)3-5-10-8/h2-3H,1,5H2' u_indices = [1, 3, 4, 8] self.compare(inchi, u_indices) diff --git a/utilities.py b/utilities.py index e52c89e887..1a014e9d7a 100644 --- a/utilities.py +++ b/utilities.py @@ -102,7 +102,7 @@ def _check_openbabel(): missing = False try: - import openbabel + from openbabel import openbabel except ImportError: print('{0:<30}{1}'.format('OpenBabel', 'Not found. Necessary for SMILES/InChI functionality for nitrogen compounds.'))