diff --git a/.conda/meta.yaml b/.conda/meta.yaml index 20466c88d3..bb0cf0be93 100644 --- a/.conda/meta.yaml +++ b/.conda/meta.yaml @@ -64,7 +64,6 @@ requirements: - conda-forge::gprof2dot - conda-forge::numdifftools - conda-forge::quantities !=0.16.0,!=0.16.1 - - conda-forge::ringdecomposerlib-python - rmg::pydas >=1.0.3 - rmg::pydqed >=1.0.3 - rmg::symmetry @@ -114,7 +113,6 @@ requirements: - conda-forge::gprof2dot - conda-forge::numdifftools - conda-forge::quantities !=0.16.0,!=0.16.1 - - conda-forge::ringdecomposerlib-python - rmg::pydas >=1.0.3 - rmg::pydqed >=1.0.3 - rmg::symmetry @@ -165,7 +163,6 @@ test: - conda-forge::gprof2dot - conda-forge::numdifftools - conda-forge::quantities !=0.16.0,!=0.16.1 - - conda-forge::ringdecomposerlib-python - rmg::pydas >=1.0.3 - rmg::pydqed >=1.0.3 - rmg::symmetry diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index e815da269d..260101d46e 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -63,7 +63,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9"] + python-version: ["3.9", "3.10", "3.11"] os: [macos-13, macos-latest, ubuntu-latest] include-rms: ["", "with RMS"] exclude: diff --git a/.github/workflows/conda_build.yml b/.github/workflows/conda_build.yml index 7c6a7b32f5..4be38424af 100644 --- a/.github/workflows/conda_build.yml +++ b/.github/workflows/conda_build.yml @@ -17,7 +17,7 @@ jobs: matrix: os: [ubuntu-latest, macos-13, macos-latest] numpy-version: ["1.26"] - python-version: ["3.9"] + python-version: ["3.9", "3.10", "3.11"] runs-on: ${{ matrix.os }} name: Build ${{ matrix.os }} Python ${{ matrix.python-version }} Numpy ${{ matrix.numpy-version }} defaults: diff --git a/arkane/encorr/isodesmic.py b/arkane/encorr/isodesmic.py index f4f39d0d85..75e52e657a 100644 --- a/arkane/encorr/isodesmic.py +++ b/arkane/encorr/isodesmic.py @@ -413,7 +413,7 @@ def _get_ring_constraints( self, species: ErrorCancelingSpecies ) -> List[GenericConstraint]: ring_features = [] - rings = species.molecule.get_smallest_set_of_smallest_rings() + rings = species.molecule.get_symmetrized_smallest_set_of_smallest_rings() for ring in rings: ring_features.append(GenericConstraint(constraint_str=f"{len(ring)}_ring")) diff --git a/environment.yml b/environment.yml index da840c1ab4..200c853c60 100644 --- a/environment.yml +++ b/environment.yml @@ -84,7 +84,6 @@ dependencies: # bug in quantities, see: # https://github.com/ReactionMechanismGenerator/RMG-Py/pull/2694#issuecomment-2489286263 - conda-forge::quantities !=0.16.0,!=0.16.1 - - conda-forge::ringdecomposerlib-python # packages we maintain - rmg::pydas >=1.0.3 diff --git a/rmgpy/data/solvation.py b/rmgpy/data/solvation.py index b95f708f02..c816e13ba7 100644 --- a/rmgpy/data/solvation.py +++ b/rmgpy/data/solvation.py @@ -1623,7 +1623,7 @@ def estimate_radical_solute_data_via_hbi(self, molecule, stable_solute_data_esti # Take C1=CC=C([O])C(O)=C1 as an example, we need to remove the interation of OH-OH, then add the interaction of Oj-OH. # For now, we only apply this part to cyclic structure because we only have radical interaction data for aromatic radical. if saturated_struct.is_cyclic(): - sssr = saturated_struct.get_smallest_set_of_smallest_rings() + sssr = saturated_struct.get_symmetrized_smallest_set_of_smallest_rings() for ring in sssr: for atomPair in itertools.permutations(ring, 2): try: @@ -1631,7 +1631,7 @@ def estimate_radical_solute_data_via_hbi(self, molecule, stable_solute_data_esti saturated_struct, {'*1': atomPair[0], '*2': atomPair[1]}) except KeyError: pass - sssr = molecule.get_smallest_set_of_smallest_rings() + sssr = molecule.get_symmetrized_smallest_set_of_smallest_rings() for ring in sssr: for atomPair in itertools.permutations(ring, 2): try: @@ -1707,7 +1707,7 @@ def estimate_halogen_solute_data(self, molecule, stable_solute_data_estimator): # Remove all of the long distance interactions of the replaced structure. Then add the long interactions of the halogenated molecule. if replaced_struct.is_cyclic(): - sssr = replaced_struct.get_smallest_set_of_smallest_rings() + sssr = replaced_struct.get_symmetrized_smallest_set_of_smallest_rings() for ring in sssr: for atomPair in itertools.permutations(ring, 2): try: @@ -1715,7 +1715,7 @@ def estimate_halogen_solute_data(self, molecule, stable_solute_data_estimator): replaced_struct, {'*1': atomPair[0], '*2': atomPair[1]}) except KeyError: pass - sssr = molecule.get_smallest_set_of_smallest_rings() + sssr = molecule.get_symmetrized_smallest_set_of_smallest_rings() for ring in sssr: for atomPair in itertools.permutations(ring, 2): try: @@ -1799,7 +1799,7 @@ def compute_group_additivity_solute(self, molecule): # In my opinion, it's cleaner to do it in the current way. # WIPWIPWIPWIPWIPWIPWIP ######################################### WIPWIPWIPWIPWIPWIPWIP if cyclic: - sssr = molecule.get_smallest_set_of_smallest_rings() + sssr = molecule.get_symmetrized_smallest_set_of_smallest_rings() for ring in sssr: for atomPair in itertools.permutations(ring, 2): try: diff --git a/rmgpy/data/thermo.py b/rmgpy/data/thermo.py index 4bd050f529..5e10424fe8 100644 --- a/rmgpy/data/thermo.py +++ b/rmgpy/data/thermo.py @@ -346,7 +346,7 @@ def is_bicyclic(polyring): returns True if it's a bicyclic, False otherwise """ submol, _ = convert_ring_to_sub_molecule(polyring) - sssr = submol.get_smallest_set_of_smallest_rings() + sssr = submol.get_symmetrized_smallest_set_of_smallest_rings() return len(sssr) == 2 @@ -466,8 +466,8 @@ def is_ring_partial_matched(ring, matched_group): return True else: submol_ring, _ = convert_ring_to_sub_molecule(ring) - sssr = submol_ring.get_smallest_set_of_smallest_rings() - sssr_grp = matched_group.get_smallest_set_of_smallest_rings() + sssr = submol_ring.get_symmetrized_smallest_set_of_smallest_rings() + sssr_grp = matched_group.make_sample_molecule().get_symmetrized_smallest_set_of_smallest_rings() if sorted([len(sr) for sr in sssr]) == sorted([len(sr_grp) for sr_grp in sssr_grp]): return False else: @@ -2141,7 +2141,7 @@ def estimate_radical_thermo_via_hbi(self, molecule, stable_thermo_estimator): # Take C1=CC=C([O])C(O)=C1 as an example, we need to remove the interation of OH-OH, then add the interaction of Oj-OH. # For now, we only apply this part to cyclic structure because we only have radical interaction data for aromatic radical. if saturated_struct.is_cyclic(): - sssr = saturated_struct.get_smallest_set_of_smallest_rings() + sssr = saturated_struct.get_symmetrized_smallest_set_of_smallest_rings() for ring in sssr: for atomPair in itertools.permutations(ring, 2): try: @@ -2149,7 +2149,7 @@ def estimate_radical_thermo_via_hbi(self, molecule, stable_thermo_estimator): saturated_struct, {'*1': atomPair[0], '*2': atomPair[1]}) except KeyError: pass - sssr = molecule.get_smallest_set_of_smallest_rings() + sssr = molecule.get_symmetrized_smallest_set_of_smallest_rings() for ring in sssr: for atomPair in itertools.permutations(ring, 2): try: @@ -2272,7 +2272,7 @@ def compute_group_additivity_thermo(self, molecule): # In my opinion, it's cleaner to do it in the current way. # WIPWIPWIPWIPWIPWIPWIP ######################################### WIPWIPWIPWIPWIPWIPWIP if cyclic: - sssr = molecule.get_smallest_set_of_smallest_rings() + sssr = molecule.get_symmetrized_smallest_set_of_smallest_rings() for ring in sssr: for atomPair in itertools.permutations(ring, 2): try: diff --git a/rmgpy/molecule/adjlist.py b/rmgpy/molecule/adjlist.py index 852d7bc9d0..8e654d0334 100644 --- a/rmgpy/molecule/adjlist.py +++ b/rmgpy/molecule/adjlist.py @@ -1093,7 +1093,7 @@ def to_adjacency_list(atoms, multiplicity, metal='', facet='', label=None, group # numbers if doesn't work try: adjlist += bond.get_order_str() - except ValueError: + except (ValueError, TypeError): adjlist += str(bond.get_order_num()) adjlist += '}' diff --git a/rmgpy/molecule/converter.pxd b/rmgpy/molecule/converter.pxd index 25752cfd2d..a6ec0f9ee2 100644 --- a/rmgpy/molecule/converter.pxd +++ b/rmgpy/molecule/converter.pxd @@ -29,7 +29,7 @@ cimport rmgpy.molecule.molecule as mm cimport rmgpy.molecule.element as elements -cpdef to_rdkit_mol(mm.Molecule mol, bint remove_h=*, bint return_mapping=*, bint sanitize=*, bint save_order=?) +cpdef to_rdkit_mol(mm.Molecule mol, bint remove_h=*, bint return_mapping=*, object sanitize=*, bint save_order=?, bint ignore_bond_orders=?) cpdef mm.Molecule from_rdkit_mol(mm.Molecule mol, object rdkitmol, bint raise_atomtype_exception=?) diff --git a/rmgpy/molecule/converter.py b/rmgpy/molecule/converter.py index 7182b20499..ae005cb25b 100644 --- a/rmgpy/molecule/converter.py +++ b/rmgpy/molecule/converter.py @@ -38,6 +38,7 @@ import cython # Assume that rdkit is installed from rdkit import Chem +from rdkit.Chem.rdchem import KekulizeException, AtomKekulizeException # Test if openbabel is installed try: from openbabel import openbabel @@ -49,7 +50,8 @@ from rmgpy.exceptions import DependencyError -def to_rdkit_mol(mol, remove_h=True, return_mapping=False, sanitize=True, save_order=False): +def to_rdkit_mol(mol, remove_h=True, return_mapping=False, sanitize=True, + save_order=False, ignore_bond_orders=False): """ Convert a molecular structure to a RDKit rdmol object. Uses `RDKit `_ to perform the conversion. @@ -57,47 +59,52 @@ def to_rdkit_mol(mol, remove_h=True, return_mapping=False, sanitize=True, save_o If return_mapping==True then it also returns a dictionary mapping the atoms to RDKit's atom indices. + + If ignore_bond_orders==True, all bonds are converted to unknown bonds, and + sanitization is skipped. This is helpful when all you want is ring perception, + for example. Must also set sanitize=False. """ - from rmgpy.molecule.fragment import Fragment + if ignore_bond_orders and sanitize: + raise ValueError("If ignore_bond_orders is True, sanitize must be False") + from rmgpy.molecule.fragment import Fragment, CuttingLabel # Sort the atoms before converting to ensure output is consistent # between different runs if not save_order: mol.sort_atoms() atoms = mol.vertices rd_atom_indices = {} # dictionary of RDKit atom indices - label_dict = {} # store label of atom for Framgent + label_dict = {} # For fragment cutting labels. Key is rdkit atom index, value is label string rdkitmol = Chem.rdchem.EditableMol(Chem.rdchem.Mol()) for index, atom in enumerate(mol.vertices): if atom.element.symbol == 'X': rd_atom = Chem.rdchem.Atom('Pt') # not sure how to do this with linear scaling when this might not be Pt + elif atom.element.symbol in ['R', 'L']: + rd_atom = Chem.rdchem.Atom(0) else: rd_atom = Chem.rdchem.Atom(atom.element.symbol) if atom.element.isotope != -1: rd_atom.SetIsotope(atom.element.isotope) rd_atom.SetNumRadicalElectrons(atom.radical_electrons) rd_atom.SetFormalCharge(atom.charge) - if atom.element.symbol == 'C' and atom.lone_pairs == 1 and mol.multiplicity == 1: rd_atom.SetNumRadicalElectrons( - 2) + if atom.element.symbol == 'C' and atom.lone_pairs == 1 and mol.multiplicity == 1: + rd_atom.SetNumRadicalElectrons(2) rdkitmol.AddAtom(rd_atom) if remove_h and atom.symbol == 'H': pass else: rd_atom_indices[atom] = index - # Check if a cutting label is present. If preserve this so that it is added to the SMILES string - # Fragment's representative species is Molecule (with CuttingLabel replaced by Si but label as CuttingLabel) - # so we use detect_cutting_label to check atom.label - _, cutting_label_list = Fragment().detect_cutting_label(atom.label) - if cutting_label_list != []: - saved_index = index - label = atom.label - if label in label_dict: - label_dict[label].append(saved_index) - else: - label_dict[label] = [saved_index] + # Save cutting labels to add to the SMILES string + if atom.label and atom.label in ('R', 'L'): + label_dict[index] = atom.label + rd_bonds = Chem.rdchem.BondType - orders = {'S': rd_bonds.SINGLE, 'D': rd_bonds.DOUBLE, 'T': rd_bonds.TRIPLE, 'B': rd_bonds.AROMATIC, - 'Q': rd_bonds.QUADRUPLE} + # no vdW bond in RDKit, so "ZERO" or "OTHER" might be OK + orders = {'S': rd_bonds.SINGLE, 'D': rd_bonds.DOUBLE, + 'T': rd_bonds.TRIPLE, 'B': rd_bonds.AROMATIC, + 'Q': rd_bonds.QUADRUPLE, 'vdW': rd_bonds.ZERO, + 'H': rd_bonds.HYDROGEN, 'R': rd_bonds.UNSPECIFIED, + None: rd_bonds.UNSPECIFIED} # Add the bonds for atom1 in mol.vertices: for atom2, bond in atom1.edges.items(): @@ -106,23 +113,31 @@ def to_rdkit_mol(mol, remove_h=True, return_mapping=False, sanitize=True, save_o index1 = atoms.index(atom1) index2 = atoms.index(atom2) if index1 < index2: - order_string = bond.get_order_str() - order = orders[order_string] + if ignore_bond_orders: + order = rd_bonds.UNSPECIFIED + else: + order_string = bond.get_order_str() + order = orders[order_string] rdkitmol.AddBond(index1, index2, order) # Make editable mol into a mol and rectify the molecule rdkitmol = rdkitmol.GetMol() - if label_dict: - for label, ind_list in label_dict.items(): - for ind in ind_list: - Chem.SetSupplementalSmilesLabel(rdkitmol.GetAtomWithIdx(ind), label) + for index, label in label_dict.items(): + Chem.SetSupplementalSmilesLabel(rdkitmol.GetAtomWithIdx(index), label) for atom in rdkitmol.GetAtoms(): if atom.GetAtomicNum() > 1: atom.SetNoImplicit(True) - if sanitize: - Chem.SanitizeMol(rdkitmol) if remove_h: - rdkitmol = Chem.RemoveHs(rdkitmol, sanitize=sanitize) + rdkitmol = Chem.RemoveHs(rdkitmol, sanitize=False) # skip sanitization here, do it later if requested + if sanitize == True: + Chem.SanitizeMol(rdkitmol) + elif sanitize == "partial": + try: + Chem.SanitizeMol(rdkitmol, sanitizeOps=Chem.SANITIZE_ALL ^ Chem.SANITIZE_PROPERTIES) + except (KekulizeException, AtomKekulizeException): + logging.warning("Kekulization failed; sanitizing without Kekulize") + Chem.SanitizeMol(rdkitmol, sanitizeOps=Chem.SANITIZE_ALL ^ Chem.SANITIZE_PROPERTIES ^ Chem.SANITIZE_KEKULIZE) + if return_mapping: return rdkitmol, rd_atom_indices return rdkitmol diff --git a/rmgpy/molecule/draw.py b/rmgpy/molecule/draw.py index ad6f0e6070..1f631238c9 100644 --- a/rmgpy/molecule/draw.py +++ b/rmgpy/molecule/draw.py @@ -155,7 +155,7 @@ def clear(self): self.surface = None self.cr = None - def draw(self, molecule, file_format, target=None): + def draw(self, molecule, file_format, target=None, use_rdkit=True): """ Draw the given `molecule` using the given image `file_format` - pdf, svg, ps, or png. If `path` is given, the drawing is saved to that location on disk. The @@ -165,6 +165,9 @@ def draw(self, molecule, file_format, target=None): This function returns the Cairo surface and context used to create the drawing, as well as a bounding box for the molecule being drawn as the tuple (`left`, `top`, `width`, `height`). + + If `use_rdkit` is True, then the RDKit 2D coordinate generation is used to generate the coordinates. + If `use_rdkit` is False, then the molecule is drawn using our (deprecated) original algorithm. """ # The Cairo 2D graphics library (and its Python wrapper) is required for @@ -219,13 +222,13 @@ def draw(self, molecule, file_format, target=None): if molecule.contains_surface_site(): try: self._connect_surface_sites() - self._generate_coordinates() + self._generate_coordinates(use_rdkit=use_rdkit) self._disconnect_surface_sites() except AdsorbateDrawingError as e: self._disconnect_surface_sites() - self._generate_coordinates(fix_surface_sites=False) + self._generate_coordinates(fix_surface_sites=False, use_rdkit=use_rdkit) else: - self._generate_coordinates() + self._generate_coordinates(use_rdkit=use_rdkit) self._replace_bonds(old_bond_dictionary) # Generate labels to use @@ -324,7 +327,7 @@ def _find_ring_groups(self): """ # Find all of the cycles in the molecule - self.cycles = self.molecule.get_smallest_set_of_smallest_rings() + self.cycles = self.molecule.get_symmetrized_smallest_set_of_smallest_rings() self.ringSystems = [] # If the molecule contains cycles, find them and group them @@ -341,7 +344,7 @@ def _find_ring_groups(self): if not found: self.ringSystems.append([cycle]) - def _generate_coordinates(self, fix_surface_sites=True): + def _generate_coordinates(self, fix_surface_sites=True, use_rdkit=True): """ Generate the 2D coordinates to be used when drawing the current molecule. The function uses rdKits 2D coordinate generation. @@ -372,15 +375,35 @@ def _generate_coordinates(self, fix_surface_sites=True): self.coordinates[1, :] = [0.5, 0.0] return self.coordinates - # Decide whether we can use RDKit or have to generate coordinates ourselves - for atom in self.molecule.atoms: - if atom.charge != 0: - use_rdkit = False - break - else: # didn't break - use_rdkit = True + if use_rdkit == True: + # Use RDKit 2D coordinate generation: + # Generate the RDkit molecule from the RDkit molecule, saving mapping + # in order to match the atoms in the rdmol with the atoms in the + # RMG molecule (which is required to extract coordinates). + rdmol, rd_atom_idx = self.molecule.to_rdkit_mol(remove_h=False, + return_mapping=True, + sanitize="partial") + + AllChem.Compute2DCoords(rdmol) + + # Extract the coordinates from each atom. + rd_conformer = rdmol.GetConformer(0) + for atom in atoms: + index = rd_atom_idx[atom] + point = rd_conformer.GetAtomPosition(index) + coordinates[index, :] = [point.x * 0.6, point.y * 0.6] + + # RDKit generates some molecules more vertically than horizontally, + # Especially linear ones. This will reflect any molecule taller than + # it is wide across the line y=x + ranges = np.ptp(coordinates, axis=0) + if ranges[1] > ranges[0]: + temp = np.copy(coordinates) + coordinates[:, 0] = temp[:, 1] + coordinates[:, 1] = temp[:, 0] - if not use_rdkit: + else: + logging.warning("Using deprecated molecule drawing algorithm; undesired behavior may occur. Consider using use_rdkit=True.") if len(self.cycles) > 0: # Cyclic molecule backbone = self._find_cyclic_backbone() @@ -438,32 +461,6 @@ def _generate_coordinates(self, fix_surface_sites=True): # minimize likelihood of overlap self._generate_neighbor_coordinates(backbone) - else: - # Use RDKit 2D coordinate generation: - - # Generate the RDkit molecule from the RDkit molecule, use geometry - # in order to match the atoms in the rdmol with the atoms in the - # RMG molecule (which is required to extract coordinates). - self.geometry = Geometry(None, None, self.molecule, None) - - rdmol, rd_atom_idx = self.geometry.rd_build() - AllChem.Compute2DCoords(rdmol) - - # Extract the coordinates from each atom. - for atom in atoms: - index = rd_atom_idx[atom] - point = rdmol.GetConformer(0).GetAtomPosition(index) - coordinates[index, :] = [point.x * 0.6, point.y * 0.6] - - # RDKit generates some molecules more vertically than horizontally, - # Especially linear ones. This will reflect any molecule taller than - # it is wide across the line y=x - ranges = np.ptp(coordinates, axis=0) - if ranges[1] > ranges[0]: - temp = np.copy(coordinates) - coordinates[:, 0] = temp[:, 1] - coordinates[:, 1] = temp[:, 0] - # For surface species if fix_surface_sites and self.molecule.contains_surface_site(): if len(self.molecule.atoms) == 1: diff --git a/rmgpy/molecule/filtration.py b/rmgpy/molecule/filtration.py index dc310f036f..3b435512e0 100644 --- a/rmgpy/molecule/filtration.py +++ b/rmgpy/molecule/filtration.py @@ -401,7 +401,7 @@ def aromaticity_filtration(mol_list, features): # Look for structures that don't have standard SDSDSD bond orders for mol in other_list: # Check all 6 membered rings - rings = [ring for ring in mol.get_relevant_cycles() if len(ring) == 6] + rings = [ring for ring in mol.get_symmetrized_smallest_set_of_smallest_rings() if len(ring) == 6] for ring in rings: bond_list = mol.get_edges_in_cycle(ring) bond_orders = ''.join([bond.get_order_str() for bond in bond_list]) diff --git a/rmgpy/molecule/fragment.py b/rmgpy/molecule/fragment.py index d4fa0acf4d..c0db0a98f1 100644 --- a/rmgpy/molecule/fragment.py +++ b/rmgpy/molecule/fragment.py @@ -490,51 +490,47 @@ def get_formula(self): return formula - def to_rdkit_mol(self, remove_h=False, return_mapping=True, save_order=False): + def to_rdkit_mol(self, *args, **kwargs): """ - Convert a molecular structure to a RDKit rdmol object. + Convert a Fragment structure to a RDKit rdmol object. + + Uses Fragment.get_representative_molecule to get a representative molecule, + and then uses converter.to_rdkit_mol to do the conversion. + + Could change the order of atoms in the Fragment (self) to match the order of + representative_molecule.atoms, though that's probably the same. """ - if remove_h: - # because we're replacing - # cutting labels with hydrogens - # so do not allow removeHs to be True - raise "Currently fragment to_rdkit_mol only allows keeping all the hydrogens." - mol0, mapping = self.get_representative_molecule("minimal", update=False) + if kwargs.get('remove_h', False): + # because we're replacing cutting labels with hydrogens + # do not allow removeHs to be True + raise ValueError("Currently Fragment to_rdkit_mol only allows keeping all the hydrogens.") - rdmol, rdAtomIdx_mol0 = converter.to_rdkit_mol( - mol0, - remove_h=remove_h, - return_mapping=return_mapping, - sanitize=True, - save_order=save_order, - ) + representative_molecule, fragment_to_mol_mapping = self.get_representative_molecule("minimal", update=False) - rdAtomIdx_frag = {} - for frag_atom, mol0_atom in mapping.items(): - rd_idx = rdAtomIdx_mol0[mol0_atom] - rdAtomIdx_frag[frag_atom] = rd_idx - - # sync the order of fragment vertices with the order - # of mol0.atoms since mol0.atoms is changed/sorted in - # converter.to_rdkit_mol(). - # Since the rdmol's atoms order is same as the order of mol0's atoms, - # the synchronization between fragment.atoms order and mol0.atoms order - # is necessary to make sure the order of fragment vertices - # reflects the order of rdmol's atoms - vertices_order = [] - for v in self.vertices: - a = mapping[v] - idx = mol0.atoms.index(a) - vertices_order.append((v, idx)) + new_kwargs = {**kwargs} # make a copy of kwargs to avoid modifying the original one + new_kwargs["remove_h"] = False + new_kwargs["return_mapping"] = True # override user if needed + new_kwargs["save_order"] = True # override user if needed + rdmol, molecule_to_rdindex_mapping = converter.to_rdkit_mol( + representative_molecule, + *args, + **new_kwargs + ) - adapted_vertices = [ - tup[0] for tup in sorted(vertices_order, key=lambda tup: tup[1]) - ] + fragment_to_rdindex_mapping = {} + for fragment_atom, molecule_atom in fragment_to_mol_mapping.items(): + rdmol_index = molecule_to_rdindex_mapping[molecule_atom] + fragment_to_rdindex_mapping[fragment_atom] = rdmol_index - self.vertices = adapted_vertices + # Sort the Fragment (self) vertices according to the order of the RDKit molecule's atoms + # (rwest is not sure this is needed, but it was here so he's leaving it) + self.vertices.sort(key=lambda v: fragment_to_rdindex_mapping[v]) - return rdmol, rdAtomIdx_frag + if kwargs.get("return_mapping", False): + return rdmol, fragment_to_rdindex_mapping + else: + return rdmol def from_adjacency_list( self, @@ -587,7 +583,7 @@ def get_aromatic_rings(self, rings=None, save_order=False): """ Returns all aromatic rings as a list of atoms and a list of bonds. - Identifies rings using `Graph.get_smallest_set_of_smallest_rings()`, then uses RDKit to perceive aromaticity. + Identifies rings, then uses RDKit to perceive aromaticity. RDKit uses an atom-based pi-electron counting algorithm to check aromaticity based on Huckel's Rule. Therefore, this method identifies "true" aromaticity, rather than simply the RMG bond type. @@ -600,7 +596,7 @@ def get_aromatic_rings(self, rings=None, save_order=False): AROMATIC = BondType.AROMATIC if rings is None: - rings = self.get_relevant_cycles() + rings = self.get_symmetrized_smallest_set_of_smallest_rings() rings = [ring for ring in rings if len(ring) == 6] if not rings: return [], [] @@ -681,9 +677,8 @@ def to_smiles(self): mol_repr.atoms = smiles_before.vertices mol_repr.update() smiles_after = mol_repr.to_smiles() - import re - smiles = re.sub(r"\[Si-3\]", "", smiles_after) + smiles = smiles_after.replace("[Si-3]", "") return smiles @@ -911,7 +906,7 @@ def sliceitup_arom(self, molecule, size_threshold=None): _, cutting_label_list = self.detect_cutting_label(molecule_smiles) # transfer to rdkit molecule for substruct matching f = self.from_smiles_like_string(molecule_smiles) - molecule_to_cut, rdAtomIdx_frag = f.to_rdkit_mol() + molecule_to_cut, rdAtomIdx_frag = f.to_rdkit_mol(remove_h=False, return_mapping=True) # replace CuttingLabel to special Atom (metal) in rdkit for atom, idx in rdAtomIdx_frag.items(): @@ -1037,7 +1032,7 @@ def sliceitup_aliph(self, molecule, size_threshold=None): _, cutting_label_list = self.detect_cutting_label(molecule_smiles) # transfer to rdkit molecule for substruct matching f = self.from_smiles_like_string(molecule_smiles) - molecule_to_cut, rdAtomIdx_frag = f.to_rdkit_mol() + molecule_to_cut, rdAtomIdx_frag = f.to_rdkit_mol(remove_h=False, return_mapping=True) # replace CuttingLabel to special Atom (metal) in rdkit for atom, idx in rdAtomIdx_frag.items(): @@ -1297,8 +1292,9 @@ def from_smiles_like_string(self, smiles_like_string): self.from_rdkit_mol(rdkitmol, atom_replace_dict) return self - - def detect_cutting_label(self, smiles): + + @staticmethod + def detect_cutting_label(smiles): import re from rmgpy.molecule.element import element_list @@ -1805,6 +1801,12 @@ def assign_representative_species(self): self.species_repr.symmetry_number = self.symmetry_number def get_representative_molecule(self, mode="minimal", update=True): + """ + Generate a representative molecule from the fragment. + The representative molecule is generated by replacing all CuttingLabel + atoms with hydrogen atoms. + """ + if mode != "minimal": raise RuntimeError( 'Fragment.get_representative_molecule onyl supports mode="minimal"' diff --git a/rmgpy/molecule/graph.pxd b/rmgpy/molecule/graph.pxd index 696aa0b07d..31499a9fb5 100644 --- a/rmgpy/molecule/graph.pxd +++ b/rmgpy/molecule/graph.pxd @@ -127,16 +127,6 @@ cdef class Graph(object): cpdef bint _is_chain_in_cycle(self, list chain) except -2 cpdef list get_all_cyclic_vertices(self) - - cpdef list get_all_polycyclic_vertices(self) - - cpdef list get_polycycles(self) - - cpdef list get_monocycles(self) - - cpdef tuple get_disparate_cycles(self) - - cpdef tuple _merge_cycles(self, list cycle_sets) cpdef list get_all_cycles(self, Vertex starting_vertex) @@ -146,13 +136,7 @@ cdef class Graph(object): cpdef list _explore_cycles_recursively(self, list chain, list cycles) - cpdef list get_smallest_set_of_smallest_rings(self) - - cpdef list get_relevant_cycles(self) - cpdef list sort_cyclic_vertices(self, list vertices) - - cpdef int get_max_cycle_overlap(self) cpdef list get_largest_ring(self, Vertex vertex) diff --git a/rmgpy/molecule/graph.pyx b/rmgpy/molecule/graph.pyx index 207470c4e5..89760d7c4a 100644 --- a/rmgpy/molecule/graph.pyx +++ b/rmgpy/molecule/graph.pyx @@ -35,8 +35,6 @@ are the components of a graph. import itertools -import py_rdl - from rmgpy.molecule.vf2 cimport VF2 ################################################################################ @@ -623,178 +621,6 @@ cdef class Graph(object): cyclic_vertices.append(vertex) return cyclic_vertices - cpdef list get_all_polycyclic_vertices(self): - """ - Return all vertices belonging to two or more cycles, fused or spirocyclic. - """ - cdef list sssr, vertices, polycyclic_vertices - sssr = self.get_smallest_set_of_smallest_rings() - polycyclic_vertices = [] - if sssr: - vertices = [] - for cycle in sssr: - for vertex in cycle: - if vertex not in vertices: - vertices.append(vertex) - else: - if vertex not in polycyclic_vertices: - polycyclic_vertices.append(vertex) - return polycyclic_vertices - - cpdef list get_polycycles(self): - """ - Return a list of cycles that are polycyclic. - In other words, merge the cycles which are fused or spirocyclic into - a single polycyclic cycle, and return only those cycles. - Cycles which are not polycyclic are not returned. - """ - cdef list polycyclic_vertices, continuous_cycles, sssr - cdef set polycyclic_cycle - cdef Vertex vertex - - sssr = self.get_smallest_set_of_smallest_rings() - if not sssr: - return [] - - polycyclic_vertices = self.get_all_polycyclic_vertices() - - if not polycyclic_vertices: - # no polycyclic vertices detected - return [] - else: - # polycyclic vertices found, merge cycles together - # that have common polycyclic vertices - continuous_cycles = [] - for vertex in polycyclic_vertices: - # First check if it is in any existing continuous cycles - for cycle in continuous_cycles: - if vertex in cycle: - polycyclic_cycle = cycle - break - else: - # Otherwise create a new cycle - polycyclic_cycle = set() - continuous_cycles.append(polycyclic_cycle) - - for cycle in sssr: - if vertex in cycle: - polycyclic_cycle.update(cycle) - - # convert each set to a list - continuous_cycles = [list(cycle) for cycle in continuous_cycles] - return continuous_cycles - - cpdef list get_monocycles(self): - """ - Return a list of cycles that are monocyclic. - """ - cdef list polycyclic_vertices, sssr, monocyclic_cycles, polycyclic_sssr - cdef Vertex vertex - - sssr = self.get_smallest_set_of_smallest_rings() - if not sssr: - return [] - - polycyclic_vertices = self.get_all_polycyclic_vertices() - - if not polycyclic_vertices: - # No polycyclic_vertices detected, all the rings from get_smallest_set_of_smallest_rings - # are monocyclic - return sssr - - polycyclic_sssr = [] - for vertex in polycyclic_vertices: - for cycle in sssr: - if vertex in cycle: - if cycle not in polycyclic_sssr: - polycyclic_sssr.append(cycle) - - # remove the polycyclic cycles from the list of SSSR, leaving behind just the monocyclics - monocyclic_cycles = sssr - for cycle in polycyclic_sssr: - monocyclic_cycles.remove(cycle) - return monocyclic_cycles - - cpdef tuple get_disparate_cycles(self): - """ - Get all disjoint monocyclic and polycyclic cycle clusters in the molecule. - Takes the RC and recursively merges all cycles which share vertices. - - Returns: monocyclic_cycles, polycyclic_cycles - """ - cdef list rc, cycle_list, cycle_sets, monocyclic_cycles, polycyclic_cycles - cdef set cycle_set - - rc = self.get_relevant_cycles() - - if not rc: - return [], [] - - # Convert cycles to sets - cycle_sets = [set(cycle_list) for cycle_list in rc] - - # Merge connected cycles - monocyclic_cycles, polycyclic_cycles = self._merge_cycles(cycle_sets) - - # Convert cycles back to lists - monocyclic_cycles = [list(cycle_set) for cycle_set in monocyclic_cycles] - polycyclic_cycles = [list(cycle_set) for cycle_set in polycyclic_cycles] - - return monocyclic_cycles, polycyclic_cycles - - cpdef tuple _merge_cycles(self, list cycle_sets): - """ - Recursively merges cycles that share common atoms. - - Returns one list with unmerged cycles and one list with merged cycles. - """ - cdef list unmerged_cycles, merged_cycles, matched, u, m - cdef set cycle, m_cycle, u_cycle - cdef bint merged, new - - unmerged_cycles = [] - merged_cycles = [] - - # Loop through each cycle - for cycle in cycle_sets: - merged = False - new = False - - # Check if it's attached to an existing merged cycle - for m_cycle in merged_cycles: - if not m_cycle.isdisjoint(cycle): - m_cycle.update(cycle) - merged = True - # It should only match one merged cycle, so we can break here - break - else: - # If it doesn't match any existing merged cycles, initiate a new one - m_cycle = cycle.copy() - new = True - - # Check if the new merged cycle is attached to any of the unmerged cycles - matched = [] - for i, u_cycle in enumerate(unmerged_cycles): - if not m_cycle.isdisjoint(u_cycle): - m_cycle.update(u_cycle) - matched.append(i) - merged = True - # Remove matched cycles from list of unmerged cycles - for i in reversed(matched): - del unmerged_cycles[i] - - if merged and new: - merged_cycles.append(m_cycle) - elif not merged: - unmerged_cycles.append(cycle) - - # If any rings were successfully merged, try to merge further - if len(merged_cycles) > 1: - u, m = self._merge_cycles(merged_cycles) - merged_cycles = u + m - - return unmerged_cycles, merged_cycles - cpdef list get_all_cycles(self, Vertex starting_vertex): """ Given a starting vertex, returns a list of all the cycles containing @@ -969,71 +795,6 @@ cdef class Graph(object): # At this point we should have discovered all of the cycles involving the current chain return cycles - cpdef list get_smallest_set_of_smallest_rings(self): - """ - Returns the smallest set of smallest rings as a list of lists. - Uses RingDecomposerLib for ring perception. - - Kolodzik, A.; Urbaczek, S.; Rarey, M. - Unique Ring Families: A Chemically Meaningful Description - of Molecular Ring Topologies. - J. Chem. Inf. Model., 2012, 52 (8), pp 2013-2021 - - Flachsenberg, F.; Andresen, N.; Rarey, M. - RingDecomposerLib: An Open-Source Implementation of - Unique Ring Families and Other Cycle Bases. - J. Chem. Inf. Model., 2017, 57 (2), pp 122-126 - """ - cdef list sssr - cdef object graph, data, cycle - - graph = py_rdl.Graph.from_edges( - self.get_all_edges(), - _get_edge_vertex1, - _get_edge_vertex2, - ) - - data = py_rdl.wrapper.DataInternal(graph.get_nof_nodes(), graph.get_edges().keys()) - data.calculate() - - sssr = [] - for cycle in data.get_sssr(): - sssr.append(self.sort_cyclic_vertices([graph.get_node_for_index(i) for i in cycle.nodes])) - - return sssr - - cpdef list get_relevant_cycles(self): - """ - Returns the set of relevant cycles as a list of lists. - Uses RingDecomposerLib for ring perception. - - Kolodzik, A.; Urbaczek, S.; Rarey, M. - Unique Ring Families: A Chemically Meaningful Description - of Molecular Ring Topologies. - J. Chem. Inf. Model., 2012, 52 (8), pp 2013-2021 - - Flachsenberg, F.; Andresen, N.; Rarey, M. - RingDecomposerLib: An Open-Source Implementation of - Unique Ring Families and Other Cycle Bases. - J. Chem. Inf. Model., 2017, 57 (2), pp 122-126 - """ - cdef list rc - cdef object graph, data, cycle - - graph = py_rdl.Graph.from_edges( - self.get_all_edges(), - _get_edge_vertex1, - _get_edge_vertex2, - ) - - data = py_rdl.wrapper.DataInternal(graph.get_nof_nodes(), graph.get_edges().keys()) - data.calculate() - - rc = [] - for cycle in data.get_rcs(): - rc.append(self.sort_cyclic_vertices([graph.get_node_for_index(i) for i in cycle.nodes])) - - return rc cpdef list sort_cyclic_vertices(self, list vertices): """ @@ -1062,24 +823,6 @@ cdef class Graph(object): return ordered - cpdef int get_max_cycle_overlap(self): - """ - Return the maximum number of vertices that are shared between - any two cycles in the graph. For example, if there are only - disparate monocycles or no cycles, the maximum overlap is zero; - if there are "spiro" cycles, it is one; if there are "fused" - cycles, it is two; and if there are "bridged" cycles, it is - three. - """ - cdef list cycles - cdef int max_overlap, overlap, i, j - - cycles = self.get_smallest_set_of_smallest_rings() - max_overlap = 0 - for i, j in itertools.combinations(range(len(cycles)), 2): - overlap = len(set(cycles[i]) & set(cycles[j])) - max_overlap = max(overlap, max_overlap) - return max_overlap cpdef list get_largest_ring(self, Vertex vertex): """ diff --git a/rmgpy/molecule/molecule.pxd b/rmgpy/molecule/molecule.pxd index 8bbf3a5aa6..2c001a46fb 100644 --- a/rmgpy/molecule/molecule.pxd +++ b/rmgpy/molecule/molecule.pxd @@ -307,4 +307,22 @@ cdef class Molecule(Graph): cpdef list get_desorbed_molecules(self) + cpdef list get_smallest_set_of_smallest_rings(self) # deprecated + + cpdef list get_symmetrized_smallest_set_of_smallest_rings(self) + + cpdef list get_relevant_cycles(self) # deprecated + + cpdef list get_all_polycyclic_vertices(self) + + cpdef list get_polycycles(self) + + cpdef list get_monocycles(self) + + cpdef tuple get_disparate_cycles(self) + + cpdef tuple _merge_cycles(self, list cycle_sets) + + cpdef int get_max_cycle_overlap(self) + cdef atom_id_counter diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index 1a32f07add..05b6b889cd 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -768,7 +768,7 @@ def is_specific_case_of(self, other): def get_order_str(self): """ - returns a string representing the bond order + Returns a string representing the bond order. Returns None if bond order does not have a string representation. """ if self.is_single(): return 'S' @@ -787,7 +787,8 @@ def get_order_str(self): elif self.is_reaction_bond(): return 'R' else: - raise ValueError("Bond order {} does not have string representation.".format(self.order)) + logging.warning("Bond order {} does not have string representation.".format(self.order)) + return None def set_order_str(self, new_order): """ @@ -1891,7 +1892,10 @@ def from_adjacency_list(self, adjlist, saturate_h=False, raise_atomtype_exceptio self.vertices, self.multiplicity, self.metal, self.facet = from_adjacency_list(adjlist, group=False, saturate_h=saturate_h, check_consistency=check_consistency) self.update_atomtypes(raise_exception=raise_atomtype_exception) - self.identify_ring_membership() + + # identify ring membership iff it's not a suspicious molecule + if not self.is_electron(): + self.identify_ring_membership() # Check if multiplicity is possible n_rad = self.get_radical_count() @@ -2048,6 +2052,10 @@ def to_rdkit_mol(self, *args, **kwargs): """ Convert a molecular structure to a RDKit rdmol object. """ + # RDKit doesn't support electron + if self.is_electron(): + raise ValueError("Cannot convert electron molecule to RDKit Mol object") + return converter.to_rdkit_mol(self, *args, **kwargs) def to_adjacency_list(self, label='', remove_h=False, remove_lone_pairs=False, old_style=False): @@ -2190,9 +2198,9 @@ def is_aromatic(self): will not fail for fused aromatic rings. """ cython.declare(rc=list, cycle=list, atom=Atom) - rc = self.get_relevant_cycles() - if rc: - for cycle in rc: + rings = self.get_symmetrized_smallest_set_of_smallest_rings() + if rings: + for cycle in rings: if len(cycle) == 6: for atom in cycle: # print atom.atomtype.label @@ -2328,6 +2336,10 @@ def is_aryl_radical(self, aromatic_rings=None, save_order=False): and this process may involve atom order change by default. Set ``save_order`` to ``True`` to force the atom order unchanged. """ + # RDKit does not support electron + if self.is_electron(): + return False + cython.declare(atom=Atom, total=int, aromatic_atoms=set, aryl=int) if aromatic_rings is None: aromatic_rings = self.get_aromatic_rings(save_order=save_order)[0] @@ -2499,17 +2511,9 @@ def identify_ring_membership(self): """ Performs ring perception and saves ring membership information to the Atom.props attribute. """ - cython.declare(rc=list, atom=Atom, ring=list) - - # Get the set of relevant cycles - rc = self.get_relevant_cycles() - # Identify whether each atom is in a ring + cython.declare(atom=Atom) for atom in self.atoms: - atom.props['inRing'] = False - for ring in rc: - if atom in ring: - atom.props['inRing'] = True - break + atom.props["inRing"] = self.is_vertex_in_cycle(atom) def count_aromatic_rings(self): """ @@ -2518,8 +2522,12 @@ def count_aromatic_rings(self): Returns an integer corresponding to the number or aromatic rings. """ + # RDKit does not support electron + if self.is_electron(): + return 0 + cython.declare(rings=list, count=int, ring=list, bonds=list, bond=Bond) - rings = self.get_relevant_cycles() + rings = self.get_symmetrized_smallest_set_of_smallest_rings() count = 0 for ring in rings: if len(ring) != 6: @@ -2536,7 +2544,7 @@ def get_aromatic_rings(self, rings=None, save_order=False): """ Returns all aromatic rings as a list of atoms and a list of bonds. - Identifies rings using `Graph.get_smallest_set_of_smallest_rings()`, then uses RDKit to perceive aromaticity. + Identifies rings, then uses RDKit to perceive aromaticity. RDKit uses an atom-based pi-electron counting algorithm to check aromaticity based on Huckel's Rule. Therefore, this method identifies "true" aromaticity, rather than simply the RMG bond type. @@ -2546,6 +2554,10 @@ def get_aromatic_rings(self, rings=None, save_order=False): By default, the atom order will be sorted to get consistent results from different runs. The atom order can be saved when dealing with problems that are sensitive to the atom map. """ + # RDKit does not support electron + if self.is_electron(): + return [], [] + cython.declare(rd_atom_indices=dict, ob_atom_ids=dict, aromatic_rings=list, aromatic_bonds=list) cython.declare(ring0=list, i=cython.int, atom1=Atom, atom2=Atom) @@ -2553,7 +2565,7 @@ def get_aromatic_rings(self, rings=None, save_order=False): AROMATIC = BondType.AROMATIC if rings is None: - rings = self.get_relevant_cycles() + rings = self.get_symmetrized_smallest_set_of_smallest_rings() # Remove rings that share more than 3 atoms, since they cannot be planar cython.declare(toRemove=set, j=cython.int, toRemoveSorted=list) @@ -2635,18 +2647,17 @@ def get_aromatic_rings(self, rings=None, save_order=False): def get_deterministic_sssr(self): """ - Modified `Graph` method `get_smallest_set_of_smallest_rings` by sorting calculated cycles - by short length and then high atomic number instead of just short length (for cases where - multiple cycles with same length are found, `get_smallest_set_of_smallest_rings` outputs - non-determinstically). - - For instance, molecule with this smiles: C1CC2C3CSC(CO3)C2C1, will have non-deterministic + Sorts calculated cycles by short length and then high atomic number instead of just short length. + Originally created as an alternative to `get_smallest_set_of_smallest_rings` before it was converted + to use only RDKit Functions. + + For instance, previously molecule with this smiles: C1CC2C3CSC(CO3)C2C1, would have non-deterministic output from `get_smallest_set_of_smallest_rings`, which leads to non-deterministic bicyclic decomposition. Using this new method can effectively prevent this situation. Important Note: This method returns an incorrect set of SSSR in certain molecules (such as cubane). - It is recommended to use the main `Graph.get_smallest_set_of_smallest_rings` method in new applications. - Alternatively, consider using `Graph.get_relevant_cycles` for deterministic output. + It is recommended to use the main `Molecule.get_smallest_set_of_smallest_rings` method in new applications. + Alternatively, consider using `Molecule.get_relevant_cycles` for deterministic output. In future development, this method should ideally be replaced by some method to select a deterministic set of SSSR from the set of Relevant Cycles, as that would be a more robust solution. @@ -2750,6 +2761,248 @@ def get_deterministic_sssr(self): return cycle_list + def get_smallest_set_of_smallest_rings(self): + """ + Returned the strictly smallest set of smallest rings (SSSR). + + Removed in favor of RDKit's Symmetrized SSSR perception, which + is less arbitrary, more chemically meaningful, and more consistent. + Use get_symmetrized_smallest_set_of_smallest_rings instead. + """ + raise NotImplementedError("Smallest Set of Smallest Rings is not implemented. " + "Use get_symmetrized_smallest_set_of_smallest_rings instead.") + + def get_symmetrized_smallest_set_of_smallest_rings(self): + """ + Returns the symmetrized smallest set of smallest rings (SSSR) as a list of lists of Atom objects. + + Uses RDKit's built-in ring perception (GetSymmSSSR). + The symmetrized SSSR is at least as large as the SSSR for a molecule. + In certain highly-symmetric cases (e.g. cubane), the symmetrized SSSR + can be a bit larger (i.e. the number of symmetrized rings is >= NumBonds-NumAtoms+1). + It is usually more chemically meaningful, and is less random/arbitrary than the SSSR, + which is non-deterministic in certain cases. + """ + # RDKit does not support electron + if self.is_electron(): + return [] + + from rdkit import Chem + + symm_sssr = [] + # Get the symmetric SSSR using RDKit + rdkit_mol = self.to_rdkit_mol(remove_h=False, + sanitize=False, + return_mapping=False, + save_order=True, + ignore_bond_orders=True) + + ring_info = Chem.GetSymmSSSR(rdkit_mol) + for ring in ring_info: + atom_ring = [self.atoms[idx] for idx in ring] + sorted_ring = self.sort_cyclic_vertices(atom_ring) + symm_sssr.append(sorted_ring) + return symm_sssr + + def get_relevant_cycles(self): + """ + Returned the "relevant cycles" (RC), as implemented in RingDecomposerLib. + + Deprecated when RingDecomposerLib was removed. + Now we are using methods that use RDKit, in the Molecule class. + Namely get_symmetrized_smallest_set_of_smallest_rings. + + References: + Kolodzik, A.; Urbaczek, S.; Rarey, M. + Unique Ring Families: A Chemically Meaningful Description + of Molecular Ring Topologies. + J. Chem. Inf. Model., 2012, 52 (8), pp 2013-2021 + + Flachsenberg, F.; Andresen, N.; Rarey, M. + RingDecomposerLib: An Open-Source Implementation of + Unique Ring Families and Other Cycle Bases. + J. Chem. Inf. Model., 2017, 57 (2), pp 122-126 + """ + raise NotImplementedError("'Relevant Cycles' are not implemented. " + "Use get_symmetrized_smallest_set_of_smallest_rings instead.") + + def get_all_polycyclic_vertices(self): + """ + Return all vertices belonging to two or more cycles, fused or spirocyclic. + """ + sssr = self.get_symmetrized_smallest_set_of_smallest_rings() + # Todo: could get RDKit to do this directly, since we're going via RDKit. + polycyclic_vertices = [] + if sssr: + vertices = [] + for cycle in sssr: + for vertex in cycle: + if vertex not in vertices: + vertices.append(vertex) + else: + if vertex not in polycyclic_vertices: + polycyclic_vertices.append(vertex) + return polycyclic_vertices + + def get_polycycles(self): + """ + Return a list of cycles that are polycyclic. + In other words, merge the cycles which are fused or spirocyclic into + a single polycyclic cycle, and return only those cycles. + Cycles which are not polycyclic are not returned. + """ + # Todo: if we're now using RDKit for ring detection anyway, we might be able to use it to do more of this method. + + # Now using symmetrized SSSR not strictly smallest SSSR. Hopefully this works the same? + sssr = self.get_symmetrized_smallest_set_of_smallest_rings() + if not sssr: + return [] + + polycyclic_vertices = self.get_all_polycyclic_vertices() + + if not polycyclic_vertices: + # no polycyclic vertices detected + return [] + else: + # polycyclic vertices found, merge cycles together + # that have common polycyclic vertices + continuous_cycles = [] + for vertex in polycyclic_vertices: + # First check if it is in any existing continuous cycles + for cycle in continuous_cycles: + if vertex in cycle: + polycyclic_cycle = cycle + break + else: + # Otherwise create a new cycle + polycyclic_cycle = set() + continuous_cycles.append(polycyclic_cycle) + + for cycle in sssr: + if vertex in cycle: + polycyclic_cycle.update(cycle) + + # convert each set to a list + continuous_cycles = [list(cycle) for cycle in continuous_cycles] + return continuous_cycles + + def get_monocycles(self): + """ + Return a list of cycles that are monocyclic. + """ + sssr = self.get_symmetrized_smallest_set_of_smallest_rings() + if not sssr: + return [] + + polycyclic_vertices = self.get_all_polycyclic_vertices() + + if not polycyclic_vertices: + # No polycyclic_vertices detected, all the rings from get_symmetrized_smallest_set_of_smallest_rings + # are monocyclic + return sssr + + polycyclic_sssr = [] + for vertex in polycyclic_vertices: + for cycle in sssr: + if vertex in cycle: + if cycle not in polycyclic_sssr: + polycyclic_sssr.append(cycle) + + # remove the polycyclic cycles from the list of SSSR, leaving behind just the monocyclics + monocyclic_cycles = sssr + for cycle in polycyclic_sssr: + monocyclic_cycles.remove(cycle) + return monocyclic_cycles + + def get_disparate_cycles(self): + """ + Get all disjoint monocyclic and polycyclic cycle clusters in the molecule. + Takes the set of rings and recursively merges all cycles which share vertices. + + Returns: monocyclic_cycles, polycyclic_cycles + """ + rings = self.get_symmetrized_smallest_set_of_smallest_rings() + + if not rings: + return [], [] + + # Convert cycles to sets + cycle_sets = [set(cycle_list) for cycle_list in rings] + + # Merge connected cycles + monocyclic_cycles, polycyclic_cycles = self._merge_cycles(cycle_sets) + + # Convert cycles back to lists + monocyclic_cycles = [list(cycle_set) for cycle_set in monocyclic_cycles] + polycyclic_cycles = [list(cycle_set) for cycle_set in polycyclic_cycles] + + return monocyclic_cycles, polycyclic_cycles + + def _merge_cycles(self, cycle_sets): + """ + Recursively merges cycles that share common atoms. + + Returns one list with unmerged cycles and one list with merged cycles. + """ + unmerged_cycles = [] + merged_cycles = [] + + # Loop through each cycle + for cycle in cycle_sets: + merged = False + new = False + + # Check if it's attached to an existing merged cycle + for m_cycle in merged_cycles: + if not m_cycle.isdisjoint(cycle): + m_cycle.update(cycle) + merged = True + # It should only match one merged cycle, so we can break here + break + else: + # If it doesn't match any existing merged cycles, initiate a new one + m_cycle = cycle.copy() + new = True + + # Check if the new merged cycle is attached to any of the unmerged cycles + matched = [] + for i, u_cycle in enumerate(unmerged_cycles): + if not m_cycle.isdisjoint(u_cycle): + m_cycle.update(u_cycle) + matched.append(i) + merged = True + # Remove matched cycles from list of unmerged cycles + for i in reversed(matched): + del unmerged_cycles[i] + + if merged and new: + merged_cycles.append(m_cycle) + elif not merged: + unmerged_cycles.append(cycle) + + # If any rings were successfully merged, try to merge further + if len(merged_cycles) > 1: + u, m = self._merge_cycles(merged_cycles) + merged_cycles = u + m + + return unmerged_cycles, merged_cycles + + def get_max_cycle_overlap(self): + """ + Return the maximum number of vertices that are shared between + any two cycles in the graph. For example, if there are only + disparate monocycles or no cycles, the maximum overlap is zero; + if there are "spiro" cycles, it is one; if there are "fused" + cycles, it is two; and if there are "bridged" cycles, it is + three. + """ + cycles = self.get_symmetrized_smallest_set_of_smallest_rings() + max_overlap = 0 + for i, j in itertools.combinations(range(len(cycles)), 2): + overlap = len(set(cycles[i]) & set(cycles[j])) + max_overlap = max(overlap, max_overlap) + return max_overlap + def kekulize(self): """ Kekulizes an aromatic molecule. diff --git a/rmgpy/molecule/resonance.py b/rmgpy/molecule/resonance.py index d389143d24..39b2fa1ed0 100644 --- a/rmgpy/molecule/resonance.py +++ b/rmgpy/molecule/resonance.py @@ -777,7 +777,7 @@ def generate_aryne_resonance_structures(mol): i=cython.int, j=cython.int, bond_orders=str, new_orders=str, ind=cython.int, bond=Edge, new_mol=Graph) - rings = mol.get_relevant_cycles() + rings = mol.get_symmetrized_smallest_set_of_smallest_rings() rings = [ring for ring in rings if len(ring) == 6] new_mol_list = [] diff --git a/rmgpy/qm/molecule.py b/rmgpy/qm/molecule.py index 45cf457b67..3c6009601a 100644 --- a/rmgpy/qm/molecule.py +++ b/rmgpy/qm/molecule.py @@ -147,7 +147,7 @@ def rd_build(self): """ Import rmg molecule and create rdkit molecule with the same atom labeling. """ - return self.molecule.to_rdkit_mol(remove_h=False, return_mapping=True) + return self.molecule.to_rdkit_mol(remove_h=False, return_mapping=True, sanitize="partial") def rd_embed(self, rdmol, num_conf_attempts): """ diff --git a/rmgpy/species.py b/rmgpy/species.py index c17bdd182a..d89cf8c864 100644 --- a/rmgpy/species.py +++ b/rmgpy/species.py @@ -126,7 +126,7 @@ def __init__(self, index=-1, label='', thermo=None, conformer=None, molecule=Non self._inchi = inchi elif smiles: # check it is fragment or molecule - _ , cutting_label_list = Fragment().detect_cutting_label(smiles) + _ , cutting_label_list = Fragment.detect_cutting_label(smiles) if cutting_label_list != []: # Fragment self.molecule = [Fragment(smiles=smiles)] else: # Molecule @@ -382,7 +382,7 @@ def from_adjacency_list(self, adjlist, raise_atomtype_exception=True, raise_char adjlist_no_label = adjlist # detect if it contains cutting label - _ , cutting_label_list = Fragment().detect_cutting_label(adjlist_no_label) + _ , cutting_label_list = Fragment.detect_cutting_label(adjlist_no_label) if cutting_label_list == []: self.molecule = [Molecule().from_adjacency_list(adjlist, saturate_h=False, diff --git a/rmgpy/statmech/ndTorsions.py b/rmgpy/statmech/ndTorsions.py index 7ee4e31135..c71675a18b 100644 --- a/rmgpy/statmech/ndTorsions.py +++ b/rmgpy/statmech/ndTorsions.py @@ -35,8 +35,9 @@ import numdifftools as nd import numpy as np -from scipy import integrate as inte from scipy import interpolate +from scipy.integrate import simpson + from sklearn import linear_model from sklearn.preprocessing import PolynomialFeatures @@ -614,7 +615,7 @@ def f(*phis): Imat[coords] = f(*rphis[np.array(coords)]) for i in range(len(self.pivots)): - Imat = inte.simps(Imat, rphis) + Imat = simpson(Imat, rphis) intg = Imat diff --git a/setup.py b/setup.py index 78a8e3eeda..ff7c0770ad 100644 --- a/setup.py +++ b/setup.py @@ -145,8 +145,9 @@ description='Reaction Mechanism Generator', author='William H. Green and the RMG Team', author_email='rmg_dev@mit.edu', - url='https://reactionmechanismgenerator.github.io', - python_requires='>=3.9,<3.10', + url='http://reactionmechanismgenerator.github.io', + python_requires='>=3.9,<3.12', + packages=find_packages(where='.', include=["rmgpy*"]) + find_packages(where='.', include=["arkane*"]), scripts=scripts, entry_points={ diff --git a/test/rmgpy/data/thermoTest.py b/test/rmgpy/data/thermoTest.py index 7d5db15a61..0f23079cbc 100644 --- a/test/rmgpy/data/thermoTest.py +++ b/test/rmgpy/data/thermoTest.py @@ -1723,7 +1723,7 @@ def test_add_poly_ring_correction_thermo_data_from_heuristic_using_pyrene(self): spe.generate_resonance_structures() mols = [] for mol in spe.molecule: - sssr0 = mol.get_smallest_set_of_smallest_rings() + sssr0 = mol.get_symmetrized_smallest_set_of_smallest_rings() aromatic_ring_num = 0 for sr0 in sssr0: sr0mol = Molecule(atoms=sr0) @@ -1772,7 +1772,7 @@ def test_add_poly_ring_correction_thermo_data_from_heuristic_using_aromatic_tric spe = Species().from_smiles(smiles) spe.generate_resonance_structures() for mol in spe.molecule: - sssr0 = mol.get_smallest_set_of_smallest_rings() + sssr0 = mol.get_symmetrized_smallest_set_of_smallest_rings() aromatic_ring_num = 0 for sr0 in sssr0: sr0mol = Molecule(atoms=sr0) @@ -2050,7 +2050,7 @@ def test_combine_two_rings_into_sub_molecule(self): mol1 = Molecule().from_smiles(smiles1) # get two SSSRs - sssr = mol1.get_smallest_set_of_smallest_rings() + sssr = mol1.get_symmetrized_smallest_set_of_smallest_rings() ring1 = sssr[0] ring2 = sssr[1] @@ -2127,7 +2127,7 @@ def test_find_aromatic_bonds_from_sub_molecule(self): mol = spe.molecule[0] # get two SSSRs - sssr = mol.get_smallest_set_of_smallest_rings() + sssr = mol.get_symmetrized_smallest_set_of_smallest_rings() ring1 = sssr[0] ring2 = sssr[1] @@ -2151,7 +2151,7 @@ def test_bicyclic_decomposition_for_polyring_using_pyrene(self): spe = Species().from_smiles(smiles) spe.generate_resonance_structures() for mol in spe.molecule: - sssr0 = mol.get_smallest_set_of_smallest_rings() + sssr0 = mol.get_symmetrized_smallest_set_of_smallest_rings() aromatic_ring_num = 0 for sr0 in sssr0: sr0mol = Molecule(atoms=sr0) @@ -2200,7 +2200,7 @@ def test_bicyclic_decomposition_for_polyring_using_aromatic_tricyclic(self): spe = Species().from_smiles(smiles) spe.generate_resonance_structures() for mol in spe.molecule: - sssr0 = mol.get_smallest_set_of_smallest_rings() + sssr0 = mol.get_symmetrized_smallest_set_of_smallest_rings() aromatic_ring_num = 0 for sr0 in sssr0: sr0mol = Molecule(atoms=sr0) diff --git a/test/rmgpy/molecule/atomtypeTest.py b/test/rmgpy/molecule/atomtypeTest.py index fb9ab3c5ed..200922b9a4 100644 --- a/test/rmgpy/molecule/atomtypeTest.py +++ b/test/rmgpy/molecule/atomtypeTest.py @@ -165,7 +165,7 @@ def test_make_sample_molecule(self): except: logging.exception(f"Couldn't make sample molecule for atomType {name}") failed.append(name) - assert not failed, f"Couldn't make sample molecules for types {', '.join(failed)}" + assert len(failed) == 0, f"Couldn't make sample molecules for types {', '.join(failed)}" @pytest.mark.skip(reason="WIP") def test_make_sample_molecule_wip(self): diff --git a/test/rmgpy/molecule/converterTest.py b/test/rmgpy/molecule/converterTest.py index d9395a90a3..142fc0f598 100644 --- a/test/rmgpy/molecule/converterTest.py +++ b/test/rmgpy/molecule/converterTest.py @@ -134,6 +134,20 @@ def test_atom_mapping_3(self): assert [atom.number for atom in mol.atoms] == [1, 6, 7] assert [rdkitmol.GetAtomWithIdx(idx).GetAtomicNum() for idx in range(3)] == [1, 6, 7] + def test_ignoring_bonds(self): + """Test that to_rdkit_mol returns correct indices and atom mappings, when ignoring bond orders.""" + + mol = Molecule().from_smiles("CC1CCC=C1C=O") + rdkitmol, rd_atom_indices = to_rdkit_mol(mol, remove_h=False, + sanitize=False, return_mapping=True, + ignore_bond_orders=True) + for atom in mol.atoms: + # Check that all atoms are found in mapping + assert atom in rd_atom_indices + # Check that all bonds are in rdkitmol with correct mapping and no order + for connected_atom, bond in atom.bonds.items(): + bond_type = str(rdkitmol.GetBondBetweenAtoms(rd_atom_indices[atom], rd_atom_indices[connected_atom]).GetBondType()) + assert bond_type == "UNSPECIFIED" class ConverterTest: def setup_class(self): diff --git a/test/rmgpy/molecule/drawTest.py b/test/rmgpy/molecule/drawTest.py index 6c416f171d..dec288ab79 100644 --- a/test/rmgpy/molecule/drawTest.py +++ b/test/rmgpy/molecule/drawTest.py @@ -297,4 +297,102 @@ def test_draw_bidentate_adsorbate(self): surface, _cr, (_xoff, _yoff, width, height) = self.drawer.draw(molecule, file_format="png", target=path) assert os.path.exists(path), "File doesn't exist" os.unlink(path) - assert isinstance(surface, ImageSurface) \ No newline at end of file + assert isinstance(surface, ImageSurface) + + def test_draw_bidentate_with_charge_separation(self): + molecule = Molecule().from_adjacency_list( + """ +1 X u0 p0 c0 {3,S} +2 X u0 p0 c0 {4,D} +3 O u0 p2 c0 {1,S} {4,S} +4 N u0 p0 c+1 {3,S} {2,D} {5,S} +5 O u0 p3 c-1 {4,S} + """ + ) + try: + from cairocffi import PDFSurface + except ImportError: + from cairo import PDFSurface + surface, _cr, (_xoff, _yoff, _width, _height) = self.drawer.draw(molecule, file_format="pdf") + assert isinstance(surface, PDFSurface) + + def test_draw_cation(self): + try: + from cairocffi import PDFSurface + except ImportError: + from cairo import PDFSurface + path = "test_molecule.pdf" + if os.path.exists(path): + os.unlink(path) + polycycle = Molecule(smiles="C1=NC2=C(N1)C(=O)[NH2+]C(=N2)N") + surface, _cr, (_xoff, _yoff, width, height) = self.drawer.draw(polycycle, file_format="pdf", target=path) + assert isinstance(surface, PDFSurface) + assert width > height + os.unlink(path) + + def test_draw_anion(self): + try: + from cairocffi import PDFSurface + except ImportError: + from cairo import PDFSurface + path = "test_molecule.pdf" + if os.path.exists(path): + os.unlink(path) + polycycle = Molecule(smiles="c1ccc2c3ccccc3[CH-]c2c1") + surface, _cr, (_xoff, _yoff, width, height) = self.drawer.draw(polycycle, file_format="pdf", target=path) + assert isinstance(surface, PDFSurface) + assert width > height + os.unlink(path) + + def test_draw_zwitterion(self): + try: + from cairocffi import PDFSurface + except ImportError: + from cairo import PDFSurface + path = "test_molecule.pdf" + if os.path.exists(path): + os.unlink(path) + polycycle = Molecule(smiles="[NH3+]CC(=O)[O-]") + surface, _cr, (_xoff, _yoff, width, height) = self.drawer.draw(polycycle, file_format="pdf", target=path) + assert isinstance(surface, PDFSurface) + assert width > height + os.unlink(path) + + def test_draw_cation_on_surface(self): + molecule = Molecule().from_adjacency_list( + """ +1 X u0 p0 c0 {3,S} +2 X u0 p0 c0 {3,S} +3 O u0 p1 c+1 {1,S} {2,S} {4,S} +4 H u0 p0 c0 {3,S} + """ + ) + try: + from cairocffi import PDFSurface + except ImportError: + from cairo import PDFSurface + path = "test_molecule.pdf" + if os.path.exists(path): + os.unlink(path) + surface, _cr, (_xoff, _yoff, _width, _height) = self.drawer.draw(molecule, file_format="pdf", target=path) + assert isinstance(surface, PDFSurface) + os.unlink(path) + + + def test_draw_anion_on_surface(self): + molecule = Molecule().from_adjacency_list( + """ +1 X u0 p0 c0 {2,S} +2 O u0 p3 c-1 {1,S} + """ + ) + try: + from cairocffi import PDFSurface + except ImportError: + from cairo import PDFSurface + path = "test_molecule.pdf" + if os.path.exists(path): + os.unlink(path) + surface, _cr, (_xoff, _yoff, _width, _height) = self.drawer.draw(molecule, file_format="pdf", target=path) + assert isinstance(surface, PDFSurface) + os.unlink(path) diff --git a/test/rmgpy/molecule/fragmentTest.py b/test/rmgpy/molecule/fragmentTest.py index 1975b442f6..5f59efd6a3 100644 --- a/test/rmgpy/molecule/fragmentTest.py +++ b/test/rmgpy/molecule/fragmentTest.py @@ -747,7 +747,7 @@ def test_get_representative_molecule(self): def test_to_rdkit_mol(self): fragment = rmgpy.molecule.fragment.Fragment().from_smiles_like_string("CCR") - rdmol, _ = fragment.to_rdkit_mol() + rdmol, _ = fragment.to_rdkit_mol(remove_h=False, return_mapping=True) assert rdmol.GetNumAtoms() == 8 diff --git a/test/rmgpy/molecule/graphTest.py b/test/rmgpy/molecule/graphTest.py index f7781d3ae7..da4daafefc 100644 --- a/test/rmgpy/molecule/graphTest.py +++ b/test/rmgpy/molecule/graphTest.py @@ -608,25 +608,6 @@ def test_get_all_cyclic_vertices(self): self.graph.add_edge(edge) # To create a cycle assert len(self.graph.get_all_cyclic_vertices()) == 4 - def test_get_all_polycylic_vertices(self): - edge = Edge(self.graph.vertices[0], self.graph.vertices[3]) - self.graph.add_edge(edge) # To create a cycle - assert self.graph.get_all_polycyclic_vertices() == [] - edge2 = Edge(self.graph.vertices[0], self.graph.vertices[5]) - self.graph.add_edge(edge2) # Create another cycle to generate two fused cycles - assert len(self.graph.get_all_polycyclic_vertices()) == 2 - # Add new vertices and edges to generate a spirocyclic cycle - vertices = [Vertex() for _ in range(2)] - for vertex in vertices: - self.graph.add_vertex(vertex) - edges = [ - Edge(self.graph.vertices[5], self.graph.vertices[6]), - Edge(self.graph.vertices[6], self.graph.vertices[7]), - Edge(self.graph.vertices[5], self.graph.vertices[7]), - ] - for edge in edges: - self.graph.add_edge(edge) - assert len(self.graph.get_all_polycyclic_vertices()) == 3 def test_get_all_cycles(self): """ @@ -673,255 +654,6 @@ def test_get_all_simple_cycles_of_size(self): cycle_list = self.graph.get_all_simple_cycles_of_size(6) assert len(cycle_list) == 0 - def test_get_smallest_set_of_smallest_rings(self): - """ - Test the Graph.get_smallest_set_of_smallest_rings() method. - """ - cycle_list = self.graph.get_smallest_set_of_smallest_rings() - assert len(cycle_list) == 0 - edge = Edge(self.graph.vertices[0], self.graph.vertices[3]) - self.graph.add_edge(edge) # To create a cycle - cycle_list = self.graph.get_smallest_set_of_smallest_rings() - assert len(cycle_list) == 1 - assert len(cycle_list[0]) == 4 - - def test_get_relevant_cycles(self): - """ - Test the Graph.get_relevant_cycles() method. - """ - cycle_list = self.graph.get_relevant_cycles() - assert len(cycle_list) == 0 - # Create a cycle of length 4 - edge = Edge(self.graph.vertices[0], self.graph.vertices[3]) - self.graph.add_edge(edge) - # Create a second cycle of length 4 - edge = Edge(self.graph.vertices[0], self.graph.vertices[5]) - self.graph.add_edge(edge) - # Create a bridge forming multiple cycles of length 4 - edge = Edge(self.graph.vertices[1], self.graph.vertices[4]) - self.graph.add_edge(edge) - - # SSSR should be 3 cycles of length 4 - cycle_list = self.graph.get_smallest_set_of_smallest_rings() - assert len(cycle_list) == 3 - size_list = sorted([len(cycle) for cycle in cycle_list]) - assert size_list == [4, 4, 4] - - # RC should be 5 cycles of length 4 - cycle_list = self.graph.get_relevant_cycles() - assert len(cycle_list) == 5 - size_list = sorted([len(cycle) for cycle in cycle_list]) - assert size_list == [4, 4, 4, 4, 4] - - def test_cycle_list_order_sssr(self): - """ - Test that get_smallest_set_of_smallest_rings return vertices in the proper order. - - There are methods such as symmetry and molecule drawing which rely - on the fact that subsequent list entries are connected. - """ - # Create a cycle of length 5 - edge = Edge(self.graph.vertices[0], self.graph.vertices[4]) - self.graph.add_edge(edge) - # Test SSSR - sssr = self.graph.get_smallest_set_of_smallest_rings() - assert len(sssr) == 1 - assert len(sssr[0]) == 5 - for i in range(5): - assert self.graph.has_edge(sssr[0][i], sssr[0][i - 1]) - - def test_cycle_list_order_relevant_cycles(self): - """ - Test that get_relevant_cycles return vertices in the proper order. - - There are methods such as symmetry and molecule drawing which rely - on the fact that subsequent list entries are connected. - """ - # Create a cycle of length 5 - edge = Edge(self.graph.vertices[0], self.graph.vertices[4]) - self.graph.add_edge(edge) - # Test RC - rc = self.graph.get_relevant_cycles() - assert len(rc) == 1 - assert len(rc[0]) == 5 - for i in range(5): - assert self.graph.has_edge(rc[0][i], rc[0][i - 1]) - - def test_get_polycyclic_rings(self): - """ - Test that the Graph.get_polycycles() method returns only polycyclic rings. - """ - vertices = [Vertex() for _ in range(27)] - bonds = [ - (0, 1), - (1, 2), - (2, 3), - (3, 4), - (4, 5), - (5, 6), - (6, 7), - (7, 8), - (8, 9), - (9, 10), - (10, 11), - (11, 12), - (12, 13), - (13, 14), - (14, 15), - (14, 12), - (12, 16), - (16, 10), - (10, 17), - (17, 18), - (18, 19), - (9, 20), - (20, 21), - (21, 7), - (6, 22), - (22, 23), - (22, 4), - (23, 3), - (23, 24), - (24, 25), - (25, 1), - ] - edges = [] - for bond in bonds: - edges.append(Edge(vertices[bond[0]], vertices[bond[1]])) - - graph = Graph() - for vertex in vertices: - graph.add_vertex(vertex) - for edge in edges: - graph.add_edge(edge) - graph.update_connectivity_values() - - sssr = graph.get_smallest_set_of_smallest_rings() - assert len(sssr) == 6 - polycyclic_vertices = set(graph.get_all_polycyclic_vertices()) - expected_polycyclic_vertices = set([vertices[index] for index in [3, 23, 4, 22, 12]]) - - assert polycyclic_vertices == expected_polycyclic_vertices - - continuous_rings = graph.get_polycycles() - expected_continuous_rings = [ - [vertices[index] for index in [1, 2, 3, 4, 5, 6, 22, 23, 24, 25]], - # [vertices[index] for index in [7,8,9,21,20]], # This is a nonpolycyclic ring - [vertices[index] for index in [10, 11, 12, 13, 14, 16]], - ] - - # Convert to sets for comparison purposes - continuous_rings = [set(ring) for ring in continuous_rings] - expected_continuous_rings = [set(ring) for ring in expected_continuous_rings] - for ring in expected_continuous_rings: - assert ring in continuous_rings - - def test_get_max_cycle_overlap(self): - """ - Test that get_max_cycle_overlap returns the correct overlap numbers - for different graphs. - """ - - def make_graph(edge_inds): - nvert = max(max(inds) for inds in edge_inds) + 1 - vertices = [Vertex() for _ in range(nvert)] - graph = Graph(vertices) - for idx1, idx2 in edge_inds: - graph.add_edge(Edge(vertices[idx1], vertices[idx2])) - return graph - - linear = make_graph([(0, 1), (1, 2)]) - mono = make_graph([(0, 1), (0, 2), (1, 2), (2, 3), (3, 4), (3, 5), (4, 5)]) - spiro = make_graph([(0, 1), (0, 2), (1, 2), (2, 3), (2, 4), (3, 4)]) - fused = make_graph([(0, 1), (0, 2), (1, 2), (1, 3), (2, 3)]) - bridged = make_graph([(0, 1), (0, 2), (1, 3), (1, 4), (2, 3), (2, 5), (4, 5)]) - cube = make_graph( - [ - (0, 1), - (0, 2), - (0, 4), - (1, 3), - (1, 5), - (2, 3), - (2, 6), - (3, 7), - (4, 5), - (4, 6), - (5, 7), - (6, 7), - ] - ) - - assert linear.get_max_cycle_overlap() == 0 - assert mono.get_max_cycle_overlap() == 0 - assert spiro.get_max_cycle_overlap() == 1 - assert fused.get_max_cycle_overlap() == 2 - assert bridged.get_max_cycle_overlap() == 3 - # With the current algorithm for maximum overlap determination, a cube - # only has an overlap of 2, because the set of relevant cycles - # contains the six four-membered faces. This could be changed in the - # future. - assert cube.get_max_cycle_overlap() == 2 - - def test_get_largest_ring(self): - """ - Test that the Graph.get_polycycles() method returns only polycyclic rings. - """ - vertices = [Vertex() for _ in range(27)] - bonds = [ - (0, 1), - (1, 2), - (2, 3), - (3, 4), - (4, 5), - (5, 6), - (6, 7), - (9, 10), - (10, 11), - (11, 12), - (12, 13), - (13, 14), - (14, 15), - (12, 16), - (10, 17), - (17, 18), - (18, 19), - (9, 20), - (20, 21), - (6, 22), - (22, 23), - (22, 8), - (8, 4), - (23, 3), - (23, 24), - (24, 25), - (25, 1), - ] - edges = [] - for bond in bonds: - edges.append(Edge(vertices[bond[0]], vertices[bond[1]])) - - graph = Graph() - for vertex in vertices: - graph.add_vertex(vertex) - for edge in edges: - graph.add_edge(edge) - graph.update_connectivity_values() - - rings = graph.get_polycycles() - assert len(rings) == 1 - - # ensure the last ring doesn't include vertex 8, since it isn't in the - # longest ring. Try two different items since one might contain the vertex 8 - long_ring = graph.get_largest_ring(rings[0][0]) - long_ring2 = graph.get_largest_ring(rings[0][1]) - - if len(long_ring) > len(long_ring2): - longest_ring = long_ring - else: - longest_ring = long_ring2 - - assert len(longest_ring) == len(rings[0]) - 1 def test_sort_cyclic_vertices(self): """Test that sort_cyclic_vertices works properly for a valid input.""" diff --git a/test/rmgpy/molecule/moleculeTest.py b/test/rmgpy/molecule/moleculeTest.py index 9621d6fc7d..58e5cc58a5 100644 --- a/test/rmgpy/molecule/moleculeTest.py +++ b/test/rmgpy/molecule/moleculeTest.py @@ -1544,13 +1544,13 @@ def test_remove_h_bonds(self): def test_sssr(self): """ - Test the Molecule.get_smallest_set_of_smallest_rings() method with a complex + Test the Molecule.get_symmetrized_smallest_set_of_smallest_rings() method with a complex polycyclic molecule. """ molecule = Molecule() molecule.from_smiles("C(CC1C(C(CCCCCCCC)C1c1ccccc1)c1ccccc1)CCCCCC") # http://cactus.nci.nih.gov/chemical/structure/C(CC1C(C(CCCCCCCC)C1c1ccccc1)c1ccccc1)CCCCCC/image - sssr = molecule.get_smallest_set_of_smallest_rings() + sssr = molecule.get_symmetrized_smallest_set_of_smallest_rings() assert len(sssr) == 3 def test_is_in_cycle_ethane(self): @@ -2310,7 +2310,7 @@ def test_large_mol_creation(self): def test_get_polycyclic_rings(self): """ Test that polycyclic rings within a molecule are returned properly in the function - `Graph().get_polycycles()` + `Molecule.get_polycycles()` """ # norbornane m1 = Molecule(smiles="C1CC2CCC1C2") @@ -2411,38 +2411,38 @@ def test_get_disparate_rings(self): assert len(polyrings) == 1 assert len(polyrings[0]) == 10 - def test_get_smallest_set_of_smallest_rings(self): + def test_get_symmetrized_smallest_set_of_smallest_rings(self): """ Test that SSSR within a molecule are returned properly in the function - `Graph().get_smallest_set_of_smallest_rings()` + `Molecule().get_symmetrized_smallest_set_of_smallest_rings()` """ m1 = Molecule(smiles="C12CCC1C3CC2CC3") - sssr1 = m1.get_smallest_set_of_smallest_rings() + sssr1 = m1.get_symmetrized_smallest_set_of_smallest_rings() sssr1_sizes = sorted([len(ring) for ring in sssr1]) sssr1_sizes_expected = [4, 5, 5] assert sssr1_sizes == sssr1_sizes_expected m2 = Molecule(smiles="C1(CC2)C(CC3)CC3C2C1") - sssr2 = m2.get_smallest_set_of_smallest_rings() + sssr2 = m2.get_symmetrized_smallest_set_of_smallest_rings() sssr2_sizes = sorted([len(ring) for ring in sssr2]) sssr2_sizes_expected = [5, 5, 6] assert sssr2_sizes == sssr2_sizes_expected m3 = Molecule(smiles="C1(CC2)C2C(CCCC3)C3C1") - sssr3 = m3.get_smallest_set_of_smallest_rings() + sssr3 = m3.get_symmetrized_smallest_set_of_smallest_rings() sssr3_sizes = sorted([len(ring) for ring in sssr3]) sssr3_sizes_expected = [4, 5, 6] assert sssr3_sizes == sssr3_sizes_expected m4 = Molecule(smiles="C12=CC=CC=C1C3=C2C=CC=C3") - sssr4 = m4.get_smallest_set_of_smallest_rings() + sssr4 = m4.get_symmetrized_smallest_set_of_smallest_rings() sssr4_sizes = sorted([len(ring) for ring in sssr4]) sssr4_sizes_expected = [4, 6, 6] assert sssr4_sizes == sssr4_sizes_expected m5 = Molecule(smiles="C12=CC=CC=C1CC3=C(C=CC=C3)C2") - sssr5 = m5.get_smallest_set_of_smallest_rings() + sssr5 = m5.get_symmetrized_smallest_set_of_smallest_rings() sssr5_sizes = sorted([len(ring) for ring in sssr5]) sssr5_sizes_expected = [6, 6, 6] assert sssr5_sizes == sssr5_sizes_expected @@ -2980,10 +2980,23 @@ def test_ring_perception(self): mol = Molecule(smiles="c12ccccc1cccc2") mol.identify_ring_membership() for atom in mol.atoms: - if atom.element == "C": + if atom.is_carbon(): assert atom.props["inRing"] - elif atom.element == "H": + elif atom.is_hydrogen(): assert not atom.props["inRing"] + else: + raise ValueError("Unexpected atom type") + mol = Molecule(smiles="C1CCCC(O)CCCCCCC1") + mol.identify_ring_membership() + for atom in mol.atoms: + if atom.is_carbon(): + assert atom.props["inRing"] + elif atom.is_hydrogen(): + assert not atom.props["inRing"] + elif atom.is_oxygen(): + assert not atom.props["inRing"] + else: + raise ValueError("Unexpected atom type") def test_enumerate_bonds(self): """Test that generating a count of bond labels works properly.""" @@ -3043,3 +3056,136 @@ def test_remove_van_der_waals_bonds(self): assert len(mol.get_all_edges()) == 2 mol.remove_van_der_waals_bonds() assert len(mol.get_all_edges()) == 1 + + def test_get_symmetrized_smallest_set_of_smallest_rings(self): + """ + Test the Molecule.get_symmetrized_smallest_set_of_smallest_rings() method. + """ + mol = Molecule(smiles="CCCC") + cycle_list = mol.get_symmetrized_smallest_set_of_smallest_rings() + assert len(cycle_list) == 0 + + # Create a cycle of length 4 + mol = Molecule(smiles="C1CCC1") + cycle_list = mol.get_symmetrized_smallest_set_of_smallest_rings() + assert len(cycle_list) == 1 + assert len(cycle_list[0]) == 4 + + # Create a bridged tricyclic + mol = Molecule(smiles="C1C(C)CC2CC1C=C3C2CCC3") + cycle_list = mol.get_symmetrized_smallest_set_of_smallest_rings() + assert len(cycle_list) == 3 + assert len(cycle_list[0]) == 5 + + # Test cubane + mol = Molecule(smiles="C12C3C4C1C5C2C3C45") + cycle_list = mol.get_symmetrized_smallest_set_of_smallest_rings() + assert len(cycle_list) == 6 + for cycle in cycle_list: + assert len(cycle) == 4 + + def test_get_relevant_cycles(self): + """ + Test the Molecule.get_relevant_cycles() raises correct error after deprecation. + """ + mol = Molecule(smiles="CCCC") + with pytest.raises(NotImplementedError): + mol.get_relevant_cycles() + + def test_cycle_list_order_sssr(self): + """ + Test that get_symmetrized_smallest_set_of_smallest_rings return vertices in the proper order. + + There are methods such as symmetry and molecule drawing which rely + on the fact that subsequent list entries are connected. + """ + # Create a cycle of length 5 + mol = Molecule(smiles="C1CCCC1") + # Test SSSR + sssr = mol.get_symmetrized_smallest_set_of_smallest_rings() + assert len(sssr) == 1 + assert len(sssr[0]) == 5 + for i in range(5): + assert mol.has_bond(sssr[0][i], sssr[0][i - 1]) + + def test_get_max_cycle_overlap(self): + """ + Test that get_max_cycle_overlap returns the correct overlap numbers + for different molecules. + """ + # Linear molecule + linear = Molecule(smiles="CCC") + assert linear.get_max_cycle_overlap() == 0 + + # Monocyclic molecule + mono = Molecule(smiles="C1CCCC1") + assert mono.get_max_cycle_overlap() == 0 + + # Spirocyclic molecule + spiro = Molecule(smiles="C1CCC2(CC1)CC2") + assert spiro.get_max_cycle_overlap() == 1 + + # Fused bicyclic molecule + fused = Molecule(smiles="C1C2C(CCC1)CCCC2") + assert fused.get_max_cycle_overlap() == 2 + + # Bridged bicyclic molecule + bridged = Molecule(smiles="C1CC2CCC1C2") + assert bridged.get_max_cycle_overlap() == 3 + + # Cube-like molecule (cubane) + cube = Molecule(smiles="C12C3C4C1C5C2C3C45") + # With the current algorithm for maximum overlap determination, a cube + # only has an overlap of 2, because the set of relevant cycles + # contains the six four-membered faces. This could be changed in the + # future. + assert cube.get_max_cycle_overlap() == 2 + + def test_get_all_polycyclic_vertices(self): + """ + Test that get_all_polycyclic_vertices returns the correct vertices. + """ + # Simple linear molecule + mol = Molecule(smiles="CCC") + polycyclic_vertices = mol.get_all_polycyclic_vertices() + assert len(polycyclic_vertices) == 0 + + # Monocyclic molecule + mol = Molecule(smiles="C1CCCC1") + polycyclic_vertices = mol.get_all_polycyclic_vertices() + assert len(polycyclic_vertices) == 0 + + # Fused bicyclic molecule + # TODO: don't just test length, test the actual vertices + mol = Molecule(smiles="C1C2C(CCC1)CCCC2") + polycyclic_vertices = mol.get_all_polycyclic_vertices() + assert len(polycyclic_vertices) > 0 + + # Spirocyclic molecule + # TODO: don't just test length, test the actual vertices + mol = Molecule(smiles="C1CCC2(CC1)CC2") + polycyclic_vertices = mol.get_all_polycyclic_vertices() + assert len(polycyclic_vertices) > 0 + + def test_get_largest_ring(self): + """ + Test that Molecule.get_largest_ring() method returns the largest ring. + """ + # Create a complex polycyclic molecule + mol = Molecule(smiles="C14CCCCCC(C(CCC1CC2CCCCCCC2)CC3CCC3)C4") + + # Get polycyclic rings + rings = mol.get_polycycles() + assert len(rings) == 1 + + long_ring = mol.get_largest_ring(rings[0][0]) + long_ring2 = mol.get_largest_ring(rings[0][1]) + + # get the longer of the two rings + if len(long_ring) > len(long_ring2): + longest_ring = long_ring + else: + longest_ring = long_ring2 + + # longest ring should be one atom shorter than the full polycyclic ring + assert len(longest_ring) == len(rings[0]) - 1 \ No newline at end of file diff --git a/utilities.py b/utilities.py index 10178c10df..1d47b33453 100644 --- a/utilities.py +++ b/utilities.py @@ -50,7 +50,6 @@ def check_dependencies(): missing = { 'openbabel': _check_openbabel(), 'pydqed': _check_pydqed(), - 'pyrdl': _check_pyrdl(), 'rdkit': _check_rdkit(), 'symmetry': _check_symmetry(), } @@ -104,22 +103,6 @@ def _check_pydqed(): return missing -def _check_pyrdl(): - """Check for pyrdl""" - missing = False - - try: - import py_rdl - except ImportError: - print('{0:<30}{1}'.format('pyrdl', 'Not found. Necessary for ring perception algorithms.')) - missing = True - else: - location = py_rdl.__file__ - print('{0:<30}{1}'.format('pyrdl', location)) - - return missing - - def _check_rdkit(): """Check for RDKit""" missing = False