Added method to add a single molecule to the Pickaxe object #44

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

LucaCappelletti94 wants to merge 6 commits into tyo-nu:master from LucaCappelletti94:add_molecule

minedatabase/exceptions.py

-Original file line number
+Diff line change
@@ -0,0 +1,15 @@
+    """Submodule providing exceptions used in the mine database package."""
+    from rdkit.Chem import Mol
+    from rdkit.Chem import MolToSmiles
+    class DisconnectedMoleculeException(ValueError):
+        """Exception raised when a molecule is disconnected."""
+        def __init__(self, molecule: Mol):
+            molecule_smile = MolToSmiles(molecule)
+            super().__init__((
+                f"The provided molecule with SMILES '{molecule_smile}' is "
+                "disconnected. This is most common when compounds are salts. "
+                "If you want to include disconnected molecules, you can set "
+                "the `fragmented_mols` parameter to True."
+            ))

minedatabase/filters/similarity.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -395,7 +395,9 @@ def gen_df_from_similarity( @@
             for mol_chunk in Chunks(mol_info, 10000):
                 # Construct targets to sample df
                 temp_df = pd.DataFrame(mol_chunk, columns=["_id", "SMILES"])
-                df = df.append(_parallelize_dataframe(temp_df, partial_T_calc, processes))
+                df = pd.concat(
+                    [df, _parallelize_dataframe(temp_df, partial_T_calc, processes)]
+                )
             # Reset index for CDF calculation
             df.reset_index(inplace=True, drop=True)
@@ Expand Down Expand Up / @@ -733,6 +735,7 @@ def _compare_target_fps( @@
             Returns cpd_id if a the compound is similar enough to a target.
             """
             # Generate the fingerprint of a compound and compare to the fingerprints
             # of the targets
             def fingerprint(fingerprint_method, keyword_dict, smi):
@@ Expand Down @@

minedatabase/pickaxe.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -3,6 +3,7 @@ @@
     This module generates new compounds from user-specified starting
     compounds using a set of SMARTS-based reaction rules.
     """
     import csv
     import datetime
     import os
@@ Expand All / @@ -12,7 +13,7 @@ @@
     from io import StringIO
     from pathlib import Path, PosixPath, WindowsPath
     from sys import exit
-    from typing import List, Set, Tuple, Union
+    from typing import List, Set, Tuple, Union, Optional
     import libsbml
     import lxml.etree as etree
@@ Expand Down Expand Up / @@ -42,6 +43,7 @@ @@
         write_targets_to_mine,
     )
     from minedatabase.reactions import transform_all_compounds_with_full
+    from minedatabase.exceptions import DisconnectedMoleculeException
     # Default to no errors
@@ Expand Down Expand Up / @@ -189,33 +191,36 @@ def __init__( @@
             # cid options
             self.cid_num_inchi_blocks = inchikey_blocks_for_cid
-            print("----------------------------------------")
-            print("Intializing pickaxe object")
+            if not self.quiet:
+                print("----------------------------------------")
+                print("Intializing pickaxe object")
             if database:
                 # Determine if a specified database is legal
                 db = MINE(database, self.mongo_uri)
                 if database in db.client.list_database_names():
                     if database_overwrite:
                         # If db exists, remove db from all of core compounds
                         # and drop db
-                        print(
-                            (
-                                f"Database {database} already exists. "
-                                "Deleting database and removing from core compound"
-                                " mines."
+                        if not self.quiet:
+                            print(
+                                (
+                                    f"Database {database} already exists. "
+                                    "Deleting database and removing from core compound"
+                                    " mines."
+                                )
                             )
-                        )
                         db.core_compounds.update_many({}, {"$pull": {"MINES": database}})
                         db.client.drop_database(database)
                         self.mine = database
                     else:
-                        print(
-                            (
-                                f"Warning! Database {database} already exists."
-                                "Specify database_overwrite as true to delete "
-                                "old database and write new."
+                        if not self.quiet:
+                            print(
+                                (
+                                    f"Warning! Database {database} already exists."
+                                    "Specify database_overwrite as true to delete "
+                                    "old database and write new."
+                                )
                             )
-                        )
                         exit("Exiting due to database name collision.")
                 else:
                     self.mine = database
@@ Expand All / @@ -238,8 +243,9 @@ def __init__( @@
             if rule_list:
                 self._load_operators(rule_list)
-            print("\nDone intializing pickaxe object")
-            print("----------------------------------------\n")
+            if not self.quiet:
+                print("\nDone intializing pickaxe object")
+                print("----------------------------------------\n")
         def load_targets(
             self,
@@ Expand Down Expand Up / @@ -275,6 +281,76 @@ def load_targets( @@
             print(f"{len(self.target_smiles)} target compounds loaded\n")
+        def add_molecule(self, molecule: Mol, identifier: Optional[str] = None) -> str:
+            """Adds the provided molecule to the pickaxe object after sanitizing it.
+            Parameters
+            ----------
+            molecule : Mol
+                RDKit molecule to add.
+            identifier : Optional[str] = None
+                Identifier for the molecule.
+                If None, the molecule will be assigned an identifier equal to
+                the string version of the number of compounds in the pickaxe object.
+            Returns
+            -------
+            str
+                Identifier of the molecule.
+            Raises
+            ------
+            DisconnectedMoleculeException
+                If the molecule is disconnected and fragmented_mols is False.
+            """
+            assert molecule is not None
+            assert isinstance(molecule, Mol)
+            if identifier is None:
+                identifier = str(len(self.compounds))
+            assert isinstance(identifier, str)
+            assert len(identifier) > 0
+            # If compound is disconnected (determined by GetMolFrags
+            # from rdkit) and loading of these molecules is not
+            # allowed, then don't add to internal dictionary.
+            # This is most common when compounds are salts.
+            if not self.fragmented_mols and len(GetMolFrags(molecule)) > 1:
+                raise DisconnectedMoleculeException(molecule)
+            # If specified remove charges (before applying reaction
+            # rules later on)
+            if self.neutralise:
+                molecule = utils.neutralise_charges(molecule)
+            assert molecule is not None
+            assert isinstance(molecule, Mol)
+            RemoveStereochemistry(molecule)
+            # Add compound to internal dictionary as a starting
+            # compound and store SMILES string to be returned
+            smile = MolToSmiles(molecule, True)
+            # Do not operate on inorganic compounds
+            if "C" in smile or "c" in smile:
+                # resolve potential tautomers and choose first one
+                if "n" in smile:
+                    smile = utils.postsanitize_smiles([smile])[0][0]
+                    molecule = MolFromSmiles(smile)
+                SanitizeMol(molecule)
+                self._add_compound(
+                    cpd_name=identifier,
+                    smi=smile,
+                    cpd_type="Starting Compound",
+                    mol=molecule,
+                )
+            return identifier
         def load_compound_set(self, compound_file: str = None, id_field: str = "id") -> str:
             """Load compounds for expansion into pickaxe.
@@ Expand Down Expand Up / @@ -428,11 +504,12 @@ def _load_operators(self, rule_path: str) -> None: @@
                             rule["Reactants"]
                         ) or rxn.GetNumProductTemplates() != len(rule["Products"]):
                             skipped += 1
-                            print(
-                                "The number of coreactants does not match the "
-                                "number of compounds in the SMARTS for reaction "
-                                "rule: " + rule["Name"]
-                            )
+                            if self.errors:
+                                print(
+                                    "The number of coreactants does not match the "
+                                    "number of compounds in the SMARTS for reaction "
+                                    "rule: " + rule["Name"]
+                                )
                         if rule["Name"] in self.operators:
                             raise ValueError("Duplicate reaction rule name")
                         # Update reaction rules dictionary
@@ Expand Down Expand Up @@
                         # Prune network to only things being expanded
                         self.prune_network(white_list, True)
-                    print("----------------------------------------")
-                    print(f"Expanding Generation {self.generation + 1}\n")
+                    if not self.quiet:
+                        print("----------------------------------------")
+                        print(f"Expanding Generation {self.generation + 1}\n")
                     # Starting time for expansion
                     time_init = time.time()
@@ Expand All @@
                     ]
                     # No compounds found
                     if not compound_smiles:
-                        print(
-                            "No compounds to expand in generation "
-                            f"{self.generation + 1}. Finished expanding."
-                        )
+                        if not self.quiet:
+                            print(
+                                "No compounds to expand in generation "
+                                f"{self.generation + 1}. Finished expanding."
+                            )
                         return None
                     self._transform_helper(compound_smiles, processes)
                     self._remove_cofactor_redundancy()
-                    print(
-                        f"Generation {self.generation + 1} finished in"
-                        f" {time.time()-time_init} s and contains:"
-                    )
-                    print(f"\t\t{len(self.compounds) - n_comps} new compounds")
-                    print(f"\t\t{len(self.reactions) - n_rxns} new reactions")
-                    print(f"\nDone expanding Generation: {self.generation + 1}.")
-                    print("----------------------------------------\n")
+                    if not self.quiet:
+                        print(
+                            f"Generation {self.generation + 1} finished in"
+                            f" {time.time()-time_init} s and contains:"
+                        )
+                        print(f"\t\t{len(self.compounds) - n_comps} new compounds")
+                        print(f"\t\t{len(self.reactions) - n_rxns} new reactions")
+                        print(f"\nDone expanding Generation: {self.generation + 1}.")
+                        print("----------------------------------------\n")
                 self.generation += 1
@@ Expand Down Expand Up / @@ -1363,7 +1443,7 @@ def get_reaction_annotation( @@
                         + reaction_annotation.split("\n")
                         + [
                             "  </rdf:RDF>",
-                            "</sbml:annotation>"
+                            "</sbml:annotation>",
                             # "</annotation>"
                         ]
                     )
@@ Expand Down @@

setup.cfg

-Original file line number
+Diff line change
@@ -1,10 +1,9 @@
     [options]
-    python_requires = >= 3.7, <3.10
     install_requires =
         keras
         python-libsbml
-        lxml==4.9
-        mordred==1.2
+        lxml
+        mordred
         pandas
         pymongo
         rdkit
@@ Expand Down @@

setup.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -5,30 +5,35 @@ @@
     with open("README.md", "r") as fh:
         long_description = fh.read()
-    setup(name='minedatabase',
-          version='2.2.0',
-          description='Metabolic In silico Network Expansions',
-          long_description=long_description,
-          long_description_content_type="text/markdown",
-          url='https://github.com/tyo-nu/MINE-Database',
-          author='Kevin Shebek, Jonathan Strutz',
-          author_email='jonstrutz11@gmail.com',
-          license='MIT',
-          packages=setuptools.find_packages(exclude=["docs", "tests"]),
+    setup(
+        name="minedatabase",
+        version="2.2.0",
+        description="Metabolic In silico Network Expansions",
+        long_description=long_description,
+        long_description_content_type="text/markdown",
+        url="https://github.com/tyo-nu/MINE-Database",
+        author="Kevin Shebek, Jonathan Strutz",
+        author_email="jonstrutz11@gmail.com",
+        license="MIT",
+        packages=setuptools.find_packages(exclude=["docs", "tests"]),
         #   install_requires=['pymongo'],#, 'rdkit', 'scikit-learn'],
-          package_data={'minedatabase': ['data/*'],
-                        'minedatabase.NP_Score': ['*.gz'],
-                        'minedatabase.tests': ['data/*'],
-                        },
-          include_package_data=True,
-          classifiers=[
-              'Development Status :: 5 - Production/Stable',
-              'Intended Audience :: Science/Research',
-              'License :: OSI Approved :: MIT License',
-              'Topic :: Scientific/Engineering :: Bio-Informatics',
-              'Topic :: Scientific/Engineering :: Chemistry',
-              'Programming Language :: Python :: 3.7',
-              'Programming Language :: Python :: 3.9',
-              'Programming Language :: Python :: 3.8',
-          ],
-          )
+        package_data={
+            "minedatabase": [
+                "data/*/*.tsv",
+                "data/feasibility/final_model.h5",
+                "data/feasibility/vae_model.pth",
+                "data/feasibility/final_model.json",
+                "data/*/*.txt",
+            ],
+            "minedatabase.NP_Score": ["*.gz"],
+            "minedatabase.tests": ["data/*"],
+        },
+        include_package_data=True,
+        classifiers=[
+            "Development Status :: 5 - Production/Stable",
+            "Intended Audience :: Science/Research",
+            "License :: OSI Approved :: MIT License",
+            "Topic :: Scientific/Engineering :: Bio-Informatics",
+            "Topic :: Scientific/Engineering :: Chemistry",
+        ],
+    )

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Added method to add a single molecule to the Pickaxe object #44

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Added method to add a single molecule to the Pickaxe object #44

Are you sure you want to change the base?

Uh oh!

Added method to add a single molecule to the Pickaxe object #44

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing