-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFP.py
63 lines (48 loc) · 1.65 KB
/
FP.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env python
import pandas as pd
import numpy as np
from rdkit.Chem import PandasTools
from rdkit import Chem
from rdkit.Chem import MACCSkeys
from rdkit.Chem.AllChem import GetMorganFingerprintAsBitVect
import tmap as tm
from map4 import MAP4Calculator
"""
Encode a molecule from a SMILES string into a fingerprint.
Parameters
----------
smiles : str
The SMILES string defining the molecule.
n_bits : int
The length of the fingerprint.
Returns
-------
array
The fingerprint array.
"""
def smiles_to_maccs(smiles):
# convert smiles to RDKit mol object
mol = Chem.MolFromSmiles(smiles)
return np.array(MACCSkeys.GenMACCSKeys(mol))
def smiles_to_morgan3(smiles, n_bits = 1024):
mol = Chem.MolFromSmiles(smiles)
return np.array(GetMorganFingerprintAsBitVect(mol, 3, nBits=n_bits))
def smiles_to_morgan3_2048(smiles, n_bits = 2048):
mol = Chem.MolFromSmiles(smiles)
return np.array(GetMorganFingerprintAsBitVect(mol, 3, nBits=n_bits))
def smiles_to_morganF(smiles, n_bits = 1024):
mol = Chem.MolFromSmiles(smiles)
return np.array(GetMorganFingerprintAsBitVect(mol, 3, nBits=n_bits, useFeatures=True))
def smiles_to_morganF_2048(smiles, n_bits = 2048):
mol = Chem.MolFromSmiles(smiles)
return np.array(GetMorganFingerprintAsBitVect(mol, 3, nBits=n_bits, useFeatures=True))
dim = 1024
MAP4 = MAP4Calculator(dimensions=dim)
def smiles_to_MAP4(smiles):
mol = Chem.MolFromSmiles(smiles)
return np.array(MAP4.calculate(mol))
dim = 2048
MAP4_2 = MAP4Calculator(dimensions=dim)
def smiles_to_MAP4_2048(smiles):
mol = Chem.MolFromSmiles(smiles)
return np.array(MAP4_2.calculate(mol))