Skip to content

Commit 9e94d02

Browse files
authored
Merge pull request #216 from datamol-io/new_rdkit
Compat with latest RDKit 2023.09
2 parents 3939c12 + e8533f9 commit 9e94d02

File tree

9 files changed

+31
-29
lines changed

9 files changed

+31
-29
lines changed

.github/workflows/test.yml

+2-8
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,9 @@ jobs:
1616
strategy:
1717
fail-fast: false
1818
matrix:
19-
python-version: ["3.9", "3.10"]
19+
python-version: ["3.10", "3.11"]
2020
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
21-
rdkit-version: ["2022.09", "2023.03"]
22-
23-
# just enable python 3.11 on ubuntu to not blow up the CI time.
24-
include:
25-
- os: ubuntu-latest
26-
python-version: "3.11"
27-
rdkit-version: "2023.03"
21+
rdkit-version: ["2023.03", "2023.09"]
2822

2923
runs-on: ${{ matrix.os }}
3024
timeout-minutes: 30

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ See below the associated versions of Python and RDKit, for which a minor version
105105

106106
| `datamol` | `python` | `rdkit` |
107107
| --------- | ------------------- | ----------------------------- |
108+
| `0.12.x` | `[3.10, 3.11]` | `[2023.03, 2023.09]` |
108109
| `0.11.x` | `[3.9, 3.10, 3.11]` | `[2022.09, 2023.03]` |
109110
| `0.10.x` | `[3.9, 3.10, 3.11]` | `[2022.03, 2022.09]` |
110111
| `0.9.x` | `[3.9, 3.10, 3.11]` | `[2022.03, 2022.09]` |

datamol/cluster.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,9 @@ def assign_to_centroids(
242242
features = dm.parallelized(feature_fn, all_mols, n_jobs=n_jobs)
243243

244244
def distij(i, j, features=features):
245-
return 1.0 - DataStructs.cDataStructs.TanimotoSimilarity(features[int(i)], features[int(j)])
245+
return 1.0 - DataStructs.cDataStructs.TanimotoSimilarity(
246+
features[int(i.item())], features[int(j.item())]
247+
)
246248

247249
if dist_fn is None:
248250
dist_fn = distij

datamol/mol.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
from rdkit.Chem.Scaffolds import MurckoScaffold
2626

2727
from rdkit.Chem.MolStandardize import rdMolStandardize
28-
from rdkit.Chem.MolStandardize import canonicalize_tautomer_smiles
2928
from rdkit.Chem.SaltRemover import SaltRemover
3029

3130
import datamol
@@ -395,22 +394,19 @@ def sanitize_first(mols: List[Mol], charge_neutral: bool = False, sanifix: bool
395394
return None
396395

397396

398-
def standardize_smiles(smiles: str, tautomer: bool = False) -> str:
397+
def standardize_smiles(smiles: str) -> str:
399398
r"""
400399
Apply smile standardization procedure. This is a convenient function wrapped arrounf RDKit
401400
smiles standardizer and tautomeric canonicalization.
402401
403402
Args:
404403
smiles: Smiles to standardize
405-
tautomer: Whether to canonicalize tautomers
406404
407405
Returns:
408406
standard_smiles: the standardized smiles
409407
"""
410408

411409
smiles = rdMolStandardize.StandardizeSmiles(smiles)
412-
if tautomer:
413-
smiles = canonicalize_tautomer_smiles(smiles)
414410
return smiles
415411

416412

docs/index.md

+1
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ See below the associated versions of Python and RDKit, for which a minor version
7575

7676
| `datamol` | `python` | `rdkit` |
7777
| --------- | ------------------- | ----------------------------- |
78+
| `0.12.x` | `[3.10, 3.11]` | `[2023.03, 2023.09]` |
7879
| `0.11.x` | `[3.9, 3.10, 3.11]` | `[2022.09, 2023.03]` |
7980
| `0.10.x` | `[3.9, 3.10, 3.11]` | `[2022.03, 2022.09]` |
8081
| `0.9.x` | `[3.9, 3.10, 3.11]` | `[2022.03, 2022.09]` |

pyproject.toml

+8-3
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,14 @@ minversion = "6.0"
8383
addopts = "--verbose --cov=datamol --cov-fail-under=85 --cov-report xml --cov-report term --durations=10 -n auto"
8484
testpaths = ["tests"]
8585
filterwarnings = [
86-
"ignore::DeprecationWarning:rdkit.*:",
87-
"ignore::DeprecationWarning:jupyter_client.*:",
88-
"ignore::DeprecationWarning:pkg_resources.*:",
86+
"ignore::DeprecationWarning:rdkit.Chem.MolStandardize",
87+
"ignore::DeprecationWarning:jupyter_client",
88+
"ignore::DeprecationWarning:pkg_resources",
89+
"ignore::DeprecationWarning:joblib.externals.loky.backend",
90+
"ignore::DeprecationWarning:dateutil.tz.tz",
91+
"ignore::DeprecationWarning:joblib._utils",
92+
"ignore::DeprecationWarning:openpyxl.packaging.core",
93+
"ignore::DeprecationWarning:tqdm.std",
8994
]
9095

9196
[tool.coverage.run]

tests/test_mcs.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@ def test_find_mcs():
1212
mols = [dm.to_mol(s) for s in smiles_list]
1313
smarts = dm.find_mcs(mols=mols, timeout=2)
1414

15-
# Load/export SMARTS to check RDKit versions compatibility.
16-
excepted_smarts = "[#6&!R]-&!@[#6&!R]-&!@[#8&!R]-&!@[#6&R]1:&@[#6&R]:&@[#6&R]2:&@[#7&R]:&@[#6&R]:&@[#7&R]:&@[#6&R](:&@[#6&R]:&@2:&@[#6&R]:&@[#6&R]:&@1-&!@[#7&!R]-&!@[#6&!R](=&!@[#8&!R])-&!@[#6&!R]=&!@[#6&!R])-&!@[#7&!R]-&!@[#6&R]1:&@[#6&R]:&@[#6&R]:&@[#6&R]:&@[#6&R]:&@[#6&R]:&@1"
17-
excepted_smarts_mol = dm.from_smarts(excepted_smarts)
18-
excepted_smarts = dm.to_smarts(excepted_smarts_mol)
19-
20-
print(smarts)
15+
# NOTE(hadim): hash are different given different RDKit version
16+
expected_hashes = [
17+
# RDKit >= 2023.09
18+
"762f483ac10cc0f45c5aa2c790f9ef52f8dfb337",
19+
# RDKit <= 2023.03
20+
"49eff32e405d17980fad428cf4063ec52e2c5fda",
21+
]
2122

22-
assert smarts == excepted_smarts
23+
assert dm.hash_mol(dm.from_smarts(smarts)) in expected_hashes

tests/test_mol.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ def test_sanitize_smiles_none():
302302

303303
def test_standardize_smiles_tautomer():
304304
smiles = "C1=CC=CN=C1"
305-
std_smiles = dm.standardize_smiles(smiles, tautomer=True)
305+
std_smiles = dm.standardize_smiles(smiles)
306306
assert "c1ccncc1" == std_smiles
307307

308308

tests/test_utils_fs.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def test_copy_files(tmp_path):
1717
dm.utils.fs.copy_file(source_path, destination_path)
1818

1919
with open(destination_path) as f:
20-
f.read() == content
20+
assert f.read() == content
2121

2222

2323
def test_copy_dir(tmp_path):
@@ -49,10 +49,10 @@ def test_copy_dir(tmp_path):
4949
assert dm.utils.fs.is_file(file2_path)
5050

5151
with open(file1_path) as f:
52-
f.read() == content
52+
assert f.read() == content
5353

5454
with open(file2_path) as f:
55-
f.read() == content
55+
assert f.read() == content
5656

5757

5858
def test_mkdir(tmp_path):
@@ -91,7 +91,9 @@ def test_cache_dir():
9191

9292
def test_get_mapper(tmp_path):
9393
fsmapper = dm.utils.fs.get_mapper(str(tmp_path / "test.txt"))
94-
assert fsmapper.fs.protocol == "file"
94+
95+
# NOTE(hadim): depends the fsspec version
96+
assert fsmapper.fs.protocol in ["file", ("file", "local")]
9597

9698

9799
@pytest.mark.skip_platform("win")

0 commit comments

Comments
 (0)