Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions nvmolkit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,16 @@ target_include_directories(_mmffOptimization SYSTEM
PUBLIC ${Boost_INCLUDE_DIRS})
installpythontarget(_mmffOptimization ./)

add_library(_uffOptimization MODULE uffOptimization.cpp)
target_link_libraries(_uffOptimization PUBLIC ${Boost_LIBRARIES}
${PYTHON_LIBRARIES})
target_link_libraries(_uffOptimization PRIVATE ${RDKit_LIBS} bfgs_uff ff_utils)
target_include_directories(
_uffOptimization PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/utils
${Python_INCLUDE_DIRS})
target_include_directories(_uffOptimization SYSTEM PUBLIC ${Boost_INCLUDE_DIRS})
installpythontarget(_uffOptimization ./)

add_library(_batchedForcefield MODULE batchedForcefield.cpp)
target_link_libraries(_batchedForcefield PUBLIC ${Boost_LIBRARIES}
${PYTHON_LIBRARIES})
Expand Down
1 change: 1 addition & 0 deletions nvmolkit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
- Bulk tanimoto/cosine similarity calculations between fingerprints
- ETKDG conformer generation for multiple molecules
- MMFF optimization for multiple molecules and conformers
- UFF optimization for multiple molecules and conformers
"""

VERSION = "0.4.0"
Expand Down
10 changes: 10 additions & 0 deletions nvmolkit/_uffOptimization.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from typing import Any, List
from rdkit.Chem import Mol

def UFFOptimizeMoleculesConfs(
molecules: List[Mol],
maxIters: int = 1000,
vdwThresholds: Any = ...,
ignoreInterfragInteractions: Any = ...,
hardwareOptions: Any = ...,
Comment on lines +7 to +9
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Weak Any types reduce stub value

All three non-trivial parameters are typed as Any, eliminating IDE autocompletion and type-checker coverage for callers of the internal binding. The C++ layer already enforces concrete list types, so the stub can reflect them.

Suggested change
vdwThresholds: Any = ...,
ignoreInterfragInteractions: Any = ...,
hardwareOptions: Any = ...,
vdwThresholds: List[float] = ...,
ignoreInterfragInteractions: List[bool] = ...,
hardwareOptions: Any = ...,

) -> List[List[float]]: ...
198 changes: 198 additions & 0 deletions nvmolkit/tests/test_uff_optimization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import pytest
from rdkit import Chem
from rdkit.Chem import rdDistGeom, rdForceFieldHelpers
from rdkit.ForceField import rdForceField as _rdForceField # noqa: F401
from rdkit.Geometry import Point3D

import nvmolkit.uffOptimization as nvmolkit_uff
from nvmolkit.types import HardwareOptions


@pytest.fixture
def uff_test_mols(num_mols=5):
"""Load a handful of UFF-valid molecules from the shared validation set."""
sdf_path = os.path.join(
os.path.dirname(__file__),
"..",
"..",
"tests",
"test_data",
"MMFF94_dative.sdf",
)

if not os.path.exists(sdf_path):
pytest.skip(f"Test data file not found: {sdf_path}")

supplier = Chem.SDMolSupplier(sdf_path, removeHs=False, sanitize=True)
molecules = []
for mol in supplier:
if mol is None:
continue
if not rdForceFieldHelpers.UFFHasAllMoleculeParams(mol):
continue
molecules.append(mol)
if len(molecules) >= num_mols:
break

if len(molecules) < num_mols:
pytest.skip(f"Expected {num_mols} UFF-valid molecules, found {len(molecules)}")

return molecules


def create_hard_copy_mols(molecules):
return [Chem.Mol(mol) for mol in molecules]


def make_fragmented_mol():
mol = Chem.AddHs(Chem.MolFromSmiles("CC.CC"))
params = rdDistGeom.ETKDGv3()
params.useRandomCoords = True
params.randomSeed = 42
rdDistGeom.EmbedMultipleConfs(mol, numConfs=1, params=params)
conf = mol.GetConformer()
Comment on lines +69 to +70
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 EmbedMultipleConfs return value not checked

EmbedMultipleConfs returns a list of assigned conformer IDs (empty on failure). If embedding fails, the unchecked mol.GetConformer() on the next line raises ValueError: No conformers present, making the test failure hard to diagnose.

Suggested change
rdDistGeom.EmbedMultipleConfs(mol, numConfs=1, params=params)
conf = mol.GetConformer()
confs_embedded = rdDistGeom.EmbedMultipleConfs(mol, numConfs=1, params=params)
if not confs_embedded:
raise RuntimeError("EmbedMultipleConfs failed for make_fragmented_mol(); check SMILES or random seed")
conf = mol.GetConformer()

fragments = Chem.GetMolFrags(mol)
if len(fragments) != 2:
raise AssertionError("Expected two fragments for interfragment interaction test")
anchor = conf.GetAtomPosition(fragments[0][0])
moved = conf.GetAtomPosition(fragments[1][0])
shift = Point3D(anchor.x - moved.x + 2.0, anchor.y - moved.y, anchor.z - moved.z)
for atom_idx in fragments[1]:
pos = conf.GetAtomPosition(atom_idx)
conf.SetAtomPosition(atom_idx, Point3D(pos.x + shift.x, pos.y + shift.y, pos.z + shift.z))
return mol


def calculate_rdkit_uff_energies(
molecules,
maxIters=1000,
vdwThreshold: float = 10.0,
ignoreInterfragInteractions: bool = True,
):
all_energies = []
for mol in molecules:
mol_energies = []
for conf_id in range(mol.GetNumConformers()):
ff = rdForceFieldHelpers.UFFGetMoleculeForceField(
mol,
vdwThresh=vdwThreshold,
confId=conf_id,
ignoreInterfragInteractions=ignoreInterfragInteractions,
)
ff.Initialize()
ff.Minimize(maxIts=maxIters)
mol_energies.append(ff.CalcEnergy())
all_energies.append(mol_energies)
return all_energies


def test_uff_optimization_serial_vs_rdkit(uff_test_mols):
rdkit_mols = create_hard_copy_mols(uff_test_mols)
nvmolkit_mols = create_hard_copy_mols(uff_test_mols)

rdkit_energies = calculate_rdkit_uff_energies(rdkit_mols, maxIters=200)

nvmolkit_energies = []
for mol in nvmolkit_mols:
mol_energies = nvmolkit_uff.UFFOptimizeMoleculesConfs([mol], maxIters=200)
nvmolkit_energies.extend(mol_energies)

assert len(rdkit_energies) == len(nvmolkit_energies)
for mol_idx, (rdkit_mol_energies, nvmolkit_mol_energies) in enumerate(zip(rdkit_energies, nvmolkit_energies)):
assert len(rdkit_mol_energies) == len(nvmolkit_mol_energies)
for conf_idx, (rdkit_energy, nvmolkit_energy) in enumerate(zip(rdkit_mol_energies, nvmolkit_mol_energies)):
diff = abs(rdkit_energy - nvmolkit_energy)
rel = diff / abs(rdkit_energy) if abs(rdkit_energy) > 1e-10 else diff
assert rel < 1e-3, (
f"Molecule {mol_idx}, conformer {conf_idx}: "
f"RDKit={rdkit_energy:.6f} nvMolKit={nvmolkit_energy:.6f} rel={rel:.6f}"
)


def test_uff_optimization_batch_vs_rdkit(uff_test_mols):
rdkit_mols = create_hard_copy_mols(uff_test_mols)
nvmolkit_mols = create_hard_copy_mols(uff_test_mols)

rdkit_energies = calculate_rdkit_uff_energies(rdkit_mols, maxIters=200)
hardware_options = HardwareOptions(batchSize=2, batchesPerGpu=1)
nvmolkit_energies = nvmolkit_uff.UFFOptimizeMoleculesConfs(
nvmolkit_mols,
maxIters=200,
hardwareOptions=hardware_options,
)

assert len(rdkit_energies) == len(nvmolkit_energies)
for mol_idx, (rdkit_mol_energies, nvmolkit_mol_energies) in enumerate(zip(rdkit_energies, nvmolkit_energies)):
assert len(rdkit_mol_energies) == len(nvmolkit_mol_energies)
for conf_idx, (rdkit_energy, nvmolkit_energy) in enumerate(zip(rdkit_mol_energies, nvmolkit_mol_energies)):
diff = abs(rdkit_energy - nvmolkit_energy)
rel = diff / abs(rdkit_energy) if abs(rdkit_energy) > 1e-10 else diff
assert rel < 1e-3, (
f"Molecule {mol_idx}, conformer {conf_idx}: "
f"RDKit={rdkit_energy:.6f} nvMolKit={nvmolkit_energy:.6f} rel={rel:.6f}"
)


def test_uff_optimization_empty_input():
assert nvmolkit_uff.UFFOptimizeMoleculesConfs([]) == []


def test_uff_optimization_invalid_input():
unsupported = Chem.MolFromSmiles("*")
with pytest.raises(ValueError) as exc_info:
nvmolkit_uff.UFFOptimizeMoleculesConfs([None, unsupported])
assert exc_info.value.args[1]["none"] == [0]
assert exc_info.value.args[1]["no_params"] == [1]


def test_uff_optimization_threshold_and_interfrag_vs_rdkit():
mols = [make_fragmented_mol(), make_fragmented_mol()]
rdkit_mols = create_hard_copy_mols(mols)
nvmolkit_mols = create_hard_copy_mols(mols)

thresholds = [25.0, 100.0]
ignore_interfrag = [False, True]

rdkit_energies = [
calculate_rdkit_uff_energies(
[mol],
maxIters=1000,
vdwThreshold=threshold,
ignoreInterfragInteractions=ignore,
)[0]
for mol, threshold, ignore in zip(rdkit_mols, thresholds, ignore_interfrag)
]
nvmolkit_energies = nvmolkit_uff.UFFOptimizeMoleculesConfs(
nvmolkit_mols,
maxIters=1000,
vdwThreshold=thresholds,
ignoreInterfragInteractions=ignore_interfrag,
)

assert len(rdkit_energies) == len(nvmolkit_energies)
for mol_idx, (rdkit_mol_energies, nvmolkit_mol_energies) in enumerate(zip(rdkit_energies, nvmolkit_energies)):
assert len(rdkit_mol_energies) == len(nvmolkit_mol_energies)
for conf_idx, (rdkit_energy, nvmolkit_energy) in enumerate(zip(rdkit_mol_energies, nvmolkit_mol_energies)):
diff = abs(rdkit_energy - nvmolkit_energy)
rel = diff / abs(rdkit_energy) if abs(rdkit_energy) > 1e-10 else diff
assert rel < 1e-3, (
f"Molecule {mol_idx}, conformer {conf_idx}: "
f"RDKit={rdkit_energy:.6f} nvMolKit={nvmolkit_energy:.6f} rel={rel:.6f}"
)
114 changes: 114 additions & 0 deletions nvmolkit/uffOptimization.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <GraphMol/ROMol.h>

#include <boost/python.hpp>
#include <stdexcept>
#include <vector>

#include "bfgs_uff.h"

namespace bp = boost::python;

template <typename T> bp::list vectorToList(const std::vector<T>& vec) {
bp::list list;
for (const auto& value : vec) {
list.append(value);
}
return list;
}

template <typename T> bp::list vectorOfVectorsToList(const std::vector<std::vector<T>>& vecOfVecs) {
bp::list outerList;
for (const auto& innerVec : vecOfVecs) {
outerList.append(vectorToList(innerVec));
}
return outerList;
}

static std::vector<RDKit::ROMol*> extractMolecules(const bp::list& molecules) {
const int n = bp::len(molecules);
std::vector<RDKit::ROMol*> mols;
mols.reserve(n);
for (int i = 0; i < n; ++i) {
auto* mol = bp::extract<RDKit::ROMol*>(bp::object(molecules[i]))();
if (mol == nullptr) {
throw std::invalid_argument("Invalid molecule at index " + std::to_string(i));
}
mols.push_back(mol);
}
return mols;
}

static std::vector<double> extractDoubleList(const bp::list& values, const int expectedSize, const std::string& name) {
if (bp::len(values) != expectedSize) {
throw std::invalid_argument("Expected " + std::to_string(expectedSize) + " values for " + name + ", got " +
std::to_string(bp::len(values)));
}
std::vector<double> result;
result.reserve(expectedSize);
for (int i = 0; i < expectedSize; ++i) {
result.push_back(bp::extract<double>(values[i]));
}
return result;
}

static std::vector<bool> extractBoolList(const bp::list& values, const int expectedSize, const std::string& name) {
if (bp::len(values) != expectedSize) {
throw std::invalid_argument("Expected " + std::to_string(expectedSize) + " values for " + name + ", got " +
std::to_string(bp::len(values)));
}
std::vector<bool> result;
result.reserve(expectedSize);
for (int i = 0; i < expectedSize; ++i) {
result.push_back(bp::extract<bool>(values[i]));
}
return result;
}

BOOST_PYTHON_MODULE(_uffOptimization) {
bp::def(
"UFFOptimizeMoleculesConfs",
+[](const bp::list& molecules,
int maxIters,
const bp::list& vdwThresholds,
const bp::list& ignoreInterfragInteractions,
const nvMolKit::BatchHardwareOptions& hardwareOptions) -> bp::list {
auto molsVec = extractMolecules(molecules);
const int numMols = static_cast<int>(molsVec.size());
const auto thresholdVec = extractDoubleList(vdwThresholds, numMols, "vdwThreshold");
const auto ignoreVec = extractBoolList(ignoreInterfragInteractions, numMols, "ignoreInterfragInteractions");
const auto result =
nvMolKit::UFF::UFFOptimizeMoleculesConfsBfgs(molsVec, maxIters, thresholdVec, ignoreVec, hardwareOptions);
return vectorOfVectorsToList(result);
},
(bp::arg("molecules"),
bp::arg("maxIters") = 1000,
bp::arg("vdwThresholds"),
bp::arg("ignoreInterfragInteractions"),
bp::arg("hardwareOptions") = nvMolKit::BatchHardwareOptions()),
"Optimize conformers for multiple molecules using UFF force field.\n"
"\n"
"Args:\n"
" molecules: List of RDKit molecules to optimize\n"
" maxIters: Maximum number of optimization iterations (default: 1000)\n"
" vdwThresholds: Per-molecule van der Waals thresholds\n"
" ignoreInterfragInteractions: Per-molecule interfragment interaction flags\n"
" hardwareOptions: BatchHardwareOptions object with hardware settings\n"
"\n"
"Returns:\n"
" List of lists of energies, where each inner list contains energies for conformers of one molecule");
}
Loading
Loading