diff --git a/nvmolkit/CMakeLists.txt b/nvmolkit/CMakeLists.txt index be4069b..1e6aa04 100644 --- a/nvmolkit/CMakeLists.txt +++ b/nvmolkit/CMakeLists.txt @@ -55,6 +55,16 @@ target_include_directories(_mmffOptimization SYSTEM PUBLIC ${Boost_INCLUDE_DIRS}) installpythontarget(_mmffOptimization ./) +add_library(_uffOptimization MODULE uffOptimization.cpp) +target_link_libraries(_uffOptimization PUBLIC ${Boost_LIBRARIES} + ${PYTHON_LIBRARIES}) +target_link_libraries(_uffOptimization PRIVATE ${RDKit_LIBS} bfgs_uff ff_utils) +target_include_directories( + _uffOptimization PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/utils + ${Python_INCLUDE_DIRS}) +target_include_directories(_uffOptimization SYSTEM PUBLIC ${Boost_INCLUDE_DIRS}) +installpythontarget(_uffOptimization ./) + add_library(_batchedForcefield MODULE batchedForcefield.cpp) target_link_libraries(_batchedForcefield PUBLIC ${Boost_LIBRARIES} ${PYTHON_LIBRARIES}) diff --git a/nvmolkit/__init__.py b/nvmolkit/__init__.py index 760cc84..96f694c 100644 --- a/nvmolkit/__init__.py +++ b/nvmolkit/__init__.py @@ -25,6 +25,7 @@ - Bulk tanimoto/cosine similarity calculations between fingerprints - ETKDG conformer generation for multiple molecules - MMFF optimization for multiple molecules and conformers +- UFF optimization for multiple molecules and conformers """ VERSION = "0.4.0" diff --git a/nvmolkit/_uffOptimization.pyi b/nvmolkit/_uffOptimization.pyi new file mode 100644 index 0000000..6e660fa --- /dev/null +++ b/nvmolkit/_uffOptimization.pyi @@ -0,0 +1,10 @@ +from typing import Any, List +from rdkit.Chem import Mol + +def UFFOptimizeMoleculesConfs( + molecules: List[Mol], + maxIters: int = 1000, + vdwThresholds: Any = ..., + ignoreInterfragInteractions: Any = ..., + hardwareOptions: Any = ..., +) -> List[List[float]]: ... diff --git a/nvmolkit/tests/test_uff_optimization.py b/nvmolkit/tests/test_uff_optimization.py new file mode 100644 index 0000000..6fabdb7 --- /dev/null +++ b/nvmolkit/tests/test_uff_optimization.py @@ -0,0 +1,198 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest +from rdkit import Chem +from rdkit.Chem import rdDistGeom, rdForceFieldHelpers +from rdkit.ForceField import rdForceField as _rdForceField # noqa: F401 +from rdkit.Geometry import Point3D + +import nvmolkit.uffOptimization as nvmolkit_uff +from nvmolkit.types import HardwareOptions + + +@pytest.fixture +def uff_test_mols(num_mols=5): + """Load a handful of UFF-valid molecules from the shared validation set.""" + sdf_path = os.path.join( + os.path.dirname(__file__), + "..", + "..", + "tests", + "test_data", + "MMFF94_dative.sdf", + ) + + if not os.path.exists(sdf_path): + pytest.skip(f"Test data file not found: {sdf_path}") + + supplier = Chem.SDMolSupplier(sdf_path, removeHs=False, sanitize=True) + molecules = [] + for mol in supplier: + if mol is None: + continue + if not rdForceFieldHelpers.UFFHasAllMoleculeParams(mol): + continue + molecules.append(mol) + if len(molecules) >= num_mols: + break + + if len(molecules) < num_mols: + pytest.skip(f"Expected {num_mols} UFF-valid molecules, found {len(molecules)}") + + return molecules + + +def create_hard_copy_mols(molecules): + return [Chem.Mol(mol) for mol in molecules] + + +def make_fragmented_mol(): + mol = Chem.AddHs(Chem.MolFromSmiles("CC.CC")) + params = rdDistGeom.ETKDGv3() + params.useRandomCoords = True + params.randomSeed = 42 + rdDistGeom.EmbedMultipleConfs(mol, numConfs=1, params=params) + conf = mol.GetConformer() + fragments = Chem.GetMolFrags(mol) + if len(fragments) != 2: + raise AssertionError("Expected two fragments for interfragment interaction test") + anchor = conf.GetAtomPosition(fragments[0][0]) + moved = conf.GetAtomPosition(fragments[1][0]) + shift = Point3D(anchor.x - moved.x + 2.0, anchor.y - moved.y, anchor.z - moved.z) + for atom_idx in fragments[1]: + pos = conf.GetAtomPosition(atom_idx) + conf.SetAtomPosition(atom_idx, Point3D(pos.x + shift.x, pos.y + shift.y, pos.z + shift.z)) + return mol + + +def calculate_rdkit_uff_energies( + molecules, + maxIters=1000, + vdwThreshold: float = 10.0, + ignoreInterfragInteractions: bool = True, +): + all_energies = [] + for mol in molecules: + mol_energies = [] + for conf_id in range(mol.GetNumConformers()): + ff = rdForceFieldHelpers.UFFGetMoleculeForceField( + mol, + vdwThresh=vdwThreshold, + confId=conf_id, + ignoreInterfragInteractions=ignoreInterfragInteractions, + ) + ff.Initialize() + ff.Minimize(maxIts=maxIters) + mol_energies.append(ff.CalcEnergy()) + all_energies.append(mol_energies) + return all_energies + + +def test_uff_optimization_serial_vs_rdkit(uff_test_mols): + rdkit_mols = create_hard_copy_mols(uff_test_mols) + nvmolkit_mols = create_hard_copy_mols(uff_test_mols) + + rdkit_energies = calculate_rdkit_uff_energies(rdkit_mols, maxIters=200) + + nvmolkit_energies = [] + for mol in nvmolkit_mols: + mol_energies = nvmolkit_uff.UFFOptimizeMoleculesConfs([mol], maxIters=200) + nvmolkit_energies.extend(mol_energies) + + assert len(rdkit_energies) == len(nvmolkit_energies) + for mol_idx, (rdkit_mol_energies, nvmolkit_mol_energies) in enumerate(zip(rdkit_energies, nvmolkit_energies)): + assert len(rdkit_mol_energies) == len(nvmolkit_mol_energies) + for conf_idx, (rdkit_energy, nvmolkit_energy) in enumerate(zip(rdkit_mol_energies, nvmolkit_mol_energies)): + diff = abs(rdkit_energy - nvmolkit_energy) + rel = diff / abs(rdkit_energy) if abs(rdkit_energy) > 1e-10 else diff + assert rel < 1e-3, ( + f"Molecule {mol_idx}, conformer {conf_idx}: " + f"RDKit={rdkit_energy:.6f} nvMolKit={nvmolkit_energy:.6f} rel={rel:.6f}" + ) + + +def test_uff_optimization_batch_vs_rdkit(uff_test_mols): + rdkit_mols = create_hard_copy_mols(uff_test_mols) + nvmolkit_mols = create_hard_copy_mols(uff_test_mols) + + rdkit_energies = calculate_rdkit_uff_energies(rdkit_mols, maxIters=200) + hardware_options = HardwareOptions(batchSize=2, batchesPerGpu=1) + nvmolkit_energies = nvmolkit_uff.UFFOptimizeMoleculesConfs( + nvmolkit_mols, + maxIters=200, + hardwareOptions=hardware_options, + ) + + assert len(rdkit_energies) == len(nvmolkit_energies) + for mol_idx, (rdkit_mol_energies, nvmolkit_mol_energies) in enumerate(zip(rdkit_energies, nvmolkit_energies)): + assert len(rdkit_mol_energies) == len(nvmolkit_mol_energies) + for conf_idx, (rdkit_energy, nvmolkit_energy) in enumerate(zip(rdkit_mol_energies, nvmolkit_mol_energies)): + diff = abs(rdkit_energy - nvmolkit_energy) + rel = diff / abs(rdkit_energy) if abs(rdkit_energy) > 1e-10 else diff + assert rel < 1e-3, ( + f"Molecule {mol_idx}, conformer {conf_idx}: " + f"RDKit={rdkit_energy:.6f} nvMolKit={nvmolkit_energy:.6f} rel={rel:.6f}" + ) + + +def test_uff_optimization_empty_input(): + assert nvmolkit_uff.UFFOptimizeMoleculesConfs([]) == [] + + +def test_uff_optimization_invalid_input(): + unsupported = Chem.MolFromSmiles("*") + with pytest.raises(ValueError) as exc_info: + nvmolkit_uff.UFFOptimizeMoleculesConfs([None, unsupported]) + assert exc_info.value.args[1]["none"] == [0] + assert exc_info.value.args[1]["no_params"] == [1] + + +def test_uff_optimization_threshold_and_interfrag_vs_rdkit(): + mols = [make_fragmented_mol(), make_fragmented_mol()] + rdkit_mols = create_hard_copy_mols(mols) + nvmolkit_mols = create_hard_copy_mols(mols) + + thresholds = [25.0, 100.0] + ignore_interfrag = [False, True] + + rdkit_energies = [ + calculate_rdkit_uff_energies( + [mol], + maxIters=1000, + vdwThreshold=threshold, + ignoreInterfragInteractions=ignore, + )[0] + for mol, threshold, ignore in zip(rdkit_mols, thresholds, ignore_interfrag) + ] + nvmolkit_energies = nvmolkit_uff.UFFOptimizeMoleculesConfs( + nvmolkit_mols, + maxIters=1000, + vdwThreshold=thresholds, + ignoreInterfragInteractions=ignore_interfrag, + ) + + assert len(rdkit_energies) == len(nvmolkit_energies) + for mol_idx, (rdkit_mol_energies, nvmolkit_mol_energies) in enumerate(zip(rdkit_energies, nvmolkit_energies)): + assert len(rdkit_mol_energies) == len(nvmolkit_mol_energies) + for conf_idx, (rdkit_energy, nvmolkit_energy) in enumerate(zip(rdkit_mol_energies, nvmolkit_mol_energies)): + diff = abs(rdkit_energy - nvmolkit_energy) + rel = diff / abs(rdkit_energy) if abs(rdkit_energy) > 1e-10 else diff + assert rel < 1e-3, ( + f"Molecule {mol_idx}, conformer {conf_idx}: " + f"RDKit={rdkit_energy:.6f} nvMolKit={nvmolkit_energy:.6f} rel={rel:.6f}" + ) diff --git a/nvmolkit/uffOptimization.cpp b/nvmolkit/uffOptimization.cpp new file mode 100644 index 0000000..bbb79c8 --- /dev/null +++ b/nvmolkit/uffOptimization.cpp @@ -0,0 +1,114 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include + +#include "bfgs_uff.h" + +namespace bp = boost::python; + +template bp::list vectorToList(const std::vector& vec) { + bp::list list; + for (const auto& value : vec) { + list.append(value); + } + return list; +} + +template bp::list vectorOfVectorsToList(const std::vector>& vecOfVecs) { + bp::list outerList; + for (const auto& innerVec : vecOfVecs) { + outerList.append(vectorToList(innerVec)); + } + return outerList; +} + +static std::vector extractMolecules(const bp::list& molecules) { + const int n = bp::len(molecules); + std::vector mols; + mols.reserve(n); + for (int i = 0; i < n; ++i) { + auto* mol = bp::extract(bp::object(molecules[i]))(); + if (mol == nullptr) { + throw std::invalid_argument("Invalid molecule at index " + std::to_string(i)); + } + mols.push_back(mol); + } + return mols; +} + +static std::vector extractDoubleList(const bp::list& values, const int expectedSize, const std::string& name) { + if (bp::len(values) != expectedSize) { + throw std::invalid_argument("Expected " + std::to_string(expectedSize) + " values for " + name + ", got " + + std::to_string(bp::len(values))); + } + std::vector result; + result.reserve(expectedSize); + for (int i = 0; i < expectedSize; ++i) { + result.push_back(bp::extract(values[i])); + } + return result; +} + +static std::vector extractBoolList(const bp::list& values, const int expectedSize, const std::string& name) { + if (bp::len(values) != expectedSize) { + throw std::invalid_argument("Expected " + std::to_string(expectedSize) + " values for " + name + ", got " + + std::to_string(bp::len(values))); + } + std::vector result; + result.reserve(expectedSize); + for (int i = 0; i < expectedSize; ++i) { + result.push_back(bp::extract(values[i])); + } + return result; +} + +BOOST_PYTHON_MODULE(_uffOptimization) { + bp::def( + "UFFOptimizeMoleculesConfs", + +[](const bp::list& molecules, + int maxIters, + const bp::list& vdwThresholds, + const bp::list& ignoreInterfragInteractions, + const nvMolKit::BatchHardwareOptions& hardwareOptions) -> bp::list { + auto molsVec = extractMolecules(molecules); + const int numMols = static_cast(molsVec.size()); + const auto thresholdVec = extractDoubleList(vdwThresholds, numMols, "vdwThreshold"); + const auto ignoreVec = extractBoolList(ignoreInterfragInteractions, numMols, "ignoreInterfragInteractions"); + const auto result = + nvMolKit::UFF::UFFOptimizeMoleculesConfsBfgs(molsVec, maxIters, thresholdVec, ignoreVec, hardwareOptions); + return vectorOfVectorsToList(result); + }, + (bp::arg("molecules"), + bp::arg("maxIters") = 1000, + bp::arg("vdwThresholds"), + bp::arg("ignoreInterfragInteractions"), + bp::arg("hardwareOptions") = nvMolKit::BatchHardwareOptions()), + "Optimize conformers for multiple molecules using UFF force field.\n" + "\n" + "Args:\n" + " molecules: List of RDKit molecules to optimize\n" + " maxIters: Maximum number of optimization iterations (default: 1000)\n" + " vdwThresholds: Per-molecule van der Waals thresholds\n" + " ignoreInterfragInteractions: Per-molecule interfragment interaction flags\n" + " hardwareOptions: BatchHardwareOptions object with hardware settings\n" + "\n" + "Returns:\n" + " List of lists of energies, where each inner list contains energies for conformers of one molecule"); +} diff --git a/nvmolkit/uffOptimization.py b/nvmolkit/uffOptimization.py new file mode 100644 index 0000000..791bf57 --- /dev/null +++ b/nvmolkit/uffOptimization.py @@ -0,0 +1,100 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +"""GPU-accelerated UFF optimization for molecular conformers.""" + +from collections.abc import Sequence +from typing import TYPE_CHECKING + +from rdkit.Chem import rdForceFieldHelpers + +if TYPE_CHECKING: + from rdkit.Chem import Mol + +from nvmolkit import _uffOptimization +from nvmolkit.types import HardwareOptions + + +def UFFOptimizeMoleculesConfs( + molecules: list["Mol"], + maxIters: int = 1000, + vdwThreshold: float | Sequence[float] = 10.0, + ignoreInterfragInteractions: bool | Sequence[bool] = True, + hardwareOptions: HardwareOptions | None = None, +) -> list[list[float]]: + """Optimize conformers for multiple molecules using the UFF force field. + + Args: + molecules: List of RDKit molecules to optimize. Each molecule should already + contain conformers. + maxIters: Maximum number of UFF minimization iterations. + vdwThreshold: Van der Waals threshold used when constructing the UFF force + field. May be provided as a scalar or per-molecule sequence. + ignoreInterfragInteractions: If ``True``, omit non-bonded terms between + fragments. May be provided as a scalar or per-molecule sequence. + hardwareOptions: Configures CPU and GPU batching, threading, and device + selection. Defaults are chosen automatically when omitted. + + Returns: + List of lists of energies, where each inner list contains optimized + conformer energies for the corresponding molecule. + + Raises: + ValueError: If molecules contains ``None`` entries or molecules lacking UFF + atom types. ``e.args[1]`` contains keys ``"none"`` and ``"no_params"``. + """ + if not molecules: + return [] + + none_indices = [] + no_params_indices = [] + for i, mol in enumerate(molecules): + if mol is None: + none_indices.append(i) + elif not rdForceFieldHelpers.UFFHasAllMoleculeParams(mol): + no_params_indices.append(i) + + if none_indices or no_params_indices: + parts = [] + if none_indices: + parts.append(f"None at indices {none_indices}") + if no_params_indices: + parts.append(f"lacking UFF atom types at indices {no_params_indices}") + raise ValueError( + "; ".join(parts), + {"none": none_indices, "no_params": no_params_indices}, + ) + + def _normalize_scalar_or_list(value, name: str): + if isinstance(value, Sequence) and not isinstance(value, (str, bytes)): + if len(value) != len(molecules): + raise ValueError(f"Expected {len(molecules)} values for {name}, got {len(value)}") + return list(value) + return [value for _ in molecules] + + thresholds = [float(v) for v in _normalize_scalar_or_list(vdwThreshold, "vdwThreshold")] + interfrag_flags = [ + bool(v) for v in _normalize_scalar_or_list(ignoreInterfragInteractions, "ignoreInterfragInteractions") + ] + + if hardwareOptions is None: + hardwareOptions = HardwareOptions() + return _uffOptimization.UFFOptimizeMoleculesConfs( + molecules, + int(maxIters), + thresholds, + interfrag_flags, + hardwareOptions._as_native(), + ) diff --git a/src/minimizer/CMakeLists.txt b/src/minimizer/CMakeLists.txt index d301269..e51e7b8 100644 --- a/src/minimizer/CMakeLists.txt +++ b/src/minimizer/CMakeLists.txt @@ -22,17 +22,39 @@ target_link_libraries( cub_helpers cccl_interface) target_include_directories(bfgs PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +add_library(bfgs_common bfgs_common.cpp) +target_link_libraries( + bfgs_common + PUBLIC host_vector device + PRIVATE ${RDKit_LIBS} OpenMP::OpenMP_CXX) +target_include_directories(bfgs_common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + add_library(bfgs_mmff bfgs_mmff.cpp) target_link_libraries( bfgs_mmff PUBLIC bfgs PRIVATE ${RDKit_LIBS} + bfgs_common mmff_batched_forcefield rdkit_mmff_flattened - device device_vector OpenMP::OpenMP_CXX openmp_helpers cccl_interface) target_include_directories(bfgs_mmff PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/src/forcefields) + +add_library(bfgs_uff bfgs_uff.cpp) +target_link_libraries( + bfgs_uff + PUBLIC bfgs + PRIVATE ${RDKit_LIBS} + bfgs_common + uff_batched_forcefield + rdkit_uff_flattened + device_vector + OpenMP::OpenMP_CXX + openmp_helpers + cccl_interface) +target_include_directories(bfgs_uff PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/src/forcefields) diff --git a/src/minimizer/bfgs_common.cpp b/src/minimizer/bfgs_common.cpp new file mode 100644 index 0000000..718b6cc --- /dev/null +++ b/src/minimizer/bfgs_common.cpp @@ -0,0 +1,107 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "bfgs_common.h" + +#include +#include +#include + +#include +#include +#include + +namespace nvMolKit { + +void ThreadLocalBuffers::ensureCapacity(const size_t positionsSize, const size_t energiesSize) { + constexpr double extraCapacityFactor = 1.3; + const auto newSize = static_cast(static_cast(positionsSize) * extraCapacityFactor); + if (positions.size() < positionsSize) { + positions.resize(newSize); + } + if (energies.size() < energiesSize) { + energies.resize(static_cast(static_cast(energiesSize) * extraCapacityFactor)); + } + if (initialPositions.size() < positionsSize) { + initialPositions.resize(newSize); + } +} + +BatchExecutionContext setupBatchExecution(const BatchHardwareOptions& perfOptions) { + BatchExecutionContext ctx; + ctx.batchSize = perfOptions.batchSize == -1 ? 500 : static_cast(perfOptions.batchSize); + + std::vector gpuIds = perfOptions.gpuIds; + if (gpuIds.empty()) { + const int numDevices = countCudaDevices(); + if (numDevices == 0) { + throw std::runtime_error("No CUDA devices found"); + } + gpuIds.resize(numDevices); + std::iota(gpuIds.begin(), gpuIds.end(), 0); + } + const int batchesPerGpu = perfOptions.batchesPerGpu == -1 ? 4 : perfOptions.batchesPerGpu; + ctx.numThreads = + perfOptions.batchesPerGpu > 0 ? batchesPerGpu * static_cast(gpuIds.size()) : omp_get_max_threads(); + + ctx.streamPool.reserve(ctx.numThreads); + ctx.devicesPerThread.resize(ctx.numThreads); + for (int i = 0; i < ctx.numThreads; ++i) { + const int gpuId = gpuIds[i % gpuIds.size()]; + const WithDevice dev(gpuId); + ctx.streamPool.emplace_back(); + ctx.devicesPerThread[i] = gpuId; + } + + return ctx; +} + +std::vector flattenConformers(const std::vector& mols, + std::vector>& moleculeEnergies) { + moleculeEnergies.resize(mols.size()); + std::vector allConformers; + for (size_t molIdx = 0; molIdx < mols.size(); ++molIdx) { + auto* mol = mols[molIdx]; + if (mol == nullptr) { + throw std::invalid_argument("Invalid molecule pointer at index " + std::to_string(molIdx)); + } + moleculeEnergies[molIdx].resize(mol->getNumConformers()); + size_t confIdx = 0; + for (auto confIter = mol->beginConformers(); confIter != mol->endConformers(); ++confIter, ++confIdx) { + allConformers.push_back({mol, molIdx, &(**confIter), static_cast((*confIter)->getId()), confIdx}); + } + } + return allConformers; +} + +void writeBackResults(const std::vector& batchConformers, + const std::vector& conformerAtomStarts, + const ThreadLocalBuffers& buffers, + std::vector>& moleculeEnergies) { + for (size_t i = 0; i < batchConformers.size(); ++i) { + const auto& confInfo = batchConformers[i]; + const uint32_t numAtoms = confInfo.mol->getNumAtoms(); + const uint32_t atomStartIdx = conformerAtomStarts[i]; + for (uint32_t j = 0; j < numAtoms; ++j) { + confInfo.conformer->setAtomPos(j, + RDGeom::Point3D(buffers.positions[3 * (atomStartIdx + j) + 0], + buffers.positions[3 * (atomStartIdx + j) + 1], + buffers.positions[3 * (atomStartIdx + j) + 2])); + } + moleculeEnergies[confInfo.molIdx][confInfo.confIdx] = buffers.energies[i]; + } +} + +} // namespace nvMolKit diff --git a/src/minimizer/bfgs_common.h b/src/minimizer/bfgs_common.h new file mode 100644 index 0000000..61b15bd --- /dev/null +++ b/src/minimizer/bfgs_common.h @@ -0,0 +1,73 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef NVMOLKIT_BFGS_COMMON_H +#define NVMOLKIT_BFGS_COMMON_H + +#include +#include + +#include "../hardware_options.h" +#include "device.h" +#include "host_vector.h" + +namespace RDKit { +class ROMol; +class Conformer; +} // namespace RDKit + +namespace nvMolKit { + +//! Thread-local pinned memory buffers for async host-device transfers during BFGS minimization. +struct ThreadLocalBuffers { + PinnedHostVector positions; + PinnedHostVector energies; + PinnedHostVector initialPositions; + + void ensureCapacity(size_t positionsSize, size_t energiesSize); +}; + +struct ConformerInfo { + RDKit::ROMol* mol; + size_t molIdx; + RDKit::Conformer* conformer; + int conformerId; + size_t confIdx; +}; + +struct BatchExecutionContext { + size_t batchSize; + int numThreads; + std::vector streamPool; + std::vector devicesPerThread; +}; + +/// Set up GPU streams, thread counts, and batch sizing from hardware options. +BatchExecutionContext setupBatchExecution(const BatchHardwareOptions& perfOptions); + +/// Flatten all conformers from all molecules into a single list, validating molecule pointers +/// and initializing the per-molecule energy output vectors. +std::vector flattenConformers(const std::vector& mols, + std::vector>& moleculeEnergies); + +/// Write optimized positions and energies back from host buffers into the RDKit conformers. +void writeBackResults(const std::vector& batchConformers, + const std::vector& conformerAtomStarts, + const ThreadLocalBuffers& buffers, + std::vector>& moleculeEnergies); + +} // namespace nvMolKit + +#endif // NVMOLKIT_BFGS_COMMON_H diff --git a/src/minimizer/bfgs_mmff.cpp b/src/minimizer/bfgs_mmff.cpp index 7e167f7..bf5da84 100644 --- a/src/minimizer/bfgs_mmff.cpp +++ b/src/minimizer/bfgs_mmff.cpp @@ -20,10 +20,9 @@ #include +#include "bfgs_common.h" #include "bfgs_minimize.h" -#include "device.h" #include "ff_utils.h" -#include "host_vector.h" #include "mmff_batched_forcefield.h" #include "mmff_flattened_builder.h" #include "nvtx.h" @@ -36,27 +35,6 @@ struct CachedMoleculeData { EnergyForceContribsHost ffParams; }; -//! Thread-local pinned memory buffers for async transfers -struct ThreadLocalBuffers { - PinnedHostVector positions; - PinnedHostVector energies; - PinnedHostVector initialPositions; - - void ensureCapacity(const size_t positionsSize, const size_t energiesSize) { - constexpr double extraCapacityFactor = 1.3; - const auto newSize = static_cast(static_cast(positionsSize) * extraCapacityFactor); - if (positions.size() < positionsSize) { - positions.resize(newSize); - } - if (energies.size() < energiesSize) { - energies.resize(newSize); - } - if (initialPositions.size() < positionsSize) { - initialPositions.resize(newSize); - } - } -}; - std::vector> MMFFOptimizeMoleculesConfsBfgs(std::vector& mols, const int maxIters, const MMFFProperties& properties, @@ -77,103 +55,47 @@ std::vector> MMFFOptimizeMoleculesConfsBfgs(std::vector gpuIds = perfOptions.gpuIds; - if (gpuIds.empty()) { - const int numDevices = countCudaDevices(); - if (numDevices == 0) { - throw std::runtime_error("No CUDA devices found for MMFF relaxation"); - } - gpuIds.resize(numDevices); - std::iota(gpuIds.begin(), gpuIds.end(), 0); // Fill with device IDs 0, 1, ..., numDevices-1 - } - const int batchesPerGpu = perfOptions.batchesPerGpu == -1 ? 4 : perfOptions.batchesPerGpu; - const int numThreads = - perfOptions.batchesPerGpu > 0 ? batchesPerGpu * static_cast(gpuIds.size()) : omp_get_max_threads(); + auto ctx = setupBatchExecution(perfOptions); + std::vector> moleculeEnergies; + const auto allConformers = flattenConformers(mols, moleculeEnergies); - // Initialize result structure - std::vector> moleculeEnergies(mols.size()); - - // Flatten all conformers from all molecules for better load balancing - struct ConformerInfo { - RDKit::ROMol* mol; - size_t molIdx; - RDKit::Conformer* conformer; - size_t confIdx; - }; - - std::vector allConformers; - for (size_t molIdx = 0; molIdx < mols.size(); ++molIdx) { - auto* mol = mols[molIdx]; - moleculeEnergies[molIdx].resize(mol->getNumConformers()); - - size_t confIdx = 0; - for (auto confIter = mol->beginConformers(); confIter != mol->endConformers(); ++confIter, ++confIdx) { - allConformers.push_back({mol, molIdx, &(**confIter), confIdx}); - } - } - - // Calculate batch parameters for conformers const size_t totalConformers = allConformers.size(); - const size_t effectiveBatchSize = (batchSize == 0) ? totalConformers : batchSize; + const size_t effectiveBatchSize = (ctx.batchSize == 0) ? totalConformers : ctx.batchSize; if (totalConformers == 0) { - return moleculeEnergies; // Early return for empty input + return moleculeEnergies; } - // Create stream pool for better performance and profiling - std::vector streamPool; - streamPool.reserve(numThreads); - std::vector devicesPerThread(numThreads); - for (int i = 0; i < numThreads; ++i) { - const int gpuId = gpuIds[i % gpuIds.size()]; - const WithDevice dev(gpuId); - streamPool.emplace_back(); - devicesPerThread[i] = gpuId; // Round-robin assignment of devices - } - - // Create thread-local pinned memory buffers for async transfers - std::vector threadBuffers(numThreads); + std::vector threadBuffers(ctx.numThreads); detail::OpenMPExceptionRegistry exceptionHandler; setupRange.pop(); -#pragma omp parallel for num_threads(numThreads) schedule(dynamic) default(none) shared(allConformers, \ - moleculeEnergies, \ - totalConformers, \ - effectiveBatchSize, \ - maxIters, \ - properties, \ - streamPool, \ - devicesPerThread, \ - threadBuffers, \ - backend, \ - exceptionHandler) +#pragma omp parallel for num_threads(ctx.numThreads) schedule(dynamic) default(none) shared(allConformers, \ + moleculeEnergies, \ + totalConformers, \ + effectiveBatchSize, \ + maxIters, \ + properties, \ + ctx, \ + threadBuffers, \ + backend, \ + exceptionHandler) for (size_t batchStart = 0; batchStart < totalConformers; batchStart += effectiveBatchSize) { try { std::unordered_map moleculeCache; ScopedNvtxRange singleBatchRange("OpenMP loop thread"); ScopedNvtxRange setupBatchRange("OpenMP loop preprocessing"); const int threadId = omp_get_thread_num(); - const WithDevice dev(devicesPerThread[threadId]); + const WithDevice dev(ctx.devicesPerThread[threadId]); const size_t batchEnd = std::min(batchStart + effectiveBatchSize, totalConformers); - // Create batch subset of conformers - std::vector batchConformers(allConformers.begin() + batchStart, allConformers.begin() + batchEnd); + std::vector batchConformers(allConformers.begin() + batchStart, + allConformers.begin() + batchEnd); - // Get thread-local stream from pool - cudaStream_t streamPtr = streamPool[threadId].stream(); + cudaStream_t streamPtr = ctx.streamPool[threadId].stream(); // Process this batch BatchedMolecularSystemHost systemHost; @@ -258,23 +180,7 @@ std::vector> MMFFOptimizeMoleculesConfsBfgs(std::vectorgetNumAtoms(); - const uint32_t atomStartIdx = conformerAtomStarts[i]; - - // Update conformer positions - for (uint32_t j = 0; j < numAtoms; ++j) { - confInfo.conformer->setAtomPos(j, - RDGeom::Point3D(buffers.positions[3 * (atomStartIdx + j) + 0], - buffers.positions[3 * (atomStartIdx + j) + 1], - buffers.positions[3 * (atomStartIdx + j) + 2])); - } - - // Store energy result - thread-safe since each thread writes to different indices - moleculeEnergies[confInfo.molIdx][confInfo.confIdx] = buffers.energies[i]; - } + writeBackResults(batchConformers, conformerAtomStarts, buffers, moleculeEnergies); } catch (...) { exceptionHandler.store(std::current_exception()); } diff --git a/src/minimizer/bfgs_uff.cpp b/src/minimizer/bfgs_uff.cpp new file mode 100644 index 0000000..6982e6b --- /dev/null +++ b/src/minimizer/bfgs_uff.cpp @@ -0,0 +1,146 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "bfgs_uff.h" + +#include +#include + +#include + +#include "bfgs_common.h" +#include "bfgs_minimize.h" +#include "ff_utils.h" +#include "nvtx.h" +#include "openmp_helpers.h" +#include "uff_batched_forcefield.h" +#include "uff_flattened_builder.h" + +namespace nvMolKit::UFF { + +std::vector> UFFOptimizeMoleculesConfsBfgs(std::vector& mols, + const int maxIters, + const std::vector& vdwThresholds, + const std::vector& ignoreInterfragInteractions, + const BatchHardwareOptions& perfOptions) { + ScopedNvtxRange fullMinimizeRange("BFGS UFF Optimize Molecules Confs"); + ScopedNvtxRange setupRange("BFGS UFF Optimize Molecules Confs"); + + if (vdwThresholds.size() != mols.size()) { + throw std::invalid_argument("Expected one vdw threshold per molecule"); + } + if (ignoreInterfragInteractions.size() != mols.size()) { + throw std::invalid_argument("Expected one interfragment interaction flag per molecule"); + } + + auto ctx = setupBatchExecution(perfOptions); + std::vector> moleculeEnergies; + const auto allConformers = flattenConformers(mols, moleculeEnergies); + + const size_t totalConformers = allConformers.size(); + const size_t effectiveBatchSize = ctx.batchSize == 0 ? totalConformers : ctx.batchSize; + if (totalConformers == 0) { + return moleculeEnergies; + } + + std::vector threadBuffers(ctx.numThreads); + detail::OpenMPExceptionRegistry exceptionHandler; + setupRange.pop(); +#pragma omp parallel for num_threads(ctx.numThreads) schedule(dynamic) default(none) \ + shared(allConformers, \ + moleculeEnergies, \ + totalConformers, \ + effectiveBatchSize, \ + maxIters, \ + vdwThresholds, \ + ignoreInterfragInteractions, \ + ctx, \ + threadBuffers, \ + exceptionHandler) + for (size_t batchStart = 0; batchStart < totalConformers; batchStart += effectiveBatchSize) { + try { + ScopedNvtxRange singleBatchRange("OpenMP loop thread"); + ScopedNvtxRange setupBatchRange("OpenMP loop preprocessing"); + + const int threadId = omp_get_thread_num(); + const WithDevice dev(ctx.devicesPerThread[threadId]); + const size_t batchEnd = std::min(batchStart + effectiveBatchSize, totalConformers); + std::vector batchConformers(allConformers.begin() + batchStart, + allConformers.begin() + batchEnd); + + cudaStream_t streamPtr = ctx.streamPool[threadId].stream(); + + BatchedMolecularSystemHost systemHost; + BatchedForcefieldMetadata metadata; + std::vector conformerAtomStarts; + uint32_t currentAtomOffset = 0; + std::vector pos; + + for (const auto& confInfo : batchConformers) { + const uint32_t numAtoms = confInfo.mol->getNumAtoms(); + conformerAtomStarts.push_back(currentAtomOffset); + currentAtomOffset += numAtoms; + + nvMolKit::confPosToVect(*confInfo.conformer, pos); + auto ffParams = constructForcefieldContribs(*confInfo.mol, + vdwThresholds[confInfo.molIdx], + confInfo.conformerId, + ignoreInterfragInteractions[confInfo.molIdx]); + addMoleculeToBatch(ffParams, pos, systemHost, metadata, confInfo.molIdx, static_cast(confInfo.confIdx)); + } + + auto& buffers = threadBuffers[threadId]; + buffers.ensureCapacity(systemHost.positions.size(), batchConformers.size()); + std::copy(systemHost.positions.begin(), systemHost.positions.end(), buffers.initialPositions.begin()); + + UFFBatchedForcefield forcefield(systemHost, metadata, streamPtr); + AsyncDeviceVector positionsDevice; + AsyncDeviceVector gradDevice; + AsyncDeviceVector energyOutsDevice; + positionsDevice.setStream(streamPtr); + gradDevice.setStream(streamPtr); + energyOutsDevice.setStream(streamPtr); + positionsDevice.resize(systemHost.positions.size()); + positionsDevice.copyFromHost(buffers.initialPositions.data(), systemHost.positions.size()); + gradDevice.resize(systemHost.positions.size()); + gradDevice.zero(); + energyOutsDevice.resize(batchConformers.size()); + energyOutsDevice.zero(); + + nvMolKit::BfgsBatchMinimizer bfgsMinimizer( + /*dataDim=*/3, + nvMolKit::DebugLevel::NONE, + true, + streamPtr, + nvMolKit::BfgsBackend::BATCHED); + constexpr double gradTol = 1e-4; + setupBatchRange.pop(); + bfgsMinimizer.minimize(maxIters, gradTol, forcefield, positionsDevice, gradDevice, energyOutsDevice); + + ScopedNvtxRange finalizeBatchRange("OpenMP loop finalizing batch"); + positionsDevice.copyToHost(buffers.positions.data(), positionsDevice.size()); + energyOutsDevice.copyToHost(buffers.energies.data(), energyOutsDevice.size()); + cudaStreamSynchronize(streamPtr); + + writeBackResults(batchConformers, conformerAtomStarts, buffers, moleculeEnergies); + } catch (...) { + exceptionHandler.store(std::current_exception()); + } + } + exceptionHandler.rethrow(); + return moleculeEnergies; +} + +} // namespace nvMolKit::UFF diff --git a/src/minimizer/bfgs_uff.h b/src/minimizer/bfgs_uff.h new file mode 100644 index 0000000..1511e1f --- /dev/null +++ b/src/minimizer/bfgs_uff.h @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef NVMOLKIT_BFGS_UFF_H +#define NVMOLKIT_BFGS_UFF_H + +#include + +#include "../hardware_options.h" + +namespace RDKit { +class ROMol; +} + +namespace nvMolKit::UFF { + +std::vector> UFFOptimizeMoleculesConfsBfgs(std::vector& mols, + int maxIters, + const std::vector& vdwThresholds, + const std::vector& ignoreInterfragInteractions, + const BatchHardwareOptions& perfOptions = {}); + +} // namespace nvMolKit::UFF + +#endif // NVMOLKIT_BFGS_UFF_H