diff --git a/benchmarks/prepare_mols.py b/benchmarks/prepare_mols.py new file mode 100644 index 0000000..56ee040 --- /dev/null +++ b/benchmarks/prepare_mols.py @@ -0,0 +1,172 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Precompute molecules with conformers for TFD benchmarking and profiling. + +Generates conformers once and saves the results as pickle files (one per +conformer count). This avoids the expensive ETKDG embedding during +benchmark/profile runs. + +Output files: /prepared_mols_confs.pkl + +Usage: + python benchmarks/prepare_mols.py + + # Custom settings: + python benchmarks/prepare_mols.py \ + --smiles-file benchmarks/data/benchmark_smiles.csv \ + --output-dir benchmarks/data \ + --conformers 5 10 20 50 \ + --max-mols 500 \ + --workers 8 +""" + +import argparse +import multiprocessing +import os +import pickle +import sys +import time +from functools import partial + +import pandas as pd +from rdkit import Chem, RDLogger +from rdkit.Chem import AllChem + +RDLogger.DisableLog("rdApp.*") + + +def _generate_one(args_tuple, num_confs): + """Generate conformers for a single molecule (picklable for multiprocessing).""" + idx, smi = args_tuple + mol = Chem.MolFromSmiles(smi) + if mol is None or mol.GetNumAtoms() < 4: + return None + + mol = Chem.AddHs(mol) + params = AllChem.ETKDGv3() + params.randomSeed = 42 + idx + params.numThreads = 1 + params.useRandomCoords = True + conf_ids = AllChem.EmbedMultipleConfs(mol, numConfs=num_confs, params=params) + if len(conf_ids) < 2: + return None + return Chem.RemoveHs(mol) + + +def main(): + default_smiles = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "benchmark_smiles.csv") + default_output_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") + + parser = argparse.ArgumentParser( + description="Precompute molecules with conformers for TFD benchmarking", + ) + parser.add_argument( + "--smiles-file", + type=str, + default=default_smiles, + help="CSV file with SMILES in first column (default: benchmarks/data/benchmark_smiles.csv)", + ) + parser.add_argument( + "--output-dir", + type=str, + default=default_output_dir, + help="Output directory for pickle files (default: benchmarks/data)", + ) + parser.add_argument( + "--conformers", + type=int, + nargs="+", + default=[5, 10, 20], + help="Conformer counts to generate (default: 5 10 20)", + ) + parser.add_argument( + "--max-mols", + type=int, + default=0, + help="Maximum number of molecules to prepare (default: 0 = all valid SMILES)", + ) + parser.add_argument( + "--workers", + type=int, + default=max(1, multiprocessing.cpu_count() // 2), + help=f"Number of parallel workers (default: {max(1, multiprocessing.cpu_count() // 2)})", + ) + args = parser.parse_args() + + print(f"Loading SMILES from: {args.smiles_file}") + df = pd.read_csv(args.smiles_file) + smiles_list = df.iloc[:, 0].tolist() + print(f"Loaded {len(smiles_list)} SMILES") + + # Build (index, smiles) pairs, filtering small molecules + work_items = [] + for i, smi in enumerate(smiles_list): + mol = Chem.MolFromSmiles(smi) + if mol is not None and mol.GetNumAtoms() >= 4: + work_items.append((i, smi)) + if args.max_mols > 0 and len(work_items) >= args.max_mols: + break + print(f"Valid molecules (>=4 atoms): {len(work_items)}") + print(f"Workers: {args.workers}") + + os.makedirs(args.output_dir, exist_ok=True) + + for num_confs in args.conformers: + out_path = os.path.join(args.output_dir, f"prepared_mols_{num_confs}confs.pkl") + + if os.path.exists(out_path): + print(f"\n--- {num_confs} conformers: {out_path} already exists, skipping ---") + continue + + print(f"\n--- Generating {num_confs} conformers ({len(work_items)} molecules, {args.workers} workers) ---") + start = time.perf_counter() + + worker_fn = partial(_generate_one, num_confs=num_confs) + + if args.workers > 1: + with multiprocessing.Pool(args.workers) as pool: + results = [] + for i, result in enumerate(pool.imap(worker_fn, work_items)): + results.append(result) + if (i + 1) % 50 == 0 or (i + 1) == len(work_items): + print(f" Progress: {i + 1}/{len(work_items)} molecules", flush=True) + else: + results = [] + for i, item in enumerate(work_items): + results.append(worker_fn(item)) + if (i + 1) % 50 == 0 or (i + 1) == len(work_items): + print(f" Progress: {i + 1}/{len(work_items)} molecules", flush=True) + + mols = [m for m in results if m is not None] + elapsed = time.perf_counter() - start + + actual_confs = [m.GetNumConformers() for m in mols] + total_pairs = sum(c * (c - 1) // 2 for c in actual_confs) + + with open(out_path, "wb") as f: + pickle.dump(mols, f, protocol=pickle.HIGHEST_PROTOCOL) + + file_size_mb = os.path.getsize(out_path) / (1024 * 1024) + print(f" {len(mols)} molecules, avg {sum(actual_confs) / len(actual_confs):.1f} conformers") + print(f" Total TFD pairs: {total_pairs}") + print(f" Time: {elapsed:.1f}s") + print(f" Saved: {out_path} ({file_size_mb:.1f} MB)") + + print("\nDone.") + + +if __name__ == "__main__": + main() diff --git a/benchmarks/tfd_bench.py b/benchmarks/tfd_bench.py new file mode 100644 index 0000000..9023831 --- /dev/null +++ b/benchmarks/tfd_bench.py @@ -0,0 +1,524 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Benchmark for TFD (Torsion Fingerprint Deviation) calculation. + +Compares: +- RDKit TorsionFingerprints.GetTFDMatrix (Python, single-threaded) +- nvMolKit CPU backend (C++ with OpenMP, multi-threaded) +- nvMolKit GPU backend (CUDA) + +Usage: + python tfd_bench.py [--smiles-file FILE] [--output FILE] [--skip-rdkit] + python tfd_bench.py --pkl-file data1.pkl data2.pkl [--output FILE] + +Example: + python tfd_bench.py --smiles-file data/benchmark_smiles.csv --output tfd_results.csv + + # Use precomputed ChEMBL stratified pickles directly: + python tfd_bench.py --pkl-file ../Data/Chembl_stratified_prepared/chembl_0-20_10confs.pkl \ + --num-mols 100 500 1000 5000 --output tfd_chembl_results.csv +""" + +import argparse +import os +import pickle +import sys +import time +from typing import List, Tuple + +import pandas as pd +import torch +from rdkit import Chem +from rdkit.Chem import AllChem, TorsionFingerprints + +import nvmolkit.tfd as nvmol_tfd + + +def time_it(func, runs: int = 3, warmups: int = 1) -> Tuple[float, float]: + """Time a function with warmup runs. + + Args: + func: Function to time (no arguments) + runs: Number of timed runs + warmups: Number of warmup runs + + Returns: + Tuple of (average_time_ms, std_time_ms) + """ + for _ in range(warmups): + func() + + times = [] + for _ in range(runs): + start = time.perf_counter_ns() + func() + end = time.perf_counter_ns() + times.append(end - start) + + avg_time = sum(times) / runs + std_time = (sum((t - avg_time) ** 2 for t in times) / runs) ** 0.5 + return avg_time / 1.0e6, std_time / 1.0e6 # Return in milliseconds + + +def generate_conformers(mol: Chem.Mol, num_confs: int, seed: int = 42) -> Chem.Mol: + """Generate conformers for a molecule using ETKDG. + + Args: + mol: RDKit molecule + num_confs: Number of conformers to generate + seed: Random seed + + Returns: + Molecule with conformers (or None if embedding failed) + """ + mol = Chem.AddHs(mol) + params = AllChem.ETKDGv3() + params.randomSeed = seed + params.numThreads = 1 # Single-threaded for reproducibility + params.useRandomCoords = True + + conf_ids = AllChem.EmbedMultipleConfs(mol, numConfs=num_confs, params=params) + + if len(conf_ids) < 2: + return None + + mol = Chem.RemoveHs(mol) + return mol + + +def _try_load_pickle(num_confs: int, max_mols: int, smiles_file: str = None) -> List[Chem.Mol]: + """Try to load precomputed molecules from pickle file.""" + search_dirs = [] + if smiles_file: + search_dirs.append(os.path.dirname(os.path.abspath(smiles_file))) + search_dirs.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")) + + for d in search_dirs: + pkl_path = os.path.join(d, f"prepared_mols_{num_confs}confs.pkl") + if os.path.exists(pkl_path): + with open(pkl_path, "rb") as f: + all_mols = pickle.load(f) + mols = all_mols[:max_mols] + print(f" Loaded {len(mols)} molecules from {pkl_path}") + return mols + return None + + +def prepare_molecules( + smiles_list: List[str], num_confs: int, max_mols: int = 100, smiles_file: str = None +) -> List[Chem.Mol]: + """Prepare molecules with conformers, using precomputed pickle if available. + + Args: + smiles_list: List of SMILES strings (fallback if no pickle) + num_confs: Number of conformers per molecule + max_mols: Maximum number of molecules to prepare + smiles_file: Path to SMILES CSV (used to locate pickle files) + + Returns: + List of molecules with conformers + """ + cached = _try_load_pickle(num_confs, max_mols, smiles_file) + if cached is not None: + return cached + + print(f" No precomputed pickle found, generating from scratch...") + mols = [] + for i, smi in enumerate(smiles_list): + if len(mols) >= max_mols: + break + + mol = Chem.MolFromSmiles(smi) + if mol is None: + continue + + if mol.GetNumAtoms() < 4: + continue + + mol_with_confs = generate_conformers(mol, num_confs, seed=42 + i) + if mol_with_confs is not None and mol_with_confs.GetNumConformers() >= 2: + mols.append(mol_with_confs) + + return mols + + +def bench_rdkit_single(mol: Chem.Mol) -> None: + """Benchmark RDKit TFD for a single molecule.""" + TorsionFingerprints.GetTFDMatrix(mol, useWeights=True, maxDev="equal") + + +def bench_rdkit_batch(mols: List[Chem.Mol]) -> None: + """Benchmark RDKit TFD for multiple molecules (sequential).""" + for mol in mols: + TorsionFingerprints.GetTFDMatrix(mol, useWeights=True, maxDev="equal") + + +def bench_nvmol_cpu_single(mol: Chem.Mol) -> None: + """Benchmark nvMolKit CPU TFD for a single molecule.""" + nvmol_tfd.GetTFDMatrix(mol, useWeights=True, maxDev="equal", backend="cpu") + + +def bench_nvmol_cpu_list(mols: List[Chem.Mol]) -> None: + """Benchmark nvMolKit CPU TFD returning Python lists.""" + nvmol_tfd.GetTFDMatrices(mols, useWeights=True, maxDev="equal", backend="cpu", return_type="list") + + +def bench_nvmol_cpu_numpy(mols: List[Chem.Mol]) -> None: + """Benchmark nvMolKit CPU TFD returning numpy arrays.""" + nvmol_tfd.GetTFDMatrices(mols, useWeights=True, maxDev="equal", backend="cpu", return_type="numpy") + + +def bench_nvmol_gpu_single(mol: Chem.Mol) -> None: + """Benchmark nvMolKit GPU TFD for a single molecule.""" + nvmol_tfd.GetTFDMatrix(mol, useWeights=True, maxDev="equal", backend="gpu") + torch.cuda.synchronize() + + +def bench_nvmol_gpu_list(mols: List[Chem.Mol]) -> None: + """Benchmark nvMolKit GPU TFD returning Python lists.""" + nvmol_tfd.GetTFDMatrices(mols, useWeights=True, maxDev="equal", backend="gpu", return_type="list") + torch.cuda.synchronize() + + +def bench_nvmol_gpu_numpy(mols: List[Chem.Mol]) -> None: + """Benchmark nvMolKit GPU TFD returning numpy arrays.""" + nvmol_tfd.GetTFDMatrices(mols, useWeights=True, maxDev="equal", backend="gpu", return_type="numpy") + torch.cuda.synchronize() + + +def bench_nvmol_gpu_tensor(mols: List[Chem.Mol]) -> None: + """Benchmark nvMolKit GPU TFD returning GPU tensors (no D2H).""" + nvmol_tfd.GetTFDMatrices(mols, useWeights=True, maxDev="equal", backend="gpu", return_type="tensor") + torch.cuda.synchronize() + + +def verify_correctness(mol: Chem.Mol, tolerance: float = 0.01) -> bool: + """Verify nvMolKit results match RDKit (within tolerance). + + Multi-quartet torsions (rings and symmetric) are fully supported, + so results should match RDKit closely. + """ + rdkit_result = TorsionFingerprints.GetTFDMatrix(mol, useWeights=True, maxDev="equal") + nvmol_result = nvmol_tfd.GetTFDMatrix(mol, useWeights=True, maxDev="equal", backend="gpu") + + if len(rdkit_result) != len(nvmol_result): + return False + + for rd, nv in zip(rdkit_result, nvmol_result): + if abs(rd - nv) > tolerance: + return False + + return True + + +def load_pkl_files(pkl_paths: List[str]) -> List[Chem.Mol]: + """Load and concatenate molecules from one or more pickle files. + + Each pickle file must contain a list of RDKit Mol objects with conformers + (as produced by prepare_chembl_stratified.py). + """ + all_mols = [] + for path in pkl_paths: + with open(path, "rb") as f: + mols = pickle.load(f) + print(f" Loaded {len(mols)} molecules from {path}") + all_mols.extend(mols) + return all_mols + + +def run_benchmarks( + smiles_list: List[str] | None = None, + skip_rdkit: bool = False, + output_file: str = "tfd_results.csv", + smiles_file: str = None, + mol_counts: List[int] = None, + conformer_counts: List[int] = None, + preloaded_mols: List[Chem.Mol] | None = None, +) -> pd.DataFrame: + """Run TFD benchmarks with various configurations. + + Args: + smiles_list: List of SMILES strings (unused when preloaded_mols given) + skip_rdkit: If True, skip RDKit benchmarks (faster for large runs) + output_file: Output CSV file path + smiles_file: Path to SMILES CSV (used to locate pickle files) + mol_counts: List of molecule counts to benchmark + conformer_counts: List of conformer counts to benchmark + preloaded_mols: Pre-loaded molecules with conformers (e.g. from --pkl-file). + When provided, smiles_list/smiles_file/conformer_counts are ignored and + the actual conformer count is read from the molecules. + + Returns: + DataFrame with benchmark results + """ + if mol_counts is None: + mol_counts = [1, 5, 10, 25, 50, 100] + + if preloaded_mols is not None: + actual_confs_all = [m.GetNumConformers() for m in preloaded_mols] + median_confs = sorted(actual_confs_all)[len(actual_confs_all) // 2] + conformer_counts = [median_confs] + print(f" Using {len(preloaded_mols)} preloaded molecules (~{median_confs} conformers each)") + elif conformer_counts is None: + conformer_counts = [5, 10, 20] + + results = [] + + print("=" * 70) + print("TFD Benchmark: RDKit vs nvMolKit (CPU) vs nvMolKit (GPU)") + print(f"Molecule counts: {mol_counts}") + print(f"Conformer counts: {conformer_counts}") + print("=" * 70) + + for num_confs in conformer_counts: + if preloaded_mols is not None: + all_mols = preloaded_mols[: max(mol_counts)] + else: + print(f"\n--- Preparing molecules with {num_confs} conformers ---") + all_mols = prepare_molecules( + smiles_list, num_confs, max_mols=max(mol_counts) + 20, smiles_file=smiles_file + ) + + if len(all_mols) < max(mol_counts): + print(f"Warning: Only {len(all_mols)} molecules available") + + for num_mols in mol_counts: + if num_mols > len(all_mols): + print(f"Skipping {num_mols} mols (only {len(all_mols)} available)") + continue + + mols = all_mols[:num_mols] + actual_confs = [mol.GetNumConformers() for mol in mols] + avg_confs = sum(actual_confs) / len(actual_confs) + + print(f"\nBenchmarking: {num_mols} molecules, ~{avg_confs:.1f} conformers each") + + # Calculate expected TFD pairs + total_pairs = sum(c * (c - 1) // 2 for c in actual_confs) + print(f" Total TFD pairs: {total_pairs}") + + result = { + "num_molecules": num_mols, + "target_conformers": num_confs, + "avg_conformers": avg_confs, + "total_tfd_pairs": total_pairs, + } + + # RDKit benchmark (single-threaded Python) + if not skip_rdkit: + try: + rdkit_time, rdkit_std = time_it(lambda: bench_rdkit_batch(mols)) + result["rdkit_time_ms"] = rdkit_time + result["rdkit_std_ms"] = rdkit_std + print(f" RDKit (Python): {rdkit_time:8.2f} ms (+/- {rdkit_std:.2f})") + except Exception as e: + print(f" RDKit failed: {e}") + result["rdkit_time_ms"] = None + result["rdkit_std_ms"] = None + else: + result["rdkit_time_ms"] = None + result["rdkit_std_ms"] = None + + # nvMolKit CPU list benchmark (return_type="list") + try: + t, s = time_it(lambda: bench_nvmol_cpu_list(mols)) + result["nvmol_cpu_list_time_ms"] = t + result["nvmol_cpu_list_std_ms"] = s + print(f" nvMolKit (CPU list): {t:8.2f} ms (+/- {s:.2f})") + except Exception as e: + print(f" nvMolKit CPU list failed: {e}") + result["nvmol_cpu_list_time_ms"] = None + result["nvmol_cpu_list_std_ms"] = None + + # nvMolKit CPU numpy benchmark (return_type="numpy") + try: + t, s = time_it(lambda: bench_nvmol_cpu_numpy(mols)) + result["nvmol_cpu_numpy_time_ms"] = t + result["nvmol_cpu_numpy_std_ms"] = s + print(f" nvMolKit (CPU numpy):{t:8.2f} ms (+/- {s:.2f})") + except Exception as e: + print(f" nvMolKit CPU numpy failed: {e}") + result["nvmol_cpu_numpy_time_ms"] = None + result["nvmol_cpu_numpy_std_ms"] = None + + # nvMolKit GPU list benchmark (return_type="list") + try: + t, s = time_it(lambda: bench_nvmol_gpu_list(mols)) + result["nvmol_gpu_list_time_ms"] = t + result["nvmol_gpu_list_std_ms"] = s + print(f" nvMolKit (GPU list): {t:8.2f} ms (+/- {s:.2f})") + except Exception as e: + print(f" nvMolKit GPU list failed: {e}") + result["nvmol_gpu_list_time_ms"] = None + + # nvMolKit GPU numpy benchmark (return_type="numpy") + try: + t, s = time_it(lambda: bench_nvmol_gpu_numpy(mols)) + result["nvmol_gpu_numpy_time_ms"] = t + result["nvmol_gpu_numpy_std_ms"] = s + print(f" nvMolKit (GPU numpy): {t:8.2f} ms (+/- {s:.2f})") + except Exception as e: + print(f" nvMolKit GPU numpy failed: {e}") + result["nvmol_gpu_numpy_time_ms"] = None + + # nvMolKit GPU tensor benchmark (return_type="tensor", no D2H) + try: + t, s = time_it(lambda: bench_nvmol_gpu_tensor(mols)) + result["nvmol_gpu_tensor_time_ms"] = t + result["nvmol_gpu_tensor_std_ms"] = s + print(f" nvMolKit (GPU ten): {t:8.2f} ms (+/- {s:.2f})") + except Exception as e: + print(f" nvMolKit GPU tensor failed: {e}") + result["nvmol_gpu_tensor_time_ms"] = None + + # Calculate speedups vs RDKit + speedups = {} + for key, label in [ + ("nvmol_cpu_list_time_ms", "CPU list"), + ("nvmol_cpu_numpy_time_ms", "CPU numpy"), + ("nvmol_gpu_list_time_ms", "GPU list"), + ("nvmol_gpu_numpy_time_ms", "GPU numpy"), + ("nvmol_gpu_tensor_time_ms", "GPU tensor"), + ]: + if result.get("rdkit_time_ms") and result.get(key): + speedups[label] = result["rdkit_time_ms"] / result[key] + + for label, val in speedups.items(): + print(f" Speedup {label:>10s} vs RDKit: {val:.1f}x") + + results.append(result) + + # Create DataFrame and save + df = pd.DataFrame(results) + df.to_csv(output_file, index=False) + print(f"\n{'=' * 70}") + print(f"Results saved to: {output_file}") + print(f"{'=' * 70}") + + return df + + +def main(): + _default_smiles = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "benchmark_smiles.csv") + + parser = argparse.ArgumentParser(description="TFD Benchmark") + parser.add_argument( + "--smiles-file", + type=str, + default=_default_smiles, + help="CSV file with SMILES (default: benchmarks/data/benchmark_smiles.csv)", + ) + parser.add_argument( + "--output", + type=str, + default="tfd_results.csv", + help="Output CSV file for results", + ) + parser.add_argument( + "--num-mols", + type=int, + nargs="+", + default=[1, 10, 50, 100, 500, 1000], + help="Molecule counts to benchmark (default: 1 10 50 100 500 1000)", + ) + parser.add_argument( + "--num-confs", + type=int, + nargs="+", + default=[5, 10, 20, 50], + help="Conformer counts to benchmark (default: 5 10 20 50)", + ) + parser.add_argument( + "--skip-rdkit", + action="store_true", + help="Skip RDKit benchmarks (faster)", + ) + parser.add_argument( + "--pkl-file", + type=str, + nargs="+", + default=None, + help="Precomputed pickle file(s) containing molecules with conformers. " + "When provided, --smiles-file and --num-confs are ignored.", + ) + parser.add_argument( + "--verify", + action="store_true", + help="Verify correctness before benchmarking", + ) + parser.add_argument( + "--verify-only", + action="store_true", + help="Verify correctness and exit (skip benchmarking)", + ) + args = parser.parse_args() + + preloaded_mols = None + smiles_list = None + + if args.pkl_file: + print("Loading precomputed molecules from pickle file(s)...") + preloaded_mols = load_pkl_files(args.pkl_file) + if not preloaded_mols: + print("Error: no molecules loaded from pickle files") + sys.exit(1) + print(f"Total: {len(preloaded_mols)} molecules") + else: + print(f"Loading SMILES from: {args.smiles_file}") + try: + df = pd.read_csv(args.smiles_file) + smiles_list = df.iloc[:, 0].tolist() + except Exception as e: + print(f"Error loading SMILES file: {e}") + sys.exit(1) + print(f"Loaded {len(smiles_list)} SMILES") + + if args.verify or args.verify_only: + print("\nVerifying correctness...") + if preloaded_mols is not None: + test_mols = preloaded_mols[:50] + else: + test_mols = prepare_molecules(smiles_list, num_confs=5, max_mols=50, smiles_file=args.smiles_file) + all_correct = True + mismatches = 0 + for i, mol in enumerate(test_mols): + if verify_correctness(mol): + print(f" Molecule {i}: OK") + else: + print(f" Molecule {i}: MISMATCH") + all_correct = False + mismatches += 1 + if all_correct: + print(f"All {len(test_mols)} molecules match RDKit.") + else: + print(f"Warning: {mismatches}/{len(test_mols)} molecules did not match RDKit within tolerance") + + if args.verify_only: + sys.exit(0 if all_correct else 1) + + run_benchmarks( + smiles_list=smiles_list, + skip_rdkit=args.skip_rdkit, + output_file=args.output, + smiles_file=args.smiles_file, + mol_counts=args.num_mols, + conformer_counts=args.num_confs if not args.pkl_file else None, + preloaded_mols=preloaded_mols, + ) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/tfd_profile.py b/benchmarks/tfd_profile.py new file mode 100644 index 0000000..664de98 --- /dev/null +++ b/benchmarks/tfd_profile.py @@ -0,0 +1,228 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""TFD profiling script for use with NVIDIA Nsight Systems (nsys). + +Designed to produce NVTX-annotated traces for analyzing CPU vs GPU TFD performance. +C++ NVTX ranges (buildTFDSystem, transferToDevice, kernel launches, etc.) are +automatically captured by nsys alongside the Python-level annotations below. + +Usage: + # Profile GPU with default sweep (multiple mol counts and conformer counts): + nsys profile -t nvtx,osrt,cuda --gpu-metrics-devices=all --stats=true \ + -f true -o tfd_gpu_profile \ + python benchmarks/tfd_profile.py gpu + + # Profile GPU with specific configurations: + python benchmarks/tfd_profile.py gpu --num-mols 100 500 1000 --num-confs 20 50 + + # Profile a single configuration (legacy style): + python benchmarks/tfd_profile.py gpu --num-mols 50 --num-confs 20 + + # Profile CPU path: + python benchmarks/tfd_profile.py cpu + + # Profile both paths: + python benchmarks/tfd_profile.py both +""" + +import argparse +import os +import pickle +import sys + +import nvtx +import torch +from rdkit import Chem +from rdkit.Chem import AllChem + +import nvmolkit.tfd as nvmol_tfd + + +def generate_conformers(mol, num_confs, seed=42): + """Generate conformers for a molecule using ETKDG.""" + mol = Chem.AddHs(mol) + params = AllChem.ETKDGv3() + params.randomSeed = seed + params.numThreads = 1 + params.useRandomCoords = True + conf_ids = AllChem.EmbedMultipleConfs(mol, numConfs=num_confs, params=params) + if len(conf_ids) < 2: + return None + return Chem.RemoveHs(mol) + + +def prepare_molecules(smiles_file, num_mols, num_confs): + """Load molecules with conformers, using precomputed pickle if available.""" + data_dir = os.path.dirname(os.path.abspath(smiles_file)) + pkl_path = os.path.join(data_dir, f"prepared_mols_{num_confs}confs.pkl") + + if os.path.exists(pkl_path): + with open(pkl_path, "rb") as f: + all_mols = pickle.load(f) + mols = all_mols[:num_mols] + print(f" Loaded {len(mols)} molecules from {pkl_path}") + return mols + + print(f" No precomputed file found ({pkl_path}), generating from scratch...") + import pandas as pd + + df = pd.read_csv(smiles_file) + smiles_list = df.iloc[:, 0].tolist() + + mols = [] + for i, smi in enumerate(smiles_list): + if len(mols) >= num_mols: + break + mol = Chem.MolFromSmiles(smi) + if mol is None or mol.GetNumAtoms() < 4: + continue + mol_with_confs = generate_conformers(mol, num_confs, seed=42 + i) + if mol_with_confs is not None and mol_with_confs.GetNumConformers() >= 2: + mols.append(mol_with_confs) + + return mols + + +def run_config(mols, backend, num_runs, label): + """Run profiled iterations for a single configuration.""" + actual_confs = [mol.GetNumConformers() for mol in mols] + total_pairs = sum(c * (c - 1) // 2 for c in actual_confs) + print( + f"\n[{label}] {len(mols)} mols, avg {sum(actual_confs) / len(actual_confs):.1f} confs, {total_pairs} TFD pairs" + ) + + for run in range(num_runs): + if backend in ("cpu", "both"): + with nvtx.annotate(f"{label} CPU run {run}", color="cyan"): + nvmol_tfd.GetTFDMatrices(mols, useWeights=True, maxDev="equal", backend="cpu", return_type="numpy") + + if backend in ("gpu", "both"): + with nvtx.annotate(f"{label} GPU run {run}", color="orange"): + nvmol_tfd.GetTFDMatrices(mols, useWeights=True, maxDev="equal", backend="gpu", return_type="tensor") + torch.cuda.synchronize() + + +def main(): + _default_smiles = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "benchmark_smiles.csv") + + parser = argparse.ArgumentParser( + description="TFD profiling script for nsys", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog="""\ +Examples: + # Profile GPU with default sweep: + nsys profile -t nvtx,osrt,cuda --gpu-metrics-devices=all --stats=true \\ + -f true -o tfd_gpu python benchmarks/tfd_profile.py gpu + + # Profile GPU with large configurations: + python benchmarks/tfd_profile.py gpu --num-mols 500 1000 --num-confs 50 100 + + # Profile CPU with single config: + python benchmarks/tfd_profile.py cpu --num-mols 100 --num-confs 20 +""", + ) + parser.add_argument( + "backend", + choices=["cpu", "gpu", "both"], + help="Backend to profile", + ) + parser.add_argument( + "--num-mols", + type=int, + nargs="+", + default=[50, 100, 500, 1000], + help="Number of molecules to test (default: 50 100 500 1000)", + ) + parser.add_argument( + "--num-confs", + type=int, + nargs="+", + default=[10, 20, 50], + help="Number of conformers to test (default: 10 20 50)", + ) + parser.add_argument( + "--warmup", + type=int, + default=1, + help="Number of warmup iterations (default: 1)", + ) + parser.add_argument( + "--runs", + type=int, + default=3, + help="Number of profiled runs per configuration (default: 3)", + ) + parser.add_argument( + "--smiles-file", + type=str, + default=_default_smiles, + help="CSV file with SMILES (default: benchmarks/data/benchmark_smiles.csv)", + ) + args = parser.parse_args() + + print(f"Backend: {args.backend}") + print(f"Molecule counts: {args.num_mols}") + print(f"Conformer counts: {args.num_confs}") + print(f"Runs per config: {args.runs}") + + # === Setup: load all needed configurations === + configs = {} + with nvtx.annotate("Setup", color="blue"): + for num_confs in args.num_confs: + max_mols = max(args.num_mols) + print(f"\nLoading up to {max_mols} molecules with {num_confs} conformers...") + all_mols = prepare_molecules(args.smiles_file, max_mols, num_confs) + if not all_mols: + print(f" Warning: no molecules available for {num_confs} conformers, skipping") + continue + configs[num_confs] = all_mols + + if not configs: + print("Error: No valid molecules prepared") + sys.exit(1) + + # === Warmup === + if args.warmup > 0: + with nvtx.annotate("Warmup", color="red"): + first_mols = list(configs.values())[0] + warmup_mols = first_mols[: min(5, len(first_mols))] + print(f"\nWarmup ({args.warmup} iteration(s)) with {len(warmup_mols)} molecules...") + for _ in range(args.warmup): + if args.backend in ("cpu", "both"): + nvmol_tfd.GetTFDMatrices(warmup_mols, useWeights=True, maxDev="equal", backend="cpu") + if args.backend in ("gpu", "both"): + nvmol_tfd.GetTFDMatrices( + warmup_mols, useWeights=True, maxDev="equal", backend="gpu", return_type="tensor" + ) + torch.cuda.synchronize() + + # === Profiled runs === + with nvtx.annotate("Profiled runs", color="green"): + for num_confs in sorted(configs.keys()): + all_mols = configs[num_confs] + for num_mols in args.num_mols: + mols = all_mols[:num_mols] + if len(mols) == 0: + continue + label = f"{len(mols)}mols_{num_confs}confs" + with nvtx.annotate(label, color="magenta"): + run_config(mols, args.backend, args.runs, label) + + print("\nProfiling complete.") + + +if __name__ == "__main__": + main() diff --git a/docs/api/nvmolkit.rst b/docs/api/nvmolkit.rst index 235dd6b..56b9677 100644 --- a/docs/api/nvmolkit.rst +++ b/docs/api/nvmolkit.rst @@ -86,6 +86,16 @@ Conformer RMSD conformerRmsd.GetConformerRMSMatrix conformerRmsd.GetConformerRMSMatrixBatch +Torsion Fingerprint Deviation (TFD) +----------------------------------- + +.. autosummary:: + :toctree: generated/ + :template: function_template.rst + + tfd.GetTFDMatrix + tfd.GetTFDMatrices + Types ----- diff --git a/docs/index.rst b/docs/index.rst index 8b15bdd..eb85340 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -125,6 +125,10 @@ nvMolKit currently supports the following features: * **Conformer RMSD**: GPU-accelerated pairwise RMSD matrix computation for conformer ensembles +* **Torsion Fingerprint Deviation (TFD)**: GPU-accelerated TFD computation for comparing conformer geometry + * Batch processing of multiple molecules with all-pairs conformer comparison + * CPU and GPU backends with automatic backend selection + .. _async-results: Asynchronous GPU Results diff --git a/nvmolkit/CMakeLists.txt b/nvmolkit/CMakeLists.txt index be4069b..bc52099 100644 --- a/nvmolkit/CMakeLists.txt +++ b/nvmolkit/CMakeLists.txt @@ -106,3 +106,12 @@ target_link_libraries(_substructure PUBLIC ${Boost_LIBRARIES} target_link_libraries(_substructure PRIVATE ${RDKit_LIBS} substructure_search pinned_host_allocator) installpythontarget(_substructure ./) + +add_library(_TFD MODULE tfd.cpp) +target_include_directories( + _TFD PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/utils ${Boost_INCLUDE_DIRS} + ${Python_INCLUDE_DIRS}) +target_link_libraries(_TFD PUBLIC ${Boost_LIBRARIES} ${PYTHON_LIBRARIES} + device_vector) +target_link_libraries(_TFD PRIVATE ${RDKit_LIBS} tfd CUDA::cudart) +installpythontarget(_TFD ./) diff --git a/nvmolkit/_TFD.pyi b/nvmolkit/_TFD.pyi new file mode 100644 index 0000000..7f18295 --- /dev/null +++ b/nvmolkit/_TFD.pyi @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any, List, Tuple + +import numpy as np +from rdkit.Chem import Mol + +def GetTFDMatricesCpuBuffer( + mols: List[Mol], + useWeights: bool = True, + maxDev: str = "equal", + symmRadius: int = 2, + ignoreColinearBonds: bool = True, +) -> List[np.ndarray]: ... +def GetTFDMatricesGpuBuffer( + mols: List[Mol], + useWeights: bool = True, + maxDev: str = "equal", + symmRadius: int = 2, + ignoreColinearBonds: bool = True, +) -> Tuple[Any, List[int]]: ... diff --git a/nvmolkit/__init__.py b/nvmolkit/__init__.py index 760cc84..ddd0d5d 100644 --- a/nvmolkit/__init__.py +++ b/nvmolkit/__init__.py @@ -25,6 +25,7 @@ - Bulk tanimoto/cosine similarity calculations between fingerprints - ETKDG conformer generation for multiple molecules - MMFF optimization for multiple molecules and conformers +- TFD (Torsion Fingerprint Deviation) calculation for conformer comparison """ VERSION = "0.4.0" diff --git a/nvmolkit/tests/test_tfd.py b/nvmolkit/tests/test_tfd.py new file mode 100644 index 0000000..8371b5e --- /dev/null +++ b/nvmolkit/tests/test_tfd.py @@ -0,0 +1,417 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for GPU-accelerated TFD calculation.""" + +import pytest +import torch +from rdkit import Chem +from rdkit.Chem import AllChem, TorsionFingerprints + +import nvmolkit.tfd as tfd + +# Tolerance for comparing GPU vs RDKit results +TOLERANCE = 0.01 + + +def generate_conformers(mol, num_confs, seed=42): + """Generate conformers for a molecule using ETKDG.""" + params = AllChem.ETKDGv3() + params.randomSeed = seed + params.numThreads = 1 + AllChem.EmbedMultipleConfs(mol, numConfs=num_confs, params=params) + return mol + + +@pytest.fixture +def simple_mol_with_conformers(): + """Create a simple molecule with conformers.""" + mol = Chem.MolFromSmiles("CCCCC") # n-pentane + return generate_conformers(mol, 5) + + +@pytest.fixture +def multiple_mols_with_conformers(): + """Create multiple molecules with conformers.""" + smiles_list = ["CCCC", "CCCCC", "CCCCCC", "CCO", "CCCO"] + mols = [] + for i, smi in enumerate(smiles_list): + mol = Chem.MolFromSmiles(smi) + if mol: + mol = generate_conformers(mol, 3 + i, seed=42 + i) + if mol.GetNumConformers() >= 2: + mols.append(mol) + return mols + + +class TestGetTFDMatrix: + """Tests for single-molecule TFD calculation.""" + + def test_basic_tfd_matrix(self, simple_mol_with_conformers): + """Test basic TFD matrix computation.""" + mol = simple_mol_with_conformers + num_conf = mol.GetNumConformers() + + result = tfd.GetTFDMatrix(mol) + + # Check size: C*(C-1)/2 for C conformers + expected_size = num_conf * (num_conf - 1) // 2 + assert len(result) == expected_size + + # All TFD values should be non-negative + for val in result: + assert val >= 0.0 + + def test_single_conformer_returns_empty(self): + """Test that single conformer returns empty matrix.""" + mol = Chem.MolFromSmiles("CCCC") + generate_conformers(mol, 1) + assert mol.GetNumConformers() == 1 + + result = tfd.GetTFDMatrix(mol) + assert len(result) == 0 + + def test_cpu_backend(self, simple_mol_with_conformers): + """Test CPU backend gives same results as GPU.""" + mol = simple_mol_with_conformers + + gpu_result = tfd.GetTFDMatrix(mol, backend="gpu") + cpu_result = tfd.GetTFDMatrix(mol, backend="cpu") + + assert len(gpu_result) == len(cpu_result) + for gpu_val, cpu_val in zip(gpu_result, cpu_result): + assert abs(gpu_val - cpu_val) < 1e-4 + + def test_no_weights(self, simple_mol_with_conformers): + """Test computation without weights.""" + mol = simple_mol_with_conformers + + with_weights = tfd.GetTFDMatrix(mol, useWeights=True) + no_weights = tfd.GetTFDMatrix(mol, useWeights=False) + + # Both should have same size + assert len(with_weights) == len(no_weights) + + # Values may differ when weights are disabled + # Just verify computation completes + + def test_maxdev_spec(self, simple_mol_with_conformers): + """Test with specific max deviation mode.""" + mol = simple_mol_with_conformers + + equal_result = tfd.GetTFDMatrix(mol, maxDev="equal") + spec_result = tfd.GetTFDMatrix(mol, maxDev="spec") + + assert len(equal_result) == len(spec_result) + + def test_invalid_maxdev_raises(self, simple_mol_with_conformers): + """Test that invalid maxDev raises error.""" + mol = simple_mol_with_conformers + + with pytest.raises(Exception): + tfd.GetTFDMatrix(mol, maxDev="invalid") + + def test_invalid_backend_raises(self, simple_mol_with_conformers): + """Test that invalid backend raises error.""" + mol = simple_mol_with_conformers + + with pytest.raises(Exception): + tfd.GetTFDMatrix(mol, backend="invalid") + + +class TestGetTFDMatrices: + """Tests for batch TFD calculation.""" + + def test_batch_processing(self, multiple_mols_with_conformers): + """Test batch processing of multiple molecules.""" + mols = multiple_mols_with_conformers + + results = tfd.GetTFDMatrices(mols) + + assert len(results) == len(mols) + + for i, (mol, result) in enumerate(zip(mols, results)): + num_conf = mol.GetNumConformers() + expected_size = num_conf * (num_conf - 1) // 2 + assert len(result) == expected_size, f"Molecule {i} has wrong result size" + + def test_empty_input(self): + """Test with empty molecule list.""" + results = tfd.GetTFDMatrices([]) + assert len(results) == 0 + + def test_batch_vs_individual(self, multiple_mols_with_conformers): + """Test that batch results match individual processing.""" + mols = multiple_mols_with_conformers + + batch_results = tfd.GetTFDMatrices(mols) + individual_results = [tfd.GetTFDMatrix(mol) for mol in mols] + + assert len(batch_results) == len(individual_results) + + for batch, individual in zip(batch_results, individual_results): + assert len(batch) == len(individual) + for b, ind in zip(batch, individual): + assert abs(b - ind) < 1e-4 + + +class TestGpuResidentOutput: + """Tests for GPU-resident TFD output via return_type parameter.""" + + def test_tensor_return_single(self, simple_mol_with_conformers): + """Test return_type='tensor' for a single molecule.""" + mol = simple_mol_with_conformers + + tensor = tfd.GetTFDMatrix(mol, return_type="tensor") + + assert isinstance(tensor, torch.Tensor) + assert tensor.device.type == "cuda" + num_conf = mol.GetNumConformers() + expected_size = num_conf * (num_conf - 1) // 2 + assert tensor.shape[0] == expected_size + + def test_tensor_return_batch(self, multiple_mols_with_conformers): + """Test return_type='tensor' for batch.""" + mols = multiple_mols_with_conformers + + tensors = tfd.GetTFDMatrices(mols, return_type="tensor") + assert len(tensors) == len(mols) + + for i, (mol, tensor) in enumerate(zip(mols, tensors)): + assert isinstance(tensor, torch.Tensor) + assert tensor.device.type == "cuda" + num_conf = mol.GetNumConformers() + expected_size = num_conf * (num_conf - 1) // 2 + assert tensor.shape[0] == expected_size, f"Molecule {i} has wrong tensor size" + + def test_list_matches_tensor(self, multiple_mols_with_conformers): + """Test that list and tensor return types give consistent results.""" + mols = multiple_mols_with_conformers + + lists = tfd.GetTFDMatrices(mols, return_type="list") + tensors = tfd.GetTFDMatrices(mols, return_type="tensor") + + assert len(lists) == len(tensors) + for gpu_list, tensor in zip(lists, tensors): + tensor_list = tensor.cpu().tolist() + assert len(gpu_list) == len(tensor_list) + for l, t in zip(gpu_list, tensor_list): + assert abs(l - t) < 1e-4 + + def test_return_type_numpy(self, multiple_mols_with_conformers): + """Test return_type='numpy' returns correct numpy arrays.""" + import numpy as np + + mols = multiple_mols_with_conformers + lists = tfd.GetTFDMatrices(mols, return_type="list") + arrays = tfd.GetTFDMatrices(mols, return_type="numpy") + + assert len(arrays) == len(lists) + for arr, lst in zip(arrays, lists): + assert isinstance(arr, np.ndarray) + assert arr.dtype == np.float32 + assert len(arr) == len(lst) + for a, l in zip(arr.tolist(), lst): + assert abs(a - l) < 1e-4 + + def test_return_type_tensor(self, multiple_mols_with_conformers): + """Test return_type='tensor' returns correct GPU tensors.""" + import torch + + mols = multiple_mols_with_conformers + lists = tfd.GetTFDMatrices(mols, return_type="list") + tensors = tfd.GetTFDMatrices(mols, return_type="tensor") + + assert len(tensors) == len(lists) + for tensor, lst in zip(tensors, lists): + assert isinstance(tensor, torch.Tensor) + assert tensor.is_cuda + assert tensor.dtype == torch.float32 + assert len(tensor) == len(lst) + for t, l in zip(tensor.cpu().tolist(), lst): + assert abs(t - l) < 1e-4 + + +class TestCompareWithRDKit: + """Tests comparing nvMolKit TFD with RDKit TFD.""" + + def test_simple_chain_molecule(self): + """Test simple chain molecule against RDKit.""" + mol = Chem.MolFromSmiles("CCCCC") + generate_conformers(mol, 4) + + nvmolkit_result = tfd.GetTFDMatrix(mol, useWeights=True, maxDev="equal") + rdkit_result = TorsionFingerprints.GetTFDMatrix(mol, useWeights=True, maxDev="equal") + + assert len(nvmolkit_result) == len(rdkit_result) + + for nv, rd in zip(nvmolkit_result, rdkit_result): + assert abs(nv - rd) < TOLERANCE, f"nvMolKit={nv}, RDKit={rd}" + + def test_no_weights_comparison(self): + """Test without weights against RDKit.""" + mol = Chem.MolFromSmiles("CCCCCC") + generate_conformers(mol, 4) + + nvmolkit_result = tfd.GetTFDMatrix(mol, useWeights=False, maxDev="equal") + rdkit_result = TorsionFingerprints.GetTFDMatrix(mol, useWeights=False, maxDev="equal") + + assert len(nvmolkit_result) == len(rdkit_result) + + for nv, rd in zip(nvmolkit_result, rdkit_result): + assert abs(nv - rd) < TOLERANCE + + def test_ring_molecule(self): + """Test ring molecule (multi-quartet) against RDKit.""" + mol = Chem.MolFromSmiles("C1CCCCC1") # cyclohexane + generate_conformers(mol, 4) + + nvmolkit_result = tfd.GetTFDMatrix(mol, useWeights=True, maxDev="equal") + rdkit_result = TorsionFingerprints.GetTFDMatrix(mol, useWeights=True, maxDev="equal") + + assert len(nvmolkit_result) == len(rdkit_result) + + for nv, rd in zip(nvmolkit_result, rdkit_result): + assert abs(nv - rd) < TOLERANCE, f"nvMolKit={nv}, RDKit={rd}" + + def test_ring_with_substituent(self): + """Test molecule with both ring and non-ring torsions against RDKit.""" + mol = Chem.MolFromSmiles("c1ccccc1CC") # ethylbenzene + generate_conformers(mol, 4) + + nvmolkit_result = tfd.GetTFDMatrix(mol, useWeights=True, maxDev="equal") + rdkit_result = TorsionFingerprints.GetTFDMatrix(mol, useWeights=True, maxDev="equal") + + assert len(nvmolkit_result) == len(rdkit_result) + + for nv, rd in zip(nvmolkit_result, rdkit_result): + assert abs(nv - rd) < TOLERANCE, f"nvMolKit={nv}, RDKit={rd}" + + @pytest.mark.parametrize("backend", ["gpu", "cpu"]) + @pytest.mark.parametrize( + "smiles", + ["CCCCC", "CC(C)CC"], + ) + def test_add_hs(self, backend, smiles): + """Test molecules with explicit hydrogens against RDKit.""" + mol = Chem.AddHs(Chem.MolFromSmiles(smiles)) + generate_conformers(mol, 4) + + nvmolkit_result = tfd.GetTFDMatrix(mol, useWeights=True, maxDev="equal", backend=backend) + rdkit_result = TorsionFingerprints.GetTFDMatrix(mol, useWeights=True, maxDev="equal") + + assert len(nvmolkit_result) == len(rdkit_result) + + for nv, rd in zip(nvmolkit_result, rdkit_result): + assert abs(nv - rd) < TOLERANCE, f"AddHs({smiles}), backend={backend}: nvMolKit={nv}, RDKit={rd}" + + @pytest.mark.parametrize("backend", ["gpu", "cpu"]) + def test_symmetric_molecule_gpu_cpu_consistency(self, backend): + """Test branched molecule gives consistent results across backends.""" + mol = Chem.MolFromSmiles("CC(C)CC") # isopentane + generate_conformers(mol, 4) + + result = tfd.GetTFDMatrix(mol, backend=backend) + rdkit_result = TorsionFingerprints.GetTFDMatrix(mol) + + assert len(result) == len(rdkit_result) + + for nv, rd in zip(result, rdkit_result): + assert abs(nv - rd) < TOLERANCE, f"backend={backend}: nvMolKit={nv}, RDKit={rd}" + + @pytest.mark.parametrize("backend", ["gpu", "cpu"]) + @pytest.mark.parametrize("symm_radius", [0, 1, 3]) + def test_symm_radius(self, backend, symm_radius): + """Test different symmRadius values against RDKit on both backends.""" + mol = Chem.MolFromSmiles("CC(C)CC") + generate_conformers(mol, 4) + + nvmolkit_result = tfd.GetTFDMatrix( + mol, + symmRadius=symm_radius, + backend=backend, + ) + rdkit_result = TorsionFingerprints.GetTFDMatrix( + mol, + symmRadius=symm_radius, + ) + + assert len(nvmolkit_result) == len(rdkit_result) + + for nv, rd in zip(nvmolkit_result, rdkit_result): + assert abs(nv - rd) < TOLERANCE, f"symmRadius={symm_radius}, backend={backend}: nvMolKit={nv}, RDKit={rd}" + + @pytest.mark.parametrize("backend", ["gpu", "cpu"]) + def test_ignore_colinear_bonds_false(self, backend): + """Test ignoreColinearBonds=False against RDKit on both backends.""" + mol = Chem.MolFromSmiles("CCCC#CCC") + generate_conformers(mol, 4) + + nvmolkit_result = tfd.GetTFDMatrix( + mol, + ignoreColinearBonds=False, + backend=backend, + ) + rdkit_result = TorsionFingerprints.GetTFDMatrix( + mol, + ignoreColinearBonds=False, + ) + + assert len(nvmolkit_result) == len(rdkit_result) + + for nv, rd in zip(nvmolkit_result, rdkit_result): + assert abs(nv - rd) < TOLERANCE, f"ignoreColinearBonds=False, backend={backend}: nvMolKit={nv}, RDKit={rd}" + + +class TestEdgeCases: + """Tests for edge cases.""" + + def test_ethane_no_torsions(self): + """Test ethane (terminal bonds only, no rotatable bonds).""" + mol = Chem.MolFromSmiles("CC") + generate_conformers(mol, 3) + + result = tfd.GetTFDMatrix(mol) + + # Should return zeros + for val in result: + assert val == 0.0 + + def test_large_molecule(self): + """Test larger molecule.""" + # Decane + mol = Chem.MolFromSmiles("CCCCCCCCCC") + generate_conformers(mol, 5) + + result = tfd.GetTFDMatrix(mol) + + num_conf = mol.GetNumConformers() + expected_size = num_conf * (num_conf - 1) // 2 + assert len(result) == expected_size + + for val in result: + assert 0.0 <= val <= 1.0 # TFD should be normalized + + def test_invalid_molecule_raises(self): + """Test that None molecule raises error.""" + with pytest.raises(Exception): + tfd.GetTFDMatrix(None) + + def test_invalid_molecule_in_batch_raises(self, simple_mol_with_conformers): + """Test that None in batch raises error.""" + mols = [simple_mol_with_conformers, None] + + with pytest.raises(Exception): + tfd.GetTFDMatrices(mols) diff --git a/nvmolkit/tfd.cpp b/nvmolkit/tfd.cpp new file mode 100644 index 0000000..5c19ccf --- /dev/null +++ b/nvmolkit/tfd.cpp @@ -0,0 +1,169 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include + +#include "array_helpers.h" +#include "nvtx.h" +#include "tfd_cpu.h" +#include "tfd_gpu.h" + +namespace { + +using namespace boost::python; +namespace numpy = boost::python::numpy; + +using CpuTFDResults = std::vector>; + +std::vector listToMolVector(const boost::python::list& mols) { + std::vector molsVec; + molsVec.reserve(len(mols)); + for (int i = 0; i < len(mols); i++) { + const RDKit::ROMol* mol = boost::python::extract(boost::python::object(mols[i])); + if (mol == nullptr) { + throw std::invalid_argument("Invalid molecule at index " + std::to_string(i)); + } + molsVec.push_back(mol); + } + return molsVec; +} + +boost::python::list intVectorToList(const std::vector& vec) { + boost::python::list result; + for (const auto& value : vec) { + result.append(value); + } + return result; +} + +nvMolKit::TFDComputeOptions buildOptions(bool useWeights, + const std::string& maxDev, + int symmRadius, + bool ignoreColinearBonds) { + nvMolKit::TFDComputeOptions options; + options.useWeights = useWeights; + options.symmRadius = symmRadius; + options.ignoreColinearBonds = ignoreColinearBonds; + + if (maxDev == "equal") { + options.maxDevMode = nvMolKit::TFDMaxDevMode::Equal; + } else if (maxDev == "spec") { + options.maxDevMode = nvMolKit::TFDMaxDevMode::Spec; + } else { + throw std::invalid_argument("maxDev must be 'equal' or 'spec', got: " + maxDev); + } + + return options; +} + +boost::python::object toOwnedPyArray(nvMolKit::PyArray* array) { + using Converter = boost::python::manage_new_object::apply::type; + return boost::python::object(boost::python::handle<>(Converter()(array))); +} + +nvMolKit::TFDCpuGenerator& getCpuGenerator() { + static nvMolKit::TFDCpuGenerator generator; + return generator; +} + +nvMolKit::TFDGpuGenerator& getGpuGenerator() { + static nvMolKit::TFDGpuGenerator generator; + return generator; +} + +} // namespace + +BOOST_PYTHON_MODULE(_TFD) { + numpy::initialize(); + + // CPU path: returns flat numpy array + offsets (avoids per-element Python object creation) + def( + "GetTFDMatricesCpuBuffer", + +[](const boost::python::list& mols, + bool useWeights, + const std::string& maxDev, + int symmRadius, + bool ignoreColinearBonds) -> boost::python::object { + auto molsVec = listToMolVector(mols); + auto options = buildOptions(useWeights, maxDev, symmRadius, ignoreColinearBonds); + options.backend = nvMolKit::TFDComputeBackend::CPU; + auto results = getCpuGenerator().GetTFDMatrices(molsVec, options); + + nvMolKit::ScopedNvtxRange range("CPU: wrap as numpy arrays", nvMolKit::NvtxColor::kGreen); + + // Move results to heap so PyCapsule can own the memory + auto* owned = new CpuTFDResults(std::move(results)); + + auto deleter = [](PyObject* cap) { + delete reinterpret_cast(PyCapsule_GetPointer(cap, "nvmolkit.cpu_tfd")); + }; + PyObject* cap = PyCapsule_New(static_cast(owned), "nvmolkit.cpu_tfd", deleter); + if (cap == nullptr) { + delete owned; + throw std::runtime_error("Failed to create PyCapsule for CPU TFD results"); + } + object owner{handle<>(cap)}; + + // Create per-molecule numpy array views (zero-copy, each backed by the capsule) + const Py_intptr_t stride = static_cast(sizeof(double)); + boost::python::list arrays; + for (auto& vec : *owned) { + const Py_intptr_t shape = static_cast(vec.size()); + arrays.append(numpy::from_data(vec.data(), + numpy::dtype::get_builtin(), + make_tuple(shape), + make_tuple(stride), + owner)); + } + return arrays; + }, + (arg("mols"), + arg("useWeights") = true, + arg("maxDev") = "equal", + arg("symmRadius") = 2, + arg("ignoreColinearBonds") = true)); + + // GPU path: returns GPU-resident buffer + metadata + def( + "GetTFDMatricesGpuBuffer", + +[](const boost::python::list& mols, + bool useWeights, + const std::string& maxDev, + int symmRadius, + bool ignoreColinearBonds) -> boost::python::object { + auto molsVec = listToMolVector(mols); + auto options = buildOptions(useWeights, maxDev, symmRadius, ignoreColinearBonds); + options.backend = nvMolKit::TFDComputeBackend::GPU; + + auto gpuResult = getGpuGenerator().GetTFDMatricesGpuBuffer(molsVec, options); + + nvMolKit::ScopedNvtxRange range("GPU: C++ to Python tuple", nvMolKit::NvtxColor::kYellow); + boost::python::list outputStarts = intVectorToList(gpuResult.tfdOutputStarts); + + size_t totalSize = gpuResult.tfdValues.size(); + auto* pyArray = nvMolKit::makePyArray(gpuResult.tfdValues, boost::python::make_tuple(totalSize)); + + return boost::python::make_tuple(toOwnedPyArray(pyArray), outputStarts); + }, + (arg("mols"), + arg("useWeights") = true, + arg("maxDev") = "equal", + arg("symmRadius") = 2, + arg("ignoreColinearBonds") = true)); +} diff --git a/nvmolkit/tfd.py b/nvmolkit/tfd.py new file mode 100644 index 0000000..648438c --- /dev/null +++ b/nvmolkit/tfd.py @@ -0,0 +1,216 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""GPU-accelerated Torsion Fingerprint Deviation (TFD) calculation. + +This module provides GPU-accelerated implementations of TFD calculation, +compatible with RDKit's TorsionFingerprints API. + +TFD (Torsion Fingerprint Deviation) is a measure of conformational similarity +based on the comparison of torsion angles between conformers. It was described +in: Schulz-Gasch et al., JCIM, 52, 1499-1512 (2012). + +Example usage: + >>> from rdkit import Chem + >>> from rdkit.Chem import AllChem + >>> import nvmolkit.tfd as tfd + >>> + >>> mol = Chem.MolFromSmiles('CCCCC') + >>> AllChem.EmbedMultipleConfs(mol, numConfs=5) + >>> + >>> # Python lists (RDKit-compatible, default) + >>> tfd_matrix = tfd.GetTFDMatrix(mol) + >>> + >>> # Numpy arrays (fast, CPU) + >>> tfd_matrices = tfd.GetTFDMatrices(mols, return_type="numpy") + >>> + >>> # GPU tensors (fastest, no D2H copy) + >>> tfd_matrices = tfd.GetTFDMatrices(mols, return_type="tensor") +""" + +from contextlib import nullcontext +from typing import List, Union + +import numpy as np +import torch + +try: + import nvtx as _nvtx + + def _nvtx_range(name, color="blue"): + return _nvtx.annotate(name, color=color) +except ImportError: + + def _nvtx_range(name, color="blue"): + return nullcontext() + + +from nvmolkit import _TFD +from nvmolkit._arrayHelpers import * # noqa: F403 +from nvmolkit.types import AsyncGpuResult + + +class _TFDGpuResult: + """Internal result container for GPU-resident TFD computation.""" + + def __init__(self, tfd_values: AsyncGpuResult, output_starts: List[int]): + self.tfd_values = tfd_values + self.output_starts = output_starts + + def to_tensors(self) -> List[torch.Tensor]: + """Extract as list of GPU tensors (no D2H copy).""" + n = len(self.output_starts) - 1 + all_values = self.tfd_values.torch() + return [all_values[self.output_starts[i] : self.output_starts[i + 1]] for i in range(n)] + + def to_numpy(self) -> List[np.ndarray]: + """Extract as list of numpy arrays (one bulk D2H copy).""" + n = len(self.output_starts) - 1 + torch.cuda.synchronize() + all_values = self.tfd_values.numpy() + return [all_values[self.output_starts[i] : self.output_starts[i + 1]] for i in range(n)] + + def to_lists(self) -> List[List[float]]: + """Extract as Python lists (bulk D2H + tolist).""" + n = len(self.output_starts) - 1 + torch.cuda.synchronize() + all_list = self.tfd_values.numpy().tolist() + return [all_list[self.output_starts[i] : self.output_starts[i + 1]] for i in range(n)] + + +def _get_gpu_result(mols, useWeights, maxDev, symmRadius, ignoreColinearBonds): + """Run GPU TFD computation and return _TFDGpuResult (no D2H copy).""" + pyarray, output_starts = _TFD.GetTFDMatricesGpuBuffer( + mols, + useWeights=useWeights, + maxDev=maxDev, + symmRadius=symmRadius, + ignoreColinearBonds=ignoreColinearBonds, + ) + return _TFDGpuResult( + tfd_values=AsyncGpuResult(pyarray), + output_starts=output_starts, + ) + + +def _extract_gpu_result(gpu_result, return_type): + """Extract results from _TFDGpuResult based on return_type.""" + if return_type == "tensor": + return gpu_result.to_tensors() + elif return_type == "numpy": + return gpu_result.to_numpy() + else: + return gpu_result.to_lists() + + +def _extract_cpu_result(arrays, return_type): + """Convert list of numpy arrays to requested return type.""" + if return_type == "numpy": + return list(arrays) + elif return_type == "tensor": + return [torch.from_numpy(a) for a in arrays] + else: + return [a.tolist() for a in arrays] + + +def GetTFDMatrices( + mols: List, + useWeights: bool = True, + maxDev: str = "equal", + symmRadius: int = 2, + ignoreColinearBonds: bool = True, + backend: str = "gpu", + return_type: str = "list", +) -> Union[List[List[float]], List[np.ndarray], List[torch.Tensor]]: + """Calculate TFD matrices for multiple molecules. + + Args: + mols: List of RDKit molecules, each with multiple conformers. + useWeights: If True (default), use distance-based torsion weights. + maxDev: Normalization mode ('equal' or 'spec'). + symmRadius: Radius for atom invariants (default: 2). + ignoreColinearBonds: If True (default), ignore colinear bonds. + backend: Computation backend, 'gpu' (default) or 'cpu'. + return_type: Output format: + 'list' (default): List of Python lists (RDKit-compatible). + 'numpy': List of numpy arrays (float64 for CPU, float32 for GPU). + 'tensor': List of torch.Tensors (GPU tensors for GPU backend, CPU tensors for CPU backend). + + Returns: + List of TFD matrices in the requested format. + """ + if backend in ("gpu", "GPU"): + gpu_result = _get_gpu_result(mols, useWeights, maxDev, symmRadius, ignoreColinearBonds) + with _nvtx_range("GPU: split per-molecule results", color="yellow"): + return _extract_gpu_result(gpu_result, return_type) + + if backend not in ("cpu", "CPU"): + raise ValueError(f"backend must be 'gpu' or 'cpu', got: '{backend}'") + + arrays = _TFD.GetTFDMatricesCpuBuffer( + mols, + useWeights=useWeights, + maxDev=maxDev, + symmRadius=symmRadius, + ignoreColinearBonds=ignoreColinearBonds, + ) + with _nvtx_range("CPU: convert result format", color="gray"): + return _extract_cpu_result(arrays, return_type) + + +def GetTFDMatrix( + mol, + useWeights: bool = True, + maxDev: str = "equal", + symmRadius: int = 2, + ignoreColinearBonds: bool = True, + backend: str = "gpu", + return_type: str = "list", +) -> Union[List[float], np.ndarray, torch.Tensor]: + """Calculate the TFD matrix for conformers of a molecule. + + Convenience wrapper over GetTFDMatrices for a single molecule. + + Args: + mol: RDKit molecule with multiple conformers. + useWeights: If True (default), use distance-based torsion weights. + maxDev: Normalization mode ('equal' or 'spec'). + symmRadius: Radius for atom invariants (default: 2). + ignoreColinearBonds: If True (default), ignore colinear bonds. + backend: Computation backend, 'gpu' (default) or 'cpu'. + return_type: Output format: + 'list' (default): Python list of floats (RDKit-compatible). + 'numpy': numpy float32 array (CPU). + 'tensor': GPU torch.Tensor (no D2H copy). + + Returns: + Lower triangular TFD matrix as a flat list, numpy array, or GPU tensor. + """ + results = GetTFDMatrices( + [mol], + useWeights=useWeights, + maxDev=maxDev, + symmRadius=symmRadius, + ignoreColinearBonds=ignoreColinearBonds, + backend=backend, + return_type=return_type, + ) + if not results: + if return_type == "numpy": + return np.array([], dtype=np.float32) + elif return_type == "tensor": + return torch.tensor([], dtype=torch.float32) + return [] + return results[0] diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8d9cb37..78e50d7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -144,3 +144,32 @@ target_link_libraries( PRIVATE ${RDKit_LIBS}) target_include_directories(conformer_rmsd_mol PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +# TFD (Torsion Fingerprint Deviation) CPU implementation +add_library(tfd_cpu tfd_common.cpp tfd_cpu.cpp tfd_transfer.cu) +target_link_libraries( + tfd_cpu + PRIVATE ${RDKit_LIBS} OpenMP::OpenMP_CXX openmp_helpers nvtx + PUBLIC device_vector) +target_include_directories(tfd_cpu PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +# TFD GPU kernels +add_library(tfd_kernels tfd_kernels.cu) +target_link_libraries( + tfd_kernels + PRIVATE cuda_error_check + PUBLIC device_vector) +target_include_directories(tfd_kernels PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +# TFD GPU orchestration +add_library(tfd_gpu tfd_gpu.cpp) +target_link_libraries( + tfd_gpu + PRIVATE tfd_kernels tfd_cpu ${RDKit_LIBS} nvtx + PUBLIC device_vector device) +target_include_directories(tfd_gpu PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +# TFD unified public API +add_library(tfd tfd.cpp) +target_link_libraries(tfd PUBLIC tfd_cpu tfd_gpu ${RDKit_LIBS}) +target_include_directories(tfd PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/src/tfd.cpp b/src/tfd.cpp new file mode 100644 index 0000000..29225b3 --- /dev/null +++ b/src/tfd.cpp @@ -0,0 +1,64 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tfd.h" + +#include + +namespace nvMolKit { + +void TFDGenerator::initializeBackendIfNeeded(TFDComputeBackend backend) { + switch (backend) { + case TFDComputeBackend::GPU: + if (!gpuGenerator_) { + gpuGenerator_ = std::make_unique(); + } + break; + case TFDComputeBackend::CPU: + if (!cpuGenerator_) { + cpuGenerator_ = std::make_unique(); + } + break; + } +} + +std::vector TFDGenerator::GetTFDMatrix(const RDKit::ROMol& mol, const TFDComputeOptions& options) { + initializeBackendIfNeeded(options.backend); + + switch (options.backend) { + case TFDComputeBackend::GPU: + return gpuGenerator_->GetTFDMatrix(mol, options); + case TFDComputeBackend::CPU: + return cpuGenerator_->GetTFDMatrix(mol, options); + } + + return {}; +} + +std::vector> TFDGenerator::GetTFDMatrices(const std::vector& mols, + const TFDComputeOptions& options) { + initializeBackendIfNeeded(options.backend); + + switch (options.backend) { + case TFDComputeBackend::GPU: + return gpuGenerator_->GetTFDMatrices(mols, options); + case TFDComputeBackend::CPU: + return cpuGenerator_->GetTFDMatrices(mols, options); + } + + return {}; +} + +} // namespace nvMolKit diff --git a/src/tfd.h b/src/tfd.h new file mode 100644 index 0000000..46c1107 --- /dev/null +++ b/src/tfd.h @@ -0,0 +1,55 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef NVMOLKIT_TFD_H +#define NVMOLKIT_TFD_H + +#include +#include + +#include "tfd_common.h" +#include "tfd_cpu.h" +#include "tfd_gpu.h" + +namespace nvMolKit { + +//! Unified TFD generator with automatic backend selection +//! +//! Provides a single interface for TFD computation, automatically +//! selecting between CPU and GPU backends based on the options provided. +//! Backends are lazily initialized on first use. +//! +//! For GPU-resident output, use TFDGpuGenerator::GetTFDMatricesGpuBuffer directly. +class TFDGenerator { + public: + TFDGenerator() = default; + + //! Compute TFD matrix for a single molecule + std::vector GetTFDMatrix(const RDKit::ROMol& mol, const TFDComputeOptions& options = TFDComputeOptions{}); + + //! Compute TFD matrices for multiple molecules + std::vector> GetTFDMatrices(const std::vector& mols, + const TFDComputeOptions& options = TFDComputeOptions{}); + + private: + void initializeBackendIfNeeded(TFDComputeBackend backend); + + std::unique_ptr gpuGenerator_; + std::unique_ptr cpuGenerator_; +}; + +} // namespace nvMolKit + +#endif // NVMOLKIT_TFD_H diff --git a/src/tfd_common.cpp b/src/tfd_common.cpp new file mode 100644 index 0000000..d5f5758 --- /dev/null +++ b/src/tfd_common.cpp @@ -0,0 +1,845 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tfd_common.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifdef _OPENMP +#include +#endif + +#include "nvtx.h" + +namespace nvMolKit { + +namespace { + +//! Get heavy atom neighbors of an atom, optionally excluding one atom +std::vector getHeavyAtomNeighbors(const RDKit::Atom* atom, int excludeIdx = -1) { + std::vector neighbors; + for (const auto* neighbor : atom->getOwningMol().atomNeighbors(atom)) { + if (neighbor->getAtomicNum() != 1 && neighbor->getIdx() != static_cast(excludeIdx)) { + neighbors.push_back(neighbor); + } + } + return neighbors; +} + +//! Check if all atoms have the same invariant +bool doMatch(const std::vector& inv, const std::vector& atoms) { + if (atoms.size() < 2) { + return true; + } + auto firstInv = inv[atoms[0]->getIdx()]; + for (size_t i = 1; i < atoms.size(); ++i) { + if (inv[atoms[i]->getIdx()] != firstInv) { + return false; + } + } + return true; +} + +//! Find the atom that is different when two atoms match (for 3 atoms) +const RDKit::Atom* doMatchExcept1(const std::vector& inv, const std::vector& atoms) { + if (atoms.size() != 3) { + return nullptr; + } + int a1 = atoms[0]->getIdx(); + int a2 = atoms[1]->getIdx(); + int a3 = atoms[2]->getIdx(); + + if (inv[a1] == inv[a2] && inv[a1] != inv[a3] && inv[a2] != inv[a3]) { + return atoms[2]; + } else if (inv[a1] != inv[a2] && inv[a1] == inv[a3] && inv[a2] != inv[a3]) { + return atoms[1]; + } else if (inv[a1] != inv[a2] && inv[a1] != inv[a3] && inv[a2] == inv[a3]) { + return atoms[0]; + } + return nullptr; +} + +//! Get atom invariants using Morgan fingerprints at given radius +std::vector getAtomInvariantsWithRadius(const RDKit::ROMol& mol, int radius) { + std::vector inv(mol.getNumAtoms(), 0); + + auto fpGen = RDKit::MorganFingerprint::getMorganGenerator(radius, + false /* countSimulation */, + false /* includeChirality */, + true /* useBondTypes */, + false /* onlyNonzeroInvariants */, + true /* includeRedundantEnvironments */); + + RDKit::AdditionalOutput ao; + ao.allocateBitInfoMap(); + + // Call to populate ao.bitInfoMap; result is unused + (void)fpGen->getSparseCountFingerprint(mol, nullptr, nullptr, -1, &ao); + const auto& bitInfo = ao.bitInfoMap; + + if (bitInfo) { + for (const auto& [bitId, atomRadiusPairs] : *bitInfo) { + for (const auto& [atomIdx, r] : atomRadiusPairs) { + if (r == static_cast(radius)) { + inv[atomIdx] = bitId; + } + } + } + } + + return inv; +} + +//! Get reference atoms for torsion based on neighbor symmetry +std::vector getIndexForTorsion(const std::vector& neighbors, + const std::vector& inv) { + if (neighbors.size() == 1) { + return neighbors; + } else if (doMatch(inv, neighbors)) { + // All symmetric neighbors - return all + return neighbors; + } else if (neighbors.size() == 3) { + const RDKit::Atom* different = doMatchExcept1(inv, neighbors); + if (different) { + return {different}; + } + } + // Fallback: sort and take first + auto sorted = neighbors; + std::sort(sorted.begin(), sorted.end(), [&inv](const RDKit::Atom* a, const RDKit::Atom* b) { + return inv[a->getIdx()] < inv[b->getIdx()]; + }); + return {sorted[0]}; +} + +//! Information about a rotatable bond for torsion calculation +struct BondInfo { + int a1; // First central atom + int a2; // Second central atom + std::vector nb1; // Heavy atom neighbors of a1 (excluding a2) + std::vector nb2; // Heavy atom neighbors of a2 (excluding a1) +}; + +//! Get bonds for which torsions should be calculated +std::vector getBondsForTorsions(const RDKit::ROMol& mol, bool ignoreColinearBonds) { + // Flag atoms that cannot be middle atoms of torsion (triple bonds, allenes) + std::vector atomFlags(mol.getNumAtoms(), 0); + + // Pattern for triple bonds + auto triplePattern = RDKit::SmartsToMol("*#*"); + if (triplePattern) { + std::vector matches; + RDKit::SubstructMatch(mol, *triplePattern, matches); + for (const auto& match : matches) { + for (const auto& [_, atomIdx] : match) { + atomFlags[atomIdx] = 1; + } + } + } + + // Pattern for allenes + auto allenePattern = RDKit::SmartsToMol("[$([C](=*)=*)]"); + if (allenePattern) { + std::vector matches; + RDKit::SubstructMatch(mol, *allenePattern, matches); + for (const auto& match : matches) { + for (const auto& [_, atomIdx] : match) { + atomFlags[atomIdx] = 1; + } + } + } + + std::vector bonds; + std::vector doneBonds(mol.getNumBonds(), 0); + + const auto* ringInfo = mol.getRingInfo(); + for (const auto* bond : mol.bonds()) { + if (ringInfo->numBondRings(bond->getIdx()) > 0) { + continue; + } + + int a1 = bond->getBeginAtomIdx(); + int a2 = bond->getEndAtomIdx(); + + auto nb1 = getHeavyAtomNeighbors(bond->getBeginAtom(), a2); + auto nb2 = getHeavyAtomNeighbors(bond->getEndAtom(), a1); + + if (!doneBonds[bond->getIdx()] && !nb1.empty() && !nb2.empty()) { + doneBonds[bond->getIdx()] = 1; + + // Check if atoms cannot be middle atoms + if (atomFlags[a1] || atomFlags[a2]) { + if (!ignoreColinearBonds) { + // Search for alternative atoms (following the Python logic) + while (nb1.size() == 1 && atomFlags[a1]) { + int a1old = a1; + a1 = nb1[0]->getIdx(); + auto* b = mol.getBondBetweenAtoms(a1old, a1); + if (b) { + if (b->getEndAtomIdx() == static_cast(a1old)) { + nb1 = getHeavyAtomNeighbors(b->getBeginAtom(), a1old); + } else { + nb1 = getHeavyAtomNeighbors(b->getEndAtom(), a1old); + } + doneBonds[b->getIdx()] = 1; + } else { + break; + } + } + while (nb2.size() == 1 && atomFlags[a2]) { + int a2old = a2; + a2 = nb2[0]->getIdx(); + auto* b = mol.getBondBetweenAtoms(a2old, a2); + if (b) { + if (b->getBeginAtomIdx() == static_cast(a2old)) { + nb2 = getHeavyAtomNeighbors(b->getEndAtom(), a2old); + } else { + nb2 = getHeavyAtomNeighbors(b->getBeginAtom(), a2old); + } + doneBonds[b->getIdx()] = 1; + } else { + break; + } + } + if (!nb1.empty() && !nb2.empty()) { + bonds.push_back({a1, a2, nb1, nb2}); + } + } + // If ignoreColinearBonds is true, we skip this bond + } else { + bonds.push_back({a1, a2, nb1, nb2}); + } + } + } + + return bonds; +} + +//! Find the most central bond in a molecule. +//! Returns {-1, -1} if no central bond can be found (e.g., methane, linear molecules). +//! +//! NOTE: For molecules with near-perfect C2 topological symmetry (e.g. macrocyclic +//! peptides), two atoms may have STDs that differ only at machine-epsilon (~4e-16). +//! The sort tiebreak can therefore pick either atom as "most central", and the choice +//! may differ from RDKit Python (which uses numpy.std with a different accumulation +//! order). This causes the weighted TFD to diverge by up to ~0.1 for affected +//! molecules (~0.8% of ChEMBL), while unweighted TFD matches exactly. Both weight +//! schemes are equally valid — the TFD paper does not prescribe a tiebreak rule. +std::pair findCentralBond(const RDKit::ROMol& mol, const double* distMat) { + int numAtoms = mol.getNumAtoms(); + + // Calculate STD of distances for each non-terminal atom + std::vector> stds; + for (int i = 0; i < numAtoms; ++i) { + auto neighbors = getHeavyAtomNeighbors(mol.getAtomWithIdx(i)); + if (neighbors.size() < 2) { + continue; // Skip terminal atoms + } + + // Calculate STD of distances + double sum = 0.0; + double sumSq = 0.0; + int count = 0; + for (int j = 0; j < numAtoms; ++j) { + if (j != i) { + double d = distMat[i * numAtoms + j]; + sum += d; + sumSq += d * d; + count++; + } + } + double mean = sum / count; + double variance = (sumSq / count) - (mean * mean); + double stdDev = std::sqrt(std::max(0.0, variance)); + stds.emplace_back(stdDev, i); + } + + if (stds.empty()) { + return {-1, -1}; // No non-terminal atoms found + } + + std::sort(stds.begin(), stds.end()); + int aid1 = stds[0].second; + + // Find second most central atom that is bonded to aid1 + for (size_t i = 1; i < stds.size(); ++i) { + if (mol.getBondBetweenAtoms(aid1, stds[i].second) != nullptr) { + return {aid1, stds[i].second}; + } + } + + return {-1, -1}; // Could not find central bond +} + +//! Calculate beta parameter for weight calculation +double calculateBeta(const RDKit::ROMol& mol, const double* distMat, int aid1) { + int numAtoms = mol.getNumAtoms(); + + // Get all non-terminal bonds + // NOTE: RDKit has a typo in _calculateBeta (TorsionFingerprints.py ~line 391): + // `if len(nb2) > 1 and len(nb2) > 1` checks nb2 twice instead of nb1 and nb2. + // This includes bonds where only the end atom is non-terminal, inflating dmax. + // We replicate this behavior for RDKit compatibility. + // TODO: Fix once RDKit corrects this, or add a flag for "correct" behavior. + double dmax = 0.0; + for (const auto* bond : mol.bonds()) { + auto nb2 = getHeavyAtomNeighbors(bond->getEndAtom()); + if (nb2.size() > 1 && nb2.size() > 1) { + int bid1 = bond->getBeginAtomIdx(); + int bid2 = bond->getEndAtomIdx(); + double d = std::max(distMat[aid1 * numAtoms + bid1], distMat[aid1 * numAtoms + bid2]); + dmax = std::max(dmax, d); + } + } + + double dmax2 = dmax / 2.0; + if (dmax2 < 1e-6) { + dmax2 = 1.0; // Avoid division by zero + } + double beta = -std::log(0.1) / (dmax2 * dmax2); + return beta; +} + +} // namespace + +// Internal: build torsion list from precomputed bonds (used when caller already has bonds) +static TorsionList extractTorsionListImpl(const RDKit::ROMol& mol, + TFDMaxDevMode maxDevMode, + int symmRadius, + const std::vector& bonds) { + TorsionList result; + + // Get atom invariants + std::vector inv; + if (symmRadius > 0) { + inv = getAtomInvariantsWithRadius(mol, symmRadius); + } else { + // Use connectivity invariants as fallback + inv.resize(mol.getNumAtoms()); + for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) { + inv[i] = mol.getAtomWithIdx(i)->getDegree(); + } + } + + // Process each bond to create torsions + for (const auto& bond : bonds) { + auto d1 = getIndexForTorsion(bond.nb1, inv); + auto d2 = getIndexForTorsion(bond.nb2, inv); + + TorsionDef torsion; + + if (maxDevMode == TFDMaxDevMode::Equal) { + // Equal mode: all combinations (d1 x d2), maxDev 180 (default path) + for (const auto* n1 : d1) { + for (const auto* n2 : d2) { + torsion.atomQuartets.push_back( + {static_cast(n1->getIdx()), bond.a1, bond.a2, static_cast(n2->getIdx())}); + } + } + torsion.maxDev = 180.0f; + } else { + // Spec mode: build quartets and set torsion-specific maxDev + if (d1.size() == 1 && d2.size() == 1) { + // Case 1, 2, 4, 5, 7, 10, 16, 12, 17, 19 - single torsion + torsion.atomQuartets.push_back( + {static_cast(d1[0]->getIdx()), bond.a1, bond.a2, static_cast(d2[0]->getIdx())}); + torsion.maxDev = 180.0f; + } else if (d1.size() == 1) { + // Case 3, 6, 8, 13, 20 - multiple torsions from d2 + for (const auto* nb : d2) { + torsion.atomQuartets.push_back( + {static_cast(d1[0]->getIdx()), bond.a1, bond.a2, static_cast(nb->getIdx())}); + } + torsion.maxDev = (bond.nb2.size() == 2) ? 90.0f : 60.0f; + } else if (d2.size() == 1) { + // Case 3, 6, 8, 13, 20 - multiple torsions from d1 + for (const auto* nb : d1) { + torsion.atomQuartets.push_back( + {static_cast(nb->getIdx()), bond.a1, bond.a2, static_cast(d2[0]->getIdx())}); + } + torsion.maxDev = (bond.nb1.size() == 2) ? 90.0f : 60.0f; + } else { + // Both symmetric - all combinations + for (const auto* n1 : d1) { + for (const auto* n2 : d2) { + torsion.atomQuartets.push_back( + {static_cast(n1->getIdx()), bond.a1, bond.a2, static_cast(n2->getIdx())}); + } + } + if (bond.nb1.size() == 2 && bond.nb2.size() == 2) { + torsion.maxDev = 90.0f; + } else if (bond.nb1.size() == 3 && bond.nb2.size() == 3) { + torsion.maxDev = 60.0f; + } else { + torsion.maxDev = 30.0f; + } + } + } + + result.nonRingTorsions.push_back(std::move(torsion)); + } + + // Process rings + auto rings = mol.getRingInfo()->atomRings(); + for (const auto& ring : rings) { + TorsionDef torsion; + int num = static_cast(ring.size()); + + // Calculate max deviation for ring + float maxdev; + if (num >= 14) { + maxdev = 180.0f; + } else { + maxdev = 180.0f * std::exp(-0.025f * (num - 14) * (num - 14)); + } + + // Create torsions for ring (consecutive 4 atoms) + for (int i = 0; i < num; ++i) { + torsion.atomQuartets.push_back({ring[i], ring[(i + 1) % num], ring[(i + 2) % num], ring[(i + 3) % num]}); + } + torsion.maxDev = maxdev; + + result.ringTorsions.push_back(std::move(torsion)); + } + + return result; +} + +TorsionList extractTorsionList(const RDKit::ROMol& mol, + TFDMaxDevMode maxDevMode, + int symmRadius, + bool ignoreColinearBonds) { + auto bonds = getBondsForTorsions(mol, ignoreColinearBonds); + return extractTorsionListImpl(mol, maxDevMode, symmRadius, bonds); +} + +// Internal: compute weights using precomputed bonds (used when caller already has bonds) +static std::vector computeTorsionWeightsImpl(const RDKit::ROMol& mol, + const TorsionList& torsionList, + const std::vector& bonds) { + std::vector weights; + + // If no torsions, return empty weights + size_t totalTorsions = torsionList.totalCount(); + if (totalTorsions == 0) { + return weights; + } + + // Get distance matrix (returns raw pointer, stored in mol's dictionary) + const double* distMat = RDKit::MolOps::getDistanceMat(mol); + int numAtoms = mol.getNumAtoms(); + + // Find central bond + auto [aid1, aid2] = findCentralBond(mol, distMat); + + // If no central bond found, return uniform weights + if (aid1 < 0 || aid2 < 0) { + weights.resize(totalTorsions, 1.0f); + return weights; + } + + // Calculate beta + double beta = calculateBeta(mol, distMat, aid1); + + // Calculate weights for non-ring torsions (bonds provided by caller) + for (size_t i = 0; i < bonds.size(); ++i) { + const auto& bond = bonds[i]; + double d; + + if ((bond.a1 == aid1 && bond.a2 == aid2) || (bond.a1 == aid2 && bond.a2 == aid1)) { + d = 0.0; // Central bond itself + } else { + // Shortest distance to central bond atoms + 1 + d = std::min({distMat[aid1 * numAtoms + bond.a1], + distMat[aid1 * numAtoms + bond.a2], + distMat[aid2 * numAtoms + bond.a1], + distMat[aid2 * numAtoms + bond.a2]}) + + 1.0; + } + + float w = static_cast(std::exp(-beta * d * d)); + weights.push_back(w); + } + + // Calculate weights for ring torsions + auto ringInfo = mol.getRingInfo(); + auto bondRings = ringInfo->bondRings(); + + for (const auto& bondRing : bondRings) { + int num = static_cast(bondRing.size()); + double sumD = 0.0; + + for (int bidx : bondRing) { + const auto* bond = mol.getBondWithIdx(bidx); + int bid1 = bond->getBeginAtomIdx(); + int bid2 = bond->getEndAtomIdx(); + double d = std::min({distMat[aid1 * numAtoms + bid1], + distMat[aid1 * numAtoms + bid2], + distMat[aid2 * numAtoms + bid1], + distMat[aid2 * numAtoms + bid2]}) + + 1.0; + sumD += d; + } + + double avgD = sumD / num; + float w = static_cast(std::exp(-beta * avgD * avgD) * (num / 2.0)); + weights.push_back(w); + } + + return weights; +} + +std::vector computeTorsionWeights(const RDKit::ROMol& mol, + const TorsionList& torsionList, + bool ignoreColinearBonds) { + auto bonds = getBondsForTorsions(mol, ignoreColinearBonds); + return computeTorsionWeightsImpl(mol, torsionList, bonds); +} + +// Sequential single-molecule builder (used as the building block for parallel batch builds) +static TFDSystemHost buildTFDSystemImpl(const RDKit::ROMol& mol, const TFDComputeOptions& options) { + TFDSystemHost system; + + int numConformers = mol.getNumConformers(); + int numAtoms = mol.getNumAtoms(); + + if (numConformers == 0) { + throw std::runtime_error("Molecule has no conformers"); + } + + // Get bonds once and reuse for torsion list and (optionally) weights + auto bonds = getBondsForTorsions(mol, options.ignoreColinearBonds); + TorsionList torsionList = extractTorsionListImpl(mol, options.maxDevMode, options.symmRadius, bonds); + + // Extract weights if needed (reuse same bonds) + std::vector weights; + if (options.useWeights) { + weights = computeTorsionWeightsImpl(mol, torsionList, bonds); + } + + int confStart = static_cast(system.confPositionStarts.size()); + int torsStart = system.totalTorsions(); + int quartetStartForMol = system.totalQuartets(); + + // Extract coordinates (tightly packed, no padding) + for (auto confIt = mol.beginConformers(); confIt != mol.endConformers(); ++confIt) { + system.confPositionStarts.push_back(static_cast(system.positions.size())); + const auto& conf = **confIt; + for (int atomIdx = 0; atomIdx < numAtoms; ++atomIdx) { + const auto& pos = conf.getAtomPos(atomIdx); + system.positions.push_back(static_cast(pos.x)); + system.positions.push_back(static_cast(pos.y)); + system.positions.push_back(static_cast(pos.z)); + } + } + + // Add torsion definitions (store ALL quartets, classify type) + int torsionIdx = 0; + + for (const auto& torsion : torsionList.nonRingTorsions) { + if (torsion.atomQuartets.empty()) { + torsionIdx++; + continue; + } + for (const auto& q : torsion.atomQuartets) + system.torsionAtoms.push_back(q); + system.quartetStarts.push_back(static_cast(system.torsionAtoms.size())); + system.torsionTypes.push_back(torsion.atomQuartets.size() > 1 ? TorsionType::Symmetric : TorsionType::Single); + system.torsionMaxDevs.push_back(torsion.maxDev); + system.torsionWeights.push_back( + (options.useWeights && torsionIdx < static_cast(weights.size())) ? weights[torsionIdx] : 1.0f); + torsionIdx++; + } + + for (const auto& torsion : torsionList.ringTorsions) { + if (torsion.atomQuartets.empty()) { + torsionIdx++; + continue; + } + for (const auto& q : torsion.atomQuartets) + system.torsionAtoms.push_back(q); + system.quartetStarts.push_back(static_cast(system.torsionAtoms.size())); + system.torsionTypes.push_back(torsion.atomQuartets.size() > 1 ? TorsionType::Ring : TorsionType::Single); + system.torsionMaxDevs.push_back(torsion.maxDev); + system.torsionWeights.push_back( + (options.useWeights && torsionIdx < static_cast(weights.size())) ? weights[torsionIdx] : 1.0f); + torsionIdx++; + } + + // Build per-molecule descriptor + int numTorsions = system.totalTorsions() - torsStart; + int totalQuartetsForMol = system.totalQuartets() - quartetStartForMol; + int dihedStart = system.totalDihedrals(); + int numDihedrals = numConformers * totalQuartetsForMol; + int numTFDOutputs = numConformers * (numConformers - 1) / 2; + int tfdOutStart = system.totalTFDOutputs(); + + system.totalDihedrals_ = dihedStart + numDihedrals; + + MolDescriptor desc; + desc.confStart = confStart; + desc.numConformers = numConformers; + desc.quartetStart = quartetStartForMol; + desc.numQuartets = totalQuartetsForMol; + desc.dihedStart = dihedStart; + desc.torsStart = torsStart; + desc.numTorsions = numTorsions; + desc.tfdOutStart = tfdOutStart; + system.molDescriptors.push_back(desc); + + system.dihedralWorkStarts.push_back(system.dihedralWorkStarts.back() + numDihedrals); + system.tfdWorkStarts.push_back(system.tfdWorkStarts.back() + numTFDOutputs); + + return system; +} + +//! Lightweight per-molecule extraction result (used in two-pass batch build). +//! Contains RDKit-derived torsion data, coordinates, and per-molecule sizes needed +//! for computing global offsets. Coordinates are extracted during Pass 1 (parallel) +//! so that Pass 2 only needs memcpy, not RDKit conformer access. +struct MolExtraction { + TorsionList torsionList; + std::vector weights; + std::vector> atoms; //!< Flattened torsion atom quartets + std::vector wts; //!< Weight per torsion + std::vector maxDevs; //!< MaxDev per torsion + std::vector types; //!< Type per torsion + std::vector qStarts; //!< CSR for quartets (including leading 0) + std::vector positions; //!< Flat xyz coords for all conformers + int numConformers; + int numAtoms; + int numTorsions; //!< = wts.size() + int numQuartets; //!< = atoms.size() + int numPositions; //!< = numConformers * numAtoms * 3 +}; + +//! Extract torsion data and coordinates from a single molecule (Pass 1 of two-pass build). +//! Coordinates are extracted here (in parallel) so Pass 2 only needs memcpy. +static MolExtraction extractMolData(const RDKit::ROMol& mol, const TFDComputeOptions& options) { + MolExtraction ext; + ext.numConformers = mol.getNumConformers(); + ext.numAtoms = mol.getNumAtoms(); + ext.numPositions = ext.numConformers * ext.numAtoms * 3; + + if (ext.numConformers == 0) { + throw std::runtime_error("Molecule has no conformers"); + } + + auto bonds = getBondsForTorsions(mol, options.ignoreColinearBonds); + ext.torsionList = extractTorsionListImpl(mol, options.maxDevMode, options.symmRadius, bonds); + if (options.useWeights) { + ext.weights = computeTorsionWeightsImpl(mol, ext.torsionList, bonds); + } + + // Extract coordinates (tightly packed, no padding) + ext.positions.resize(ext.numPositions); + int posIdx = 0; + for (auto confIt = mol.beginConformers(); confIt != mol.endConformers(); ++confIt) { + const auto& conf = **confIt; + for (int a = 0; a < ext.numAtoms; ++a) { + const auto& pos = conf.getAtomPos(a); + ext.positions[posIdx] = static_cast(pos.x); + ext.positions[posIdx + 1] = static_cast(pos.y); + ext.positions[posIdx + 2] = static_cast(pos.z); + posIdx += 3; + } + } + + // Flatten torsion definitions into GPU-ready arrays + ext.qStarts.push_back(0); + int torsionIdx = 0; + + auto addTorsion = [&](const TorsionDef& torsion, bool isRing) { + if (torsion.atomQuartets.empty()) { + torsionIdx++; + return; + } + for (const auto& q : torsion.atomQuartets) + ext.atoms.push_back(q); + ext.qStarts.push_back(static_cast(ext.atoms.size())); + if (isRing) { + ext.types.push_back(torsion.atomQuartets.size() > 1 ? TorsionType::Ring : TorsionType::Single); + } else { + ext.types.push_back(torsion.atomQuartets.size() > 1 ? TorsionType::Symmetric : TorsionType::Single); + } + ext.maxDevs.push_back(torsion.maxDev); + ext.wts.push_back( + (options.useWeights && torsionIdx < static_cast(ext.weights.size())) ? ext.weights[torsionIdx] : 1.0f); + torsionIdx++; + }; + + for (const auto& t : ext.torsionList.nonRingTorsions) + addTorsion(t, false); + for (const auto& t : ext.torsionList.ringTorsions) + addTorsion(t, true); + + ext.numTorsions = static_cast(ext.wts.size()); + ext.numQuartets = static_cast(ext.atoms.size()); + return ext; +} + +TFDSystemHost buildTFDSystem(const std::vector& mols, const TFDComputeOptions& options) { + ScopedNvtxRange range("buildTFDSystem (" + std::to_string(mols.size()) + " mols)", NvtxColor::kCyan); + + if (mols.empty()) { + return {}; + } + if (mols.size() == 1) { + return buildTFDSystemImpl(*mols[0], options); + } + + int N = static_cast(mols.size()); + + // ---- Pass 1: extract torsion data per-molecule in parallel ---- + // This is the expensive part (RDKit fingerprinting, SMARTS matching, etc.) + std::vector extractions(N); + { + ScopedNvtxRange buildRange("Parallel RDKit extraction", NvtxColor::kCyan); +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) +#endif + for (int i = 0; i < N; ++i) { + extractions[i] = extractMolData(*mols[i], options); + } + } + + // ---- Compute global offsets (sequential, ~O(N) with tiny per-element cost) ---- + ScopedNvtxRange mergeRange("Compute offsets & fill system", NvtxColor::kCyan); + + std::vector posOffset(N); + std::vector confOffset(N); + std::vector quartetOffset(N); + std::vector torsOffset(N); + std::vector dihedOffset(N); + std::vector tfdOutOffset(N); + + int totalPos = 0; + int totalConfs = 0; + int totalQuartets = 0; + int totalTors = 0; + int totalDiheds = 0; + int totalTfdOuts = 0; + + for (int i = 0; i < N; ++i) { + const auto& ext = extractions[i]; + posOffset[i] = totalPos; + confOffset[i] = totalConfs; + quartetOffset[i] = totalQuartets; + torsOffset[i] = totalTors; + dihedOffset[i] = totalDiheds; + tfdOutOffset[i] = totalTfdOuts; + + totalPos += ext.numPositions; + totalConfs += ext.numConformers; + totalQuartets += ext.numQuartets; + totalTors += ext.numTorsions; + totalDiheds += ext.numConformers * ext.numQuartets; + totalTfdOuts += ext.numConformers * (ext.numConformers - 1) / 2; + } + + // ---- Allocate the final system (single allocation per array) ---- + // For large arrays (positions, torsionAtoms), we use reserve() + resize() + // to avoid double-writing: reserve() allocates without init, then the parallel + // fill writes every element exactly once before the vector is used. + TFDSystemHost system; + // Positions is the largest array (~29MB for 1900 mols x 100 confs). + // Use reserve+resize pattern: reserve avoids reallocation during parallel fill. + system.positions.resize(totalPos); + system.confPositionStarts.resize(totalConfs); + system.torsionAtoms.resize(totalQuartets); + system.torsionWeights.resize(totalTors); + system.torsionMaxDevs.resize(totalTors); + system.torsionTypes.resize(totalTors); + system.quartetStarts.resize(1 + totalTors); + system.quartetStarts[0] = 0; + system.molDescriptors.resize(N); + system.dihedralWorkStarts.resize(1 + N); + system.dihedralWorkStarts[0] = 0; + system.tfdWorkStarts.resize(1 + N); + system.tfdWorkStarts[0] = 0; + system.totalDihedrals_ = totalDiheds; + + // ---- Pass 2: fill the system in parallel ---- + // Each thread writes to its own slice of the pre-allocated arrays (no contention). +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) +#endif + for (int i = 0; i < N; ++i) { + const auto& ext = extractions[i]; + + // Positions: bulk memcpy from pre-extracted coordinates + if (ext.numPositions > 0) { + std::memcpy(system.positions.data() + posOffset[i], ext.positions.data(), ext.numPositions * sizeof(float)); + } + // confPositionStarts: compute directly + for (int c = 0; c < ext.numConformers; ++c) { + system.confPositionStarts[confOffset[i] + c] = posOffset[i] + c * ext.numAtoms * 3; + } + + // Torsion atoms: bulk copy (no offset adjustment needed) + if (ext.numQuartets > 0) { + std::memcpy(&system.torsionAtoms[quartetOffset[i]], + ext.atoms.data(), + ext.numQuartets * sizeof(std::array)); + } + if (ext.numTorsions > 0) { + std::memcpy(&system.torsionWeights[torsOffset[i]], ext.wts.data(), ext.numTorsions * sizeof(float)); + std::memcpy(&system.torsionMaxDevs[torsOffset[i]], ext.maxDevs.data(), ext.numTorsions * sizeof(float)); + std::memcpy(&system.torsionTypes[torsOffset[i]], ext.types.data(), ext.numTorsions * sizeof(TorsionType)); + } + + // QuartetStarts CSR: skip leading 0, add global quartet offset + for (int j = 0; j < ext.numTorsions; ++j) { + system.quartetStarts[1 + torsOffset[i] + j] = quartetOffset[i] + ext.qStarts[j + 1]; + } + + // MolDescriptor + MolDescriptor desc; + desc.confStart = confOffset[i]; + desc.numConformers = ext.numConformers; + desc.quartetStart = quartetOffset[i]; + desc.numQuartets = ext.numQuartets; + desc.dihedStart = dihedOffset[i]; + desc.torsStart = torsOffset[i]; + desc.numTorsions = ext.numTorsions; + desc.tfdOutStart = tfdOutOffset[i]; + system.molDescriptors[i] = desc; + + // Work CSRs + system.dihedralWorkStarts[1 + i] = dihedOffset[i] + ext.numConformers * ext.numQuartets; + system.tfdWorkStarts[1 + i] = tfdOutOffset[i] + ext.numConformers * (ext.numConformers - 1) / 2; + } + + return system; +} + +TFDSystemHost buildTFDSystem(const RDKit::ROMol& mol, const TFDComputeOptions& options) { + return buildTFDSystemImpl(mol, options); +} + +} // namespace nvMolKit diff --git a/src/tfd_common.h b/src/tfd_common.h new file mode 100644 index 0000000..7941eac --- /dev/null +++ b/src/tfd_common.h @@ -0,0 +1,213 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef NVMOLKIT_TFD_COMMON_H +#define NVMOLKIT_TFD_COMMON_H + +#include + +#include +#include + +#include "device_vector.h" +#include "tfd_types.h" + +namespace nvMolKit { + +//! Device to run TFD computation on +enum class TFDComputeBackend { + CPU, + GPU, +}; + +//! Maximum deviation mode for torsion normalization +enum class TFDMaxDevMode { + Equal, //!< All torsions normalized by 180.0 + Spec, //!< Each torsion normalized by its specific max deviation +}; + +//! A single torsion definition: four atom indices and normalization factor +struct TorsionDef { + //! Atom indices defining the torsion (a-b-c-d dihedral) + //! For symmetric cases, multiple quartets may define equivalent torsions + std::vector> atomQuartets; + //! Maximum deviation for normalization (180.0 for equal mode, or specific value) + float maxDev = 180.0f; +}; + +//! Torsion list for a single molecule (non-ring and ring torsions) +struct TorsionList { + std::vector nonRingTorsions; + std::vector ringTorsions; + + //! Total number of torsions (non-ring + ring) + size_t totalCount() const { return nonRingTorsions.size() + ringTorsions.size(); } +}; + +//! Performance and algorithm options for TFD computation +struct TFDComputeOptions { + //! Whether to use distance-based weights for torsions + bool useWeights = true; + //! Maximum deviation mode + TFDMaxDevMode maxDevMode = TFDMaxDevMode::Equal; + //! Radius for Morgan fingerprint atom invariants (for symmetry detection) + int symmRadius = 2; + //! Whether to ignore single bonds adjacent to triple bonds + bool ignoreColinearBonds = true; + //! Backend selection + TFDComputeBackend backend = TFDComputeBackend::GPU; +}; + +//! Flattened system data on host for a batch of molecules +struct TFDSystemHost { + //! Flattened 3D coordinates, tightly packed (no padding) + //! Stored as: conf0_atom0_xyz, conf0_atom1_xyz, ..., conf1_atom0_xyz, ... + std::vector positions; + + //! Position start offset per conformer [totalConformers] + //! confPositionStarts[i] = float offset into positions for conformer i + std::vector confPositionStarts; + + //! Flattened torsion atom indices: [totalQuartets][4] + //! Multiple quartets per torsion (when torsionTypes[t] is Ring or Symmetric) are stored contiguously. + std::vector> torsionAtoms; + + //! Weight per torsion [totalTorsions] + std::vector torsionWeights; + + //! Maximum deviation per torsion [totalTorsions] + std::vector torsionMaxDevs; + + //! Type per torsion [totalTorsions] - Single, Ring, or Symmetric + std::vector torsionTypes; + + //! CSR index: quartet boundaries per torsion [totalTorsions + 1] + //! quartetStarts[t] to quartetStarts[t+1] are indices into torsionAtoms for torsion t + std::vector quartetStarts = {0}; + + // ========== Per-molecule descriptors for GPU kernel dispatch ========== + + //! One descriptor per molecule with all offsets needed by both kernels + std::vector molDescriptors; + + //! CSR: cumulative dihedral work items per molecule [nMols + 1] + //! dihedralWorkStarts[m+1] - dihedralWorkStarts[m] = numConformers[m] * numQuartets[m] + std::vector dihedralWorkStarts = {0}; + + //! CSR: cumulative TFD pair work items per molecule [nMols + 1] + //! tfdWorkStarts[m+1] - tfdWorkStarts[m] = C[m] * (C[m]-1) / 2 + std::vector tfdWorkStarts = {0}; + + //! Total number of dihedral angle values (numConformers * totalQuartets across all molecules) + int totalDihedrals_ = 0; + + //! Total number of molecules + int numMolecules() const { return static_cast(molDescriptors.size()); } + + //! Total number of torsions across all molecules + int totalTorsions() const { return quartetStarts.empty() ? 0 : static_cast(quartetStarts.size()) - 1; } + + //! Total number of quartets across all molecules + int totalQuartets() const { return quartetStarts.empty() ? 0 : quartetStarts.back(); } + + //! Total number of TFD output values + int totalTFDOutputs() const { return tfdWorkStarts.empty() ? 0 : tfdWorkStarts.back(); } + + //! Total number of dihedral angle values to store + int totalDihedrals() const { return totalDihedrals_; } + + //! Total number of dihedral work items (for dihedral kernel launch) + int totalDihedralWorkItems() const { return dihedralWorkStarts.empty() ? 0 : dihedralWorkStarts.back(); } +}; + +//! Flattened system data on GPU device +//! Mirrors TFDSystemHost for device-side storage. +struct TFDSystemDevice { + // ========== Shared kernel inputs ========== + + //! Flattened 3D coordinates (tightly packed, no padding) + AsyncDeviceVector positions; + //! Position start offset per conformer [totalConformers] + AsyncDeviceVector confPositionStarts; + //! Flattened torsion atom indices [totalQuartets * 4] + AsyncDeviceVector torsionAtoms; + + //! Weight per torsion + AsyncDeviceVector torsionWeights; + //! Maximum deviation per torsion + AsyncDeviceVector torsionMaxDevs; + //! CSR index: quartet boundaries per torsion [totalTorsions + 1] + AsyncDeviceVector quartetStarts; + //! Type per torsion [totalTorsions] + AsyncDeviceVector torsionTypes; + + // ========== Per-molecule descriptors ========== + + //! One MolDescriptor per molecule [nMols] + AsyncDeviceVector molDescriptors; + //! CSR: cumulative dihedral work items [nMols + 1] + AsyncDeviceVector dihedralWorkStarts; + //! CSR: cumulative TFD pair work items [nMols + 1] + AsyncDeviceVector tfdWorkStarts; + + // ========== Output buffers ========== + + //! Output: computed dihedral angles [totalDihedrals] + AsyncDeviceVector dihedralAngles; + //! Output: TFD matrix values + AsyncDeviceVector tfdOutput; + + //! Set CUDA stream for all buffers + void setStream(cudaStream_t stream); +}; + +//! Extract torsion list from an RDKit molecule +//! @param mol The molecule to analyze +//! @param maxDevMode How to determine max deviation for normalization +//! @param symmRadius Radius for Morgan fingerprint (symmetry detection) +//! @param ignoreColinearBonds Whether to skip bonds adjacent to triple bonds +//! @return TorsionList containing non-ring and ring torsions +TorsionList extractTorsionList(const RDKit::ROMol& mol, + TFDMaxDevMode maxDevMode = TFDMaxDevMode::Equal, + int symmRadius = 2, + bool ignoreColinearBonds = true); + +//! Calculate distance-based weights for torsions +//! @param mol The molecule +//! @param torsionList Previously extracted torsion list +//! @param ignoreColinearBonds Must match the value used in extractTorsionList +//! @return Vector of weights (size = total torsion count) +std::vector computeTorsionWeights(const RDKit::ROMol& mol, + const TorsionList& torsionList, + bool ignoreColinearBonds = true); + +//! Transfer host system data to device, resizing and allocating output buffers +//! @param host Source host data +//! @param device Destination device data (resized as needed) +//! @param stream CUDA stream for async transfers +void transferToDevice(const TFDSystemHost& host, TFDSystemDevice& device, cudaStream_t stream); + +//! Build TFDSystemHost from a batch of molecules +//! @param mols Vector of molecules (each may have multiple conformers) +//! @param options Computation options +//! @return Populated TFDSystemHost ready for GPU transfer +TFDSystemHost buildTFDSystem(const std::vector& mols, const TFDComputeOptions& options); + +//! Build TFDSystemHost from a single molecule (convenience wrapper) +TFDSystemHost buildTFDSystem(const RDKit::ROMol& mol, const TFDComputeOptions& options); + +} // namespace nvMolKit + +#endif // NVMOLKIT_TFD_COMMON_H diff --git a/src/tfd_cpu.cpp b/src/tfd_cpu.cpp new file mode 100644 index 0000000..ef85ac9 --- /dev/null +++ b/src/tfd_cpu.cpp @@ -0,0 +1,214 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tfd_cpu.h" + +#include + +#include "tfd_detail.h" + +#ifdef _OPENMP +#include +#endif + +#include "nvtx.h" + +namespace nvMolKit { + +double TFDCpuGenerator::computeTFDPair(const float* anglesI, + const float* anglesJ, + const TorsionList& torsionList, + const std::vector& weights) { + double sumWeightedDev = 0.0; + double sumWeights = 0.0; + int qOffset = 0; + int torsIdx = 0; + + auto processTorsion = [&](const TorsionDef& torsion, bool isRing) { + int numQ = static_cast(torsion.atomQuartets.size()); + if (numQ == 0) { + torsIdx++; + return; + } + + double deviation; + if (numQ == 1) { + float diff = detail::circularDifference(anglesI[qOffset], anglesJ[qOffset]); + deviation = diff / torsion.maxDev; + } else if (isRing) { + // Average abs(signed dihedral) for each conformer, then compare averages. + // Our angles are in [0,360) with a 180 deg offset from RDKit's signed convention, + // so abs(signed) = abs(angle - 180). + double avgI = 0.0, avgJ = 0.0; + for (int q = 0; q < numQ; ++q) { + avgI += std::abs(static_cast(anglesI[qOffset + q]) - 180.0); + avgJ += std::abs(static_cast(anglesJ[qOffset + q]) - 180.0); + } + avgI /= numQ; + avgJ /= numQ; + deviation = std::abs(avgI - avgJ) / torsion.maxDev; + } else { + // Symmetric: minimum circular difference across all (qi, qj) pairs + double minDiff = 180.0; + for (int qi = 0; qi < numQ; ++qi) { + for (int qj = 0; qj < numQ; ++qj) { + float diff = detail::circularDifference(anglesI[qOffset + qi], anglesJ[qOffset + qj]); + minDiff = std::min(minDiff, static_cast(diff)); + } + } + deviation = minDiff / torsion.maxDev; + } + + float w = (torsIdx < static_cast(weights.size())) ? weights[torsIdx] : 1.0f; + sumWeightedDev += deviation * w; + sumWeights += w; + qOffset += numQ; + torsIdx++; + }; + + for (const auto& t : torsionList.nonRingTorsions) + processTorsion(t, false); + for (const auto& t : torsionList.ringTorsions) + processTorsion(t, true); + + return (sumWeights > 1e-10) ? (sumWeightedDev / sumWeights) : 0.0; +} + +std::vector TFDCpuGenerator::computeDihedralAngles(const RDKit::ROMol& mol, const TorsionList& torsionList) { + int numConformers = mol.getNumConformers(); + int totalQuartets = 0; + for (const auto& t : torsionList.nonRingTorsions) + totalQuartets += static_cast(t.atomQuartets.size()); + for (const auto& t : torsionList.ringTorsions) + totalQuartets += static_cast(t.atomQuartets.size()); + + if (totalQuartets == 0 || numConformers == 0) { + return {}; + } + + std::vector angles(numConformers * totalQuartets); + + // Collect all quartets into a flat list for indexed access + std::vector*> allQuartets; + allQuartets.reserve(totalQuartets); + for (const auto& t : torsionList.nonRingTorsions) + for (const auto& q : t.atomQuartets) + allQuartets.push_back(&q); + for (const auto& t : torsionList.ringTorsions) + for (const auto& q : t.atomQuartets) + allQuartets.push_back(&q); + + int numAtoms = mol.getNumAtoms(); + int confIdx = 0; + for (auto confIt = mol.beginConformers(); confIt != mol.endConformers(); ++confIt, ++confIdx) { + const auto& conf = **confIt; + std::vector pos(numAtoms * 3); + for (int a = 0; a < numAtoms; ++a) { + const auto& p = conf.getAtomPos(a); + pos[a * 3] = static_cast(p.x); + pos[a * 3 + 1] = static_cast(p.y); + pos[a * 3 + 2] = static_cast(p.z); + } + + for (int qIdx = 0; qIdx < totalQuartets; ++qIdx) { + const auto& quartet = *allQuartets[qIdx]; + angles[confIdx * totalQuartets + qIdx] = detail::computeDihedralAngle(&pos[quartet[0] * 3], + &pos[quartet[1] * 3], + &pos[quartet[2] * 3], + &pos[quartet[3] * 3]); + } + } + + return angles; +} + +std::vector TFDCpuGenerator::computeTFDMatrixFromAngles(const std::vector& angles, + int numConformers, + int totalQuartets, + const TorsionList& torsionList, + const std::vector& weights) { + int numPairs = numConformers * (numConformers - 1) / 2; + std::vector tfdMatrix(numPairs); + +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) +#endif + for (int i = 1; i < numConformers; ++i) { + for (int j = 0; j < i; ++j) { + int pairIdx = i * (i - 1) / 2 + j; + double tfd = + computeTFDPair(angles.data() + i * totalQuartets, angles.data() + j * totalQuartets, torsionList, weights); + tfdMatrix[pairIdx] = tfd; + } + } + + return tfdMatrix; +} + +TFDCpuGenerator::TorsionData TFDCpuGenerator::extractTorsionData(const RDKit::ROMol& mol, + const TFDComputeOptions& options) { + TorsionData data; + data.torsionList = extractTorsionList(mol, options.maxDevMode, options.symmRadius, options.ignoreColinearBonds); + if (options.useWeights) { + data.weights = computeTorsionWeights(mol, data.torsionList, options.ignoreColinearBonds); + } + return data; +} + +std::vector TFDCpuGenerator::GetTFDMatrix(const RDKit::ROMol& mol, const TFDComputeOptions& options) { + int numConformers = mol.getNumConformers(); + if (numConformers < 2) { + return {}; + } + + TorsionData td; + { + ScopedNvtxRange range("CPU: extractTorsions", NvtxColor::kGreen); + td = extractTorsionData(mol, options); + } + + if (td.torsionList.totalCount() == 0) { + int numPairs = numConformers * (numConformers - 1) / 2; + return std::vector(numPairs, 0.0); + } + + std::vector angles; + int totalQuartets; + { + ScopedNvtxRange range("CPU: computeDihedralAngles", NvtxColor::kGreen); + angles = computeDihedralAngles(mol, td.torsionList); + totalQuartets = static_cast(angles.size()) / numConformers; + } + + ScopedNvtxRange range("CPU: computeTFDMatrix", NvtxColor::kGreen); + return computeTFDMatrixFromAngles(angles, numConformers, totalQuartets, td.torsionList, td.weights); +} + +std::vector> TFDCpuGenerator::GetTFDMatrices(const std::vector& mols, + const TFDComputeOptions& options) { + ScopedNvtxRange range("CPU: GetTFDMatrices (" + std::to_string(mols.size()) + " mols)", NvtxColor::kGreen); + std::vector> results(mols.size()); + +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) +#endif + for (size_t i = 0; i < mols.size(); ++i) { + results[i] = GetTFDMatrix(*mols[i], options); + } + + return results; +} + +} // namespace nvMolKit diff --git a/src/tfd_cpu.h b/src/tfd_cpu.h new file mode 100644 index 0000000..4f043f1 --- /dev/null +++ b/src/tfd_cpu.h @@ -0,0 +1,75 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef NVMOLKIT_TFD_CPU_H +#define NVMOLKIT_TFD_CPU_H + +#include + +#include "tfd_common.h" + +namespace nvMolKit { + +//! CPU implementation of TFD computation +class TFDCpuGenerator { + public: + TFDCpuGenerator() = default; + + //! Compute TFD matrix for a single molecule + //! @param mol Molecule with conformers + //! @param options Computation options + //! @return Lower triangular TFD matrix as flat vector [C*(C-1)/2 values] + std::vector GetTFDMatrix(const RDKit::ROMol& mol, const TFDComputeOptions& options = TFDComputeOptions{}); + + //! Compute TFD matrices for multiple molecules + //! @param mols Vector of molecules + //! @param options Computation options + //! @return Vector of TFD matrices, one per molecule + std::vector> GetTFDMatrices(const std::vector& mols, + const TFDComputeOptions& options = TFDComputeOptions{}); + + //! Compute dihedral angles for all conformers of a molecule + //! @param mol Molecule with conformers + //! @param torsionList Previously extracted torsion list + //! @return Angles array [numConformers][totalQuartets], flattened + std::vector computeDihedralAngles(const RDKit::ROMol& mol, const TorsionList& torsionList); + + private: + //! Torsion list and weights for a single molecule + struct TorsionData { + TorsionList torsionList; + std::vector weights; + }; + + //! Extract torsion list and weights from a molecule + static TorsionData extractTorsionData(const RDKit::ROMol& mol, const TFDComputeOptions& options); + + //! Compute TFD matrix from precomputed angles + std::vector computeTFDMatrixFromAngles(const std::vector& angles, + int numConformers, + int totalQuartets, + const TorsionList& torsionList, + const std::vector& weights); + + //! Compute TFD between two conformers + static double computeTFDPair(const float* anglesI, + const float* anglesJ, + const TorsionList& torsionList, + const std::vector& weights); +}; + +} // namespace nvMolKit + +#endif // NVMOLKIT_TFD_CPU_H diff --git a/src/tfd_detail.h b/src/tfd_detail.h new file mode 100644 index 0000000..05e5077 --- /dev/null +++ b/src/tfd_detail.h @@ -0,0 +1,123 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef NVMOLKIT_TFD_DETAIL_H +#define NVMOLKIT_TFD_DETAIL_H + +#include + +#ifdef __CUDACC__ +#define TFD_INLINE __host__ __device__ __forceinline__ +#else +#define TFD_INLINE inline +#endif + +namespace nvMolKit { +namespace detail { + +constexpr float kPi = 3.14159265358979323846f; +constexpr float kRadToDeg = 180.0f / kPi; + +//! Cross product of two 3D vectors: result = a x b +TFD_INLINE void cross3(const float* a, const float* b, float* result) { + result[0] = a[1] * b[2] - a[2] * b[1]; + result[1] = a[2] * b[0] - a[0] * b[2]; + result[2] = a[0] * b[1] - a[1] * b[0]; +} + +//! Dot product of two 3D vectors +TFD_INLINE float dot3(const float* a, const float* b) { + return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; +} + +//! Vector subtraction: result = a - b +TFD_INLINE void sub3(const float* a, const float* b, float* result) { + result[0] = a[0] - b[0]; + result[1] = a[1] - b[1]; + result[2] = a[2] - b[2]; +} + +//! Vector length +TFD_INLINE float length3(const float* v) { + return sqrtf(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]); +} + +//! Compute dihedral angle in degrees from four atom positions +//! @param p1, p2, p3, p4 The four atom positions (p2-p3 is the central bond) +//! @return Dihedral angle in degrees [0, 360) +TFD_INLINE float computeDihedralAngle(const float* p1, const float* p2, const float* p3, const float* p4) { + // Vector from p2 to p3 (central bond) + float b1[3]; + sub3(p3, p2, b1); + + // Vector from p2 to p1 + float v1[3]; + sub3(p1, p2, v1); + + // Vector from p3 to p4 + float v2[3]; + sub3(p4, p3, v2); + + // Normal to plane 1 (p1-p2-p3) + float n1[3]; + cross3(v1, b1, n1); + + // Normal to plane 2 (p2-p3-p4) + float n2[3]; + cross3(b1, v2, n2); + + // Lengths of normals + float n1Len = length3(n1); + float n2Len = length3(n2); + + if (n1Len < 1e-10f || n2Len < 1e-10f) { + return 0.0f; // Degenerate case + } + + // Compute angle between normals + float cosAngle = dot3(n1, n2) / (n1Len * n2Len); + cosAngle = fmaxf(-1.0f, fminf(1.0f, cosAngle)); // Clamp for numerical stability + float angle = acosf(cosAngle) * kRadToDeg; + + // Determine sign using cross product of normals dotted with central bond + float crossN[3]; + cross3(n1, n2, crossN); + if (dot3(crossN, b1) < 0) { + angle = -angle; + } + + // Normalize to [0, 360) + if (angle < 0) { + angle += 360.0f; + } + + return angle; +} + +//! Compute circular difference between two angles (both in degrees) +//! @param angle1, angle2 Angles in degrees [0, 360) +//! @return Minimum difference in degrees [0, 180] +TFD_INLINE float circularDifference(float angle1, float angle2) { + float diff = fabsf(angle1 - angle2); + if (360.0f - diff < diff) { + diff = 360.0f - diff; + } + return diff; +} + +} // namespace detail +} // namespace nvMolKit + +#endif // NVMOLKIT_TFD_DETAIL_H diff --git a/src/tfd_gpu.cpp b/src/tfd_gpu.cpp new file mode 100644 index 0000000..df46875 --- /dev/null +++ b/src/tfd_gpu.cpp @@ -0,0 +1,192 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tfd_gpu.h" + +#include + +#include + +#include "nvtx.h" +#include "tfd_kernels.h" + +namespace nvMolKit { + +// ========== TFDGpuResult ========== + +std::vector TFDGpuResult::extractMolecule(int molIdx) const { + ScopedNvtxRange range("GPU: extractMolecule", NvtxColor::kRed); + if (molIdx < 0 || molIdx >= static_cast(conformerCounts.size())) { + throw std::out_of_range("Invalid molecule index: " + std::to_string(molIdx)); + } + + int outStart = tfdOutputStarts[molIdx]; + int outEnd = tfdOutputStarts[molIdx + 1]; + int numValues = outEnd - outStart; + + if (numValues == 0) { + return {}; + } + + // Copy from device to host + std::vector hostFloats(numValues); + const_cast&>(tfdValues).copyToHost(hostFloats.data(), numValues, 0, outStart); + cudaStreamSynchronize(tfdValues.stream()); + + // Convert to double + std::vector result(numValues); + for (int i = 0; i < numValues; ++i) { + result[i] = static_cast(hostFloats[i]); + } + + return result; +} + +std::vector> TFDGpuResult::extractAll() const { + ScopedNvtxRange range("GPU: extractAll (D2H)", NvtxColor::kRed); + + int numMolecules = static_cast(conformerCounts.size()); + std::vector> results(numMolecules); + + if (tfdValues.size() == 0) { + return results; + } + + int totalValues = static_cast(tfdValues.size()); + + // Copy all data at once + std::vector allHostFloats(totalValues); + const_cast&>(tfdValues).copyToHost(allHostFloats.data(), totalValues); + cudaStreamSynchronize(tfdValues.stream()); + + // Pre-allocate all per-molecule vectors in one pass to reduce allocation overhead + for (int m = 0; m < numMolecules; ++m) { + results[m].resize(tfdOutputStarts[m + 1] - tfdOutputStarts[m]); + } + + // Convert float→double and scatter (parallelizable, cache-friendly) + const float* src = allHostFloats.data(); + for (int m = 0; m < numMolecules; ++m) { + int numValues = static_cast(results[m].size()); + double* dst = results[m].data(); + for (int i = 0; i < numValues; ++i) { + dst[i] = static_cast(src[i]); + } + src += numValues; + } + + return results; +} + +// ========== TFDGpuGenerator ========== + +TFDGpuGenerator::TFDGpuGenerator() : stream_() { + device_.setStream(stream_.stream()); +} + +TFDGpuResult TFDGpuGenerator::GetTFDMatricesGpuBuffer(const std::vector& mols, + const TFDComputeOptions& options) { + ScopedNvtxRange outerRange("GPU: GetTFDMatricesGpuBuffer (" + std::to_string(mols.size()) + " mols)", + NvtxColor::kBlue); + + TFDGpuResult result; + + if (mols.empty()) { + return result; + } + + // Build host system data (CPU preprocessing, parallelized across molecules) + TFDSystemHost system = buildTFDSystem(mols, options); + + // Build result metadata from MolDescriptors + result.tfdOutputStarts.resize(system.numMolecules() + 1); + result.tfdOutputStarts[0] = 0; + result.conformerCounts.resize(system.numMolecules()); + for (int i = 0; i < system.numMolecules(); ++i) { + const auto& desc = system.molDescriptors[i]; + result.conformerCounts[i] = desc.numConformers; + int numPairs = desc.numConformers * (desc.numConformers - 1) / 2; + result.tfdOutputStarts[i + 1] = result.tfdOutputStarts[i] + numPairs; + } + + // Handle edge case: no TFD outputs + if (system.totalTFDOutputs() == 0) { + return result; + } + + cudaStream_t stream = stream_.stream(); + + // Transfer to device (handles resize + copy + output buffer allocation) + { + ScopedNvtxRange range("GPU: transferToDevice (H2D)", NvtxColor::kYellow); + transferToDevice(system, device_, stream); + } + + // Launch dihedral kernel + { + ScopedNvtxRange range("GPU: launchDihedralKernel", NvtxColor::kOrange); + launchDihedralKernel(system.totalDihedralWorkItems(), + device_.positions.data(), + device_.confPositionStarts.data(), + device_.torsionAtoms.data(), + device_.molDescriptors.data(), + device_.dihedralWorkStarts.data(), + system.numMolecules(), + device_.dihedralAngles.data(), + stream); + } + + // Launch TFD matrix kernel (one block per molecule) + { + ScopedNvtxRange range("GPU: launchTFDMatrixKernel", NvtxColor::kOrange); + launchTFDMatrixKernel(system.numMolecules(), + device_.dihedralAngles.data(), + device_.torsionWeights.data(), + device_.torsionMaxDevs.data(), + device_.quartetStarts.data(), + device_.torsionTypes.data(), + device_.molDescriptors.data(), + device_.tfdOutput.data(), + stream); + } + + // Move output to result (transfer ownership of GPU memory) + result.tfdValues = std::move(device_.tfdOutput); + + // Reallocate the output buffer for next use + device_.tfdOutput = AsyncDeviceVector(); + device_.tfdOutput.setStream(stream); + + return result; +} + +std::vector> TFDGpuGenerator::GetTFDMatrices(const std::vector& mols, + const TFDComputeOptions& options) { + ScopedNvtxRange range("GPU: GetTFDMatrices (" + std::to_string(mols.size()) + " mols)", NvtxColor::kBlue); + TFDGpuResult gpuResult = GetTFDMatricesGpuBuffer(mols, options); + return gpuResult.extractAll(); +} + +std::vector TFDGpuGenerator::GetTFDMatrix(const RDKit::ROMol& mol, const TFDComputeOptions& options) { + std::vector mols = {&mol}; + auto results = GetTFDMatrices(mols, options); + + if (results.empty()) { + return {}; + } + return results[0]; +} + +} // namespace nvMolKit diff --git a/src/tfd_gpu.h b/src/tfd_gpu.h new file mode 100644 index 0000000..6560cb5 --- /dev/null +++ b/src/tfd_gpu.h @@ -0,0 +1,82 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef NVMOLKIT_TFD_GPU_H +#define NVMOLKIT_TFD_GPU_H + +#include + +#include "device.h" +#include "tfd_common.h" + +namespace nvMolKit { + +//! Result structure for GPU TFD computation with GPU-resident data +struct TFDGpuResult { + //! GPU-resident TFD values (flattened across all molecules) + AsyncDeviceVector tfdValues; + + //! CSR index: TFD output boundaries per molecule [nMols + 1] + std::vector tfdOutputStarts; + + //! Number of conformers per molecule [nMols] + std::vector conformerCounts; + + //! Extract TFD matrix for a single molecule (copies to host) + //! @param molIdx Index of the molecule in the batch + //! @return Lower triangular TFD matrix as flat vector + std::vector extractMolecule(int molIdx) const; + + //! Extract all TFD matrices (copies to host) + //! @return Vector of TFD matrices, one per molecule + std::vector> extractAll() const; +}; + +//! GPU implementation of TFD computation +class TFDGpuGenerator { + public: + TFDGpuGenerator(); + + //! Compute TFD matrix for a single molecule + //! @param mol Molecule with conformers + //! @param options Computation options + //! @return Lower triangular TFD matrix as flat vector [C*(C-1)/2 values] + std::vector GetTFDMatrix(const RDKit::ROMol& mol, const TFDComputeOptions& options = TFDComputeOptions{}); + + //! Compute TFD matrices for multiple molecules + //! @param mols Vector of molecules + //! @param options Computation options + //! @return Vector of TFD matrices, one per molecule + std::vector> GetTFDMatrices(const std::vector& mols, + const TFDComputeOptions& options = TFDComputeOptions{}); + + //! Compute TFD matrices and keep results on GPU + //! @param mols Vector of molecules + //! @param options Computation options + //! @return TFDGpuResult with GPU-resident data + TFDGpuResult GetTFDMatricesGpuBuffer(const std::vector& mols, + const TFDComputeOptions& options = TFDComputeOptions{}); + + private: + // NOTE: Declaration order matters! Stream must be declared BEFORE device_ + // because members are destroyed in reverse order, and device_ buffers need + // a valid stream for cudaFreeAsync during destruction. + ScopedStream stream_; + TFDSystemDevice device_; +}; + +} // namespace nvMolKit + +#endif // NVMOLKIT_TFD_GPU_H diff --git a/src/tfd_kernels.cu b/src/tfd_kernels.cu new file mode 100644 index 0000000..f268b0d --- /dev/null +++ b/src/tfd_kernels.cu @@ -0,0 +1,218 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cuda_error_check.h" +#include "tfd_detail.h" +#include "tfd_kernels.h" + +namespace nvMolKit { + +using detail::circularDifference; +using detail::computeDihedralAngle; + +namespace { + +//! Binary search: find m such that starts[m] <= idx < starts[m+1] +__device__ __forceinline__ int findMolecule(const int* starts, int numMolecules, int idx) { + int lo = 0, hi = numMolecules; + while (lo < hi) { + int mid = (lo + hi) / 2; + if (starts[mid + 1] <= idx) { + lo = mid + 1; + } else { + hi = mid; + } + } + return lo; +} + +//! Kernel to compute dihedral angles for all conformers. +//! One thread per (conformer, quartet) work item. +//! Uses binary search on dihedralWorkStarts to find the molecule, +//! then computes (confIdx, quartetIdx, outIdx) arithmetically. +__global__ void dihedralKernel(const int totalWorkItems, + const float* __restrict__ positions, + const int* __restrict__ confPositionStarts, + const int* __restrict__ torsionAtoms, + const MolDescriptor* __restrict__ molDescriptors, + const int* __restrict__ dihedralWorkStarts, + const int numMolecules, + float* __restrict__ dihedralAngles) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= totalWorkItems) { + return; + } + + int m = findMolecule(dihedralWorkStarts, numMolecules, idx); + + const MolDescriptor& desc = molDescriptors[m]; + int localIdx = idx - dihedralWorkStarts[m]; + int c = localIdx / desc.numQuartets; + int q = localIdx % desc.numQuartets; + + int confIdx = desc.confStart + c; + int torsIdx = desc.quartetStart + q; + int outIdx = desc.dihedStart + localIdx; + + int a = torsionAtoms[torsIdx * 4 + 0]; + int b = torsionAtoms[torsIdx * 4 + 1]; + int cc = torsionAtoms[torsIdx * 4 + 2]; + int d = torsionAtoms[torsIdx * 4 + 3]; + + const float* posBase = positions + confPositionStarts[confIdx]; + + dihedralAngles[outIdx] = computeDihedralAngle(posBase + a * 3, posBase + b * 3, posBase + cc * 3, posBase + d * 3); +} + +//! Kernel to compute TFD matrix values. +//! One block per molecule. Threads within a block cooperatively process +//! all C*(C-1)/2 conformer pairs via grid-stride loop. +//! All threads in a block share the same torsion types — no cross-molecule divergence. +__global__ void tfdMatrixKernel(const int numMolecules, + const float* __restrict__ dihedralAngles, + const float* __restrict__ torsionWeights, + const float* __restrict__ torsionMaxDevs, + const int* __restrict__ quartetStarts, + const uint8_t* __restrict__ torsionTypes, + const MolDescriptor* __restrict__ molDescriptors, + float* __restrict__ tfdOutput) { + int m = blockIdx.x; + if (m >= numMolecules) { + return; + } + + const MolDescriptor& desc = molDescriptors[m]; + int numConf = desc.numConformers; + int numPairs = numConf * (numConf - 1) / 2; + int numTors = desc.numTorsions; + int qBase = quartetStarts[desc.torsStart]; + + if (numPairs == 0 || numTors == 0) { + return; + } + + // Each thread processes conformer pairs via grid-stride within this block + for (int pairIdx = threadIdx.x; pairIdx < numPairs; pairIdx += blockDim.x) { + // Decode lower-triangular (i, j) from flat pairIdx + // i*(i-1)/2 + j = pairIdx, i > j >= 0 + // i = floor((1 + sqrt(1 + 8*pairIdx)) / 2) + int i = static_cast((1.0f + sqrtf(1.0f + 8.0f * pairIdx)) * 0.5f); + if (i * (i - 1) / 2 > pairIdx) + i--; + int j = pairIdx - i * (i - 1) / 2; + + int aI = desc.dihedStart + i * desc.numQuartets; + int aJ = desc.dihedStart + j * desc.numQuartets; + + float sumWeightedDev = 0.0f; + float sumWeights = 0.0f; + + for (int t = 0; t < numTors; ++t) { + int globalT = desc.torsStart + t; + int qLocalStart = quartetStarts[globalT] - qBase; + int numQ = quartetStarts[globalT + 1] - quartetStarts[globalT]; + uint8_t type = torsionTypes[globalT]; + + float deviation; + if (type == 0) { // Single + deviation = circularDifference(dihedralAngles[aI + qLocalStart], dihedralAngles[aJ + qLocalStart]) / + torsionMaxDevs[globalT]; + } else if (type == 1) { // Ring + float avgI = 0.0f; + float avgJ = 0.0f; + for (int qq = 0; qq < numQ; ++qq) { + avgI += fabsf(dihedralAngles[aI + qLocalStart + qq] - 180.0f); + avgJ += fabsf(dihedralAngles[aJ + qLocalStart + qq] - 180.0f); + } + avgI /= numQ; + avgJ /= numQ; + deviation = fabsf(avgI - avgJ) / torsionMaxDevs[globalT]; + } else { // Symmetric + float minDiff = 180.0f; + for (int qi = 0; qi < numQ; ++qi) { + for (int qj = 0; qj < numQ; ++qj) { + float diff = + circularDifference(dihedralAngles[aI + qLocalStart + qi], dihedralAngles[aJ + qLocalStart + qj]); + minDiff = fminf(minDiff, diff); + } + } + deviation = minDiff / torsionMaxDevs[globalT]; + } + + float weight = torsionWeights[globalT]; + sumWeightedDev += deviation * weight; + sumWeights += weight; + } + + tfdOutput[desc.tfdOutStart + pairIdx] = (sumWeights > 1e-10f) ? (sumWeightedDev / sumWeights) : 0.0f; + } +} + +} // namespace + +void launchDihedralKernel(int totalWorkItems, + const float* positions, + const int* confPositionStarts, + const int* torsionAtoms, + const MolDescriptor* molDescriptors, + const int* dihedralWorkStarts, + int numMolecules, + float* dihedralAngles, + cudaStream_t stream) { + if (totalWorkItems == 0) { + return; + } + + int gridSize = (totalWorkItems + kTFDBlockSize - 1) / kTFDBlockSize; + + dihedralKernel<<>>(totalWorkItems, + positions, + confPositionStarts, + torsionAtoms, + molDescriptors, + dihedralWorkStarts, + numMolecules, + dihedralAngles); + + cudaCheckError(cudaGetLastError()); +} + +void launchTFDMatrixKernel(int numMolecules, + const float* dihedralAngles, + const float* torsionWeights, + const float* torsionMaxDevs, + const int* quartetStarts, + const uint8_t* torsionTypes, + const MolDescriptor* molDescriptors, + float* tfdOutput, + cudaStream_t stream) { + if (numMolecules == 0) { + return; + } + + // One block per molecule; threads within block process pairs via grid-stride + tfdMatrixKernel<<>>(numMolecules, + dihedralAngles, + torsionWeights, + torsionMaxDevs, + quartetStarts, + torsionTypes, + molDescriptors, + tfdOutput); + + cudaCheckError(cudaGetLastError()); +} + +} // namespace nvMolKit diff --git a/src/tfd_kernels.h b/src/tfd_kernels.h new file mode 100644 index 0000000..190a503 --- /dev/null +++ b/src/tfd_kernels.h @@ -0,0 +1,58 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef NVMOLKIT_TFD_KERNELS_H +#define NVMOLKIT_TFD_KERNELS_H + +#include + +#include + +#include "tfd_types.h" + +namespace nvMolKit { + +//! Block size for TFD kernels +constexpr int kTFDBlockSize = 256; + +//! Launch kernel to compute dihedral angles for all conformers. +//! One thread per (conformer, quartet) work item; uses binary search on +//! dihedralWorkStarts to find the molecule, then computes indices arithmetically. +void launchDihedralKernel(int totalWorkItems, + const float* positions, + const int* confPositionStarts, + const int* torsionAtoms, + const MolDescriptor* molDescriptors, + const int* dihedralWorkStarts, + int numMolecules, + float* dihedralAngles, + cudaStream_t stream); + +//! Launch kernel to compute TFD matrix for all conformer pairs. +//! One block per molecule; threads within a block cooperatively process pairs. +//! Uses shared memory for torsion metadata to reduce global reads. +void launchTFDMatrixKernel(int numMolecules, + const float* dihedralAngles, + const float* torsionWeights, + const float* torsionMaxDevs, + const int* quartetStarts, + const uint8_t* torsionTypes, + const MolDescriptor* molDescriptors, + float* tfdOutput, + cudaStream_t stream); + +} // namespace nvMolKit + +#endif // NVMOLKIT_TFD_KERNELS_H diff --git a/src/tfd_transfer.cu b/src/tfd_transfer.cu new file mode 100644 index 0000000..530d5bc --- /dev/null +++ b/src/tfd_transfer.cu @@ -0,0 +1,74 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "tfd_common.h" + +namespace nvMolKit { + +void TFDSystemDevice::setStream(cudaStream_t stream) { + positions.setStream(stream); + confPositionStarts.setStream(stream); + torsionAtoms.setStream(stream); + torsionWeights.setStream(stream); + torsionMaxDevs.setStream(stream); + quartetStarts.setStream(stream); + torsionTypes.setStream(stream); + molDescriptors.setStream(stream); + dihedralWorkStarts.setStream(stream); + tfdWorkStarts.setStream(stream); + dihedralAngles.setStream(stream); + tfdOutput.setStream(stream); +} + +void transferToDevice(const TFDSystemHost& host, TFDSystemDevice& device, cudaStream_t stream) { + device.setStream(stream); + + // Positions and conformer offsets + device.positions.setFromVector(host.positions); + device.confPositionStarts.setFromVector(host.confPositionStarts); + + // std::array is layout-compatible with int[4]; copy directly + device.torsionAtoms.setFromArray(reinterpret_cast(host.torsionAtoms.data()), + host.torsionAtoms.size() * 4); + + // Torsion metadata + device.torsionWeights.setFromVector(host.torsionWeights); + device.torsionMaxDevs.setFromVector(host.torsionMaxDevs); + device.quartetStarts.setFromVector(host.quartetStarts); + + // TorsionType is enum class : uint8_t; copy directly + device.torsionTypes.setFromArray(reinterpret_cast(host.torsionTypes.data()), + host.torsionTypes.size()); + + // Per-molecule descriptors (compact: ~32 bytes per molecule) + device.molDescriptors.setFromArray(host.molDescriptors.data(), host.molDescriptors.size()); + device.dihedralWorkStarts.setFromVector(host.dihedralWorkStarts); + device.tfdWorkStarts.setFromVector(host.tfdWorkStarts); + + // Allocate and zero output buffers + int totalDihedrals = host.totalDihedrals(); + if (static_cast(device.dihedralAngles.size()) < totalDihedrals) { + device.dihedralAngles.resize(totalDihedrals); + } + cudaMemsetAsync(device.dihedralAngles.data(), 0, totalDihedrals * sizeof(float), stream); + + int totalTFDOutputs = host.totalTFDOutputs(); + if (static_cast(device.tfdOutput.size()) < totalTFDOutputs) { + device.tfdOutput.resize(totalTFDOutputs); + } + cudaMemsetAsync(device.tfdOutput.data(), 0, totalTFDOutputs * sizeof(float), stream); +} + +} // namespace nvMolKit diff --git a/src/tfd_types.h b/src/tfd_types.h new file mode 100644 index 0000000..7c1d8c5 --- /dev/null +++ b/src/tfd_types.h @@ -0,0 +1,47 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef NVMOLKIT_TFD_TYPES_H +#define NVMOLKIT_TFD_TYPES_H + +#include + +namespace nvMolKit { + +//! Torsion type for multi-quartet handling +enum class TorsionType : uint8_t { + Single, //!< 1 quartet: direct circularDifference + Ring, //!< N quartets: average abs(signed), compare averages + Symmetric, //!< N quartets: min circularDiff across all (qi,qj) pairs +}; + +//! Per-molecule descriptor for GPU kernel dispatch. +//! Replaces per-work-item index arrays with compact per-molecule metadata. +//! Kernels use binary search on cumulative work-item counts to find the molecule, +//! then compute local indices arithmetically. +struct MolDescriptor { + int confStart; //!< First conformer index (into confPositionStarts) + int numConformers; //!< Number of conformers for this molecule + int quartetStart; //!< First quartet index (into torsionAtoms) + int numQuartets; //!< Total quartets for this molecule + int dihedStart; //!< Offset into dihedralAngles output + int torsStart; //!< First torsion index (into weights/maxDevs/types/quartetStarts) + int numTorsions; //!< Number of torsions + int tfdOutStart; //!< Offset into tfdOutput +}; + +} // namespace nvMolKit + +#endif // NVMOLKIT_TFD_TYPES_H diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 03945da..c5bce6a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -280,10 +280,30 @@ target_link_libraries( test_substruct_label_integration PRIVATE substructure_search molecules device test_utils molData ${RDKit_LIBS}) +add_executable(test_tfd_cpu test_tfd_cpu.cpp) +target_link_libraries(test_tfd_cpu PRIVATE ${RDKit_LIBS} tfd_cpu test_utils) + +add_executable(test_tfd_kernels test_tfd_kernels.cu) +target_link_libraries( + test_tfd_kernels PRIVATE ${RDKit_LIBS} tfd_cpu tfd_kernels device_vector + test_utils) + +add_executable(test_tfd_gpu test_tfd_gpu.cpp) +target_link_libraries(test_tfd_gpu PRIVATE ${RDKit_LIBS} tfd_gpu device_vector + test_utils) + +add_executable(test_tfd test_tfd.cpp) +target_link_libraries(test_tfd PRIVATE ${RDKit_LIBS} tfd device_vector + test_utils) + set(TEST_LIST test_boolean_tree test_butina test_bfgs_hessian + test_tfd_cpu + test_tfd_kernels + test_tfd_gpu + test_tfd test_openmp_helpers test_conformer_checkers test_coordgen diff --git a/tests/test_tfd.cpp b/tests/test_tfd.cpp new file mode 100644 index 0000000..ec0609f --- /dev/null +++ b/tests/test_tfd.cpp @@ -0,0 +1,163 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include +#include + +#include "tfd.h" +#include "tfd_gpu.h" + +namespace { + +constexpr double kTolerance = 1e-3; + +void generateConformers(RDKit::ROMol& mol, int numConformers, int seed = 42) { + RDKit::DGeomHelpers::EmbedParameters params = RDKit::DGeomHelpers::ETKDGv3; + params.randomSeed = seed; + params.numThreads = 1; + RDKit::DGeomHelpers::EmbedMultipleConfs(mol, numConformers, params); +} + +} // namespace + +// Global check for CUDA availability (same pattern as test_tfd_gpu.cpp) +static bool gCudaAvailable = false; +static bool gCudaChecked = false; + +static bool checkCudaAvailable() { + if (!gCudaChecked) { + gCudaChecked = true; + try { + int deviceCount = 0; + cudaError_t err = cudaGetDeviceCount(&deviceCount); + if (err == cudaSuccess && deviceCount > 0) { + err = cudaSetDevice(0); + if (err == cudaSuccess) { + gCudaAvailable = true; + } else { + cudaGetLastError(); + } + } else { + cudaGetLastError(); + } + } catch (...) { + gCudaAvailable = false; + } + } + return gCudaAvailable; +} + +// ========== Unified TFDGenerator API Tests ========== + +class TFDGeneratorTest : public ::testing::Test { + protected: + void SetUp() override { + if (!checkCudaAvailable()) { + GTEST_SKIP() << "No CUDA devices available, skipping GPU tests"; + } + } +}; + +TEST_F(TFDGeneratorTest, UnifiedAPIBackendSelection) { + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCCC")); + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, 4); + + nvMolKit::TFDGenerator generator; + + nvMolKit::TFDComputeOptions gpuOptions; + gpuOptions.backend = nvMolKit::TFDComputeBackend::GPU; + auto gpuResult = generator.GetTFDMatrix(*mol, gpuOptions); + + nvMolKit::TFDComputeOptions cpuOptions; + cpuOptions.backend = nvMolKit::TFDComputeBackend::CPU; + auto cpuResult = generator.GetTFDMatrix(*mol, cpuOptions); + + ASSERT_EQ(gpuResult.size(), cpuResult.size()); + for (size_t i = 0; i < gpuResult.size(); ++i) { + EXPECT_NEAR(gpuResult[i], cpuResult[i], kTolerance) << "Mismatch at index " << i; + } +} + +TEST_F(TFDGeneratorTest, UnifiedAPIBatchProcessing) { + std::vector> mols; + std::vector molPtrs; + + for (int i = 0; i < 3; ++i) { + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCCC")); + generateConformers(*mol, 3 + i); + if (mol->getNumConformers() >= 2) { + molPtrs.push_back(mol.get()); + mols.push_back(std::move(mol)); + } + } + + ASSERT_GE(mols.size(), 2u); + + nvMolKit::TFDGenerator generator; + + nvMolKit::TFDComputeOptions gpuOptions; + gpuOptions.backend = nvMolKit::TFDComputeBackend::GPU; + auto gpuResults = generator.GetTFDMatrices(molPtrs, gpuOptions); + + nvMolKit::TFDComputeOptions cpuOptions; + cpuOptions.backend = nvMolKit::TFDComputeBackend::CPU; + auto cpuResults = generator.GetTFDMatrices(molPtrs, cpuOptions); + + ASSERT_EQ(gpuResults.size(), cpuResults.size()); + for (size_t m = 0; m < gpuResults.size(); ++m) { + ASSERT_EQ(gpuResults[m].size(), cpuResults[m].size()); + for (size_t i = 0; i < gpuResults[m].size(); ++i) { + EXPECT_NEAR(gpuResults[m][i], cpuResults[m][i], kTolerance) << "Mismatch at molecule " << m << " index " << i; + } + } +} + +TEST_F(TFDGeneratorTest, GpuBufferMethodWorks) { + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCCC")); + ASSERT_NE(mol, nullptr); + generateConformers(*mol, 4); + + std::vector molPtrs = {mol.get()}; + + nvMolKit::TFDGpuGenerator gpuGenerator; + nvMolKit::TFDComputeOptions options; + + auto gpuResult = gpuGenerator.GetTFDMatricesGpuBuffer(molPtrs, options); + + auto extracted = gpuResult.extractAll(); + ASSERT_EQ(extracted.size(), 1u); + EXPECT_EQ(extracted[0].size(), 6u); // 4*(4-1)/2 = 6 +} + +TEST_F(TFDGeneratorTest, EmptyInput) { + nvMolKit::TFDGenerator generator; + std::vector emptyMols; + nvMolKit::TFDComputeOptions options; + + options.backend = nvMolKit::TFDComputeBackend::GPU; + auto gpuResults = generator.GetTFDMatrices(emptyMols, options); + EXPECT_TRUE(gpuResults.empty()); + + options.backend = nvMolKit::TFDComputeBackend::CPU; + auto cpuResults = generator.GetTFDMatrices(emptyMols, options); + EXPECT_TRUE(cpuResults.empty()); +} diff --git a/tests/test_tfd_cpu.cpp b/tests/test_tfd_cpu.cpp new file mode 100644 index 0000000..aacf09a --- /dev/null +++ b/tests/test_tfd_cpu.cpp @@ -0,0 +1,828 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "tfd_common.h" +#include "tfd_cpu.h" + +namespace { + +constexpr double kTolerance = 1e-4; + +//! Generate conformers for a molecule using RDKit +void generateConformers(RDKit::ROMol& mol, int numConformers, int seed = 42) { + RDKit::DGeomHelpers::EmbedParameters params = RDKit::DGeomHelpers::ETKDGv3; + params.randomSeed = seed; + params.numThreads = 1; + RDKit::DGeomHelpers::EmbedMultipleConfs(mol, numConformers, params); +} + +} // namespace + +class TFDCpuTest : public ::testing::Test { + protected: + nvMolKit::TFDCpuGenerator generator_; +}; + +// ============================================================================= +// extractTorsionList +// ============================================================================= + +TEST_F(TFDCpuTest, ExtractTorsionListRDKitReference) { + // Compare extractTorsionList output against RDKit CalculateTorsionLists. + // Parameters: maxDev='equal', symmRadius=2, ignoreColinearBonds=True + + // --- CCCC: 1 non-ring torsion, 0 ring --- + { + SCOPED_TRACE("CCCC"); + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCC")); + ASSERT_NE(mol, nullptr); + auto tl = nvMolKit::extractTorsionList(*mol); + + ASSERT_EQ(tl.nonRingTorsions.size(), 1u); + EXPECT_EQ(tl.ringTorsions.size(), 0u); + + ASSERT_EQ(tl.nonRingTorsions[0].atomQuartets.size(), 1u); + EXPECT_EQ(tl.nonRingTorsions[0].atomQuartets[0], (std::array{0, 1, 2, 3})); + EXPECT_NEAR(tl.nonRingTorsions[0].maxDev, 180.0f, 0.01f); + } + + // --- CCCCC: 2 non-ring torsions, 0 ring --- + { + SCOPED_TRACE("CCCCC"); + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCCC")); + ASSERT_NE(mol, nullptr); + auto tl = nvMolKit::extractTorsionList(*mol); + + ASSERT_EQ(tl.nonRingTorsions.size(), 2u); + EXPECT_EQ(tl.ringTorsions.size(), 0u); + + ASSERT_EQ(tl.nonRingTorsions[0].atomQuartets.size(), 1u); + EXPECT_EQ(tl.nonRingTorsions[0].atomQuartets[0], (std::array{0, 1, 2, 3})); + + ASSERT_EQ(tl.nonRingTorsions[1].atomQuartets.size(), 1u); + EXPECT_EQ(tl.nonRingTorsions[1].atomQuartets[0], (std::array{1, 2, 3, 4})); + } + + // --- CCCCCC: 3 non-ring torsions, 0 ring --- + { + SCOPED_TRACE("CCCCCC"); + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCCCC")); + ASSERT_NE(mol, nullptr); + auto tl = nvMolKit::extractTorsionList(*mol); + + ASSERT_EQ(tl.nonRingTorsions.size(), 3u); + EXPECT_EQ(tl.ringTorsions.size(), 0u); + + EXPECT_EQ(tl.nonRingTorsions[0].atomQuartets[0], (std::array{0, 1, 2, 3})); + EXPECT_EQ(tl.nonRingTorsions[1].atomQuartets[0], (std::array{1, 2, 3, 4})); + EXPECT_EQ(tl.nonRingTorsions[2].atomQuartets[0], (std::array{2, 3, 4, 5})); + } + + // --- c1ccccc1 (benzene): 0 non-ring, 1 ring torsion with 6 quartets --- + { + SCOPED_TRACE("c1ccccc1"); + auto mol = std::unique_ptr(RDKit::SmilesToMol("c1ccccc1")); + ASSERT_NE(mol, nullptr); + auto tl = nvMolKit::extractTorsionList(*mol); + + EXPECT_EQ(tl.nonRingTorsions.size(), 0u); + ASSERT_EQ(tl.ringTorsions.size(), 1u); + + const auto& ringTorsion = tl.ringTorsions[0]; + ASSERT_EQ(ringTorsion.atomQuartets.size(), 6u); + + // Ring torsion maxDev = 180 * exp(-0.025 * (6-14)^2) ≈ 36.34 + EXPECT_NEAR(ringTorsion.maxDev, 36.34f, 0.1f); + + // Quartets from RDKit (ring order: 0, 5, 4, 3, 2, 1) + EXPECT_EQ(ringTorsion.atomQuartets[0], (std::array{0, 5, 4, 3})); + EXPECT_EQ(ringTorsion.atomQuartets[1], (std::array{5, 4, 3, 2})); + EXPECT_EQ(ringTorsion.atomQuartets[2], (std::array{4, 3, 2, 1})); + EXPECT_EQ(ringTorsion.atomQuartets[3], (std::array{3, 2, 1, 0})); + EXPECT_EQ(ringTorsion.atomQuartets[4], (std::array{2, 1, 0, 5})); + EXPECT_EQ(ringTorsion.atomQuartets[5], (std::array{1, 0, 5, 4})); + } + + // --- CC(=O)[C@H]1CCCC[C@@H]1CN: 2 non-ring + 1 ring (cyclohexane with substituents) --- + { + SCOPED_TRACE("CC(=O)[C@H]1CCCC[C@@H]1CN"); + auto mol = std::unique_ptr(RDKit::SmilesToMol("CC(=O)[C@H]1CCCC[C@@H]1CN")); + ASSERT_NE(mol, nullptr); + auto tl = nvMolKit::extractTorsionList(*mol); + + ASSERT_EQ(tl.nonRingTorsions.size(), 2u); + ASSERT_EQ(tl.ringTorsions.size(), 1u); + + EXPECT_EQ(tl.nonRingTorsions[0].atomQuartets[0], (std::array{0, 1, 3, 4})); + EXPECT_EQ(tl.nonRingTorsions[1].atomQuartets[0], (std::array{3, 8, 9, 10})); + + ASSERT_EQ(tl.ringTorsions[0].atomQuartets.size(), 6u); + EXPECT_NEAR(tl.ringTorsions[0].maxDev, 36.34f, 0.1f); + EXPECT_EQ(tl.ringTorsions[0].atomQuartets[0], (std::array{3, 8, 7, 6})); + EXPECT_EQ(tl.ringTorsions[0].atomQuartets[5], (std::array{4, 3, 8, 7})); + } + + // --- C[C@@H](CN(C)C(=O)C#CCN)C1CC1: 4 non-ring + 1 ring (cyclopropane + triple bond) --- + { + SCOPED_TRACE("C[C@@H](CN(C)C(=O)C#CCN)C1CC1"); + auto mol = std::unique_ptr(RDKit::SmilesToMol("C[C@@H](CN(C)C(=O)C#CCN)C1CC1")); + ASSERT_NE(mol, nullptr); + auto tl = nvMolKit::extractTorsionList(*mol); + + ASSERT_EQ(tl.nonRingTorsions.size(), 4u); + ASSERT_EQ(tl.ringTorsions.size(), 1u); + + EXPECT_EQ(tl.nonRingTorsions[0].atomQuartets[0], (std::array{0, 1, 2, 3})); + EXPECT_EQ(tl.nonRingTorsions[1].atomQuartets[0], (std::array{1, 2, 3, 4})); + EXPECT_EQ(tl.nonRingTorsions[2].atomQuartets[0], (std::array{4, 3, 5, 7})); + ASSERT_EQ(tl.nonRingTorsions[3].atomQuartets.size(), 2u); + EXPECT_EQ(tl.nonRingTorsions[3].atomQuartets[0], (std::array{0, 1, 11, 12})); + EXPECT_EQ(tl.nonRingTorsions[3].atomQuartets[1], (std::array{0, 1, 11, 13})); + + ASSERT_EQ(tl.ringTorsions[0].atomQuartets.size(), 3u); + EXPECT_NEAR(tl.ringTorsions[0].maxDev, 8.74f, 0.1f); + } +} + +// ============================================================================= +// computeTorsionWeights +// ============================================================================= + +TEST_F(TFDCpuTest, ComputeTorsionWeightsRDKitReference) { + // Compare computeTorsionWeights output against RDKit CalculateTorsionWeights. + // Reference generated with: TorsionFingerprints.CalculateTorsionWeights(mol, ignoreColinearBonds=True) + + struct TestCase { + const char* smiles; + std::vector expectedWeights; + }; + + // clang-format off + std::vector cases = { + {"CCCC", {1.0f}}, // 1 torsion: central bond + {"CCCCC", {1.0f, 0.1f}}, // 2 torsions: central + terminal + {"CCCCCC", {0.1f, 1.0f, 0.1f}}, // 3 torsions: symmetric around center + {"CC(=O)[C@H]1CCCC[C@@H]1CN", {0.3593813664f, 0.3593813664f, 0.1748047999f}}, // cyclohexane + substituents + {"C[C@@H](CN(C)C(=O)C#CCN)C1CC1", {0.5623413252f, 1.0f, 0.5623413252f, 0.1f, 0.0025021508f}}, // cyclopropane + triple bond + }; + // clang-format on + + for (const auto& tc : cases) { + SCOPED_TRACE(tc.smiles); + auto mol = std::unique_ptr(RDKit::SmilesToMol(tc.smiles)); + ASSERT_NE(mol, nullptr); + + auto tl = nvMolKit::extractTorsionList(*mol); + auto weights = nvMolKit::computeTorsionWeights(*mol, tl); + + ASSERT_EQ(weights.size(), tc.expectedWeights.size()); + for (size_t i = 0; i < weights.size(); ++i) { + EXPECT_NEAR(weights[i], tc.expectedWeights[i], 5e-4f) << "Weight[" << i << "] mismatch"; + } + } +} + +// ============================================================================= +// buildTFDSystem +// ============================================================================= + +TEST_F(TFDCpuTest, BuildTFDSystem) { + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCCC")); + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, 5); + ASSERT_EQ(mol->getNumConformers(), 5); + + nvMolKit::TFDComputeOptions options; + auto system = nvMolKit::buildTFDSystem(*mol, options); + + EXPECT_EQ(system.numMolecules(), 1); + EXPECT_EQ(system.molDescriptors[0].numConformers, 5); + EXPECT_GT(system.totalTorsions(), 0); + + // TFD output size: 5 conformers = 5*4/2 = 10 pairs + EXPECT_EQ(system.totalTFDOutputs(), 10); + + // Single-quartet molecule: totalQuartets == totalTorsions + EXPECT_EQ(system.totalQuartets(), system.totalTorsions()); +} + +TEST_F(TFDCpuTest, BuildTFDSystemMultiQuartet) { + // Verify multi-quartet fields for CC(C)CC (isopentane) + auto mol = std::unique_ptr(RDKit::SmilesToMol("CC(C)CC")); + ASSERT_NE(mol, nullptr); + generateConformers(*mol, 3); + + nvMolKit::TFDComputeOptions options; + auto system = nvMolKit::buildTFDSystem(*mol, options); + + EXPECT_EQ(system.numMolecules(), 1); + EXPECT_EQ(system.totalTorsions(), 1); // 1 torsion + EXPECT_EQ(system.totalQuartets(), 2); // 2 quartets + + // Verify torsion type + ASSERT_EQ(system.torsionTypes.size(), 1u); + EXPECT_EQ(system.torsionTypes[0], nvMolKit::TorsionType::Symmetric); + + // Verify quartetStarts CSR + ASSERT_EQ(system.quartetStarts.size(), 2u); // 1 torsion + 1 + EXPECT_EQ(system.quartetStarts[0], 0); + EXPECT_EQ(system.quartetStarts[1], 2); + + // Verify 2 quartets in torsionAtoms + ASSERT_EQ(system.torsionAtoms.size(), 2u); +} + +TEST_F(TFDCpuTest, BuildTFDSystemRingTorsion) { + // Verify multi-quartet fields for C1CCCCC1 (cyclohexane) + auto mol = std::unique_ptr(RDKit::SmilesToMol("C1CCCCC1")); + ASSERT_NE(mol, nullptr); + generateConformers(*mol, 3); + + nvMolKit::TFDComputeOptions options; + auto system = nvMolKit::buildTFDSystem(*mol, options); + + EXPECT_EQ(system.totalTorsions(), 1); // 1 ring torsion + EXPECT_EQ(system.totalQuartets(), 6); // 6 quartets in the ring + + ASSERT_EQ(system.torsionTypes.size(), 1u); + EXPECT_EQ(system.torsionTypes[0], nvMolKit::TorsionType::Ring); +} + +// ============================================================================= +// computeDihedralAngles +// ============================================================================= + +TEST_F(TFDCpuTest, KnownDihedralAngle) { + // Test dihedral angle computation with hand-crafted geometry (RDKit-independent). + // Create n-butane and manually set conformer coordinates to known dihedral angles. + // + // Dihedral angle definition: Looking down the C1-C2 bond (central bond), + // the angle between C0 and C3 measured clockwise from C0. + // - Trans (180°): C0 and C3 on opposite sides + // - Gauche (60°): C0 and C3 at 60° apart + + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCC")); + ASSERT_NE(mol, nullptr); + ASSERT_EQ(mol->getNumAtoms(), 4); + + // Trans conformer (180°): C0-C1 and C2-C3 vectors are antiparallel + { + auto* conf = new RDKit::Conformer(4); + conf->setId(0); + conf->setAtomPos(0, RDGeom::Point3D(1.0, 0.0, 0.0)); + conf->setAtomPos(1, RDGeom::Point3D(0.0, 0.0, 0.0)); + conf->setAtomPos(2, RDGeom::Point3D(0.0, 1.5, 0.0)); + conf->setAtomPos(3, RDGeom::Point3D(1.0, 1.5, 0.0)); + mol->addConformer(conf, true); + } + + // Gauche conformer (60°): C3 rotated to give 60° dihedral + { + auto* conf = new RDKit::Conformer(4); + conf->setId(1); + conf->setAtomPos(0, RDGeom::Point3D(1.0, 0.0, 0.0)); + conf->setAtomPos(1, RDGeom::Point3D(0.0, 0.0, 0.0)); + conf->setAtomPos(2, RDGeom::Point3D(0.0, 1.5, 0.0)); + conf->setAtomPos(3, RDGeom::Point3D(-0.5, 1.5, 0.866)); + mol->addConformer(conf, true); + } + + ASSERT_EQ(mol->getNumConformers(), 2); + + nvMolKit::TFDComputeOptions options; + options.useWeights = false; + options.maxDevMode = nvMolKit::TFDMaxDevMode::Equal; + + auto tl = nvMolKit::extractTorsionList(*mol, options.maxDevMode, options.symmRadius, options.ignoreColinearBonds); + auto angles = generator_.computeDihedralAngles(*mol, tl); + + int totalQuartets = static_cast(tl.totalCount()); + ASSERT_GE(totalQuartets, 1); + + EXPECT_NEAR(angles[0], 180.0f, 5.0f) << "Trans conformer should have ~180° dihedral"; + EXPECT_NEAR(angles[totalQuartets], 60.0f, 5.0f) << "Gauche conformer should have ~60° dihedral"; + + // TFD: difference = |180 - 60| = 120°, normalized by 180° ≈ 0.667 + auto tfdMatrix = generator_.GetTFDMatrix(*mol, options); + ASSERT_EQ(tfdMatrix.size(), 1u); + EXPECT_NEAR(tfdMatrix[0], 0.667, 0.05); +} + +TEST_F(TFDCpuTest, ComputeDihedralAnglesRDKitReference) { + // Compare computeDihedralAngles output against RDKit rdMolTransforms.GetDihedralDeg. + // 4 conformers per molecule, seed=42, ETKDGv3. + // + // NOTE: Our dihedral convention is offset by 180° from RDKit's GetDihedralDeg. + // This does NOT affect TFD (circularDifference is invariant to shared offset). + // Reference values below are RDKit values + 180° (mod 360). + // + // Original RDKit values generated with: + // angle = rdMolTransforms.GetDihedralDeg(conf, a, b, c, d) + // our_angle = (angle + 180) % 360 + + struct TestCase { + const char* smiles; + int numTorsions; + std::vector expectedAngles; // [numConf * numTors], our convention + }; + + // clang-format off + std::vector cases = { + {"CCCC", 1, { + 120.0130f, // conf[0] tors[0] (RDKit: 300.013) + 0.0000f, // conf[1] tors[0] (RDKit: 180.000) + 120.0000f, // conf[2] tors[0] (RDKit: 300.000) + 0.0000f // conf[3] tors[0] (RDKit: 180.000) + }}, + {"CCCCC", 2, { + 239.9983f, 240.0002f, // conf[0] (RDKit: 60.0, 60.0) + 119.9983f, 240.0001f, // conf[1] (RDKit: 300.0, 60.0) + 359.9974f, 240.0025f, // conf[2] (RDKit: 180.0, 60.0) + 0.0037f, 120.0134f // conf[3] (RDKit: 180.0, 300.0) + }}, + {"CCCCCC", 3, { + 240.0055f, 359.9997f, 119.9928f, // conf[0] (RDKit: 60, 180, 300) + 359.9961f, 239.9959f, 119.9996f, // conf[1] (RDKit: 180, 60, 300) + 239.9980f, 359.9978f, 239.9987f, // conf[2] (RDKit: 60, 180, 60) + 239.9980f, 119.9980f, 119.9976f // conf[3] (RDKit: 60, 300, 300) + }}, + }; + // clang-format on + + // Use circular distance to handle 0°/360° wrap-around + constexpr float kAngleTolerance = 0.05f; // degrees; generous for float vs double + + nvMolKit::TFDComputeOptions options; + options.useWeights = true; + options.maxDevMode = nvMolKit::TFDMaxDevMode::Equal; + options.symmRadius = 2; + options.ignoreColinearBonds = true; + + for (const auto& tc : cases) { + SCOPED_TRACE(tc.smiles); + auto mol = std::unique_ptr(RDKit::SmilesToMol(tc.smiles)); + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, 4, 42); + ASSERT_EQ(mol->getNumConformers(), 4); + + auto tl = nvMolKit::extractTorsionList(*mol, options.maxDevMode, options.symmRadius, options.ignoreColinearBonds); + auto angles = generator_.computeDihedralAngles(*mol, tl); + + int numConf = 4; + int numTors = tc.numTorsions; + ASSERT_EQ(static_cast(angles.size()), numConf * numTors); + ASSERT_EQ(static_cast(tc.expectedAngles.size()), numConf * numTors); + + for (size_t i = 0; i < angles.size(); ++i) { + // Circular distance handles 0°/360° boundary + float diff = std::abs(angles[i] - tc.expectedAngles[i]); + if (diff > 180.0f) { + diff = 360.0f - diff; + } + EXPECT_LE(diff, kAngleTolerance) << "Angle[" << i << "] (conf=" << i / numTors << " tors=" << i % numTors + << "): got " << angles[i] << ", expected " << tc.expectedAngles[i]; + } + } +} + +// ============================================================================= +// GetTFDMatrix (pipeline) +// ============================================================================= + +TEST_F(TFDCpuTest, ComputeTFDMatrixSelfComparison) { + // Identical conformers should produce TFD = 0 + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCC")); + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, 1); + ASSERT_EQ(mol->getNumConformers(), 1); + + // Duplicate the conformer + RDKit::Conformer conf = mol->getConformer(0); + conf.setId(1); + mol->addConformer(new RDKit::Conformer(conf), true); + ASSERT_EQ(mol->getNumConformers(), 2); + + nvMolKit::TFDComputeOptions options; + auto tfdMatrix = generator_.GetTFDMatrix(*mol, options); + + ASSERT_EQ(tfdMatrix.size(), 1u); + EXPECT_NEAR(tfdMatrix[0], 0.0, kTolerance); +} + +TEST_F(TFDCpuTest, NoTorsionsMolecule) { + // Methane has no rotatable bonds — TFD should be zero + auto mol = std::unique_ptr(RDKit::SmilesToMol("C")); + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, 3); + + nvMolKit::TFDComputeOptions options; + auto tfdMatrix = generator_.GetTFDMatrix(*mol, options); + + ASSERT_EQ(tfdMatrix.size(), 3u); // 3 conformers = 3 pairs + for (double tfd : tfdMatrix) { + EXPECT_NEAR(tfd, 0.0, kTolerance); + } +} + +TEST_F(TFDCpuTest, SingleConformer) { + // Single conformer should return empty matrix (no pairs) + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCC")); + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, 1); + ASSERT_EQ(mol->getNumConformers(), 1); + + nvMolKit::TFDComputeOptions options; + auto tfdMatrix = generator_.GetTFDMatrix(*mol, options); + + EXPECT_TRUE(tfdMatrix.empty()); +} + +TEST_F(TFDCpuTest, UseWeightsOption) { + // Weighted and unweighted TFD should differ for multi-torsion molecules + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCCCCCC")); // n-octane + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, 4); + ASSERT_GE(mol->getNumConformers(), 2); + + nvMolKit::TFDComputeOptions optionsWithWeights; + optionsWithWeights.useWeights = true; + + nvMolKit::TFDComputeOptions optionsNoWeights; + optionsNoWeights.useWeights = false; + + auto tfdWithWeights = generator_.GetTFDMatrix(*mol, optionsWithWeights); + auto tfdNoWeights = generator_.GetTFDMatrix(*mol, optionsNoWeights); + + ASSERT_EQ(tfdWithWeights.size(), tfdNoWeights.size()); + ASSERT_GT(tfdWithWeights.size(), 0u); + + for (size_t i = 0; i < tfdWithWeights.size(); ++i) { + EXPECT_GE(tfdWithWeights[i], 0.0); + EXPECT_GE(tfdNoWeights[i], 0.0); + } + + bool anyDifferent = false; + for (size_t i = 0; i < tfdWithWeights.size(); ++i) { + if (std::abs(tfdWithWeights[i] - tfdNoWeights[i]) > 1e-6) { + anyDifferent = true; + break; + } + } + EXPECT_TRUE(anyDifferent) << "Weighted and unweighted TFD should differ for n-octane"; +} + +TEST_F(TFDCpuTest, MaxDevModes) { + // Both Equal and Spec modes should produce valid finite results + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCCC")); + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, 3); + + nvMolKit::TFDComputeOptions optionsEqual; + optionsEqual.maxDevMode = nvMolKit::TFDMaxDevMode::Equal; + + nvMolKit::TFDComputeOptions optionsSpec; + optionsSpec.maxDevMode = nvMolKit::TFDMaxDevMode::Spec; + + auto tfdEqual = generator_.GetTFDMatrix(*mol, optionsEqual); + auto tfdSpec = generator_.GetTFDMatrix(*mol, optionsSpec); + + ASSERT_EQ(tfdEqual.size(), tfdSpec.size()); + + for (size_t i = 0; i < tfdEqual.size(); ++i) { + EXPECT_TRUE(std::isfinite(tfdEqual[i])); + EXPECT_TRUE(std::isfinite(tfdSpec[i])); + } +} + +TEST_F(TFDCpuTest, MaxDevModesMultiQuartet) { + // Spec maxDev mode uses torsion-specific normalization for non-ring torsions: + // - CC(C)CC symmetric torsion: maxDev=90 (Spec) vs 180 (Equal) + // Ring torsions always use the ring-specific formula (matching RDKit), + // so Equal vs Spec produces identical results for ring-only molecules. + + // --- CC(C)CC: Spec and Equal should differ --- + { + SCOPED_TRACE("CC(C)CC"); + + auto mol = std::unique_ptr(RDKit::SmilesToMol("CC(C)CC")); + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, 4); + ASSERT_GE(mol->getNumConformers(), 2); + + nvMolKit::TFDComputeOptions optionsEqual; + optionsEqual.maxDevMode = nvMolKit::TFDMaxDevMode::Equal; + + nvMolKit::TFDComputeOptions optionsSpec; + optionsSpec.maxDevMode = nvMolKit::TFDMaxDevMode::Spec; + + auto tfdEqual = generator_.GetTFDMatrix(*mol, optionsEqual); + auto tfdSpec = generator_.GetTFDMatrix(*mol, optionsSpec); + + ASSERT_EQ(tfdEqual.size(), tfdSpec.size()); + ASSERT_GT(tfdEqual.size(), 0u); + + for (size_t i = 0; i < tfdEqual.size(); ++i) { + EXPECT_TRUE(std::isfinite(tfdEqual[i])); + EXPECT_TRUE(std::isfinite(tfdSpec[i])); + EXPECT_GE(tfdEqual[i], 0.0); + EXPECT_GE(tfdSpec[i], 0.0); + } + + // Symmetric non-ring torsion: maxDev=90 (Spec) vs 180 (Equal) + bool anyDifferent = false; + for (size_t i = 0; i < tfdEqual.size(); ++i) { + if (std::abs(tfdEqual[i] - tfdSpec[i]) > 1e-6) { + anyDifferent = true; + break; + } + } + EXPECT_TRUE(anyDifferent) << "Spec and Equal should differ for symmetric non-ring torsion"; + } + + // --- C1CCCCC1: Spec and Equal should be identical (ring maxDev is always formula-based) --- + { + SCOPED_TRACE("C1CCCCC1"); + + auto mol = std::unique_ptr(RDKit::SmilesToMol("C1CCCCC1")); + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, 4); + ASSERT_GE(mol->getNumConformers(), 2); + + nvMolKit::TFDComputeOptions optionsEqual; + optionsEqual.maxDevMode = nvMolKit::TFDMaxDevMode::Equal; + + nvMolKit::TFDComputeOptions optionsSpec; + optionsSpec.maxDevMode = nvMolKit::TFDMaxDevMode::Spec; + + auto tfdEqual = generator_.GetTFDMatrix(*mol, optionsEqual); + auto tfdSpec = generator_.GetTFDMatrix(*mol, optionsSpec); + + ASSERT_EQ(tfdEqual.size(), tfdSpec.size()); + ASSERT_GT(tfdEqual.size(), 0u); + + for (size_t i = 0; i < tfdEqual.size(); ++i) { + EXPECT_TRUE(std::isfinite(tfdEqual[i])); + EXPECT_GE(tfdEqual[i], 0.0); + // Ring-only molecule: maxDev mode has no effect + EXPECT_NEAR(tfdEqual[i], tfdSpec[i], 1e-10) << "Ring torsion maxDev should be identical for Equal and Spec modes"; + } + } +} + +TEST_F(TFDCpuTest, IgnoreColinearBondsFalse) { + // Test ignoreColinearBonds=false with a molecule containing a triple bond. + // With ignoreColinearBonds=true (default), bonds adjacent to triple bonds are skipped. + // With false, alternative atoms are found and torsions are included. + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCC#CCC")); + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, 4); + ASSERT_GE(mol->getNumConformers(), 2); + + nvMolKit::TFDComputeOptions optionsIgnore; + optionsIgnore.ignoreColinearBonds = true; + + nvMolKit::TFDComputeOptions optionsKeep; + optionsKeep.ignoreColinearBonds = false; + + auto torsionsIgnore = nvMolKit::extractTorsionList(*mol, nvMolKit::TFDMaxDevMode::Equal, 2, true); + auto torsionsKeep = nvMolKit::extractTorsionList(*mol, nvMolKit::TFDMaxDevMode::Equal, 2, false); + + // With ignoreColinearBonds=false, we should get more (or at least as many) torsions + EXPECT_GE(torsionsKeep.totalCount(), torsionsIgnore.totalCount()); + + auto tfdIgnore = generator_.GetTFDMatrix(*mol, optionsIgnore); + auto tfdKeep = generator_.GetTFDMatrix(*mol, optionsKeep); + + // Both should produce valid results + for (double val : tfdIgnore) { + EXPECT_TRUE(std::isfinite(val)); + EXPECT_GE(val, 0.0); + } + for (double val : tfdKeep) { + EXPECT_TRUE(std::isfinite(val)); + EXPECT_GE(val, 0.0); + } +} + +TEST_F(TFDCpuTest, CompareWithRDKitReference) { + // Compare full TFD pipeline against pre-computed RDKit reference values. + // Includes single-quartet, multi-quartet (ring, symmetric), ring+substituent, + // and triple-bond molecules. + // + // Reference values generated with RDKit Python: + // mol = Chem.MolFromSmiles(smiles) + // params = AllChem.ETKDGv3() + // params.randomSeed = + // AllChem.EmbedMultipleConfs(mol, , params) + // tfd = TorsionFingerprints.GetTFDMatrix(mol, useWeights=True, maxDev='equal', symmRadius=2) + + struct TestCase { + const char* smiles; + int numConfs; + int seed; + std::vector reference; + }; + + // clang-format off + std::vector cases = { + {"CCCC", 4, 42, { // n-butane, 1 torsion + 0.6667389132, 0.0000726610, 0.6666662521, + 0.6667387931, 0.0000001200, 0.6666661321 + }}, + {"CCCCC", 4, 42, { // n-pentane, 2 torsions + 0.6060606631, 0.6060573299, 0.6060662252, + 0.6666872992, 0.6666326206, 0.0606323184 + }}, + {"CCCCCC", 4, 42, { // n-hexane, 3 torsions + 0.6111276139, 0.0555704226, 0.6666744357, + 0.5555532106, 0.6111014381, 0.6111123144 + }}, + {"CC(C)CC", 4, 42, { // isopentane, 1 symmetric torsion (2 quartets) + 0.0000045777, 0.0433253929, 0.0433299706, + 0.0000027031, 0.0000072808, 0.0433226898 + }}, + {"C1CCCCC1", 4, 42, { // cyclohexane, 1 ring torsion (6 quartets) + 0.1343662730, 0.3653621455, 0.2309958724, + 0.1017138867, 0.2360801597, 0.4670760321 + }}, + {"c1ccccc1CC", 4, 42, { // ethylbenzene, 1 symmetric non-ring + 1 ring torsion + 0.0000114982, 0.0000104863, 0.0000020789, + 0.0000100311, 0.0000018047, 0.0000013199 + }}, + {"CC(=O)[C@H]1CCCC[C@@H]1CN", 5, 44, { // cyclohexane + ketone/amine substituents + 0.3538817018, 0.5924305446, 0.2977012405, 0.5450659700, 0.6307899400, + 0.3331152938, 0.5590067080, 0.3311160076, 0.3015480245, 0.5678678322 + }}, + {"C[C@@H](CN(C)C(=O)C#CCN)C1CC1", 5, 45, { // cyclopropane + triple bond + 0.6173250123, 0.2578501411, 0.8622535690, 0.4331903310, 0.7154140061, + 0.1978909084, 0.1683354525, 0.4489992971, 0.4239704017, 0.2760081465 + }}, + }; + // clang-format on + + nvMolKit::TFDComputeOptions options; + options.useWeights = true; + options.maxDevMode = nvMolKit::TFDMaxDevMode::Equal; + options.symmRadius = 2; + options.ignoreColinearBonds = true; + + for (const auto& tc : cases) { + SCOPED_TRACE(tc.smiles); + + auto mol = std::unique_ptr(RDKit::SmilesToMol(tc.smiles)); + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, tc.numConfs, tc.seed); + auto tfdMatrix = generator_.GetTFDMatrix(*mol, options); + + ASSERT_EQ(tfdMatrix.size(), tc.reference.size()); + constexpr double kRDKitTolerance = 5e-4; + for (size_t i = 0; i < tfdMatrix.size(); ++i) { + EXPECT_NEAR(tfdMatrix[i], tc.reference[i], kRDKitTolerance) << "TFD[" << i << "] mismatch with RDKit reference"; + } + } +} + +TEST_F(TFDCpuTest, CompareWithRDKitReferenceAddHs) { + // Compare TFD with explicit hydrogens (AddHs) against RDKit reference values. + // AddHs produces more quartets per torsion, exercising multi-quartet handling + // on molecules that would otherwise be single-quartet. + // + // Reference values generated with RDKit Python: + // mol = Chem.AddHs(Chem.MolFromSmiles(smiles)) + // params = AllChem.ETKDGv3() + // params.randomSeed = 42 + // AllChem.EmbedMultipleConfs(mol, 4, params) + // tfd = TorsionFingerprints.GetTFDMatrix(mol, useWeights=True, maxDev='equal', symmRadius=2) + + struct TestCase { + const char* smiles; + std::vector reference; + }; + + // clang-format off + std::vector cases = { + {"CCCCC", { // n-pentane with AddHs (symmetric torsions from H) + 0.6666346588, + 0.0606342183, + 0.6666024736, + 0.6666717502, + 0.6666935910, + 0.6061117436 + }}, + {"CC(C)CC", { // isopentane with AddHs + 0.0155798214, + 0.0118375755, + 0.0180868258, + 0.0000140721, + 0.0091266798, + 0.0336807192 + }}, + }; + // clang-format on + + nvMolKit::TFDComputeOptions options; + options.useWeights = true; + options.maxDevMode = nvMolKit::TFDMaxDevMode::Equal; + options.symmRadius = 2; + options.ignoreColinearBonds = true; + + for (const auto& tc : cases) { + SCOPED_TRACE(std::string(tc.smiles) + " (AddHs)"); + + auto mol = std::unique_ptr(RDKit::SmilesToMol(tc.smiles)); + ASSERT_NE(mol, nullptr); + + RDKit::MolOps::addHs(*mol); + + generateConformers(*mol, 4, 42); + ASSERT_EQ(mol->getNumConformers(), 4); + + auto tfdMatrix = generator_.GetTFDMatrix(*mol, options); + + ASSERT_EQ(tfdMatrix.size(), tc.reference.size()); + constexpr double kRDKitTolerance = 5e-4; + for (size_t i = 0; i < tfdMatrix.size(); ++i) { + EXPECT_NEAR(tfdMatrix[i], tc.reference[i], kRDKitTolerance) << "TFD[" << i << "] mismatch with RDKit reference"; + } + } +} + +// ============================================================================= +// GetTFDMatrices (batch) +// ============================================================================= + +TEST_F(TFDCpuTest, BatchProcessing) { + // Test batch processing of multiple molecules via GetTFDMatrices + // clang-format off + const std::vector testSmiles = { + "CCCC", "CC(C)C", "c1ccccc1", "CCO", "CCCCC", + "CC(=O)O", "c1ccc(cc1)O", "CCCCCC", "CC(C)(C)C", "c1ccc2ccccc2c1", + }; + // clang-format on + + std::vector> mols; + std::vector molPtrs; + + for (const auto& smiles : testSmiles) { + auto mol = std::unique_ptr(RDKit::SmilesToMol(smiles)); + if (mol) { + generateConformers(*mol, 3); + if (mol->getNumConformers() >= 2) { + molPtrs.push_back(mol.get()); + mols.push_back(std::move(mol)); + } + } + } + + ASSERT_GE(mols.size(), 3u); + + nvMolKit::TFDComputeOptions options; + auto results = generator_.GetTFDMatrices(molPtrs, options); + + ASSERT_EQ(results.size(), molPtrs.size()); + + for (size_t i = 0; i < results.size(); ++i) { + int numConf = molPtrs[i]->getNumConformers(); + int expectedPairs = numConf * (numConf - 1) / 2; + + EXPECT_EQ(results[i].size(), static_cast(expectedPairs)) << "Mismatch for molecule " << i; + + for (double tfd : results[i]) { + EXPECT_TRUE(std::isfinite(tfd)); + EXPECT_GE(tfd, 0.0); + } + } +} diff --git a/tests/test_tfd_gpu.cpp b/tests/test_tfd_gpu.cpp new file mode 100644 index 0000000..4bcad16 --- /dev/null +++ b/tests/test_tfd_gpu.cpp @@ -0,0 +1,327 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include +#include + +#include "tfd_cpu.h" +#include "tfd_gpu.h" + +namespace { + +constexpr double kTolerance = 1e-3; // GPU has lower precision due to float + +//! Generate conformers for a molecule using RDKit +void generateConformers(RDKit::ROMol& mol, int numConformers, int seed = 42) { + RDKit::DGeomHelpers::EmbedParameters params = RDKit::DGeomHelpers::ETKDGv3; + params.randomSeed = seed; + params.numThreads = 1; + RDKit::DGeomHelpers::EmbedMultipleConfs(mol, numConformers, params); +} + +} // namespace + +// Global check for CUDA availability +static bool gCudaAvailable = false; +static bool gCudaChecked = false; + +static bool checkCudaAvailable() { + if (!gCudaChecked) { + gCudaChecked = true; + try { + int deviceCount = 0; + cudaError_t err = cudaGetDeviceCount(&deviceCount); + if (err == cudaSuccess && deviceCount > 0) { + err = cudaSetDevice(0); + if (err == cudaSuccess) { + gCudaAvailable = true; + } else { + cudaGetLastError(); + } + } else { + cudaGetLastError(); + } + } catch (...) { + gCudaAvailable = false; + } + } + return gCudaAvailable; +} + +// ========== GPU Generator Tests ========== + +class TFDGpuTest : public ::testing::Test { + protected: + void SetUp() override { + if (!checkCudaAvailable()) { + GTEST_SKIP() << "No CUDA devices available, skipping GPU tests"; + } + gpuGenerator_ = std::make_unique(); + } + + nvMolKit::TFDCpuGenerator cpuGenerator_; + std::unique_ptr gpuGenerator_; +}; + +TEST_F(TFDGpuTest, MatchesCPUReference) { + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCCC")); + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, 5); + ASSERT_EQ(mol->getNumConformers(), 5); + + nvMolKit::TFDComputeOptions options; + + auto cpuTFD = cpuGenerator_.GetTFDMatrix(*mol, options); + ASSERT_FALSE(cpuTFD.empty()); + + auto gpuTFD = gpuGenerator_->GetTFDMatrix(*mol, options); + + ASSERT_EQ(gpuTFD.size(), cpuTFD.size()); + for (size_t i = 0; i < cpuTFD.size(); ++i) { + EXPECT_NEAR(gpuTFD[i], cpuTFD[i], kTolerance) << "Mismatch at TFD index " << i; + } +} + +TEST_F(TFDGpuTest, MatchesCPUReferenceUnweighted) { + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCCC")); + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, 5); + ASSERT_EQ(mol->getNumConformers(), 5); + + nvMolKit::TFDComputeOptions options; + options.useWeights = false; + + auto cpuTFD = cpuGenerator_.GetTFDMatrix(*mol, options); + ASSERT_FALSE(cpuTFD.empty()); + + auto gpuTFD = gpuGenerator_->GetTFDMatrix(*mol, options); + + ASSERT_EQ(gpuTFD.size(), cpuTFD.size()); + for (size_t i = 0; i < cpuTFD.size(); ++i) { + EXPECT_NEAR(gpuTFD[i], cpuTFD[i], kTolerance) << "Mismatch at TFD index " << i; + } +} + +TEST_F(TFDGpuTest, BatchMultipleMolecules) { + // Mix of single-quartet and multi-quartet molecules in a single batch + std::vector smilesList = { + "CCCC", // n-butane (single-quartet) + "CCCCC", // n-pentane (single-quartet) + "CC(C)CC", // isopentane (symmetric, 2 quartets) + "C1CCCCC1", // cyclohexane (ring, 6 quartets) + "c1ccccc1CC", // ethylbenzene (ring + symmetric) + "CCO", // ethanol + "CC(=O)O", // acetic acid + "CC(=O)[C@H]1CCCC[C@@H]1CN", // cyclohexane + ketone/amine substituents + "C[C@@H](CN(C)C(=O)C#CCN)C1CC1", // cyclopropane + triple bond + }; + + std::vector> mols; + std::vector molPtrs; + + for (const auto& smiles : smilesList) { + auto mol = std::unique_ptr(RDKit::SmilesToMol(smiles)); + if (mol) { + generateConformers(*mol, 4); + if (mol->getNumConformers() >= 2) { + molPtrs.push_back(mol.get()); + mols.push_back(std::move(mol)); + } + } + } + + ASSERT_GE(mols.size(), 3u); + + nvMolKit::TFDComputeOptions options; + + auto cpuResults = cpuGenerator_.GetTFDMatrices(molPtrs, options); + auto gpuResults = gpuGenerator_->GetTFDMatrices(molPtrs, options); + + ASSERT_EQ(gpuResults.size(), cpuResults.size()); + for (size_t m = 0; m < cpuResults.size(); ++m) { + ASSERT_EQ(gpuResults[m].size(), cpuResults[m].size()) << "Size mismatch for molecule " << m; + + for (size_t i = 0; i < cpuResults[m].size(); ++i) { + EXPECT_NEAR(gpuResults[m][i], cpuResults[m][i], kTolerance) << "Mismatch at molecule " << m << " TFD index " << i; + } + } +} + +TEST_F(TFDGpuTest, SingleConformer) { + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCC")); + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, 1); + ASSERT_EQ(mol->getNumConformers(), 1); + + nvMolKit::TFDComputeOptions options; + auto tfdMatrix = gpuGenerator_->GetTFDMatrix(*mol, options); + + EXPECT_TRUE(tfdMatrix.empty()); +} + +TEST_F(TFDGpuTest, NoTorsionsMolecule) { + // Methane has no rotatable bonds + auto mol = std::unique_ptr(RDKit::SmilesToMol("C")); + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, 3); + + nvMolKit::TFDComputeOptions options; + + auto cpuTFD = cpuGenerator_.GetTFDMatrix(*mol, options); + auto gpuTFD = gpuGenerator_->GetTFDMatrix(*mol, options); + + ASSERT_EQ(gpuTFD.size(), cpuTFD.size()); + for (size_t i = 0; i < gpuTFD.size(); ++i) { + EXPECT_NEAR(gpuTFD[i], 0.0, kTolerance); + EXPECT_NEAR(cpuTFD[i], 0.0, kTolerance); + } +} + +TEST_F(TFDGpuTest, GeneratorReuse) { + // Verify that calling GetTFDMatrix twice on the same generator works. + // The first call moves device_.tfdOutput into the result; the second call + // must work correctly with the reallocated output buffer. + auto mol1 = std::unique_ptr(RDKit::SmilesToMol("CCCC")); + ASSERT_NE(mol1, nullptr); + generateConformers(*mol1, 3); + + auto mol2 = std::unique_ptr(RDKit::SmilesToMol("CCCCC")); + ASSERT_NE(mol2, nullptr); + generateConformers(*mol2, 4); + + nvMolKit::TFDComputeOptions options; + + auto gpuTFD1 = gpuGenerator_->GetTFDMatrix(*mol1, options); + auto gpuTFD2 = gpuGenerator_->GetTFDMatrix(*mol2, options); + + auto cpuTFD1 = cpuGenerator_.GetTFDMatrix(*mol1, options); + auto cpuTFD2 = cpuGenerator_.GetTFDMatrix(*mol2, options); + + ASSERT_EQ(gpuTFD1.size(), cpuTFD1.size()); + for (size_t i = 0; i < cpuTFD1.size(); ++i) { + EXPECT_NEAR(gpuTFD1[i], cpuTFD1[i], kTolerance) << "First call mismatch at index " << i; + } + + ASSERT_EQ(gpuTFD2.size(), cpuTFD2.size()); + for (size_t i = 0; i < cpuTFD2.size(); ++i) { + EXPECT_NEAR(gpuTFD2[i], cpuTFD2[i], kTolerance) << "Second call mismatch at index " << i; + } +} + +// ========== GPU Result Extraction Tests ========== + +TEST_F(TFDGpuTest, GpuResultExtraction) { + std::vector> mols; + std::vector molPtrs; + + auto mol1 = std::unique_ptr(RDKit::SmilesToMol("CCCC")); + generateConformers(*mol1, 3); + molPtrs.push_back(mol1.get()); + mols.push_back(std::move(mol1)); + + auto mol2 = std::unique_ptr(RDKit::SmilesToMol("CCCCC")); + generateConformers(*mol2, 4); + molPtrs.push_back(mol2.get()); + mols.push_back(std::move(mol2)); + + nvMolKit::TFDComputeOptions options; + + auto gpuResult = gpuGenerator_->GetTFDMatricesGpuBuffer(molPtrs, options); + + // Test extractMolecule + auto mol0Extracted = gpuResult.extractMolecule(0); + auto mol1Extracted = gpuResult.extractMolecule(1); + + // Verify sizes: 3 conformers = 3 pairs, 4 conformers = 6 pairs + EXPECT_EQ(mol0Extracted.size(), 3u); // 3*(3-1)/2 = 3 + EXPECT_EQ(mol1Extracted.size(), 6u); // 4*(4-1)/2 = 6 + + // Test extractAll + auto allExtracted = gpuResult.extractAll(); + ASSERT_EQ(allExtracted.size(), 2u); + EXPECT_EQ(allExtracted[0].size(), mol0Extracted.size()); + EXPECT_EQ(allExtracted[1].size(), mol1Extracted.size()); + + // Values should match + for (size_t i = 0; i < mol0Extracted.size(); ++i) { + EXPECT_DOUBLE_EQ(allExtracted[0][i], mol0Extracted[i]); + } + for (size_t i = 0; i < mol1Extracted.size(); ++i) { + EXPECT_DOUBLE_EQ(allExtracted[1][i], mol1Extracted[i]); + } + + // Compare with CPU reference + auto cpuResults = cpuGenerator_.GetTFDMatrices(molPtrs, options); + for (size_t m = 0; m < allExtracted.size(); ++m) { + ASSERT_EQ(allExtracted[m].size(), cpuResults[m].size()); + for (size_t i = 0; i < allExtracted[m].size(); ++i) { + EXPECT_NEAR(allExtracted[m][i], cpuResults[m][i], kTolerance) << "Mismatch at molecule " << m << " index " << i; + } + } +} + +TEST_F(TFDGpuTest, GpuResultExtractionOutOfRange) { + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCC")); + ASSERT_NE(mol, nullptr); + generateConformers(*mol, 3); + + std::vector molPtrs = {mol.get()}; + + nvMolKit::TFDComputeOptions options; + auto gpuResult = gpuGenerator_->GetTFDMatricesGpuBuffer(molPtrs, options); + + EXPECT_NO_THROW(gpuResult.extractMolecule(0)); + EXPECT_THROW(gpuResult.extractMolecule(-1), std::out_of_range); + EXPECT_THROW(gpuResult.extractMolecule(1), std::out_of_range); +} + +TEST_F(TFDGpuTest, AddHsMatchesCPU) { + // Test with explicit hydrogens (AddHs) — produces more multi-quartet torsions. + // Verifies GPU generator matches CPU for molecules with hydrogen atoms. + const std::vector testSmiles = {"CCCCC", "CC(C)CC"}; + + for (const auto& smiles : testSmiles) { + SCOPED_TRACE(std::string(smiles) + " (AddHs)"); + + auto mol = std::unique_ptr(RDKit::SmilesToMol(smiles)); + ASSERT_NE(mol, nullptr); + + RDKit::MolOps::addHs(*mol); + generateConformers(*mol, 4, 42); + ASSERT_EQ(mol->getNumConformers(), 4); + + nvMolKit::TFDComputeOptions options; + + auto cpuTFD = cpuGenerator_.GetTFDMatrix(*mol, options); + ASSERT_FALSE(cpuTFD.empty()); + + auto gpuTFD = gpuGenerator_->GetTFDMatrix(*mol, options); + + ASSERT_EQ(gpuTFD.size(), cpuTFD.size()); + for (size_t i = 0; i < cpuTFD.size(); ++i) { + EXPECT_NEAR(gpuTFD[i], cpuTFD[i], kTolerance) << "Mismatch at TFD index " << i; + } + } +} diff --git a/tests/test_tfd_kernels.cu b/tests/test_tfd_kernels.cu new file mode 100644 index 0000000..4fe3c3a --- /dev/null +++ b/tests/test_tfd_kernels.cu @@ -0,0 +1,666 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include + +#include +#include + +#include "tfd_common.h" +#include "tfd_cpu.h" +#include "tfd_kernels.h" + +namespace { + +constexpr double kTolerance = 1e-3; // GPU has lower precision due to float + +//! Generate conformers for a molecule using RDKit +void generateConformers(RDKit::ROMol& mol, int numConformers, int seed = 42) { + RDKit::DGeomHelpers::EmbedParameters params = RDKit::DGeomHelpers::ETKDGv3; + params.randomSeed = seed; + params.numThreads = 1; + RDKit::DGeomHelpers::EmbedMultipleConfs(mol, numConformers, params); +} + +} // namespace + +// Global check for CUDA availability +static bool gCudaAvailable = false; +static bool gCudaChecked = false; + +static bool checkCudaAvailable() { + if (!gCudaChecked) { + gCudaChecked = true; + try { + int deviceCount = 0; + cudaError_t err = cudaGetDeviceCount(&deviceCount); + if (err == cudaSuccess && deviceCount > 0) { + // Try to actually use the device + err = cudaSetDevice(0); + if (err == cudaSuccess) { + gCudaAvailable = true; + } else { + cudaGetLastError(); // Reset error state + } + } else { + cudaGetLastError(); // Reset error state + } + } catch (...) { + gCudaAvailable = false; + } + } + return gCudaAvailable; +} + +class TFDKernelsTest : public ::testing::Test { + protected: + void SetUp() override { + // Check if CUDA is available + if (!checkCudaAvailable()) { + GTEST_SKIP() << "No CUDA devices available, skipping GPU tests"; + } + } + + nvMolKit::TFDCpuGenerator cpuGenerator_; +}; + +// ============================================================================= +// Dihedral kernel +// ============================================================================= + +TEST_F(TFDKernelsTest, DihedralKernelBasic) { + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCC")); + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, 3); + ASSERT_EQ(mol->getNumConformers(), 3); + + nvMolKit::TFDComputeOptions options; + auto system = nvMolKit::buildTFDSystem(*mol, options); + + ASSERT_EQ(system.numMolecules(), 1); + ASSERT_EQ(system.molDescriptors[0].numConformers, 3); + ASSERT_GT(system.totalTorsions(), 0); + ASSERT_GT(system.totalDihedralWorkItems(), 0); + + // Transfer to device + cudaStream_t stream; + cudaStreamCreate(&stream); + + std::vector gpuAngles; + + { + nvMolKit::TFDSystemDevice device; + nvMolKit::transferToDevice(system, device, stream); + + nvMolKit::launchDihedralKernel(system.totalDihedralWorkItems(), + device.positions.data(), + device.confPositionStarts.data(), + device.torsionAtoms.data(), + device.molDescriptors.data(), + device.dihedralWorkStarts.data(), + system.numMolecules(), + device.dihedralAngles.data(), + stream); + + gpuAngles.resize(device.dihedralAngles.size()); + device.dihedralAngles.copyToHost(gpuAngles.data(), gpuAngles.size()); + cudaStreamSynchronize(stream); + } + + // Compute CPU reference + auto tl = nvMolKit::extractTorsionList(*mol, options.maxDevMode, options.symmRadius, options.ignoreColinearBonds); + auto cpuAngles = cpuGenerator_.computeDihedralAngles(*mol, tl); + + // Compare + ASSERT_EQ(gpuAngles.size(), cpuAngles.size()); + for (size_t i = 0; i < cpuAngles.size(); ++i) { + EXPECT_NEAR(gpuAngles[i], cpuAngles[i], kTolerance) << "Mismatch at angle " << i; + } + + cudaStreamDestroy(stream); +} + +// ============================================================================= +// TFD pipeline (GPU vs CPU) +// ============================================================================= + +TEST_F(TFDKernelsTest, TFDMatrixKernelMatchesCPU) { + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCCC")); + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, 5); + ASSERT_EQ(mol->getNumConformers(), 5); + + nvMolKit::TFDComputeOptions options; + + // Compute CPU reference + auto cpuTFD = cpuGenerator_.GetTFDMatrix(*mol, options); + ASSERT_FALSE(cpuTFD.empty()); + + // Build system + auto system = nvMolKit::buildTFDSystem(*mol, options); + + cudaStream_t stream; + cudaStreamCreate(&stream); + + std::vector gpuTFD; + + { + nvMolKit::TFDSystemDevice device; + nvMolKit::transferToDevice(system, device, stream); + + // Compute dihedral angles first + nvMolKit::launchDihedralKernel(system.totalDihedralWorkItems(), + device.positions.data(), + device.confPositionStarts.data(), + device.torsionAtoms.data(), + device.molDescriptors.data(), + device.dihedralWorkStarts.data(), + system.numMolecules(), + device.dihedralAngles.data(), + stream); + + // Compute TFD matrix + nvMolKit::launchTFDMatrixKernel(system.numMolecules(), + device.dihedralAngles.data(), + device.torsionWeights.data(), + device.torsionMaxDevs.data(), + device.quartetStarts.data(), + device.torsionTypes.data(), + device.molDescriptors.data(), + device.tfdOutput.data(), + stream); + + gpuTFD.resize(device.tfdOutput.size()); + device.tfdOutput.copyToHost(gpuTFD.data(), gpuTFD.size()); + cudaStreamSynchronize(stream); + } + + // Compare + ASSERT_EQ(gpuTFD.size(), cpuTFD.size()); + for (size_t i = 0; i < cpuTFD.size(); ++i) { + EXPECT_NEAR(gpuTFD[i], cpuTFD[i], kTolerance) << "Mismatch at TFD index " << i; + } + + cudaStreamDestroy(stream); +} + +TEST_F(TFDKernelsTest, BatchMultipleMolecules) { + // Mix of single-quartet and multi-quartet molecules in a single batch + const std::vector testSmiles = { + "CCCC", // n-butane (single-quartet) + "CCCCC", // n-pentane (single-quartet) + "CC(C)CC", // isopentane (symmetric, 2 quartets) + "c1ccccc1", // benzene (ring, 6 quartets) + "c1ccccc1CC", // ethylbenzene (ring + symmetric) + "CCO", // ethanol + "c1ccc(cc1)O", // phenol + }; + + std::vector> mols; + std::vector molPtrs; + + for (const auto& smiles : testSmiles) { + auto mol = std::unique_ptr(RDKit::SmilesToMol(smiles)); + if (mol) { + generateConformers(*mol, 3); + if (mol->getNumConformers() >= 2) { + molPtrs.push_back(mol.get()); + mols.push_back(std::move(mol)); + } + } + } + + ASSERT_GE(mols.size(), 3u); + + nvMolKit::TFDComputeOptions options; + + // Compute CPU reference + auto cpuResults = cpuGenerator_.GetTFDMatrices(molPtrs, options); + + // Build batch system + auto system = nvMolKit::buildTFDSystem(molPtrs, options); + + cudaStream_t stream; + cudaStreamCreate(&stream); + + std::vector gpuTFDFlat; + + { + nvMolKit::TFDSystemDevice device; + nvMolKit::transferToDevice(system, device, stream); + + // Launch dihedral kernel + nvMolKit::launchDihedralKernel(system.totalDihedralWorkItems(), + device.positions.data(), + device.confPositionStarts.data(), + device.torsionAtoms.data(), + device.molDescriptors.data(), + device.dihedralWorkStarts.data(), + system.numMolecules(), + device.dihedralAngles.data(), + stream); + + // Launch TFD matrix kernel + nvMolKit::launchTFDMatrixKernel(system.numMolecules(), + device.dihedralAngles.data(), + device.torsionWeights.data(), + device.torsionMaxDevs.data(), + device.quartetStarts.data(), + device.torsionTypes.data(), + device.molDescriptors.data(), + device.tfdOutput.data(), + stream); + + gpuTFDFlat.resize(device.tfdOutput.size()); + if (!gpuTFDFlat.empty()) { + device.tfdOutput.copyToHost(gpuTFDFlat.data(), gpuTFDFlat.size()); + } + cudaStreamSynchronize(stream); + } + + // Compare per molecule + for (size_t m = 0; m < molPtrs.size(); ++m) { + int outStart = system.molDescriptors[m].tfdOutStart; + int outEnd = (static_cast(m) + 1 < system.numMolecules()) ? system.molDescriptors[m + 1].tfdOutStart : + system.totalTFDOutputs(); + + ASSERT_EQ(static_cast(outEnd - outStart), cpuResults[m].size()) << "Size mismatch for molecule " << m; + + for (int i = outStart; i < outEnd; ++i) { + EXPECT_NEAR(gpuTFDFlat[i], cpuResults[m][i - outStart], kTolerance) + << "Mismatch at molecule " << m << " TFD index " << (i - outStart); + } + } + + cudaStreamDestroy(stream); +} + +// ============================================================================= +// Direct RDKit reference +// ============================================================================= + +TEST_F(TFDKernelsTest, CompareWithRDKitReference) { + // Compare GPU TFD output directly against pre-computed RDKit reference values. + // Includes both single-quartet and multi-quartet (ring, symmetric) molecules. + // + // Reference generated with RDKit Python: + // mol = Chem.MolFromSmiles(smiles) + // params = AllChem.ETKDGv3() + // params.randomSeed = 42 + // AllChem.EmbedMultipleConfs(mol, 4, params) + // tfd = TorsionFingerprints.GetTFDMatrix(mol, useWeights=True, maxDev='equal', symmRadius=2) + + struct TestCase { + const char* smiles; + std::vector reference; + }; + + // clang-format off + std::vector cases = { + {"CCCC", { // n-butane, 1 torsion + 0.6667389132, // TFD(1,0) + 0.0000726610, // TFD(2,0) + 0.6666662521, // TFD(2,1) + 0.6667387931, // TFD(3,0) + 0.0000001200, // TFD(3,1) + 0.6666661321 // TFD(3,2) + }}, + {"CCCCC", { // n-pentane, 2 torsions + 0.6060606631, // TFD[0] + 0.6060573299, // TFD[1] + 0.6060662252, // TFD[2] + 0.6666872992, // TFD[3] + 0.6666326206, // TFD[4] + 0.0606323184 // TFD[5] + }}, + {"CCCCCC", { // n-hexane, 3 torsions + 0.6111276139, // TFD[0] + 0.0555704226, // TFD[1] + 0.6666744357, // TFD[2] + 0.5555532106, // TFD[3] + 0.6111014381, // TFD[4] + 0.6111123144 // TFD[5] + }}, + {"CC(C)CC", { // isopentane, 1 symmetric torsion (2 quartets) + 0.0000045777, + 0.0433253929, + 0.0433299706, + 0.0000027031, + 0.0000072808, + 0.0433226898 + }}, + {"C1CCCCC1", { // cyclohexane, 1 ring torsion (6 quartets) + 0.1343662730, + 0.3653621455, + 0.2309958724, + 0.1017138867, + 0.2360801597, + 0.4670760321 + }}, + {"c1ccccc1CC", { // ethylbenzene, 1 symmetric non-ring + 1 ring + 0.0000114982, + 0.0000104863, + 0.0000020789, + 0.0000100311, + 0.0000018047, + 0.0000013199 + }}, + }; + // clang-format on + + nvMolKit::TFDComputeOptions options; + options.useWeights = true; + options.maxDevMode = nvMolKit::TFDMaxDevMode::Equal; + options.symmRadius = 2; + options.ignoreColinearBonds = true; + + for (const auto& tc : cases) { + SCOPED_TRACE(tc.smiles); + + auto mol = std::unique_ptr(RDKit::SmilesToMol(tc.smiles)); + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, 4, 42); + ASSERT_EQ(mol->getNumConformers(), 4); + + auto system = nvMolKit::buildTFDSystem(*mol, options); + + cudaStream_t stream; + cudaStreamCreate(&stream); + + std::vector gpuTFD; + + { + nvMolKit::TFDSystemDevice device; + nvMolKit::transferToDevice(system, device, stream); + + nvMolKit::launchDihedralKernel(system.totalDihedralWorkItems(), + device.positions.data(), + device.confPositionStarts.data(), + device.torsionAtoms.data(), + device.molDescriptors.data(), + device.dihedralWorkStarts.data(), + system.numMolecules(), + device.dihedralAngles.data(), + stream); + + nvMolKit::launchTFDMatrixKernel(system.numMolecules(), + device.dihedralAngles.data(), + device.torsionWeights.data(), + device.torsionMaxDevs.data(), + device.quartetStarts.data(), + device.torsionTypes.data(), + device.molDescriptors.data(), + device.tfdOutput.data(), + stream); + + gpuTFD.resize(device.tfdOutput.size()); + device.tfdOutput.copyToHost(gpuTFD.data(), gpuTFD.size()); + cudaStreamSynchronize(stream); + } + + ASSERT_EQ(gpuTFD.size(), tc.reference.size()); + for (size_t i = 0; i < gpuTFD.size(); ++i) { + EXPECT_NEAR(gpuTFD[i], tc.reference[i], kTolerance) << "TFD[" << i << "] mismatch with RDKit reference"; + } + + cudaStreamDestroy(stream); + } +} + +// ============================================================================= +// Edge cases +// ============================================================================= + +TEST_F(TFDKernelsTest, TwoConformers) { + // Minimum pair case: 2 conformers produce exactly 1 TFD pair + auto mol = std::unique_ptr(RDKit::SmilesToMol("CCCC")); + ASSERT_NE(mol, nullptr); + + generateConformers(*mol, 2); + ASSERT_EQ(mol->getNumConformers(), 2); + + nvMolKit::TFDComputeOptions options; + + auto cpuTFD = cpuGenerator_.GetTFDMatrix(*mol, options); + ASSERT_EQ(cpuTFD.size(), 1u); + + auto system = nvMolKit::buildTFDSystem(*mol, options); + ASSERT_EQ(system.totalTFDOutputs(), 1); + ASSERT_EQ(system.totalTFDOutputs(), 1); + + cudaStream_t stream; + cudaStreamCreate(&stream); + + std::vector gpuTFD; + + { + nvMolKit::TFDSystemDevice device; + nvMolKit::transferToDevice(system, device, stream); + + nvMolKit::launchDihedralKernel(system.totalDihedralWorkItems(), + device.positions.data(), + device.confPositionStarts.data(), + device.torsionAtoms.data(), + device.molDescriptors.data(), + device.dihedralWorkStarts.data(), + system.numMolecules(), + device.dihedralAngles.data(), + stream); + + nvMolKit::launchTFDMatrixKernel(system.numMolecules(), + device.dihedralAngles.data(), + device.torsionWeights.data(), + device.torsionMaxDevs.data(), + device.quartetStarts.data(), + device.torsionTypes.data(), + device.molDescriptors.data(), + device.tfdOutput.data(), + stream); + + gpuTFD.resize(device.tfdOutput.size()); + device.tfdOutput.copyToHost(gpuTFD.data(), gpuTFD.size()); + cudaStreamSynchronize(stream); + } + + ASSERT_EQ(gpuTFD.size(), 1u); + EXPECT_NEAR(gpuTFD[0], cpuTFD[0], kTolerance); + + cudaStreamDestroy(stream); +} + +TEST_F(TFDKernelsTest, BatchWithZeroTorsionMolecule) { + // A molecule with no torsions (ethane) should produce TFD=0 + // and not corrupt results of neighboring molecules in the batch + auto ethane = std::unique_ptr(RDKit::SmilesToMol("CC")); + ASSERT_NE(ethane, nullptr); + generateConformers(*ethane, 3); + ASSERT_GE(ethane->getNumConformers(), 2); + + auto butane = std::unique_ptr(RDKit::SmilesToMol("CCCC")); + ASSERT_NE(butane, nullptr); + generateConformers(*butane, 3); + ASSERT_GE(butane->getNumConformers(), 2); + + // CPU reference for butane alone + nvMolKit::TFDComputeOptions options; + auto cpuButane = cpuGenerator_.GetTFDMatrix(*butane, options); + + // GPU batch: ethane first, then butane + std::vector molPtrs = {ethane.get(), butane.get()}; + auto system = nvMolKit::buildTFDSystem(molPtrs, options); + ASSERT_EQ(system.numMolecules(), 2); + + cudaStream_t stream; + cudaStreamCreate(&stream); + + std::vector gpuTFD; + + { + nvMolKit::TFDSystemDevice device; + nvMolKit::transferToDevice(system, device, stream); + + nvMolKit::launchDihedralKernel(system.totalDihedralWorkItems(), + device.positions.data(), + device.confPositionStarts.data(), + device.torsionAtoms.data(), + device.molDescriptors.data(), + device.dihedralWorkStarts.data(), + system.numMolecules(), + device.dihedralAngles.data(), + stream); + + nvMolKit::launchTFDMatrixKernel(system.numMolecules(), + device.dihedralAngles.data(), + device.torsionWeights.data(), + device.torsionMaxDevs.data(), + device.quartetStarts.data(), + device.torsionTypes.data(), + device.molDescriptors.data(), + device.tfdOutput.data(), + stream); + + gpuTFD.resize(device.tfdOutput.size()); + if (!gpuTFD.empty()) { + device.tfdOutput.copyToHost(gpuTFD.data(), gpuTFD.size()); + } + cudaStreamSynchronize(stream); + } + + // Check ethane: all TFD values must be exactly 0 (no torsions). + // The kernel skips molecules with 0 torsions, so output buffer must be pre-zeroed. + int ethaneStart = system.molDescriptors[0].tfdOutStart; + int ethaneEnd = system.molDescriptors[1].tfdOutStart; + for (int i = ethaneStart; i < ethaneEnd; ++i) { + EXPECT_EQ(gpuTFD[i], 0.0f) << "Ethane (no torsions) should have TFD=0 at index " << i; + } + + // Check butane: should match CPU reference (not corrupted by ethane) + int butaneStart = system.molDescriptors[1].tfdOutStart; + int butaneEnd = system.totalTFDOutputs(); + ASSERT_EQ(static_cast(butaneEnd - butaneStart), cpuButane.size()); + for (int i = butaneStart; i < butaneEnd; ++i) { + EXPECT_NEAR(gpuTFD[i], cpuButane[i - butaneStart], kTolerance) + << "Butane TFD mismatch at index " << (i - butaneStart); + } + + cudaStreamDestroy(stream); +} + +TEST_F(TFDKernelsTest, CompareWithRDKitReferenceAddHs) { + // Compare GPU TFD output with AddHs against RDKit reference values. + // AddHs produces more quartets per torsion, exercising multi-quartet handling + // on molecules that would otherwise be single-quartet. + // + // Reference generated with RDKit Python: + // mol = Chem.AddHs(Chem.MolFromSmiles(smiles)) + // params = AllChem.ETKDGv3() + // params.randomSeed = 42 + // AllChem.EmbedMultipleConfs(mol, 4, params) + // tfd = TorsionFingerprints.GetTFDMatrix(mol, useWeights=True, maxDev='equal', symmRadius=2) + + struct TestCase { + const char* smiles; + std::vector reference; + }; + + // clang-format off + std::vector cases = { + {"CCCCC", { + 0.6666346588, + 0.0606342183, + 0.6666024736, + 0.6666717502, + 0.6666935910, + 0.6061117436 + }}, + {"CC(C)CC", { + 0.0155798214, + 0.0118375755, + 0.0180868258, + 0.0000140721, + 0.0091266798, + 0.0336807192 + }}, + }; + // clang-format on + + nvMolKit::TFDComputeOptions options; + options.useWeights = true; + options.maxDevMode = nvMolKit::TFDMaxDevMode::Equal; + options.symmRadius = 2; + options.ignoreColinearBonds = true; + + for (const auto& tc : cases) { + SCOPED_TRACE(std::string(tc.smiles) + " (AddHs)"); + + auto mol = std::unique_ptr(RDKit::SmilesToMol(tc.smiles)); + ASSERT_NE(mol, nullptr); + + RDKit::MolOps::addHs(*mol); + + generateConformers(*mol, 4, 42); + ASSERT_EQ(mol->getNumConformers(), 4); + + auto system = nvMolKit::buildTFDSystem(*mol, options); + + cudaStream_t stream; + cudaStreamCreate(&stream); + + std::vector gpuTFD; + + { + nvMolKit::TFDSystemDevice device; + nvMolKit::transferToDevice(system, device, stream); + + nvMolKit::launchDihedralKernel(system.totalDihedralWorkItems(), + device.positions.data(), + device.confPositionStarts.data(), + device.torsionAtoms.data(), + device.molDescriptors.data(), + device.dihedralWorkStarts.data(), + system.numMolecules(), + device.dihedralAngles.data(), + stream); + + nvMolKit::launchTFDMatrixKernel(system.numMolecules(), + device.dihedralAngles.data(), + device.torsionWeights.data(), + device.torsionMaxDevs.data(), + device.quartetStarts.data(), + device.torsionTypes.data(), + device.molDescriptors.data(), + device.tfdOutput.data(), + stream); + + gpuTFD.resize(device.tfdOutput.size()); + device.tfdOutput.copyToHost(gpuTFD.data(), gpuTFD.size()); + cudaStreamSynchronize(stream); + } + + ASSERT_EQ(gpuTFD.size(), tc.reference.size()); + for (size_t i = 0; i < gpuTFD.size(); ++i) { + EXPECT_NEAR(gpuTFD[i], tc.reference[i], kTolerance) << "TFD[" << i << "] mismatch with RDKit reference"; + } + + cudaStreamDestroy(stream); + } +}