Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/boost.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ if(NVMOLKIT_BUILD_AGAINST_PIP_RDKIT)
endforeach()
set(Boost_LIBRARIES ${BOOST_LIBRARIES_FROM_PIP})
else()
set(BOOST_TARGET_LIBS system serialization iostreams)
set(BOOST_TARGET_LIBS serialization iostreams)
if(NVMOLKIT_BUILD_PYTHON_BINDINGS)
list(APPEND BOOST_TARGET_LIBS
"python${Python_VERSION_MAJOR}${Python_VERSION_MINOR}")
Expand Down
38 changes: 0 additions & 38 deletions nvmolkit/tests/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import gc
import math

import pytest
import torch

from rdkit.Chem import MolFromSmiles

from nvmolkit.fingerprints import MorganFingerprintGenerator
from nvmolkit.types import (
AsyncGpuResult,
CoordinateOutput,
Expand All @@ -31,40 +27,6 @@
)


def _get_fps(num_mols):
generator = MorganFingerprintGenerator(radius=0, fpSize=2048)
template = MolFromSmiles("CC")
mols = [template] * num_mols

result = generator.GetFingerprints(mols)
torch.cuda.synchronize()
return result


def test_async_gpu_result_release_frees_memory():
torch.cuda.synchronize()
gc.collect()
torch.cuda.empty_cache()
base_free, _ = torch.cuda.mem_get_info()

num_mols = 210_000
expected_bytes = num_mols * 2048 // 8
fps = _get_fps(num_mols)
torch.cuda.synchronize()

free_after_alloc, _ = torch.cuda.mem_get_info()
assert free_after_alloc < base_free
assert free_after_alloc + expected_bytes <= base_free

del fps
gc.collect()
torch.cuda.synchronize()

free_post, _ = torch.cuda.mem_get_info()

assert (free_post - free_after_alloc) >= expected_bytes


@pytest.mark.parametrize("invalid_value", [0, -2, -99])
def test_hardware_options_invalid_batches_per_gpu(invalid_value):
"""Test that invalid batchesPerGpu values are rejected at construction time and via setter."""
Expand Down
7 changes: 5 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ add_subdirectory(utils)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/versions.h.in
${CMAKE_CURRENT_BINARY_DIR}/versions.h @ONLY)

add_library(nvmolkit_versions INTERFACE)
target_include_directories(nvmolkit_versions
INTERFACE ${CMAKE_CURRENT_BINARY_DIR})

add_library(similarity_kernels similarity_kernels.cu)
target_link_libraries(similarity_kernels PRIVATE cuda_error_check device
cccl_interface)
Expand Down Expand Up @@ -77,9 +81,8 @@ target_include_directories(triangle_smooth PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})

add_library(embedder_utils embedder_utils.cpp)
target_link_libraries(embedder_utils PRIVATE ${RDKit_LIBS} device_vector
triangle_smooth)
triangle_smooth nvmolkit_versions)
target_include_directories(embedder_utils PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_include_directories(embedder_utils PRIVATE ${CMAKE_CURRENT_BINARY_DIR})

add_library(etkdg_impl etkdg_impl.cpp etkdg_kernels.cu)
target_link_libraries(
Expand Down
9 changes: 7 additions & 2 deletions src/minimizer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,13 @@ add_library(bfgs bfgs_minimize.cu bfgs_hessian.cu
target_link_libraries(
bfgs
PUBLIC host_vector device_vector
PRIVATE ${RDKit_LIBS} batched_forcefield rdkit_mmff_flattened dist_geom
cub_helpers cccl_interface)
PRIVATE ${RDKit_LIBS}
batched_forcefield
rdkit_mmff_flattened
dist_geom
cub_helpers
cccl_interface
nvmolkit_versions)
target_include_directories(bfgs PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})

add_library(bfgs_common bfgs_common.cpp)
Expand Down
16 changes: 12 additions & 4 deletions src/minimizer/bfgs_minimize.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "mmff.h"
#include "mmff_kernels.h"
#include "nvtx.h"
#include "versions.h"

namespace nvMolKit {
constexpr double FUNCTOL = 1e-4; //!< Default tolerance for function convergence in the minimizer
Expand Down Expand Up @@ -788,14 +789,20 @@ void BfgsBatchMinimizer::setDirection() {
cudaCheckError(cudaGetLastError());
}

// TODO: The RDKit scaling code only appears to scale positive gradients, investigate this.
// Mirrors RDKit's ForceField::minimize gradient cap (calcGradient in
// Code/ForceField/ForceField.cpp). RDKit historically tracked the signed max of
// gradient components; commit 5b1d04d23 (RDKit 2025.09) switched to |grad|.
// Follow whichever rule the linked RDKit uses so weighted MMFF/UFF minimization
// trajectories agree with the host reference.
template <bool scaleGrads>
__global__ void scaleGradKernel(const int16_t* statuses,
const int* atomStarts,
double* grads,
double* gradScales,
const int* activeSystemIndices,
const int DIM) {
constexpr bool kRdkitHasGradScaleFix =
RDKIT_VERSION_MAJOR > 2025 || (RDKIT_VERSION_MAJOR == 2025 && RDKIT_VERSION_MINOR >= 9);
const int sysIdx = activeSystemIndices == nullptr ? blockIdx.x : activeSystemIndices[blockIdx.x];
const int idxWithinSystem = threadIdx.x;
const int numTerms = DIM * (atomStarts[sysIdx + 1] - atomStarts[sysIdx]);
Expand All @@ -806,7 +813,7 @@ __global__ void scaleGradKernel(const int16_t* statuses,

double* localGrad = &grads[atomStarts[sysIdx] * DIM];

double maxGrad = -1e8;
double maxGrad = kRdkitHasGradScaleFix ? 0.0 : -1e8;
double gradScale = scaleGrads ? 0.1 : 1.0;
__shared__ double distributedMax[1];
if (idxWithinSystem == 0) {
Expand All @@ -815,8 +822,9 @@ __global__ void scaleGradKernel(const int16_t* statuses,

for (int i = idxWithinSystem; i < numTerms; i += blockDim.x) {
localGrad[i] *= gradScale;
if (localGrad[i] > maxGrad) {
maxGrad = localGrad[i];
const double cmp = kRdkitHasGradScaleFix ? fabs(localGrad[i]) : localGrad[i];
if (cmp > maxGrad) {
maxGrad = cmp;
}
}

Expand Down
11 changes: 8 additions & 3 deletions src/minimizer/bfgs_minimize_permol_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "dist_geom_kernels_device.cuh"
#include "mmff_kernels.h"
#include "mmff_kernels_device.cuh"
#include "versions.h"

namespace nvMolKit {

Expand Down Expand Up @@ -233,15 +234,19 @@ __device__ void scaleGrad(const int
double* grad,
double& gradScale,
typename cub::BlockReduce<double, BLOCK_SIZE>::TempStorage& tempStorage) {
// See scaleGradKernel in bfgs_minimize.cu for the RDKit 5b1d04d23 (2025.09) rationale.
constexpr bool kRdkitHasGradScaleFix =
RDKIT_VERSION_MAJOR > 2025 || (RDKIT_VERSION_MAJOR == 2025 && RDKIT_VERSION_MINOR >= 9);
Comment thread
scal444 marked this conversation as resolved.
gradScale = scaleGrads ? 0.1 : 1.0;

double maxGrad = -1e8;
double maxGrad = kRdkitHasGradScaleFix ? 0.0 : -1e8;
for (int i = threadIdx.x; i < numTerms; i += blockDim.x) {
if constexpr (scaleGrads) {
grad[i] *= gradScale;
}
if (grad[i] > maxGrad) {
maxGrad = grad[i];
const double cmp = kRdkitHasGradScaleFix ? fabs(grad[i]) : grad[i];
if (cmp > maxGrad) {
maxGrad = cmp;
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/minimizer/bfgs_mmff.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,8 @@ MMFFMinimizeResult MMFFMinimizeMoleculesConfs(std::vector<RDKit::ROMol*>&
cudaStreamSynchronize(streamPtr);
}
} else {
nvMolKit::MMFF::sendContribsAndIndicesToDevice(systemHost, systemDevice);
nvMolKit::MMFF::setStreams(systemDevice, streamPtr);
nvMolKit::MMFF::sendContribsAndIndicesToDevice(systemHost, systemDevice);
nvMolKit::MMFF::allocateIntermediateBuffers(systemHost, systemDevice);
systemDevice.positions.resize(systemHost.positions.size());
systemDevice.positions.copyFromHost(buffers.initialPositions.data(), systemHost.positions.size());
Expand Down
2 changes: 1 addition & 1 deletion src/tfd/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
add_library(tfd_cpu tfd_common.cpp tfd_cpu.cpp tfd_transfer.cu)
target_link_libraries(
tfd_cpu
PRIVATE ${RDKit_LIBS} OpenMP::OpenMP_CXX openmp_helpers nvtx
PRIVATE ${RDKit_LIBS} OpenMP::OpenMP_CXX openmp_helpers nvtx nvmolkit_versions
PUBLIC device_vector)
target_include_directories(tfd_cpu PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})

Expand Down
19 changes: 11 additions & 8 deletions src/tfd/tfd_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <unordered_set>

#include "nvtx.h"
#include "versions.h"

namespace nvMolKit {

Expand Down Expand Up @@ -294,16 +295,18 @@ std::pair<int, int> findCentralBond(const RDKit::ROMol& mol, const double* distM
double calculateBeta(const RDKit::ROMol& mol, const double* distMat, int aid1) {
int numAtoms = mol.getNumAtoms();

// Get all non-terminal bonds
// NOTE: RDKit has a typo in _calculateBeta (TorsionFingerprints.py ~line 391):
// `if len(nb2) > 1 and len(nb2) > 1` checks nb2 twice instead of nb1 and nb2.
// This includes bonds where only the end atom is non-terminal, inflating dmax.
// We replicate this behavior for RDKit compatibility.
// TODO: Fix once RDKit corrects this, or add a flag for "correct" behavior.
// Match RDKit's _calculateBeta (TorsionFingerprints.py) version-for-version.
// Pre-2026.03.1 RDKit had a typo that checked nb2 twice, inflating dmax by
// including bonds where only the end atom was non-terminal. Commit b56f3dc68
// (RDKit 2026.03.1) fixed it to check both endpoints. We match the RDKit version installed against.
constexpr bool kRdkitHasBetaTypoFix =
RDKIT_VERSION_MAJOR > 2026 || (RDKIT_VERSION_MAJOR == 2026 && RDKIT_VERSION_MINOR >= 3);
double dmax = 0.0;
for (const auto* bond : mol.bonds()) {
auto nb2 = getHeavyAtomNeighbors(bond->getEndAtom());
if (nb2.size() > 1 && nb2.size() > 1) {
auto nb1 = getHeavyAtomNeighbors(bond->getBeginAtom());
auto nb2 = getHeavyAtomNeighbors(bond->getEndAtom());
const bool beginIsNonTerminal = kRdkitHasBetaTypoFix ? (nb1.size() > 1) : (nb2.size() > 1);
if (beginIsNonTerminal && nb2.size() > 1) {
int bid1 = bond->getBeginAtomIdx();
int bid2 = bond->getEndAtomIdx();
double d = std::max(distMat[aid1 * numAtoms + bid1], distMat[aid1 * numAtoms + bid2]);
Expand Down
Loading