From bf2c8dd4ac2e4fdd1fe47f19b5731101c89b24aa Mon Sep 17 00:00:00 2001 From: Kevin Boyd Date: Fri, 15 May 2026 09:54:44 -0400 Subject: [PATCH] Make pip-built wheels load on minimal manylinux containers The pip-build path was globbing every .so in rdkit.libs/ into RDKit_LIBS and every libboost_* into Boost_LIBRARIES, then linking each into every nvmolkit Python module. That dragged libcairo and libquadmath onto each module's NEEDED list. libcairo in turn NEEDEDs libXrender/libX11/libXext; those are on the manylinux lib_whitelist so rdkit-pypi's auditwheel pass legally left them external, but the nvidia/cuda runtime container we test in doesn't ship them, so `import nvmolkit.fingerprints` failed at load. Narrow RDKit_LIBS to the 16 components nvmolkit actually uses (mirroring the conda path's explicit list) and Boost_LIBRARIES to the same 4 (boost serialization / iostreams / python / numpy). Also patchelf with --force-rpath so the entry-point modules get DT_RPATH instead of DT_RUNPATH. The libs inside rdkit.libs/ have no rpath of their own and rely on RPATH inheritance to resolve second-level deps; rdkit's own python bindings do the same. Drop the unused ${RDKit_LIBS} link from _arrayHelpers, which touches zero RDKit symbols. Verified the rdkit==2026.3.1 py3.12 wheel built with this change loads and passes the full pytest suite (390 passed, 10 long deselected) on an H200 in the manylinux+CUDA container with no system X/font libs. --- admin/distribute/repair_wheel.sh | 8 +++- cmake/boost.cmake | 49 ++++++++++++-------- cmake/rdkit.cmake | 79 ++++++++++++++++++++------------ nvmolkit/CMakeLists.txt | 1 - 4 files changed, 86 insertions(+), 51 deletions(-) diff --git a/admin/distribute/repair_wheel.sh b/admin/distribute/repair_wheel.sh index d3d266af..8f92bc8d 100755 --- a/admin/distribute/repair_wheel.sh +++ b/admin/distribute/repair_wheel.sh @@ -86,10 +86,14 @@ unzip -q "${REPAIRED_WHEEL}" -d "${WORK}" # Each nvmolkit/_*.so resolves siblings in nvmolkit.libs/ (auditwheel's vendor # dir), and externally-shipped libs in rdkit.libs/ and nvidia/cuda_runtime/lib -# under the same site-packages root. +# under the same site-packages root. Use DT_RPATH (--force-rpath) rather than +# DT_RUNPATH so the search applies recursively to second-level deps. The libs +# inside rdkit.libs/ have no rpath of their own and rely on RPATH inheritance +# from the entry-point module to find their rdkit.libs/ siblings - rdkit's +# own python bindings work the same way. NEW_RPATH='$ORIGIN/../nvmolkit.libs:$ORIGIN/../rdkit.libs:$ORIGIN/../nvidia/cuda_runtime/lib' find "${WORK}/nvmolkit" -maxdepth 1 -name '_*.so' -type f | while read -r so; do - patchelf --set-rpath "${NEW_RPATH}" "${so}" + patchelf --force-rpath --set-rpath "${NEW_RPATH}" "${so}" done # Repack the wheel (preserves original filename). diff --git a/cmake/boost.cmake b/cmake/boost.cmake index 8b2936da..a3477e97 100644 --- a/cmake/boost.cmake +++ b/cmake/boost.cmake @@ -15,28 +15,41 @@ # cmake-lint: disable=C0103 +set(BOOST_TARGET_LIBS serialization iostreams) +if(NVMOLKIT_BUILD_PYTHON_BINDINGS) + list(APPEND BOOST_TARGET_LIBS + "python${Python_VERSION_MAJOR}${Python_VERSION_MINOR}") + # Link Boost.Python.Numpy as we use boost::python::numpy in DataStructs.cpp + list(APPEND BOOST_TARGET_LIBS + "numpy${Python_VERSION_MAJOR}${Python_VERSION_MINOR}") +endif() + if(NVMOLKIT_BUILD_AGAINST_PIP_RDKIT) - message(STATUS "Using boost libs from pip RDKit") - # rdkit.cmake already enumerated every .so under rdkit.libs/ as an IMPORTED - # target and appended each to RDKit_LIBS. Filter out the boost ones so targets - # that link against ${Boost_LIBRARIES} (rather than ${RDKit_LIBS}) still pull - # in libboost_python312, libboost_serialization, etc. - set(BOOST_LIBRARIES_FROM_PIP "") - foreach(lib IN LISTS RDKit_LIBS) - if(lib MATCHES "^libboost_") - list(APPEND BOOST_LIBRARIES_FROM_PIP ${lib}) + message(STATUS "Using boost libs from pip RDKit: ${BOOST_TARGET_LIBS}") + # rdkit-pypi hash-mangles SONAMEs (e.g. + # libboost_python312-ed6a74e7.so.1.85.0), so we glob per component rather than + # calling find_package. + set(Boost_LIBRARIES "") + foreach(component IN LISTS BOOST_TARGET_LIBS) + file(GLOB MATCHES + ${NVMOLKIT_BUILD_AGAINST_PIP_LIBDIR}/libboost_${component}-*.so.*) + list(LENGTH MATCHES NUM_MATCHES) + if(NOT NUM_MATCHES EQUAL 1) + message( + FATAL_ERROR + "Expected exactly one libboost_${component}-*.so.* under " + "${NVMOLKIT_BUILD_AGAINST_PIP_LIBDIR}, got ${NUM_MATCHES}: ${MATCHES}" + ) endif() + list(GET MATCHES 0 LIB_PATH) + get_filename_component(libname ${LIB_PATH} NAME_WE) + add_library(${libname} SHARED IMPORTED) + set_target_properties(${libname} PROPERTIES IMPORTED_LOCATION ${LIB_PATH}) + target_include_directories( + ${libname} SYSTEM INTERFACE ${NVMOLKIT_BUILD_AGAINST_PIP_BOOSTINCLUDEDIR}) + list(APPEND Boost_LIBRARIES ${libname}) endforeach() - set(Boost_LIBRARIES ${BOOST_LIBRARIES_FROM_PIP}) else() - set(BOOST_TARGET_LIBS serialization iostreams) - if(NVMOLKIT_BUILD_PYTHON_BINDINGS) - list(APPEND BOOST_TARGET_LIBS - "python${Python_VERSION_MAJOR}${Python_VERSION_MINOR}") - # Link Boost.Python.Numpy as we use boost::python::numpy in DataStructs.cpp - list(APPEND BOOST_TARGET_LIBS - "numpy${Python_VERSION_MAJOR}${Python_VERSION_MINOR}") - endif() message(STATUS "Finding boost libs: ${BOOST_TARGET_LIBS}") find_package(Boost REQUIRED COMPONENTS ${BOOST_TARGET_LIBS}) endif() diff --git a/cmake/rdkit.cmake b/cmake/rdkit.cmake index e38468f3..ebc394a9 100644 --- a/cmake/rdkit.cmake +++ b/cmake/rdkit.cmake @@ -13,25 +13,33 @@ # License for the specific language governing permissions and limitations under # the License. +# RDKit components nvmolkit actually links against. Used by both code paths +# below: find_package on the conda/system path, and resolved against +# rdkit.libs/libRDKit-*.so.1 on the pip-build path. +set(NVMOLKIT_RDKIT_COMPONENTS + DataStructs + Depictor + Descriptors + DistGeomHelpers + FileParsers + Fingerprints + ForceField + ForceFieldHelpers + GraphMol + MolStandardize + MolTransforms + PartialCharges + RDGeneral + RDGeometryLib + SmilesParse + SubstructMatch) + if(NOT NVMOLKIT_BUILD_AGAINST_PIP_RDKIT) find_package(RDKit REQUIRED) - set(RDKit_LIBS - RDKit::DataStructs - RDKit::Depictor - RDKit::Descriptors - RDKit::DistGeomHelpers - RDKit::FileParsers - RDKit::Fingerprints - RDKit::ForceField - RDKit::ForceFieldHelpers - RDKit::GraphMol - RDKit::MolStandardize - RDKit::MolTransforms - RDKit::PartialCharges - RDKit::RDGeneral - RDKit::RDGeometryLib - RDKit::SmilesParse - RDKit::SubstructMatch) + set(RDKit_LIBS "") + foreach(component IN LISTS NVMOLKIT_RDKIT_COMPONENTS) + list(APPEND RDKit_LIBS RDKit::${component}) + endforeach() # For RDKit 2023.5 onwards, the rdkit::rdbase target improperly has hardcoded # interface include directories that use the python version they were built @@ -86,25 +94,36 @@ else() ) endif() - # make a list of all files ine the libdir - file(GLOB RDKIT_FILES ${NVMOLKIT_BUILD_AGAINST_PIP_LIBDIR}/*) - # for each file, make an imported library with that lib as source + # Resolve each component to its hash-mangled rdkit.libs/ filename. boost.cmake + # handles libboost_* separately; everything else in rdkit.libs/ (libcairo, + # libfontconfig, libfreetype, libxcb*, libXau, libpixman, libpng16, + # libquadmath, libuuid, libbz2) is an auditwheel transitive that nvmolkit does + # not consume. message( STATUS "Searched for RDKit libs in: ${NVMOLKIT_BUILD_AGAINST_PIP_LIBDIR}") - message(STATUS "Found RDKit pip libs: ${RDKIT_FILES}") - # Populate RDKIT_LIBS with the imported libraries - foreach(lib ${RDKIT_FILES}) - get_filename_component(libname ${lib} NAME_WE) + set(RDKit_LIBS "") + foreach(component IN LISTS NVMOLKIT_RDKIT_COMPONENTS) + file(GLOB MATCHES + ${NVMOLKIT_BUILD_AGAINST_PIP_LIBDIR}/libRDKit${component}-*.so.*) + list(LENGTH MATCHES NUM_MATCHES) + if(NOT NUM_MATCHES EQUAL 1) + message( + FATAL_ERROR + "Expected exactly one libRDKit${component}-*.so.* under " + "${NVMOLKIT_BUILD_AGAINST_PIP_LIBDIR}, got ${NUM_MATCHES}: ${MATCHES}" + ) + endif() + list(GET MATCHES 0 LIB_PATH) + get_filename_component(libname ${LIB_PATH} NAME_WE) add_library(${libname} SHARED IMPORTED) - set_target_properties(${libname} PROPERTIES IMPORTED_LOCATION ${lib}) - # Set include dirs to include both NVMOLKIT_BUILD_AGAINST_PIP_INCDIR and - # NVMOLKIT_BUILD_AGAINST_PIP_BOOSTINCLUDEDIR + set_target_properties(${libname} PROPERTIES IMPORTED_LOCATION ${LIB_PATH}) target_include_directories( ${libname} SYSTEM INTERFACE ${NVMOLKIT_BUILD_AGAINST_PIP_INCDIR} ${NVMOLKIT_BUILD_AGAINST_PIP_BOOSTINCLUDEDIR}) list(APPEND RDKit_LIBS ${libname}) endforeach() - # cmake-lint: disable=C0103 - set(Boost_INCLUDE_DIRS ${NVMOLKIT_BUILD_AGAINST_PIP_BOOSTINCLUDEDIR}) - message(STATUS "Using boost libs from pip RDKit") + message(STATUS "Imported RDKit pip libs: ${RDKit_LIBS}") + # cmake-format: off + set(Boost_INCLUDE_DIRS ${NVMOLKIT_BUILD_AGAINST_PIP_BOOSTINCLUDEDIR}) # cmake-lint: disable=C0103 + # cmake-format: on endif(NOT NVMOLKIT_BUILD_AGAINST_PIP_RDKIT) diff --git a/nvmolkit/CMakeLists.txt b/nvmolkit/CMakeLists.txt index adca1380..bdb00b1b 100644 --- a/nvmolkit/CMakeLists.txt +++ b/nvmolkit/CMakeLists.txt @@ -40,7 +40,6 @@ target_link_libraries(_arrayHelpers PUBLIC ${Boost_LIBRARIES} device_vector) target_include_directories( _arrayHelpers PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/utils ${Python_INCLUDE_DIRS}) target_include_directories(_arrayHelpers SYSTEM PUBLIC ${Boost_INCLUDE_DIRS}) -target_link_libraries(_arrayHelpers PRIVATE ${RDKit_LIBS}) installpythontarget(_arrayHelpers ./) add_library(_mmffOptimization MODULE mmffOptimization.cpp)