From 918f073cbbebd49031900667b66b649249d2d725 Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Thu, 19 Jun 2025 10:29:27 -0400 Subject: [PATCH 01/19] add ripserq note to readme + ignore R project user data --- .gitignore | 4 ++++ README.md | 12 +++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 59f7e7c..db589b4 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,7 @@ DerivedData ripser.dSYM ripser ripser-coeff + +# ripserq +.DS_Store +.Rproj.user diff --git a/README.md b/README.md index 2b08dce..01aebdc 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,14 @@ -# Ripser +# `ripserq` + +This branch exists to maintain a minimal R package structure around the current version of Ripser, into which future upgrades will be merged. +If and when experimental features are added, they will be maintained and combined in sub-branches of the form `ripserq-`. +The code will be minimally altered, most significantly (1) to toggle off C-only procedures like `main()` and (2) to ensure compatibility with R, the {Rcpp} interface, and the R package infrastructure. +Stable versions of `ripserq` will then be used to upgrade the R package {ripserr}. + +The Ripser README is unaltered below. + + +## Ripser Copyright © 2015–2021 [Ulrich Bauer]. From 1b047794bfc91d55fe2fb413fbf7d1ceafa14477 Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Thu, 19 Jun 2025 10:41:33 -0400 Subject: [PATCH 02/19] move source code files to src folder --- robin-hood-hashing | 1 - .clang-format => src/.clang-format | 0 .gitmodules => src/.gitmodules | 0 Makefile => src/Makefile | 0 {examples => src/examples}/o3_1024.txt | 0 {examples => src/examples}/o3_2048.txt | 0 {examples => src/examples}/o3_4096.txt | 0 {examples => src/examples}/o3_8192.txt | 0 {examples => src/examples}/pointsCycloOctane.csv | 0 {examples => src/examples}/projective_plane.csv | 0 {examples => src/examples}/projective_plane.dipha | Bin .../projective_plane.lower_distance_matrix | 0 .../examples}/random16.lower_distance_matrix | 0 .../examples}/random20.lower_distance_matrix | 0 .../examples}/rp2_600.lower_distance_matrix.csv | 0 .../examples}/sphere_3_192.lower_distance_matrix | 0 ripser.cpp => src/ripser.cpp | 0 17 files changed, 1 deletion(-) delete mode 160000 robin-hood-hashing rename .clang-format => src/.clang-format (100%) rename .gitmodules => src/.gitmodules (100%) rename Makefile => src/Makefile (100%) rename {examples => src/examples}/o3_1024.txt (100%) rename {examples => src/examples}/o3_2048.txt (100%) rename {examples => src/examples}/o3_4096.txt (100%) rename {examples => src/examples}/o3_8192.txt (100%) rename {examples => src/examples}/pointsCycloOctane.csv (100%) rename {examples => src/examples}/projective_plane.csv (100%) rename {examples => src/examples}/projective_plane.dipha (100%) rename {examples => src/examples}/projective_plane.lower_distance_matrix (100%) rename {examples => src/examples}/random16.lower_distance_matrix (100%) rename {examples => src/examples}/random20.lower_distance_matrix (100%) rename {examples => src/examples}/rp2_600.lower_distance_matrix.csv (100%) rename {examples => src/examples}/sphere_3_192.lower_distance_matrix (100%) rename ripser.cpp => src/ripser.cpp (100%) diff --git a/robin-hood-hashing b/robin-hood-hashing deleted file mode 160000 index 7b00237..0000000 --- a/robin-hood-hashing +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 7b00237399cec62ee5795f9013cf5df8de596e69 diff --git a/.clang-format b/src/.clang-format similarity index 100% rename from .clang-format rename to src/.clang-format diff --git a/.gitmodules b/src/.gitmodules similarity index 100% rename from .gitmodules rename to src/.gitmodules diff --git a/Makefile b/src/Makefile similarity index 100% rename from Makefile rename to src/Makefile diff --git a/examples/o3_1024.txt b/src/examples/o3_1024.txt similarity index 100% rename from examples/o3_1024.txt rename to src/examples/o3_1024.txt diff --git a/examples/o3_2048.txt b/src/examples/o3_2048.txt similarity index 100% rename from examples/o3_2048.txt rename to src/examples/o3_2048.txt diff --git a/examples/o3_4096.txt b/src/examples/o3_4096.txt similarity index 100% rename from examples/o3_4096.txt rename to src/examples/o3_4096.txt diff --git a/examples/o3_8192.txt b/src/examples/o3_8192.txt similarity index 100% rename from examples/o3_8192.txt rename to src/examples/o3_8192.txt diff --git a/examples/pointsCycloOctane.csv b/src/examples/pointsCycloOctane.csv similarity index 100% rename from examples/pointsCycloOctane.csv rename to src/examples/pointsCycloOctane.csv diff --git a/examples/projective_plane.csv b/src/examples/projective_plane.csv similarity index 100% rename from examples/projective_plane.csv rename to src/examples/projective_plane.csv diff --git a/examples/projective_plane.dipha b/src/examples/projective_plane.dipha similarity index 100% rename from examples/projective_plane.dipha rename to src/examples/projective_plane.dipha diff --git a/examples/projective_plane.lower_distance_matrix b/src/examples/projective_plane.lower_distance_matrix similarity index 100% rename from examples/projective_plane.lower_distance_matrix rename to src/examples/projective_plane.lower_distance_matrix diff --git a/examples/random16.lower_distance_matrix b/src/examples/random16.lower_distance_matrix similarity index 100% rename from examples/random16.lower_distance_matrix rename to src/examples/random16.lower_distance_matrix diff --git a/examples/random20.lower_distance_matrix b/src/examples/random20.lower_distance_matrix similarity index 100% rename from examples/random20.lower_distance_matrix rename to src/examples/random20.lower_distance_matrix diff --git a/examples/rp2_600.lower_distance_matrix.csv b/src/examples/rp2_600.lower_distance_matrix.csv similarity index 100% rename from examples/rp2_600.lower_distance_matrix.csv rename to src/examples/rp2_600.lower_distance_matrix.csv diff --git a/examples/sphere_3_192.lower_distance_matrix b/src/examples/sphere_3_192.lower_distance_matrix similarity index 100% rename from examples/sphere_3_192.lower_distance_matrix rename to src/examples/sphere_3_192.lower_distance_matrix diff --git a/ripser.cpp b/src/ripser.cpp similarity index 100% rename from ripser.cpp rename to src/ripser.cpp From 2fe2a6adaa4486d47edb25d010ca19b096fa56d5 Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Thu, 19 Jun 2025 13:20:57 -0400 Subject: [PATCH 03/19] split src & pkg gitignore files + rm makefile --- .gitignore | 15 +++++++++------ src/.gitignore | 4 ++++ src/Makefile | 18 ------------------ 3 files changed, 13 insertions(+), 24 deletions(-) create mode 100644 src/.gitignore delete mode 100644 src/Makefile diff --git a/.gitignore b/.gitignore index db589b4..8c813a3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,11 @@ -DerivedData -ripser.dSYM -ripser -ripser-coeff - -# ripserq +# Mac .DS_Store + +# R project .Rproj.user +.Rhistory + +# Rcpp +*.o +*.so +*.dll diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..59f7e7c --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,4 @@ +DerivedData +ripser.dSYM +ripser +ripser-coeff diff --git a/src/Makefile b/src/Makefile deleted file mode 100644 index 921e438..0000000 --- a/src/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -build: ripser - - -all: ripser ripser-coeff ripser-debug - - -ripser: ripser.cpp - c++ -std=c++11 -Wall ripser.cpp -o ripser -O3 -D NDEBUG - -ripser-coeff: ripser.cpp - c++ -std=c++11 -Wall ripser.cpp -o ripser-coeff -O3 -D NDEBUG -D USE_COEFFICIENTS - -ripser-debug: ripser.cpp - c++ -std=c++11 -Wall ripser.cpp -o ripser-debug -g - - -clean: - rm -f ripser ripser-coeff ripser-debug From 5764d4b9d2aa4c5fa4a295f5890aaac97e5a1cc7 Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Thu, 19 Jun 2025 13:22:42 -0400 Subject: [PATCH 04/19] add proj & pkg infrastructure + harmonize with rcpp + export printing function --- DESCRIPTION | 15 ++++++ Makefile | 18 +++++++ NAMESPACE | 5 ++ R/RcppExports.R | 8 +++ R/ripserq-package.R | 4 ++ man/ripserq-package.Rd | 15 ++++++ ripser.Rproj | 13 +++++ src/RcppExports.cpp | 37 ++++++++++++++ src/ripser.cpp | 107 +++++++++++++++++++++++++++++------------ 9 files changed, 192 insertions(+), 30 deletions(-) create mode 100644 DESCRIPTION create mode 100644 Makefile create mode 100644 NAMESPACE create mode 100644 R/RcppExports.R create mode 100644 R/ripserq-package.R create mode 100644 man/ripserq-package.Rd create mode 100644 ripser.Rproj create mode 100644 src/RcppExports.cpp diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..5d543b0 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,15 @@ +Package: ripserq +Type: Package +Title: Ports the Ripser program to R with minimal package structure. +Version: 1.0 +Date: 2025-06-19 +Authors@R: person("Your", "Name", role = c("aut", "cre"), + email = "your@email.com") +Description: More about what it does (maybe more than one + line). +License: GPL (>= 2) +Imports: + Rcpp +LinkingTo: Rcpp +RoxygenNote: 7.3.2 +Encoding: UTF-8 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..921e438 --- /dev/null +++ b/Makefile @@ -0,0 +1,18 @@ +build: ripser + + +all: ripser ripser-coeff ripser-debug + + +ripser: ripser.cpp + c++ -std=c++11 -Wall ripser.cpp -o ripser -O3 -D NDEBUG + +ripser-coeff: ripser.cpp + c++ -std=c++11 -Wall ripser.cpp -o ripser-coeff -O3 -D NDEBUG -D USE_COEFFICIENTS + +ripser-debug: ripser.cpp + c++ -std=c++11 -Wall ripser.cpp -o ripser-debug -g + + +clean: + rm -f ripser ripser-coeff ripser-debug diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..06d3733 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,5 @@ +# Generated by roxygen2: do not edit by hand + +export(ripser_cpp_dist) +importFrom(Rcpp,sourceCpp) +useDynLib(ripserq, .registration = TRUE) diff --git a/R/RcppExports.R b/R/RcppExports.R new file mode 100644 index 0000000..ee26e91 --- /dev/null +++ b/R/RcppExports.R @@ -0,0 +1,8 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#' @export +ripser_cpp_dist <- function(dataset, dim, thresh, ratio, p) { + .Call(`_ripserq_ripser_cpp_dist`, dataset, dim, thresh, ratio, p) +} + diff --git a/R/ripserq-package.R b/R/ripserq-package.R new file mode 100644 index 0000000..50e0427 --- /dev/null +++ b/R/ripserq-package.R @@ -0,0 +1,4 @@ +#' @importFrom Rcpp sourceCpp +#' @useDynLib ripserq, .registration = TRUE +#' @keywords internal +"_PACKAGE" diff --git a/man/ripserq-package.Rd b/man/ripserq-package.Rd new file mode 100644 index 0000000..16344db --- /dev/null +++ b/man/ripserq-package.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ripserq-package.R +\docType{package} +\name{ripserq-package} +\alias{ripserq} +\alias{ripserq-package} +\title{ripserq: Ports the Ripser program to R with minimal package structure.} +\description{ +More about what it does (maybe more than one line). +} +\author{ +\strong{Maintainer}: Your Name \email{your@email.com} + +} +\keyword{internal} diff --git a/ripser.Rproj b/ripser.Rproj new file mode 100644 index 0000000..8e3c2eb --- /dev/null +++ b/ripser.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp new file mode 100644 index 0000000..2c6d25b --- /dev/null +++ b/src/RcppExports.cpp @@ -0,0 +1,37 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include + +using namespace Rcpp; + +#ifdef RCPP_USE_GLOBAL_ROSTREAM +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); +#endif + +// ripser_cpp_dist +Rcpp::List ripser_cpp_dist(const Rcpp::NumericVector& dataset, int dim, double thresh, float ratio, int p); +RcppExport SEXP _ripserq_ripser_cpp_dist(SEXP datasetSEXP, SEXP dimSEXP, SEXP threshSEXP, SEXP ratioSEXP, SEXP pSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type dataset(datasetSEXP); + Rcpp::traits::input_parameter< int >::type dim(dimSEXP); + Rcpp::traits::input_parameter< double >::type thresh(threshSEXP); + Rcpp::traits::input_parameter< float >::type ratio(ratioSEXP); + Rcpp::traits::input_parameter< int >::type p(pSEXP); + rcpp_result_gen = Rcpp::wrap(ripser_cpp_dist(dataset, dim, thresh, ratio, p)); + return rcpp_result_gen; +END_RCPP +} + +static const R_CallMethodDef CallEntries[] = { + {"_ripserq_ripser_cpp_dist", (DL_FUNC) &_ripserq_ripser_cpp_dist, 5}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_ripserq(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/src/ripser.cpp b/src/ripser.cpp index 92b0e49..62e6598 100644 --- a/src/ripser.cpp +++ b/src/ripser.cpp @@ -53,6 +53,8 @@ #include #include #include +// ripserq +#include #ifdef USE_ROBINHOOD_HASHMAP @@ -511,7 +513,8 @@ template class ripser { }; diameter_entry_t get_zero_pivot_facet(const diameter_entry_t simplex, const index_t dim) { - static simplex_boundary_enumerator facets(0, *this); + // ripserq: rm `static` + simplex_boundary_enumerator facets(0, *this); facets.set_simplex(simplex, dim); while (facets.has_next()) { diameter_entry_t facet = facets.next(); @@ -521,7 +524,8 @@ template class ripser { } diameter_entry_t get_zero_pivot_cofacet(const diameter_entry_t simplex, const index_t dim) { - static simplex_coboundary_enumerator cofacets(*this); + // ripserq: rm `static` + simplex_coboundary_enumerator cofacets(*this); cofacets.set_simplex(simplex, dim); while (cofacets.has_next()) { diameter_entry_t cofacet = cofacets.next(); @@ -556,7 +560,8 @@ template class ripser { entry_hash_map& pivot_column_index, index_t dim) { #ifdef INDICATE_PROGRESS - std::cerr << clear_line << "assembling columns" << std::flush; + // ripserq + Rcpp::Rcerr << clear_line << "assembling columns" << std::flush; std::chrono::steady_clock::time_point next = std::chrono::steady_clock::now() + time_step; #endif @@ -571,7 +576,8 @@ template class ripser { while (cofacets.has_next(false)) { #ifdef INDICATE_PROGRESS if (std::chrono::steady_clock::now() > next) { - std::cerr << clear_line << "assembling " << next_simplices.size() + // ripserq + Rcpp::Rcerr << clear_line << "assembling " << next_simplices.size() << " columns (processing " << std::distance(&simplices[0], &simplex) << "/" << simplices.size() << " simplices)" << std::flush; next = std::chrono::steady_clock::now() + time_step; @@ -590,21 +596,24 @@ template class ripser { if (dim < dim_max) simplices.swap(next_simplices); #ifdef INDICATE_PROGRESS - std::cerr << clear_line << "sorting " << columns_to_reduce.size() << " columns" + // ripserq + Rcpp::Rcerr << clear_line << "sorting " << columns_to_reduce.size() << " columns" << std::flush; #endif std::sort(columns_to_reduce.begin(), columns_to_reduce.end(), greater_diameter_or_smaller_index); #ifdef INDICATE_PROGRESS - std::cerr << clear_line << std::flush; + // ripserq + Rcpp::Rcerr << clear_line << std::flush; #endif } void compute_dim_0_pairs(std::vector& edges, std::vector& columns_to_reduce) { #ifdef PRINT_PERSISTENCE_PAIRS - std::cout << "persistence intervals in dim 0:" << std::endl; + // ripserq + Rcpp::Rcout << "persistence intervals in dim 0:" << std::endl; #endif union_find dset(n); @@ -620,7 +629,8 @@ template class ripser { if (u != v) { #ifdef PRINT_PERSISTENCE_PAIRS if (get_diameter(e) != 0) - std::cout << " [0," << get_diameter(e) << ")" << std::endl; + // ripserq + Rcpp::Rcout << " [0," << get_diameter(e) << ")" << std::endl; #endif dset.link(u, v); } else if ((dim_max > 0) && (get_index(get_zero_apparent_cofacet(e, 1)) == -1)) @@ -630,7 +640,8 @@ template class ripser { #ifdef PRINT_PERSISTENCE_PAIRS for (index_t i = 0; i < n; ++i) - if (dset.find(i) == i) std::cout << " [0, )" << std::endl; + // ripserq + if (dset.find(i) == i) Rcpp::Rcout << " [0, )" << std::endl; #endif } @@ -669,7 +680,8 @@ template class ripser { diameter_entry_t init_coboundary_and_get_pivot(const diameter_entry_t simplex, Column& working_coboundary, const index_t& dim, entry_hash_map& pivot_column_index) { - static simplex_coboundary_enumerator cofacets(*this); + // ripserq: rm `static` + simplex_coboundary_enumerator cofacets(*this); bool check_for_emergent_pair = true; cofacet_entries.clear(); cofacets.set_simplex(simplex, dim); @@ -692,7 +704,8 @@ template class ripser { template void add_simplex_coboundary(const diameter_entry_t simplex, const index_t& dim, Column& working_reduction_column, Column& working_coboundary) { - static simplex_coboundary_enumerator cofacets(*this); + // ripserq: rm `static` + simplex_coboundary_enumerator cofacets(*this); working_reduction_column.push(simplex); cofacets.set_simplex(simplex, dim); while (cofacets.has_next()) { @@ -720,7 +733,8 @@ template class ripser { entry_hash_map& pivot_column_index, const index_t dim) { #ifdef PRINT_PERSISTENCE_PAIRS - std::cout << "persistence intervals in dim " << dim << ":" << std::endl; + // ripserq + Rcpp::Rcout << "persistence intervals in dim " << dim << ":" << std::endl; #endif compressed_sparse_matrix reduction_matrix; @@ -746,7 +760,8 @@ template class ripser { while (true) { #ifdef INDICATE_PROGRESS if (std::chrono::steady_clock::now() > next) { - std::cerr << clear_line << "reducing column " << index_column_to_reduce + 1 + // ripserq + Rcpp::Rcerr << clear_line << "reducing column " << index_column_to_reduce + 1 << "/" << columns_to_reduce.size() << " (diameter " << diameter << ")" << std::flush; next = std::chrono::steady_clock::now() + time_step; @@ -777,9 +792,11 @@ template class ripser { value_t death = get_diameter(pivot); if (death > diameter * ratio) { #ifdef INDICATE_PROGRESS - std::cerr << clear_line << std::flush; + // ripserq + Rcpp::Rcerr << clear_line << std::flush; #endif - std::cout << " [" << diameter << "," << death << ")" << std::endl; + // ripserq + Rcpp::Rcout << " [" << diameter << "," << death << ")" << std::endl; } #endif pivot_column_index.insert({get_entry(pivot), index_column_to_reduce}); @@ -795,16 +812,19 @@ template class ripser { } else { #ifdef PRINT_PERSISTENCE_PAIRS #ifdef INDICATE_PROGRESS - std::cerr << clear_line << std::flush; + // ripserq + Rcpp::Rcerr << clear_line << std::flush; #endif - std::cout << " [" << diameter << ", )" << std::endl; + // ripserq + Rcpp::Rcout << " [" << diameter << ", )" << std::endl; #endif break; } } } #ifdef INDICATE_PROGRESS - std::cerr << clear_line << std::flush; + // ripserq + Rcpp::Rcerr << clear_line << std::flush; #endif } @@ -1015,7 +1035,8 @@ euclidean_distance_matrix read_point_cloud(std::istream& input_stream) { euclidean_distance_matrix eucl_dist(std::move(points)); index_t n = eucl_dist.size(); - std::cout << "point cloud with " << n << " points in dimension " + // ripserq + Rcpp::Rcout << "point cloud with " << n << " points in dimension " << eucl_dist.points.front().size() << std::endl; return eucl_dist; @@ -1090,13 +1111,13 @@ compressed_lower_distance_matrix read_distance_matrix(std::istream& input_stream compressed_lower_distance_matrix read_dipha(std::istream& input_stream) { if (read(input_stream) != 8067171840) { - std::cerr << "input is not a Dipha file (magic number: 8067171840)" << std::endl; - exit(-1); + // ripserq + Rcpp::stop("input is not a Dipha file (magic number: 8067171840)"); } if (read(input_stream) != 7) { - std::cerr << "input is not a Dipha distance matrix (file type: 7)" << std::endl; - exit(-1); + // ripserq + Rcpp::stop("input is not a Dipha distance matrix (file type: 7)"); } index_t n = read(input_stream); @@ -1137,7 +1158,8 @@ compressed_lower_distance_matrix read_file(std::istream& input_stream, const fil } void print_usage_and_exit(int exit_code) { - std::cerr + // ripserq + Rcpp::Rcerr << "Usage: " << "ripser " << "[options] [filename]" << std::endl @@ -1230,14 +1252,15 @@ int main(int argc, char** argv) { std::ifstream file_stream(filename); if (filename && file_stream.fail()) { - std::cerr << "couldn't open file " << filename << std::endl; - exit(-1); + // ripserq + Rcpp::stop("couldn't open file %s", filename); } if (format == SPARSE) { sparse_distance_matrix dist = read_sparse_distance_matrix(filename ? file_stream : std::cin); - std::cout << "sparse distance matrix with " << dist.size() << " points and " + // ripserq + Rcpp::Rcout << "sparse distance matrix with " << dist.size() << " points and " << dist.num_edges << "/" << (dist.size() * (dist.size() - 1)) / 2 << " entries" << std::endl; @@ -1270,17 +1293,20 @@ int main(int argc, char** argv) { if (d != std::numeric_limits::infinity()) max_finite = std::max(max_finite, d); if (d <= threshold) ++num_edges; } - std::cout << "value range: [" << min << "," << max_finite << "]" << std::endl; + // ripserq + Rcpp::Rcout << "value range: [" << min << "," << max_finite << "]" << std::endl; if (threshold == std::numeric_limits::max()) { - std::cout << "distance matrix with " << dist.size() + // ripserq + Rcpp::Rcout << "distance matrix with " << dist.size() << " points, using threshold at enclosing radius " << enclosing_radius << std::endl; ripser(std::move(dist), dim_max, enclosing_radius, ratio, modulus) .compute_barcodes(); } else { - std::cout << "sparse distance matrix with " << dist.size() << " points and " + // ripserq + Rcpp::Rcout << "sparse distance matrix with " << dist.size() << " points and " << num_edges << "/" << (dist.size() * (dist.size() - 1)) / 2 << " entries" << std::endl; @@ -1291,3 +1317,24 @@ int main(int argc, char** argv) { exit(0); } } + +//' @export +// [[Rcpp::export()]] +Rcpp::List ripser_cpp_dist(const Rcpp::NumericVector &dataset, int dim, double thresh, float ratio, int p) { + std::vector distances(dataset.begin(), dataset.end()); + + compressed_lower_distance_matrix dist(std::move(distances)); + index_t idx_dim = static_cast(dim); + value_t val_thresh = static_cast(thresh); + coefficient_t coeff_p = static_cast(p); + + using RipserType = ripser; + + auto ripser_ptr = new RipserType(std::move(dist), idx_dim, val_thresh, ratio, coeff_p); + Rcpp::XPtr ripser_obj(ripser_ptr, false); + ripser_obj->compute_barcodes(); + + Rcpp::List output; + + return output; +} From aaceec1322040c62f69005ef843a8ebf1805379b Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Thu, 19 Jun 2025 15:34:54 -0400 Subject: [PATCH 05/19] resolve check notes & warnings --- .Rbuildignore | 9 +++++++++ DESCRIPTION | 2 +- NAMESPACE | 1 - R/RcppExports.R | 1 - man/ripserq-package.Rd | 2 +- ripser.Rproj | 4 ++++ src/ripser.cpp | 10 +++++++++- 7 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 .Rbuildignore diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..7a02beb --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,9 @@ +# Ripser +.clang-format +CONTRIBUTING.txt +COPYING.txt +Makefile + +# R project +^ripser\.Rproj$ +^\.Rproj\.user$ diff --git a/DESCRIPTION b/DESCRIPTION index 5d543b0..66e68aa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ripserq Type: Package -Title: Ports the Ripser program to R with minimal package structure. +Title: Ports the Ripser program to R with minimal package structure Version: 1.0 Date: 2025-06-19 Authors@R: person("Your", "Name", role = c("aut", "cre"), diff --git a/NAMESPACE b/NAMESPACE index 06d3733..a19a17a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,4 @@ # Generated by roxygen2: do not edit by hand -export(ripser_cpp_dist) importFrom(Rcpp,sourceCpp) useDynLib(ripserq, .registration = TRUE) diff --git a/R/RcppExports.R b/R/RcppExports.R index ee26e91..e5ab8d4 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,7 +1,6 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -#' @export ripser_cpp_dist <- function(dataset, dim, thresh, ratio, p) { .Call(`_ripserq_ripser_cpp_dist`, dataset, dim, thresh, ratio, p) } diff --git a/man/ripserq-package.Rd b/man/ripserq-package.Rd index 16344db..0765847 100644 --- a/man/ripserq-package.Rd +++ b/man/ripserq-package.Rd @@ -4,7 +4,7 @@ \name{ripserq-package} \alias{ripserq} \alias{ripserq-package} -\title{ripserq: Ports the Ripser program to R with minimal package structure.} +\title{ripserq: Ports the Ripser program to R with minimal package structure} \description{ More about what it does (maybe more than one line). } diff --git a/ripser.Rproj b/ripser.Rproj index 8e3c2eb..21a4da0 100644 --- a/ripser.Rproj +++ b/ripser.Rproj @@ -11,3 +11,7 @@ Encoding: UTF-8 RnwWeave: Sweave LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/src/ripser.cpp b/src/ripser.cpp index 62e6598..9573932 100644 --- a/src/ripser.cpp +++ b/src/ripser.cpp @@ -43,6 +43,9 @@ //#define USE_ROBINHOOD_HASHMAP +// ripserq: R does not tolerate use of `exit()`. +//#define COMMAND_LINE_IO + #include #include #include @@ -1157,6 +1160,9 @@ compressed_lower_distance_matrix read_file(std::istream& input_stream, const fil } } +// ripserq: R does not tolerate use of `exit()`. +#ifdef COMMAND_LINE_IO + void print_usage_and_exit(int exit_code) { // ripserq Rcpp::Rcerr @@ -1318,7 +1324,9 @@ int main(int argc, char** argv) { } } -//' @export +// ripserq +#endif + // [[Rcpp::export()]] Rcpp::List ripser_cpp_dist(const Rcpp::NumericVector &dataset, int dim, double thresh, float ratio, int p) { std::vector distances(dataset.begin(), dataset.end()); From 4c85d7e3972bbcb2bd842112c33980dd928fec84 Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Thu, 19 Jun 2025 15:41:28 -0400 Subject: [PATCH 06/19] make code change notes clearer --- src/ripser.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/ripser.cpp b/src/ripser.cpp index 9573932..a1a5139 100644 --- a/src/ripser.cpp +++ b/src/ripser.cpp @@ -516,7 +516,8 @@ template class ripser { }; diameter_entry_t get_zero_pivot_facet(const diameter_entry_t simplex, const index_t dim) { - // ripserq: rm `static` + // ripserq: Use of `static` variables induces an R end-of-program problem: + // https://github.com/Ripser/ripser/issues/55 simplex_boundary_enumerator facets(0, *this); facets.set_simplex(simplex, dim); while (facets.has_next()) { @@ -527,7 +528,8 @@ template class ripser { } diameter_entry_t get_zero_pivot_cofacet(const diameter_entry_t simplex, const index_t dim) { - // ripserq: rm `static` + // ripserq: Use of `static` variables induces an R end-of-program problem: + // https://github.com/Ripser/ripser/issues/55 simplex_coboundary_enumerator cofacets(*this); cofacets.set_simplex(simplex, dim); while (cofacets.has_next()) { @@ -683,7 +685,8 @@ template class ripser { diameter_entry_t init_coboundary_and_get_pivot(const diameter_entry_t simplex, Column& working_coboundary, const index_t& dim, entry_hash_map& pivot_column_index) { - // ripserq: rm `static` + // ripserq: Use of `static` variables induces an R end-of-program problem: + // https://github.com/Ripser/ripser/issues/55 simplex_coboundary_enumerator cofacets(*this); bool check_for_emergent_pair = true; cofacet_entries.clear(); @@ -707,7 +710,8 @@ template class ripser { template void add_simplex_coboundary(const diameter_entry_t simplex, const index_t& dim, Column& working_reduction_column, Column& working_coboundary) { - // ripserq: rm `static` + // ripserq: Use of `static` variables induces an R end-of-program problem: + // https://github.com/Ripser/ripser/issues/55 simplex_coboundary_enumerator cofacets(*this); working_reduction_column.push(simplex); cofacets.set_simplex(simplex, dim); From 660f9517a74b2f32e0a6b8be24de3206c10df62a Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Thu, 19 Jun 2025 16:14:08 -0400 Subject: [PATCH 07/19] write & export r wrapper function --- NAMESPACE | 1 + R/ripser-dist.R | 59 ++++++++++++++++++++++++++++++++++++++++++++++ man/ripser_dist.Rd | 54 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 R/ripser-dist.R create mode 100644 man/ripser_dist.Rd diff --git a/NAMESPACE b/NAMESPACE index a19a17a..39baaa8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,4 +1,5 @@ # Generated by roxygen2: do not edit by hand +export(ripser_dist) importFrom(Rcpp,sourceCpp) useDynLib(ripserq, .registration = TRUE) diff --git a/R/ripser-dist.R b/R/ripser-dist.R new file mode 100644 index 0000000..71dfe05 --- /dev/null +++ b/R/ripser-dist.R @@ -0,0 +1,59 @@ +#' @title Calculate Persistent Homology via a Vietoris-Rips Filtration +#' +#' @description This "externally exported" R function gives the user access to +#' the "internally exported" C++ function `ripser_cpp_dist()`. +#' +#' @param dataset "dist" object on which to compute persistent homology +#' @param max_dim maximum dimension +#' @param threshold maximum diameter +#' @returns an empty list +#' @examples +#' +#' # validate computation on toy data set +#' dist_vec <- c(4, 3, 5, 5, 3, 4) +#' result <- ripserq:::ripser_cpp_dist( +#' dist_vec, +#' dim = 1, +#' thresh = 6.0, +#' ratio = 1.0, +#' p = 2 +#' ) +#' result +#' +#' # validate use of default threshold +#' ripserq:::ripser_cpp_dist( +#' dist_vec, +#' dim = 1, +#' thresh = Inf, +#' ratio = 1.0, +#' p = 2 +#' ) +#' +#' # validate compatibility with 'dist' class +#' ripserq:::ripser_cpp_dist( +#' eurodist, +#' dim = 1, thresh = 5000, ratio = 1.0, p = 2 +#' ) +#' +#' # exposed R function with no explicit parameter settings +#' ripser_dist(dist_vec) +#' +#' @export +ripser_dist <- function( + dataset, + max_dim = 1L, + threshold = Inf +) { + + # pre-process parameters + if (threshold == Inf) threshold <- max(dataset) + + # run `compute_barcodes()` and save result + ans <- ripser_cpp_dist( + dataset, + dim = max_dim, thresh = threshold, ratio = 1., p = 2L + ) + + # return result + ans +} diff --git a/man/ripser_dist.Rd b/man/ripser_dist.Rd new file mode 100644 index 0000000..2373414 --- /dev/null +++ b/man/ripser_dist.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ripser-dist.R +\name{ripser_dist} +\alias{ripser_dist} +\title{Calculate Persistent Homology via a Vietoris-Rips Filtration} +\usage{ +ripser_dist(dataset, max_dim = 1L, threshold = Inf) +} +\arguments{ +\item{dataset}{"dist" object on which to compute persistent homology} + +\item{max_dim}{maximum dimension} + +\item{threshold}{maximum diameter} +} +\value{ +an empty list +} +\description{ +This "externally exported" R function gives the user access to + the "internally exported" C++ function `ripser_cpp_dist()`. +} +\examples{ + +# validate computation on toy data set +dist_vec <- c(4, 3, 5, 5, 3, 4) +result <- ripserq:::ripser_cpp_dist( + dist_vec, + dim = 1, + thresh = 6.0, + ratio = 1.0, + p = 2 +) +result + +# validate use of default threshold +ripserq:::ripser_cpp_dist( + dist_vec, + dim = 1, + thresh = Inf, + ratio = 1.0, + p = 2 +) + +# validate compatibility with 'dist' class +ripserq:::ripser_cpp_dist( + eurodist, + dim = 1, thresh = 5000, ratio = 1.0, p = 2 +) + +# exposed R function with no explicit parameter settings +ripser_dist(dist_vec) + +} From be210f4c75ec9abe9be13c9add0d40e46b691710 Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Thu, 19 Jun 2025 16:53:31 -0400 Subject: [PATCH 08/19] toggle off printing of persistence pairs --- src/ripser.cpp | 57 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/src/ripser.cpp b/src/ripser.cpp index a1a5139..1cd8410 100644 --- a/src/ripser.cpp +++ b/src/ripser.cpp @@ -39,7 +39,11 @@ //#define USE_COEFFICIENTS //#define INDICATE_PROGRESS -#define PRINT_PERSISTENCE_PAIRS +// ripserq: Don't print pairs (accumulate them instead). +//#define PRINT_PERSISTENCE_PAIRS + +// ripserq: Accumulate pairs in an object to be returned to the user. +#define COLLECT_PERSISTENCE_PAIRS //#define USE_ROBINHOOD_HASHMAP @@ -423,6 +427,9 @@ template class ripser { typedef hash_map entry_hash_map; public: + // ripserq: Accumulate pairs in an object to be returned to the user. + std::vector>> persistence_pairs; + ripser(DistanceMatrix&& _dist, index_t _dim_max, value_t _threshold, float _ratio, coefficient_t _modulus) : dist(std::move(_dist)), n(dist.size()), @@ -626,6 +633,10 @@ template class ripser { edges = get_edges(); std::sort(edges.rbegin(), edges.rend(), greater_diameter_or_smaller_index); + // ripserq: Accumulate pairs in an object to be returned to the user. +#ifdef COLLECT_PERSISTENCE_PAIRS + persistence_pairs.resize(dim_max + 1); +#endif std::vector vertices_of_edge(2); for (auto e : edges) { get_simplex_vertices(get_index(e), 1, n, vertices_of_edge.rbegin()); @@ -636,6 +647,10 @@ template class ripser { if (get_diameter(e) != 0) // ripserq Rcpp::Rcout << " [0," << get_diameter(e) << ")" << std::endl; +#endif + // ripserq: Accumulate pairs in an object to be returned to the user. +#ifdef COLLECT_PERSISTENCE_PAIRS + if (get_diameter(e) != 0) persistence_pairs[0].emplace_back(0.0, get_diameter(e)); #endif dset.link(u, v); } else if ((dim_max > 0) && (get_index(get_zero_apparent_cofacet(e, 1)) == -1)) @@ -647,6 +662,11 @@ template class ripser { for (index_t i = 0; i < n; ++i) // ripserq if (dset.find(i) == i) Rcpp::Rcout << " [0, )" << std::endl; +#endif + // ripserq: Accumulate pairs in an object to be returned to the user. +#ifdef COLLECT_PERSISTENCE_PAIRS + for (index_t i = 0; i < n; ++i) + if (dset.find(i) == i) persistence_pairs[0].emplace_back(0.0, std::numeric_limits::infinity()); #endif } @@ -743,6 +763,10 @@ template class ripser { // ripserq Rcpp::Rcout << "persistence intervals in dim " << dim << ":" << std::endl; #endif + // ripserq: Accumulate pairs in an object to be returned to the user. +#ifdef COLLECT_PERSISTENCE_PAIRS + persistence_pairs.resize(std::max(persistence_pairs.size(), static_cast(dim + 1))); +#endif compressed_sparse_matrix reduction_matrix; @@ -805,6 +829,12 @@ template class ripser { // ripserq Rcpp::Rcout << " [" << diameter << "," << death << ")" << std::endl; } +#endif + // ripserq: Accumulate pairs in an object to be returned to the user. +#ifdef COLLECT_PERSISTENCE_PAIRS + value_t death_diameter = get_diameter(pivot); + if (death_diameter > diameter * ratio) + persistence_pairs[dim].emplace_back(diameter, death_diameter); #endif pivot_column_index.insert({get_entry(pivot), index_column_to_reduce}); @@ -824,6 +854,10 @@ template class ripser { #endif // ripserq Rcpp::Rcout << " [" << diameter << ", )" << std::endl; +#endif + // ripserq: Accumulate pairs in an object to be returned to the user. +#ifdef COLLECT_PERSISTENCE_PAIRS + persistence_pairs[dim].emplace_back(diameter, std::numeric_limits::infinity()); #endif break; } @@ -837,7 +871,8 @@ template class ripser { std::vector get_edges(); - void compute_barcodes() { + // ripserq: Accumulate pairs in an object to be returned to the user. + std::vector>> compute_barcodes() { std::vector simplices, columns_to_reduce; compute_dim_0_pairs(simplices, columns_to_reduce); @@ -852,6 +887,9 @@ template class ripser { assemble_columns_to_reduce(simplices, columns_to_reduce, pivot_column_index, dim + 1); } + + // ripserq: Accumulate pairs in an object to be returned to the user. + return persistence_pairs; } }; @@ -1341,12 +1379,23 @@ Rcpp::List ripser_cpp_dist(const Rcpp::NumericVector &dataset, int dim, double t coefficient_t coeff_p = static_cast(p); using RipserType = ripser; + using PersistenceType = std::vector>>; auto ripser_ptr = new RipserType(std::move(dist), idx_dim, val_thresh, ratio, coeff_p); Rcpp::XPtr ripser_obj(ripser_ptr, false); ripser_obj->compute_barcodes(); - - Rcpp::List output; + PersistenceType result = ripser_obj->persistence_pairs; + + Rcpp::List output(result.size()); + for (size_t d = 0; d < result.size(); ++d) { + const auto& pairs = result[d]; + Rcpp::NumericMatrix mat(pairs.size(), 2); + for (size_t i = 0; i < pairs.size(); ++i) { + mat(i, 0) = pairs[i].first; + mat(i, 1) = pairs[i].second; + } + output[d] = mat; + } return output; } From af072012527370aeebc714f03442687b3a21ebec Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Thu, 19 Jun 2025 16:59:02 -0400 Subject: [PATCH 09/19] toggle off file-reading code --- src/ripser.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/ripser.cpp b/src/ripser.cpp index 1cd8410..54d6697 100644 --- a/src/ripser.cpp +++ b/src/ripser.cpp @@ -47,6 +47,9 @@ //#define USE_ROBINHOOD_HASHMAP +// ripserq: R package need not read files. +//#define INPUT_TYPE + // ripserq: R does not tolerate use of `exit()`. //#define COMMAND_LINE_IO @@ -1041,6 +1044,9 @@ template <> std::vector ripser::get_ed return edges; } +// ripserq: R package need not read files. +#ifdef INPUT_TYPE + enum file_format { LOWER_DISTANCE_MATRIX, UPPER_DISTANCE_MATRIX, @@ -1202,6 +1208,9 @@ compressed_lower_distance_matrix read_file(std::istream& input_stream, const fil } } +// ripserq: R package need not read files. +#endif + // ripserq: R does not tolerate use of `exit()`. #ifdef COMMAND_LINE_IO From 9c17f4fdb6532de86024c6361a5951c714de0c9a Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Fri, 4 Jul 2025 09:38:30 -0700 Subject: [PATCH 10/19] commit example added during transition to ripserr --- R/ripser-dist.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/R/ripser-dist.R b/R/ripser-dist.R index 71dfe05..dcaecf3 100644 --- a/R/ripser-dist.R +++ b/R/ripser-dist.R @@ -20,6 +20,15 @@ #' ) #' result #' +#' # validate clustering (only 0-degree homology) +#' ripserq:::ripser_cpp_dist( +#' dist_vec, +#' dim = 0, +#' thresh = Inf, +#' ratio = 1.0, +#' p = 2 +#' ) +#' #' # validate use of default threshold #' ripserq:::ripser_cpp_dist( #' dist_vec, From 159172a53fe583315c6fa565d80e02a9cda0cc3f Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Fri, 4 Jul 2025 09:51:41 -0700 Subject: [PATCH 11/19] document example --- man/ripser_dist.Rd | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/man/ripser_dist.Rd b/man/ripser_dist.Rd index 2373414..458496d 100644 --- a/man/ripser_dist.Rd +++ b/man/ripser_dist.Rd @@ -33,6 +33,15 @@ result <- ripserq:::ripser_cpp_dist( ) result +# validate clustering (only 0-degree homology) +ripserq:::ripser_cpp_dist( + dist_vec, + dim = 0, + thresh = Inf, + ratio = 1.0, + p = 2 +) + # validate use of default threshold ripserq:::ripser_cpp_dist( dist_vec, From 26ba7cc2dc2c7bd4f1298b997fc3fde7c04965fe Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Fri, 4 Jul 2025 09:53:34 -0700 Subject: [PATCH 12/19] convert from upper to lower on c side - see ripserr e5beb5a --- src/ripser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ripser.cpp b/src/ripser.cpp index 54d6697..5177da5 100644 --- a/src/ripser.cpp +++ b/src/ripser.cpp @@ -1382,7 +1382,7 @@ int main(int argc, char** argv) { Rcpp::List ripser_cpp_dist(const Rcpp::NumericVector &dataset, int dim, double thresh, float ratio, int p) { std::vector distances(dataset.begin(), dataset.end()); - compressed_lower_distance_matrix dist(std::move(distances)); + compressed_lower_distance_matrix dist(compressed_upper_distance_matrix(std::move(distances))); index_t idx_dim = static_cast(dim); value_t val_thresh = static_cast(thresh); coefficient_t coeff_p = static_cast(p); From 13f5dd581f78b78a6cd9aa0326935b424254b434 Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Fri, 4 Jul 2025 10:40:10 -0700 Subject: [PATCH 13/19] add edge case examples --- R/ripser-dist.R | 29 ++++++++++++++++++++++++++--- man/ripser_dist.Rd | 26 +++++++++++++++++++++++--- 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/R/ripser-dist.R b/R/ripser-dist.R index dcaecf3..fb6f9bf 100644 --- a/R/ripser-dist.R +++ b/R/ripser-dist.R @@ -38,15 +38,35 @@ #' p = 2 #' ) #' -#' # validate compatibility with 'dist' class +#' # test edge case #' ripserq:::ripser_cpp_dist( -#' eurodist, -#' dim = 1, thresh = 5000, ratio = 1.0, p = 2 +#' dist(matrix(c(0,0,0,1), ncol = 2)), +#' dim = 1, thresh = 1, ratio = 1, p = 2 #' ) +#' \dontrun{ +#' ripserq:::ripser_cpp_dist( +#' dist(0), +#' dim = 1, thresh = 1, ratio = 1, p = 2 +#' ) +#' } #' #' # exposed R function with no explicit parameter settings #' ripser_dist(dist_vec) #' +#' # validate compatibility with 'dist' class and different outputs +#' ripser_dist( +#' UScitiesD, +#' max_dim = 1, thresh = 1000 +#' ) +#' ripser_dist( +#' UScitiesD, +#' max_dim = 1, thresh = 930 +#' ) +#' ripser_dist( +#' UScitiesD, +#' max_dim = 1, thresh = 800 +#' ) +#' #' @export ripser_dist <- function( dataset, @@ -63,6 +83,9 @@ ripser_dist <- function( dim = max_dim, thresh = threshold, ratio = 1., p = 2L ) + # convert not-a-number values to missing + ans <- lapply(ans, function(x) { x[is.nan(x)] <- NA_real_; x }) + # return result ans } diff --git a/man/ripser_dist.Rd b/man/ripser_dist.Rd index 458496d..7a1a82d 100644 --- a/man/ripser_dist.Rd +++ b/man/ripser_dist.Rd @@ -51,13 +51,33 @@ ripserq:::ripser_cpp_dist( p = 2 ) -# validate compatibility with 'dist' class +# test edge case ripserq:::ripser_cpp_dist( - eurodist, - dim = 1, thresh = 5000, ratio = 1.0, p = 2 + dist(matrix(c(0,0,0,1), ncol = 2)), + dim = 1, thresh = 1, ratio = 1, p = 2 ) +\dontrun{ +ripserq:::ripser_cpp_dist( + dist(0), + dim = 1, thresh = 1, ratio = 1, p = 2 +) +} # exposed R function with no explicit parameter settings ripser_dist(dist_vec) +# validate compatibility with 'dist' class and different outputs +ripser_dist( + UScitiesD, + max_dim = 1, thresh = 1000 +) +ripser_dist( + UScitiesD, + max_dim = 1, thresh = 930 +) +ripser_dist( + UScitiesD, + max_dim = 1, thresh = 800 +) + } From e472dfb7bc5e6da96987987c0cdd989717355b0f Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Thu, 7 Aug 2025 16:43:06 -0500 Subject: [PATCH 14/19] track ripserq news --- NEWS-ripserq.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 NEWS-ripserq.md diff --git a/NEWS-ripserq.md b/NEWS-ripserq.md new file mode 100644 index 0000000..953840b --- /dev/null +++ b/NEWS-ripserq.md @@ -0,0 +1 @@ +This branch (`ripserq`) and its sub-branches are testing grounds for planned changes to {ripserr}. From 7d63ac776a9b3bdea02a0bba0bc0f71494a0cd41 Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Mon, 11 Aug 2025 22:57:40 -0400 Subject: [PATCH 15/19] track draft benchmark script --- ignore/benchmark.R | 74 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 ignore/benchmark.R diff --git a/ignore/benchmark.R b/ignore/benchmark.R new file mode 100644 index 0000000..4cbaf0f --- /dev/null +++ b/ignore/benchmark.R @@ -0,0 +1,74 @@ +library(gert) + +# branches to benchmark +bench_branches <- c( + "ripserq", + "ripserq-double", + "ripserq-missing", "ripserq-missing-alt" +) +if (! all(bench_branches %in% git_branch_list()$name)) { + # missing_branches <- setdiff(bench_branches, git_branch_list()$name) + stop("Some branches were not found.") +} + +# loop over branches +nrep <- 3L +for (i in seq(nrep)) for (branch in bench_branches) { + + # checkout branch and compile source code + git_branch_checkout(branch) + devtools::load_all() + + # perform benchmark tests + set.seed(265879) + klein <- dist(tdaunif::sample_klein_flat(n = 1280, sd = .01)) + res <- bench::mark( + usa = ripser_dist(UScitiesD, max_dim = 1, thresh = 600), + euro = ripser_dist(eurodist, max_dim = 1, thresh = 500), + klein = ripser_dist(klein, max_dim = 2, thresh = .5), + check = FALSE + ) + + # save results + saveRDS(res, file = paste0("ignore/benchmark-", branch, "-", i, ".rds")) +} + +# return to "subtrunk" branch +git_branch_checkout("ripserq") + +library(tidyverse) +stop("Revise `gsub()` call below.") + +# collate benchmark results +list.files(path = "ignore", pattern = "benchmark\\-.*\\-[0-9]+\\.rds") |> + enframe(name = NULL, value = "path") |> + mutate(branch = gsub( + "benchmark\\-([a-z\\-]+)\\-[0-9]+\\.rds", "\\1", + path + )) |> + transmute( + subbranch = ifelse( + branch == "ripserq", " ", gsub("^ripserq\\-", "", branch) + ), + rep = gsub("benchmark\\-[a-z\\-]+\\-([0-9]+)\\.rds", "\\1", path), + results = map(file.path("ignore", path), readRDS) + ) |> + unnest(results) |> + select(expression, subbranch, median, mem_alloc) |> + arrange(expression, subbranch) |> + print() -> bench_results + +# plot benchmark results +bench_results |> + mutate(expression = fct_inorder(as.character(expression))) |> + mutate(across(c(median, mem_alloc), as.numeric)) |> + pivot_longer( + cols = c(median, mem_alloc), + names_to = "measure", values_to = "value" + ) |> + ggplot(aes(x = subbranch, y = value)) + + facet_wrap(facets = vars(measure, expression), scales = "free_y") + + # geom_col(aes(fill = subbranch)) + + geom_boxplot(aes(color = subbranch)) + + scale_y_log10() + + theme(axis.text.x = element_text(angle = -30, hjust = 0)) From ac5769c7d7f361df05a79d37b4ca9cb2e52a6659 Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Tue, 19 Aug 2025 14:40:40 -0400 Subject: [PATCH 16/19] add dataset(s) to benchmark script --- ignore/benchmark.R | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/ignore/benchmark.R b/ignore/benchmark.R index 4cbaf0f..d78dbcc 100644 --- a/ignore/benchmark.R +++ b/ignore/benchmark.R @@ -1,3 +1,23 @@ +# prepare point clouds +rp2_n <- here::here("src/examples/rp2_600.lower_distance_matrix.csv") |> + readLines() |> + strsplit(split = ",") |> + sapply(length) |> + max() + 1L +here::here("src/examples/rp2_600.lower_distance_matrix.csv") |> + read.csv(col.names = paste0("X", seq(rp2_n)), header = FALSE) |> + as.matrix() |> + (\(m) rbind(NA_character_, m))() |> + # (\(m) m[seq(6), seq(6)])() + as.dist() -> + rp2_600 +here::here("src/examples/o3_1024.txt") |> + readr::read_tsv(col_names = FALSE, col_types = "d") |> + dist() -> + o3_1024 +set.seed(265879) +klein <- dist(tdaunif::sample_klein_flat(n = 1280, sd = .01)) + library(gert) # branches to benchmark @@ -20,12 +40,12 @@ for (i in seq(nrep)) for (branch in bench_branches) { devtools::load_all() # perform benchmark tests - set.seed(265879) - klein <- dist(tdaunif::sample_klein_flat(n = 1280, sd = .01)) res <- bench::mark( usa = ripser_dist(UScitiesD, max_dim = 1, thresh = 600), euro = ripser_dist(eurodist, max_dim = 1, thresh = 500), + rp2_600 = ripser_dist(rp2_600, max_dim = 1, thresh = 30), klein = ripser_dist(klein, max_dim = 2, thresh = .5), + # o3_1024 = ripser_dist(o3_1024, max_dim = 2, thresh = 3.5), check = FALSE ) @@ -37,7 +57,6 @@ for (i in seq(nrep)) for (branch in bench_branches) { git_branch_checkout("ripserq") library(tidyverse) -stop("Revise `gsub()` call below.") # collate benchmark results list.files(path = "ignore", pattern = "benchmark\\-.*\\-[0-9]+\\.rds") |> @@ -71,4 +90,10 @@ bench_results |> # geom_col(aes(fill = subbranch)) + geom_boxplot(aes(color = subbranch)) + scale_y_log10() + - theme(axis.text.x = element_text(angle = -30, hjust = 0)) + theme(axis.text.x = element_text(angle = -30, hjust = 0)) -> + bench_plot +print(bench_plot) +ggsave( + here::here("ignore/benchmark-plot.pdf"), bench_plot, + width = 8, height = 6 +) From e5c0df6d74f946abdec36e28b633969f0c229497 Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Tue, 19 Aug 2025 14:48:32 -0400 Subject: [PATCH 17/19] one column per test --- ignore/benchmark.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ignore/benchmark.R b/ignore/benchmark.R index d78dbcc..f8a9d84 100644 --- a/ignore/benchmark.R +++ b/ignore/benchmark.R @@ -78,6 +78,7 @@ list.files(path = "ignore", pattern = "benchmark\\-.*\\-[0-9]+\\.rds") |> print() -> bench_results # plot benchmark results +test_n <- length(unique(bench_results$expression)) bench_results |> mutate(expression = fct_inorder(as.character(expression))) |> mutate(across(c(median, mem_alloc), as.numeric)) |> @@ -86,7 +87,10 @@ bench_results |> names_to = "measure", values_to = "value" ) |> ggplot(aes(x = subbranch, y = value)) + - facet_wrap(facets = vars(measure, expression), scales = "free_y") + + facet_wrap( + facets = vars(measure, expression), + ncol = test_n, scales = "free_y" + ) + # geom_col(aes(fill = subbranch)) + geom_boxplot(aes(color = subbranch)) + scale_y_log10() + From 04bfc11104af73001a8d1de3a9fccfd87d428827 Mon Sep 17 00:00:00 2001 From: Jason Cory Brunson Date: Tue, 19 Aug 2025 15:01:38 -0400 Subject: [PATCH 18/19] write preface & results for benchmark script --- ignore/benchmark.R | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ignore/benchmark.R b/ignore/benchmark.R index f8a9d84..03a7c4f 100644 --- a/ignore/benchmark.R +++ b/ignore/benchmark.R @@ -1,3 +1,21 @@ +# This script benchmarks several variations of {ripserq} to record censored +# death times as `NA_real_` rather than `Inf` or `NaN` or to use doubles rather +# than floats to store distance measurements. They work as follows: + +# `ripserq`, e472dfb7bc5e6da96987987c0cdd989717355b0f: +# no correction; reported as `Inf` +# `ripserq-double`, 2936f3b9455ff9f778b61b0216d7b902177fbfd3: +# type distances as double rather than float +# `ripserq-missing`, bfdfdfbd2569338dc511240b8106e9367736ff3d: +# stored in `std::vector` as `NaN`, then converted to `NA_real_` +# `ripserq-missing-alt`, 41689894007c5d91710f6ad1a7debd0b9d835386: +# stored in `Rcpp::NumericMatrix` as `NA_REAL` (corresponding to `NA_real_`) + +# Benchmark tests on small-to-moderate data sets found +# * negligible differences between float versus double +# * significant efficiencies with using `NaN` then converting +# * significant deficiencies to using `NumericMatrix` and not converting + # prepare point clouds rp2_n <- here::here("src/examples/rp2_600.lower_distance_matrix.csv") |> readLines() |> From f14f632d6068090fe34cc14629e27119a1dd7a97 Mon Sep 17 00:00:00 2001 From: SeanHersh <32366781+SeanHersh@users.noreply.github.com> Date: Wed, 22 Oct 2025 01:34:45 -0400 Subject: [PATCH 19/19] Add enhanced benchmark script with intermediate datasets - Includes rp2_600 as intermediate-sized dataset - Focuses on datasets that complete in finite time - Adds documentation about o3_* dataset limitations - Enhanced plotting with better titles and formatting --- ignore/Benchmark_enhanced.R | 136 ++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 ignore/Benchmark_enhanced.R diff --git a/ignore/Benchmark_enhanced.R b/ignore/Benchmark_enhanced.R new file mode 100644 index 0000000..23c1128 --- /dev/null +++ b/ignore/Benchmark_enhanced.R @@ -0,0 +1,136 @@ +# Enhanced Benchmark Script for {ripserq} +# This script benchmarks several variations of {ripserq} to record censored +# death times as `NA_real_` rather than `Inf` or `NaN` or to use doubles rather +# than floats to store distance measurements. They work as follows: + +# `ripserq`, e472dfb7bc5e6da96987987c0cdd989717355b0f: +# no correction; reported as `Inf` +# `ripserq-double`, 2936f3b9455ff9f778b61b0216d7b902177fbfd3: +# type distances as double rather than float +# `ripserq-missing`, bfdfdfbd2569338dc511240b8106e9367736ff3d: +# stored in `std::vector` as `NaN`, then converted to `NA_real_` +# `ripserq-missing-alt`, 41689894007c5d91710f6ad1a7debd0b9d835386: +# stored in `Rcpp::NumericMatrix` as `NA_REAL` (corresponding to `NA_real_`) + +# Benchmark tests on small-to-moderate data sets found +# * negligible differences between float versus double +# * significant efficiencies with using `NaN` then converting +# * significant deficiencies to using `NumericMatrix` and not converting + +# ENHANCED VERSION: Includes intermediate-sized datasets +# This version focuses on datasets that run in finite time while still +# providing meaningful performance comparisons across branches. + +# prepare point clouds +rp2_n <- here::here("src/examples/rp2_600.lower_distance_matrix.csv") |> + readLines() |> + strsplit(split = ",") |> + sapply(length) |> + max() + 1L +here::here("src/examples/rp2_600.lower_distance_matrix.csv") |> + read.csv(col.names = paste0("X", seq(rp2_n)), header = FALSE) |> + as.matrix() |> + (\(m) rbind(NA_character_, m))() |> + as.dist() -> + rp2_600 + +# Note: o3_1024 and larger datasets are commented out as they may cause +# system hangs or crashes on some machines. Uncomment if your system +# can handle the computational load. +# here::here("src/examples/o3_1024.txt") |> +# readr::read_tsv(col_names = FALSE, col_types = "d") |> +# dist() -> +# o3_1024 + +set.seed(265879) +klein <- dist(tdaunif::sample_klein_flat(n = 1280, sd = .01)) + +library(gert) + +# branches to benchmark +bench_branches <- c( + "ripserq", + "ripserq-double", + "ripserq-missing", "ripserq-missing-alt" +) +if (! all(bench_branches %in% git_branch_list()$name)) { + stop("Some branches were not found.") +} + +# loop over branches +nrep <- 3L +for (i in seq(nrep)) for (branch in bench_branches) { + + # checkout branch and compile source code + git_branch_checkout(branch) + devtools::load_all() + + # perform benchmark tests + # Enhanced version focuses on datasets that complete in reasonable time + res <- bench::mark( + usa = ripser_dist(UScitiesD, max_dim = 1, thresh = 600), + euro = ripser_dist(eurodist, max_dim = 1, thresh = 500), + rp2_600 = ripser_dist(rp2_600, max_dim = 1, thresh = 30), # Intermediate dataset + klein = ripser_dist(klein, max_dim = 2, thresh = .5), + # Uncomment the following line if your system can handle o3_1024: + # o3_1024 = ripser_dist(o3_1024, max_dim = 2, thresh = 3.5), + check = FALSE + ) + + # save results + saveRDS(res, file = paste0("ignore/benchmark-", branch, "-", i, ".rds")) +} + +# return to "subtrunk" branch +git_branch_checkout("ripserq") + +library(tidyverse) + +# collate benchmark results +list.files(path = "ignore", pattern = "benchmark\\-.*\\-[0-9]+\\.rds") |> + enframe(name = NULL, value = "path") |> + mutate(branch = gsub( + "benchmark\\-([a-z\\-]+)\\-[0-9]+\\.rds", "\\1", + path + )) |> + transmute( + subbranch = ifelse( + branch == "ripserq", " ", gsub("^ripserq\\-", "", branch) + ), + rep = gsub("benchmark\\-[a-z\\-]+\\-([0-9]+)\\.rds", "\\1", path), + results = map(file.path("ignore", path), readRDS) + ) |> + unnest(results) |> + select(expression, subbranch, median, mem_alloc) |> + arrange(expression, subbranch) |> + print() -> bench_results + +# plot benchmark results +test_n <- length(unique(bench_results$expression)) +bench_results |> + mutate(expression = fct_inorder(as.character(expression))) |> + mutate(across(c(median, mem_alloc), as.numeric)) |> + pivot_longer( + cols = c(median, mem_alloc), + names_to = "measure", values_to = "value" + ) |> + ggplot(aes(x = subbranch, y = value)) + + facet_wrap( + facets = vars(measure, expression), + ncol = test_n, scales = "free_y" + ) + + geom_boxplot(aes(color = subbranch)) + + scale_y_log10() + + theme(axis.text.x = element_text(angle = -30, hjust = 0)) + + labs( + title = "Enhanced RipserQ Benchmark Results", + subtitle = "Including intermediate-sized datasets for reliable performance testing", + x = "Branch Variation", + y = "Value (log scale)" + ) -> + bench_plot +print(bench_plot) +ggsave( + here::here("ignore/benchmark-plot-enhanced.pdf"), bench_plot, + width = 10, height = 8 +)