diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..7a02beb --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,9 @@ +# Ripser +.clang-format +CONTRIBUTING.txt +COPYING.txt +Makefile + +# R project +^ripser\.Rproj$ +^\.Rproj\.user$ diff --git a/.gitignore b/.gitignore index 59f7e7c..8c813a3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,11 @@ -DerivedData -ripser.dSYM -ripser -ripser-coeff +# Mac +.DS_Store + +# R project +.Rproj.user +.Rhistory + +# Rcpp +*.o +*.so +*.dll diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..66e68aa --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,15 @@ +Package: ripserq +Type: Package +Title: Ports the Ripser program to R with minimal package structure +Version: 1.0 +Date: 2025-06-19 +Authors@R: person("Your", "Name", role = c("aut", "cre"), + email = "your@email.com") +Description: More about what it does (maybe more than one + line). +License: GPL (>= 2) +Imports: + Rcpp +LinkingTo: Rcpp +RoxygenNote: 7.3.2 +Encoding: UTF-8 diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..39baaa8 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,5 @@ +# Generated by roxygen2: do not edit by hand + +export(ripser_dist) +importFrom(Rcpp,sourceCpp) +useDynLib(ripserq, .registration = TRUE) diff --git a/NEWS-ripserq.md b/NEWS-ripserq.md new file mode 100644 index 0000000..953840b --- /dev/null +++ b/NEWS-ripserq.md @@ -0,0 +1 @@ +This branch (`ripserq`) and its sub-branches are testing grounds for planned changes to {ripserr}. diff --git a/R/RcppExports.R b/R/RcppExports.R new file mode 100644 index 0000000..e5ab8d4 --- /dev/null +++ b/R/RcppExports.R @@ -0,0 +1,7 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +ripser_cpp_dist <- function(dataset, dim, thresh, ratio, p) { + .Call(`_ripserq_ripser_cpp_dist`, dataset, dim, thresh, ratio, p) +} + diff --git a/R/ripser-dist.R b/R/ripser-dist.R new file mode 100644 index 0000000..fb6f9bf --- /dev/null +++ b/R/ripser-dist.R @@ -0,0 +1,91 @@ +#' @title Calculate Persistent Homology via a Vietoris-Rips Filtration +#' +#' @description This "externally exported" R function gives the user access to +#' the "internally exported" C++ function `ripser_cpp_dist()`. +#' +#' @param dataset "dist" object on which to compute persistent homology +#' @param max_dim maximum dimension +#' @param threshold maximum diameter +#' @returns an empty list +#' @examples +#' +#' # validate computation on toy data set +#' dist_vec <- c(4, 3, 5, 5, 3, 4) +#' result <- ripserq:::ripser_cpp_dist( +#' dist_vec, +#' dim = 1, +#' thresh = 6.0, +#' ratio = 1.0, +#' p = 2 +#' ) +#' result +#' +#' # validate clustering (only 0-degree homology) +#' ripserq:::ripser_cpp_dist( +#' dist_vec, +#' dim = 0, +#' thresh = Inf, +#' ratio = 1.0, +#' p = 2 +#' ) +#' +#' # validate use of default threshold +#' ripserq:::ripser_cpp_dist( +#' dist_vec, +#' dim = 1, +#' thresh = Inf, +#' ratio = 1.0, +#' p = 2 +#' ) +#' +#' # test edge case +#' ripserq:::ripser_cpp_dist( +#' dist(matrix(c(0,0,0,1), ncol = 2)), +#' dim = 1, thresh = 1, ratio = 1, p = 2 +#' ) +#' \dontrun{ +#' ripserq:::ripser_cpp_dist( +#' dist(0), +#' dim = 1, thresh = 1, ratio = 1, p = 2 +#' ) +#' } +#' +#' # exposed R function with no explicit parameter settings +#' ripser_dist(dist_vec) +#' +#' # validate compatibility with 'dist' class and different outputs +#' ripser_dist( +#' UScitiesD, +#' max_dim = 1, thresh = 1000 +#' ) +#' ripser_dist( +#' UScitiesD, +#' max_dim = 1, thresh = 930 +#' ) +#' ripser_dist( +#' UScitiesD, +#' max_dim = 1, thresh = 800 +#' ) +#' +#' @export +ripser_dist <- function( + dataset, + max_dim = 1L, + threshold = Inf +) { + + # pre-process parameters + if (threshold == Inf) threshold <- max(dataset) + + # run `compute_barcodes()` and save result + ans <- ripser_cpp_dist( + dataset, + dim = max_dim, thresh = threshold, ratio = 1., p = 2L + ) + + # convert not-a-number values to missing + ans <- lapply(ans, function(x) { x[is.nan(x)] <- NA_real_; x }) + + # return result + ans +} diff --git a/R/ripserq-package.R b/R/ripserq-package.R new file mode 100644 index 0000000..50e0427 --- /dev/null +++ b/R/ripserq-package.R @@ -0,0 +1,4 @@ +#' @importFrom Rcpp sourceCpp +#' @useDynLib ripserq, .registration = TRUE +#' @keywords internal +"_PACKAGE" diff --git a/README.md b/README.md index 2b08dce..01aebdc 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,14 @@ -# Ripser +# `ripserq` + +This branch exists to maintain a minimal R package structure around the current version of Ripser, into which future upgrades will be merged. +If and when experimental features are added, they will be maintained and combined in sub-branches of the form `ripserq-`. +The code will be minimally altered, most significantly (1) to toggle off C-only procedures like `main()` and (2) to ensure compatibility with R, the {Rcpp} interface, and the R package infrastructure. +Stable versions of `ripserq` will then be used to upgrade the R package {ripserr}. + +The Ripser README is unaltered below. + + +## Ripser Copyright © 2015–2021 [Ulrich Bauer]. diff --git a/ignore/Benchmark_enhanced.R b/ignore/Benchmark_enhanced.R new file mode 100644 index 0000000..23c1128 --- /dev/null +++ b/ignore/Benchmark_enhanced.R @@ -0,0 +1,136 @@ +# Enhanced Benchmark Script for {ripserq} +# This script benchmarks several variations of {ripserq} to record censored +# death times as `NA_real_` rather than `Inf` or `NaN` or to use doubles rather +# than floats to store distance measurements. They work as follows: + +# `ripserq`, e472dfb7bc5e6da96987987c0cdd989717355b0f: +# no correction; reported as `Inf` +# `ripserq-double`, 2936f3b9455ff9f778b61b0216d7b902177fbfd3: +# type distances as double rather than float +# `ripserq-missing`, bfdfdfbd2569338dc511240b8106e9367736ff3d: +# stored in `std::vector` as `NaN`, then converted to `NA_real_` +# `ripserq-missing-alt`, 41689894007c5d91710f6ad1a7debd0b9d835386: +# stored in `Rcpp::NumericMatrix` as `NA_REAL` (corresponding to `NA_real_`) + +# Benchmark tests on small-to-moderate data sets found +# * negligible differences between float versus double +# * significant efficiencies with using `NaN` then converting +# * significant deficiencies to using `NumericMatrix` and not converting + +# ENHANCED VERSION: Includes intermediate-sized datasets +# This version focuses on datasets that run in finite time while still +# providing meaningful performance comparisons across branches. + +# prepare point clouds +rp2_n <- here::here("src/examples/rp2_600.lower_distance_matrix.csv") |> + readLines() |> + strsplit(split = ",") |> + sapply(length) |> + max() + 1L +here::here("src/examples/rp2_600.lower_distance_matrix.csv") |> + read.csv(col.names = paste0("X", seq(rp2_n)), header = FALSE) |> + as.matrix() |> + (\(m) rbind(NA_character_, m))() |> + as.dist() -> + rp2_600 + +# Note: o3_1024 and larger datasets are commented out as they may cause +# system hangs or crashes on some machines. Uncomment if your system +# can handle the computational load. +# here::here("src/examples/o3_1024.txt") |> +# readr::read_tsv(col_names = FALSE, col_types = "d") |> +# dist() -> +# o3_1024 + +set.seed(265879) +klein <- dist(tdaunif::sample_klein_flat(n = 1280, sd = .01)) + +library(gert) + +# branches to benchmark +bench_branches <- c( + "ripserq", + "ripserq-double", + "ripserq-missing", "ripserq-missing-alt" +) +if (! all(bench_branches %in% git_branch_list()$name)) { + stop("Some branches were not found.") +} + +# loop over branches +nrep <- 3L +for (i in seq(nrep)) for (branch in bench_branches) { + + # checkout branch and compile source code + git_branch_checkout(branch) + devtools::load_all() + + # perform benchmark tests + # Enhanced version focuses on datasets that complete in reasonable time + res <- bench::mark( + usa = ripser_dist(UScitiesD, max_dim = 1, thresh = 600), + euro = ripser_dist(eurodist, max_dim = 1, thresh = 500), + rp2_600 = ripser_dist(rp2_600, max_dim = 1, thresh = 30), # Intermediate dataset + klein = ripser_dist(klein, max_dim = 2, thresh = .5), + # Uncomment the following line if your system can handle o3_1024: + # o3_1024 = ripser_dist(o3_1024, max_dim = 2, thresh = 3.5), + check = FALSE + ) + + # save results + saveRDS(res, file = paste0("ignore/benchmark-", branch, "-", i, ".rds")) +} + +# return to "subtrunk" branch +git_branch_checkout("ripserq") + +library(tidyverse) + +# collate benchmark results +list.files(path = "ignore", pattern = "benchmark\\-.*\\-[0-9]+\\.rds") |> + enframe(name = NULL, value = "path") |> + mutate(branch = gsub( + "benchmark\\-([a-z\\-]+)\\-[0-9]+\\.rds", "\\1", + path + )) |> + transmute( + subbranch = ifelse( + branch == "ripserq", " ", gsub("^ripserq\\-", "", branch) + ), + rep = gsub("benchmark\\-[a-z\\-]+\\-([0-9]+)\\.rds", "\\1", path), + results = map(file.path("ignore", path), readRDS) + ) |> + unnest(results) |> + select(expression, subbranch, median, mem_alloc) |> + arrange(expression, subbranch) |> + print() -> bench_results + +# plot benchmark results +test_n <- length(unique(bench_results$expression)) +bench_results |> + mutate(expression = fct_inorder(as.character(expression))) |> + mutate(across(c(median, mem_alloc), as.numeric)) |> + pivot_longer( + cols = c(median, mem_alloc), + names_to = "measure", values_to = "value" + ) |> + ggplot(aes(x = subbranch, y = value)) + + facet_wrap( + facets = vars(measure, expression), + ncol = test_n, scales = "free_y" + ) + + geom_boxplot(aes(color = subbranch)) + + scale_y_log10() + + theme(axis.text.x = element_text(angle = -30, hjust = 0)) + + labs( + title = "Enhanced RipserQ Benchmark Results", + subtitle = "Including intermediate-sized datasets for reliable performance testing", + x = "Branch Variation", + y = "Value (log scale)" + ) -> + bench_plot +print(bench_plot) +ggsave( + here::here("ignore/benchmark-plot-enhanced.pdf"), bench_plot, + width = 10, height = 8 +) diff --git a/ignore/benchmark.R b/ignore/benchmark.R new file mode 100644 index 0000000..03a7c4f --- /dev/null +++ b/ignore/benchmark.R @@ -0,0 +1,121 @@ +# This script benchmarks several variations of {ripserq} to record censored +# death times as `NA_real_` rather than `Inf` or `NaN` or to use doubles rather +# than floats to store distance measurements. They work as follows: + +# `ripserq`, e472dfb7bc5e6da96987987c0cdd989717355b0f: +# no correction; reported as `Inf` +# `ripserq-double`, 2936f3b9455ff9f778b61b0216d7b902177fbfd3: +# type distances as double rather than float +# `ripserq-missing`, bfdfdfbd2569338dc511240b8106e9367736ff3d: +# stored in `std::vector` as `NaN`, then converted to `NA_real_` +# `ripserq-missing-alt`, 41689894007c5d91710f6ad1a7debd0b9d835386: +# stored in `Rcpp::NumericMatrix` as `NA_REAL` (corresponding to `NA_real_`) + +# Benchmark tests on small-to-moderate data sets found +# * negligible differences between float versus double +# * significant efficiencies with using `NaN` then converting +# * significant deficiencies to using `NumericMatrix` and not converting + +# prepare point clouds +rp2_n <- here::here("src/examples/rp2_600.lower_distance_matrix.csv") |> + readLines() |> + strsplit(split = ",") |> + sapply(length) |> + max() + 1L +here::here("src/examples/rp2_600.lower_distance_matrix.csv") |> + read.csv(col.names = paste0("X", seq(rp2_n)), header = FALSE) |> + as.matrix() |> + (\(m) rbind(NA_character_, m))() |> + # (\(m) m[seq(6), seq(6)])() + as.dist() -> + rp2_600 +here::here("src/examples/o3_1024.txt") |> + readr::read_tsv(col_names = FALSE, col_types = "d") |> + dist() -> + o3_1024 +set.seed(265879) +klein <- dist(tdaunif::sample_klein_flat(n = 1280, sd = .01)) + +library(gert) + +# branches to benchmark +bench_branches <- c( + "ripserq", + "ripserq-double", + "ripserq-missing", "ripserq-missing-alt" +) +if (! all(bench_branches %in% git_branch_list()$name)) { + # missing_branches <- setdiff(bench_branches, git_branch_list()$name) + stop("Some branches were not found.") +} + +# loop over branches +nrep <- 3L +for (i in seq(nrep)) for (branch in bench_branches) { + + # checkout branch and compile source code + git_branch_checkout(branch) + devtools::load_all() + + # perform benchmark tests + res <- bench::mark( + usa = ripser_dist(UScitiesD, max_dim = 1, thresh = 600), + euro = ripser_dist(eurodist, max_dim = 1, thresh = 500), + rp2_600 = ripser_dist(rp2_600, max_dim = 1, thresh = 30), + klein = ripser_dist(klein, max_dim = 2, thresh = .5), + # o3_1024 = ripser_dist(o3_1024, max_dim = 2, thresh = 3.5), + check = FALSE + ) + + # save results + saveRDS(res, file = paste0("ignore/benchmark-", branch, "-", i, ".rds")) +} + +# return to "subtrunk" branch +git_branch_checkout("ripserq") + +library(tidyverse) + +# collate benchmark results +list.files(path = "ignore", pattern = "benchmark\\-.*\\-[0-9]+\\.rds") |> + enframe(name = NULL, value = "path") |> + mutate(branch = gsub( + "benchmark\\-([a-z\\-]+)\\-[0-9]+\\.rds", "\\1", + path + )) |> + transmute( + subbranch = ifelse( + branch == "ripserq", " ", gsub("^ripserq\\-", "", branch) + ), + rep = gsub("benchmark\\-[a-z\\-]+\\-([0-9]+)\\.rds", "\\1", path), + results = map(file.path("ignore", path), readRDS) + ) |> + unnest(results) |> + select(expression, subbranch, median, mem_alloc) |> + arrange(expression, subbranch) |> + print() -> bench_results + +# plot benchmark results +test_n <- length(unique(bench_results$expression)) +bench_results |> + mutate(expression = fct_inorder(as.character(expression))) |> + mutate(across(c(median, mem_alloc), as.numeric)) |> + pivot_longer( + cols = c(median, mem_alloc), + names_to = "measure", values_to = "value" + ) |> + ggplot(aes(x = subbranch, y = value)) + + facet_wrap( + facets = vars(measure, expression), + ncol = test_n, scales = "free_y" + ) + + # geom_col(aes(fill = subbranch)) + + geom_boxplot(aes(color = subbranch)) + + scale_y_log10() + + theme(axis.text.x = element_text(angle = -30, hjust = 0)) -> + bench_plot +print(bench_plot) +ggsave( + here::here("ignore/benchmark-plot.pdf"), bench_plot, + width = 8, height = 6 +) diff --git a/man/ripser_dist.Rd b/man/ripser_dist.Rd new file mode 100644 index 0000000..7a1a82d --- /dev/null +++ b/man/ripser_dist.Rd @@ -0,0 +1,83 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ripser-dist.R +\name{ripser_dist} +\alias{ripser_dist} +\title{Calculate Persistent Homology via a Vietoris-Rips Filtration} +\usage{ +ripser_dist(dataset, max_dim = 1L, threshold = Inf) +} +\arguments{ +\item{dataset}{"dist" object on which to compute persistent homology} + +\item{max_dim}{maximum dimension} + +\item{threshold}{maximum diameter} +} +\value{ +an empty list +} +\description{ +This "externally exported" R function gives the user access to + the "internally exported" C++ function `ripser_cpp_dist()`. +} +\examples{ + +# validate computation on toy data set +dist_vec <- c(4, 3, 5, 5, 3, 4) +result <- ripserq:::ripser_cpp_dist( + dist_vec, + dim = 1, + thresh = 6.0, + ratio = 1.0, + p = 2 +) +result + +# validate clustering (only 0-degree homology) +ripserq:::ripser_cpp_dist( + dist_vec, + dim = 0, + thresh = Inf, + ratio = 1.0, + p = 2 +) + +# validate use of default threshold +ripserq:::ripser_cpp_dist( + dist_vec, + dim = 1, + thresh = Inf, + ratio = 1.0, + p = 2 +) + +# test edge case +ripserq:::ripser_cpp_dist( + dist(matrix(c(0,0,0,1), ncol = 2)), + dim = 1, thresh = 1, ratio = 1, p = 2 +) +\dontrun{ +ripserq:::ripser_cpp_dist( + dist(0), + dim = 1, thresh = 1, ratio = 1, p = 2 +) +} + +# exposed R function with no explicit parameter settings +ripser_dist(dist_vec) + +# validate compatibility with 'dist' class and different outputs +ripser_dist( + UScitiesD, + max_dim = 1, thresh = 1000 +) +ripser_dist( + UScitiesD, + max_dim = 1, thresh = 930 +) +ripser_dist( + UScitiesD, + max_dim = 1, thresh = 800 +) + +} diff --git a/man/ripserq-package.Rd b/man/ripserq-package.Rd new file mode 100644 index 0000000..0765847 --- /dev/null +++ b/man/ripserq-package.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ripserq-package.R +\docType{package} +\name{ripserq-package} +\alias{ripserq} +\alias{ripserq-package} +\title{ripserq: Ports the Ripser program to R with minimal package structure} +\description{ +More about what it does (maybe more than one line). +} +\author{ +\strong{Maintainer}: Your Name \email{your@email.com} + +} +\keyword{internal} diff --git a/ripser.Rproj b/ripser.Rproj new file mode 100644 index 0000000..21a4da0 --- /dev/null +++ b/ripser.Rproj @@ -0,0 +1,17 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/robin-hood-hashing b/robin-hood-hashing deleted file mode 160000 index 7b00237..0000000 --- a/robin-hood-hashing +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 7b00237399cec62ee5795f9013cf5df8de596e69 diff --git a/.clang-format b/src/.clang-format similarity index 100% rename from .clang-format rename to src/.clang-format diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..59f7e7c --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,4 @@ +DerivedData +ripser.dSYM +ripser +ripser-coeff diff --git a/.gitmodules b/src/.gitmodules similarity index 100% rename from .gitmodules rename to src/.gitmodules diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp new file mode 100644 index 0000000..2c6d25b --- /dev/null +++ b/src/RcppExports.cpp @@ -0,0 +1,37 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include + +using namespace Rcpp; + +#ifdef RCPP_USE_GLOBAL_ROSTREAM +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); +#endif + +// ripser_cpp_dist +Rcpp::List ripser_cpp_dist(const Rcpp::NumericVector& dataset, int dim, double thresh, float ratio, int p); +RcppExport SEXP _ripserq_ripser_cpp_dist(SEXP datasetSEXP, SEXP dimSEXP, SEXP threshSEXP, SEXP ratioSEXP, SEXP pSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type dataset(datasetSEXP); + Rcpp::traits::input_parameter< int >::type dim(dimSEXP); + Rcpp::traits::input_parameter< double >::type thresh(threshSEXP); + Rcpp::traits::input_parameter< float >::type ratio(ratioSEXP); + Rcpp::traits::input_parameter< int >::type p(pSEXP); + rcpp_result_gen = Rcpp::wrap(ripser_cpp_dist(dataset, dim, thresh, ratio, p)); + return rcpp_result_gen; +END_RCPP +} + +static const R_CallMethodDef CallEntries[] = { + {"_ripserq_ripser_cpp_dist", (DL_FUNC) &_ripserq_ripser_cpp_dist, 5}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_ripserq(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/examples/o3_1024.txt b/src/examples/o3_1024.txt similarity index 100% rename from examples/o3_1024.txt rename to src/examples/o3_1024.txt diff --git a/examples/o3_2048.txt b/src/examples/o3_2048.txt similarity index 100% rename from examples/o3_2048.txt rename to src/examples/o3_2048.txt diff --git a/examples/o3_4096.txt b/src/examples/o3_4096.txt similarity index 100% rename from examples/o3_4096.txt rename to src/examples/o3_4096.txt diff --git a/examples/o3_8192.txt b/src/examples/o3_8192.txt similarity index 100% rename from examples/o3_8192.txt rename to src/examples/o3_8192.txt diff --git a/examples/pointsCycloOctane.csv b/src/examples/pointsCycloOctane.csv similarity index 100% rename from examples/pointsCycloOctane.csv rename to src/examples/pointsCycloOctane.csv diff --git a/examples/projective_plane.csv b/src/examples/projective_plane.csv similarity index 100% rename from examples/projective_plane.csv rename to src/examples/projective_plane.csv diff --git a/examples/projective_plane.dipha b/src/examples/projective_plane.dipha similarity index 100% rename from examples/projective_plane.dipha rename to src/examples/projective_plane.dipha diff --git a/examples/projective_plane.lower_distance_matrix b/src/examples/projective_plane.lower_distance_matrix similarity index 100% rename from examples/projective_plane.lower_distance_matrix rename to src/examples/projective_plane.lower_distance_matrix diff --git a/examples/random16.lower_distance_matrix b/src/examples/random16.lower_distance_matrix similarity index 100% rename from examples/random16.lower_distance_matrix rename to src/examples/random16.lower_distance_matrix diff --git a/examples/random20.lower_distance_matrix b/src/examples/random20.lower_distance_matrix similarity index 100% rename from examples/random20.lower_distance_matrix rename to src/examples/random20.lower_distance_matrix diff --git a/examples/rp2_600.lower_distance_matrix.csv b/src/examples/rp2_600.lower_distance_matrix.csv similarity index 100% rename from examples/rp2_600.lower_distance_matrix.csv rename to src/examples/rp2_600.lower_distance_matrix.csv diff --git a/examples/sphere_3_192.lower_distance_matrix b/src/examples/sphere_3_192.lower_distance_matrix similarity index 100% rename from examples/sphere_3_192.lower_distance_matrix rename to src/examples/sphere_3_192.lower_distance_matrix diff --git a/ripser.cpp b/src/ripser.cpp similarity index 87% rename from ripser.cpp rename to src/ripser.cpp index 92b0e49..5177da5 100644 --- a/ripser.cpp +++ b/src/ripser.cpp @@ -39,10 +39,20 @@ //#define USE_COEFFICIENTS //#define INDICATE_PROGRESS -#define PRINT_PERSISTENCE_PAIRS +// ripserq: Don't print pairs (accumulate them instead). +//#define PRINT_PERSISTENCE_PAIRS + +// ripserq: Accumulate pairs in an object to be returned to the user. +#define COLLECT_PERSISTENCE_PAIRS //#define USE_ROBINHOOD_HASHMAP +// ripserq: R package need not read files. +//#define INPUT_TYPE + +// ripserq: R does not tolerate use of `exit()`. +//#define COMMAND_LINE_IO + #include #include #include @@ -53,6 +63,8 @@ #include #include #include +// ripserq +#include #ifdef USE_ROBINHOOD_HASHMAP @@ -418,6 +430,9 @@ template class ripser { typedef hash_map entry_hash_map; public: + // ripserq: Accumulate pairs in an object to be returned to the user. + std::vector>> persistence_pairs; + ripser(DistanceMatrix&& _dist, index_t _dim_max, value_t _threshold, float _ratio, coefficient_t _modulus) : dist(std::move(_dist)), n(dist.size()), @@ -511,7 +526,9 @@ template class ripser { }; diameter_entry_t get_zero_pivot_facet(const diameter_entry_t simplex, const index_t dim) { - static simplex_boundary_enumerator facets(0, *this); + // ripserq: Use of `static` variables induces an R end-of-program problem: + // https://github.com/Ripser/ripser/issues/55 + simplex_boundary_enumerator facets(0, *this); facets.set_simplex(simplex, dim); while (facets.has_next()) { diameter_entry_t facet = facets.next(); @@ -521,7 +538,9 @@ template class ripser { } diameter_entry_t get_zero_pivot_cofacet(const diameter_entry_t simplex, const index_t dim) { - static simplex_coboundary_enumerator cofacets(*this); + // ripserq: Use of `static` variables induces an R end-of-program problem: + // https://github.com/Ripser/ripser/issues/55 + simplex_coboundary_enumerator cofacets(*this); cofacets.set_simplex(simplex, dim); while (cofacets.has_next()) { diameter_entry_t cofacet = cofacets.next(); @@ -556,7 +575,8 @@ template class ripser { entry_hash_map& pivot_column_index, index_t dim) { #ifdef INDICATE_PROGRESS - std::cerr << clear_line << "assembling columns" << std::flush; + // ripserq + Rcpp::Rcerr << clear_line << "assembling columns" << std::flush; std::chrono::steady_clock::time_point next = std::chrono::steady_clock::now() + time_step; #endif @@ -571,7 +591,8 @@ template class ripser { while (cofacets.has_next(false)) { #ifdef INDICATE_PROGRESS if (std::chrono::steady_clock::now() > next) { - std::cerr << clear_line << "assembling " << next_simplices.size() + // ripserq + Rcpp::Rcerr << clear_line << "assembling " << next_simplices.size() << " columns (processing " << std::distance(&simplices[0], &simplex) << "/" << simplices.size() << " simplices)" << std::flush; next = std::chrono::steady_clock::now() + time_step; @@ -590,21 +611,24 @@ template class ripser { if (dim < dim_max) simplices.swap(next_simplices); #ifdef INDICATE_PROGRESS - std::cerr << clear_line << "sorting " << columns_to_reduce.size() << " columns" + // ripserq + Rcpp::Rcerr << clear_line << "sorting " << columns_to_reduce.size() << " columns" << std::flush; #endif std::sort(columns_to_reduce.begin(), columns_to_reduce.end(), greater_diameter_or_smaller_index); #ifdef INDICATE_PROGRESS - std::cerr << clear_line << std::flush; + // ripserq + Rcpp::Rcerr << clear_line << std::flush; #endif } void compute_dim_0_pairs(std::vector& edges, std::vector& columns_to_reduce) { #ifdef PRINT_PERSISTENCE_PAIRS - std::cout << "persistence intervals in dim 0:" << std::endl; + // ripserq + Rcpp::Rcout << "persistence intervals in dim 0:" << std::endl; #endif union_find dset(n); @@ -612,6 +636,10 @@ template class ripser { edges = get_edges(); std::sort(edges.rbegin(), edges.rend(), greater_diameter_or_smaller_index); + // ripserq: Accumulate pairs in an object to be returned to the user. +#ifdef COLLECT_PERSISTENCE_PAIRS + persistence_pairs.resize(dim_max + 1); +#endif std::vector vertices_of_edge(2); for (auto e : edges) { get_simplex_vertices(get_index(e), 1, n, vertices_of_edge.rbegin()); @@ -620,7 +648,12 @@ template class ripser { if (u != v) { #ifdef PRINT_PERSISTENCE_PAIRS if (get_diameter(e) != 0) - std::cout << " [0," << get_diameter(e) << ")" << std::endl; + // ripserq + Rcpp::Rcout << " [0," << get_diameter(e) << ")" << std::endl; +#endif + // ripserq: Accumulate pairs in an object to be returned to the user. +#ifdef COLLECT_PERSISTENCE_PAIRS + if (get_diameter(e) != 0) persistence_pairs[0].emplace_back(0.0, get_diameter(e)); #endif dset.link(u, v); } else if ((dim_max > 0) && (get_index(get_zero_apparent_cofacet(e, 1)) == -1)) @@ -630,7 +663,13 @@ template class ripser { #ifdef PRINT_PERSISTENCE_PAIRS for (index_t i = 0; i < n; ++i) - if (dset.find(i) == i) std::cout << " [0, )" << std::endl; + // ripserq + if (dset.find(i) == i) Rcpp::Rcout << " [0, )" << std::endl; +#endif + // ripserq: Accumulate pairs in an object to be returned to the user. +#ifdef COLLECT_PERSISTENCE_PAIRS + for (index_t i = 0; i < n; ++i) + if (dset.find(i) == i) persistence_pairs[0].emplace_back(0.0, std::numeric_limits::infinity()); #endif } @@ -669,7 +708,9 @@ template class ripser { diameter_entry_t init_coboundary_and_get_pivot(const diameter_entry_t simplex, Column& working_coboundary, const index_t& dim, entry_hash_map& pivot_column_index) { - static simplex_coboundary_enumerator cofacets(*this); + // ripserq: Use of `static` variables induces an R end-of-program problem: + // https://github.com/Ripser/ripser/issues/55 + simplex_coboundary_enumerator cofacets(*this); bool check_for_emergent_pair = true; cofacet_entries.clear(); cofacets.set_simplex(simplex, dim); @@ -692,7 +733,9 @@ template class ripser { template void add_simplex_coboundary(const diameter_entry_t simplex, const index_t& dim, Column& working_reduction_column, Column& working_coboundary) { - static simplex_coboundary_enumerator cofacets(*this); + // ripserq: Use of `static` variables induces an R end-of-program problem: + // https://github.com/Ripser/ripser/issues/55 + simplex_coboundary_enumerator cofacets(*this); working_reduction_column.push(simplex); cofacets.set_simplex(simplex, dim); while (cofacets.has_next()) { @@ -720,7 +763,12 @@ template class ripser { entry_hash_map& pivot_column_index, const index_t dim) { #ifdef PRINT_PERSISTENCE_PAIRS - std::cout << "persistence intervals in dim " << dim << ":" << std::endl; + // ripserq + Rcpp::Rcout << "persistence intervals in dim " << dim << ":" << std::endl; +#endif + // ripserq: Accumulate pairs in an object to be returned to the user. +#ifdef COLLECT_PERSISTENCE_PAIRS + persistence_pairs.resize(std::max(persistence_pairs.size(), static_cast(dim + 1))); #endif compressed_sparse_matrix reduction_matrix; @@ -746,7 +794,8 @@ template class ripser { while (true) { #ifdef INDICATE_PROGRESS if (std::chrono::steady_clock::now() > next) { - std::cerr << clear_line << "reducing column " << index_column_to_reduce + 1 + // ripserq + Rcpp::Rcerr << clear_line << "reducing column " << index_column_to_reduce + 1 << "/" << columns_to_reduce.size() << " (diameter " << diameter << ")" << std::flush; next = std::chrono::steady_clock::now() + time_step; @@ -777,10 +826,18 @@ template class ripser { value_t death = get_diameter(pivot); if (death > diameter * ratio) { #ifdef INDICATE_PROGRESS - std::cerr << clear_line << std::flush; + // ripserq + Rcpp::Rcerr << clear_line << std::flush; #endif - std::cout << " [" << diameter << "," << death << ")" << std::endl; + // ripserq + Rcpp::Rcout << " [" << diameter << "," << death << ")" << std::endl; } +#endif + // ripserq: Accumulate pairs in an object to be returned to the user. +#ifdef COLLECT_PERSISTENCE_PAIRS + value_t death_diameter = get_diameter(pivot); + if (death_diameter > diameter * ratio) + persistence_pairs[dim].emplace_back(diameter, death_diameter); #endif pivot_column_index.insert({get_entry(pivot), index_column_to_reduce}); @@ -795,22 +852,30 @@ template class ripser { } else { #ifdef PRINT_PERSISTENCE_PAIRS #ifdef INDICATE_PROGRESS - std::cerr << clear_line << std::flush; + // ripserq + Rcpp::Rcerr << clear_line << std::flush; +#endif + // ripserq + Rcpp::Rcout << " [" << diameter << ", )" << std::endl; #endif - std::cout << " [" << diameter << ", )" << std::endl; + // ripserq: Accumulate pairs in an object to be returned to the user. +#ifdef COLLECT_PERSISTENCE_PAIRS + persistence_pairs[dim].emplace_back(diameter, std::numeric_limits::infinity()); #endif break; } } } #ifdef INDICATE_PROGRESS - std::cerr << clear_line << std::flush; + // ripserq + Rcpp::Rcerr << clear_line << std::flush; #endif } std::vector get_edges(); - void compute_barcodes() { + // ripserq: Accumulate pairs in an object to be returned to the user. + std::vector>> compute_barcodes() { std::vector simplices, columns_to_reduce; compute_dim_0_pairs(simplices, columns_to_reduce); @@ -825,6 +890,9 @@ template class ripser { assemble_columns_to_reduce(simplices, columns_to_reduce, pivot_column_index, dim + 1); } + + // ripserq: Accumulate pairs in an object to be returned to the user. + return persistence_pairs; } }; @@ -976,6 +1044,9 @@ template <> std::vector ripser::get_ed return edges; } +// ripserq: R package need not read files. +#ifdef INPUT_TYPE + enum file_format { LOWER_DISTANCE_MATRIX, UPPER_DISTANCE_MATRIX, @@ -1015,7 +1086,8 @@ euclidean_distance_matrix read_point_cloud(std::istream& input_stream) { euclidean_distance_matrix eucl_dist(std::move(points)); index_t n = eucl_dist.size(); - std::cout << "point cloud with " << n << " points in dimension " + // ripserq + Rcpp::Rcout << "point cloud with " << n << " points in dimension " << eucl_dist.points.front().size() << std::endl; return eucl_dist; @@ -1090,13 +1162,13 @@ compressed_lower_distance_matrix read_distance_matrix(std::istream& input_stream compressed_lower_distance_matrix read_dipha(std::istream& input_stream) { if (read(input_stream) != 8067171840) { - std::cerr << "input is not a Dipha file (magic number: 8067171840)" << std::endl; - exit(-1); + // ripserq + Rcpp::stop("input is not a Dipha file (magic number: 8067171840)"); } if (read(input_stream) != 7) { - std::cerr << "input is not a Dipha distance matrix (file type: 7)" << std::endl; - exit(-1); + // ripserq + Rcpp::stop("input is not a Dipha distance matrix (file type: 7)"); } index_t n = read(input_stream); @@ -1136,8 +1208,15 @@ compressed_lower_distance_matrix read_file(std::istream& input_stream, const fil } } +// ripserq: R package need not read files. +#endif + +// ripserq: R does not tolerate use of `exit()`. +#ifdef COMMAND_LINE_IO + void print_usage_and_exit(int exit_code) { - std::cerr + // ripserq + Rcpp::Rcerr << "Usage: " << "ripser " << "[options] [filename]" << std::endl @@ -1230,14 +1309,15 @@ int main(int argc, char** argv) { std::ifstream file_stream(filename); if (filename && file_stream.fail()) { - std::cerr << "couldn't open file " << filename << std::endl; - exit(-1); + // ripserq + Rcpp::stop("couldn't open file %s", filename); } if (format == SPARSE) { sparse_distance_matrix dist = read_sparse_distance_matrix(filename ? file_stream : std::cin); - std::cout << "sparse distance matrix with " << dist.size() << " points and " + // ripserq + Rcpp::Rcout << "sparse distance matrix with " << dist.size() << " points and " << dist.num_edges << "/" << (dist.size() * (dist.size() - 1)) / 2 << " entries" << std::endl; @@ -1270,17 +1350,20 @@ int main(int argc, char** argv) { if (d != std::numeric_limits::infinity()) max_finite = std::max(max_finite, d); if (d <= threshold) ++num_edges; } - std::cout << "value range: [" << min << "," << max_finite << "]" << std::endl; + // ripserq + Rcpp::Rcout << "value range: [" << min << "," << max_finite << "]" << std::endl; if (threshold == std::numeric_limits::max()) { - std::cout << "distance matrix with " << dist.size() + // ripserq + Rcpp::Rcout << "distance matrix with " << dist.size() << " points, using threshold at enclosing radius " << enclosing_radius << std::endl; ripser(std::move(dist), dim_max, enclosing_radius, ratio, modulus) .compute_barcodes(); } else { - std::cout << "sparse distance matrix with " << dist.size() << " points and " + // ripserq + Rcpp::Rcout << "sparse distance matrix with " << dist.size() << " points and " << num_edges << "/" << (dist.size() * (dist.size() - 1)) / 2 << " entries" << std::endl; @@ -1291,3 +1374,37 @@ int main(int argc, char** argv) { exit(0); } } + +// ripserq +#endif + +// [[Rcpp::export()]] +Rcpp::List ripser_cpp_dist(const Rcpp::NumericVector &dataset, int dim, double thresh, float ratio, int p) { + std::vector distances(dataset.begin(), dataset.end()); + + compressed_lower_distance_matrix dist(compressed_upper_distance_matrix(std::move(distances))); + index_t idx_dim = static_cast(dim); + value_t val_thresh = static_cast(thresh); + coefficient_t coeff_p = static_cast(p); + + using RipserType = ripser; + using PersistenceType = std::vector>>; + + auto ripser_ptr = new RipserType(std::move(dist), idx_dim, val_thresh, ratio, coeff_p); + Rcpp::XPtr ripser_obj(ripser_ptr, false); + ripser_obj->compute_barcodes(); + PersistenceType result = ripser_obj->persistence_pairs; + + Rcpp::List output(result.size()); + for (size_t d = 0; d < result.size(); ++d) { + const auto& pairs = result[d]; + Rcpp::NumericMatrix mat(pairs.size(), 2); + for (size_t i = 0; i < pairs.size(); ++i) { + mat(i, 0) = pairs[i].first; + mat(i, 1) = pairs[i].second; + } + output[d] = mat; + } + + return output; +}