Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Ripser
.clang-format
CONTRIBUTING.txt
COPYING.txt
Makefile

# R project
^ripser\.Rproj$
^\.Rproj\.user$
15 changes: 11 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
DerivedData
ripser.dSYM
ripser
ripser-coeff
# Mac
.DS_Store

# R project
.Rproj.user
.Rhistory

# Rcpp
*.o
*.so
*.dll
15 changes: 15 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Package: ripserq
Type: Package
Title: Ports the Ripser program to R with minimal package structure
Version: 1.0
Date: 2025-06-19
Authors@R: person("Your", "Name", role = c("aut", "cre"),
email = "[email protected]")
Description: More about what it does (maybe more than one
line).
License: GPL (>= 2)
Imports:
Rcpp
LinkingTo: Rcpp
RoxygenNote: 7.3.2
Encoding: UTF-8
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Generated by roxygen2: do not edit by hand

export(ripser_dist)
importFrom(Rcpp,sourceCpp)
useDynLib(ripserq, .registration = TRUE)
1 change: 1 addition & 0 deletions NEWS-ripserq.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This branch (`ripserq`) and its sub-branches are testing grounds for planned changes to {ripserr}.
7 changes: 7 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

ripser_cpp_dist <- function(dataset, dim, thresh, ratio, p) {
.Call(`_ripserq_ripser_cpp_dist`, dataset, dim, thresh, ratio, p)
}

91 changes: 91 additions & 0 deletions R/ripser-dist.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#' @title Calculate Persistent Homology via a Vietoris-Rips Filtration
#'
#' @description This "externally exported" R function gives the user access to
#' the "internally exported" C++ function `ripser_cpp_dist()`.
#'
#' @param dataset "dist" object on which to compute persistent homology
#' @param max_dim maximum dimension
#' @param threshold maximum diameter
#' @returns an empty list
#' @examples
#'
#' # validate computation on toy data set
#' dist_vec <- c(4, 3, 5, 5, 3, 4)
#' result <- ripserq:::ripser_cpp_dist(
#' dist_vec,
#' dim = 1,
#' thresh = 6.0,
#' ratio = 1.0,
#' p = 2
#' )
#' result
#'
#' # validate clustering (only 0-degree homology)
#' ripserq:::ripser_cpp_dist(
#' dist_vec,
#' dim = 0,
#' thresh = Inf,
#' ratio = 1.0,
#' p = 2
#' )
#'
#' # validate use of default threshold
#' ripserq:::ripser_cpp_dist(
#' dist_vec,
#' dim = 1,
#' thresh = Inf,
#' ratio = 1.0,
#' p = 2
#' )
#'
#' # test edge case
#' ripserq:::ripser_cpp_dist(
#' dist(matrix(c(0,0,0,1), ncol = 2)),
#' dim = 1, thresh = 1, ratio = 1, p = 2
#' )
#' \dontrun{
#' ripserq:::ripser_cpp_dist(
#' dist(0),
#' dim = 1, thresh = 1, ratio = 1, p = 2
#' )
#' }
#'
#' # exposed R function with no explicit parameter settings
#' ripser_dist(dist_vec)
#'
#' # validate compatibility with 'dist' class and different outputs
#' ripser_dist(
#' UScitiesD,
#' max_dim = 1, thresh = 1000
#' )
#' ripser_dist(
#' UScitiesD,
#' max_dim = 1, thresh = 930
#' )
#' ripser_dist(
#' UScitiesD,
#' max_dim = 1, thresh = 800
#' )
#'
#' @export
ripser_dist <- function(
dataset,
max_dim = 1L,
threshold = Inf
) {

# pre-process parameters
if (threshold == Inf) threshold <- max(dataset)

# run `compute_barcodes()` and save result
ans <- ripser_cpp_dist(
dataset,
dim = max_dim, thresh = threshold, ratio = 1., p = 2L
)

# convert not-a-number values to missing
ans <- lapply(ans, function(x) { x[is.nan(x)] <- NA_real_; x })

# return result
ans
}
4 changes: 4 additions & 0 deletions R/ripserq-package.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#' @importFrom Rcpp sourceCpp
#' @useDynLib ripserq, .registration = TRUE
#' @keywords internal
"_PACKAGE"
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
# Ripser
# `ripserq`

This branch exists to maintain a minimal R package structure around the current version of Ripser, into which future upgrades will be merged.
If and when experimental features are added, they will be maintained and combined in sub-branches of the form `ripserq-<feature(s)>`.
The code will be minimally altered, most significantly (1) to toggle off C-only procedures like `main()` and (2) to ensure compatibility with R, the {Rcpp} interface, and the R package infrastructure.
Stable versions of `ripserq` will then be used to upgrade the R package {ripserr}.

The Ripser README is unaltered below.


## Ripser

Copyright © 2015–2021 [Ulrich Bauer].

Expand Down
136 changes: 136 additions & 0 deletions ignore/Benchmark_enhanced.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# Enhanced Benchmark Script for {ripserq}
# This script benchmarks several variations of {ripserq} to record censored
# death times as `NA_real_` rather than `Inf` or `NaN` or to use doubles rather
# than floats to store distance measurements. They work as follows:

# `ripserq`, e472dfb7bc5e6da96987987c0cdd989717355b0f:
# no correction; reported as `Inf`
# `ripserq-double`, 2936f3b9455ff9f778b61b0216d7b902177fbfd3:
# type distances as double rather than float
# `ripserq-missing`, bfdfdfbd2569338dc511240b8106e9367736ff3d:
# stored in `std::vector` as `NaN`, then converted to `NA_real_`
# `ripserq-missing-alt`, 41689894007c5d91710f6ad1a7debd0b9d835386:
# stored in `Rcpp::NumericMatrix` as `NA_REAL` (corresponding to `NA_real_`)

# Benchmark tests on small-to-moderate data sets found
# * negligible differences between float versus double
# * significant efficiencies with using `NaN` then converting
# * significant deficiencies to using `NumericMatrix` and not converting

# ENHANCED VERSION: Includes intermediate-sized datasets
# This version focuses on datasets that run in finite time while still
# providing meaningful performance comparisons across branches.

# prepare point clouds
rp2_n <- here::here("src/examples/rp2_600.lower_distance_matrix.csv") |>
readLines() |>
strsplit(split = ",") |>
sapply(length) |>
max() + 1L
here::here("src/examples/rp2_600.lower_distance_matrix.csv") |>
read.csv(col.names = paste0("X", seq(rp2_n)), header = FALSE) |>
as.matrix() |>
(\(m) rbind(NA_character_, m))() |>
as.dist() ->
rp2_600

# Note: o3_1024 and larger datasets are commented out as they may cause
# system hangs or crashes on some machines. Uncomment if your system
# can handle the computational load.
# here::here("src/examples/o3_1024.txt") |>
# readr::read_tsv(col_names = FALSE, col_types = "d") |>
# dist() ->
# o3_1024

set.seed(265879)
klein <- dist(tdaunif::sample_klein_flat(n = 1280, sd = .01))

library(gert)

# branches to benchmark
bench_branches <- c(
"ripserq",
"ripserq-double",
"ripserq-missing", "ripserq-missing-alt"
)
if (! all(bench_branches %in% git_branch_list()$name)) {
stop("Some branches were not found.")
}

# loop over branches
nrep <- 3L
for (i in seq(nrep)) for (branch in bench_branches) {

# checkout branch and compile source code
git_branch_checkout(branch)
devtools::load_all()

# perform benchmark tests
# Enhanced version focuses on datasets that complete in reasonable time
res <- bench::mark(
usa = ripser_dist(UScitiesD, max_dim = 1, thresh = 600),
euro = ripser_dist(eurodist, max_dim = 1, thresh = 500),
rp2_600 = ripser_dist(rp2_600, max_dim = 1, thresh = 30), # Intermediate dataset
klein = ripser_dist(klein, max_dim = 2, thresh = .5),
# Uncomment the following line if your system can handle o3_1024:
# o3_1024 = ripser_dist(o3_1024, max_dim = 2, thresh = 3.5),
check = FALSE
)

# save results
saveRDS(res, file = paste0("ignore/benchmark-", branch, "-", i, ".rds"))
}

# return to "subtrunk" branch
git_branch_checkout("ripserq")

library(tidyverse)

# collate benchmark results
list.files(path = "ignore", pattern = "benchmark\\-.*\\-[0-9]+\\.rds") |>
enframe(name = NULL, value = "path") |>
mutate(branch = gsub(
"benchmark\\-([a-z\\-]+)\\-[0-9]+\\.rds", "\\1",
path
)) |>
transmute(
subbranch = ifelse(
branch == "ripserq", " ", gsub("^ripserq\\-", "", branch)
),
rep = gsub("benchmark\\-[a-z\\-]+\\-([0-9]+)\\.rds", "\\1", path),
results = map(file.path("ignore", path), readRDS)
) |>
unnest(results) |>
select(expression, subbranch, median, mem_alloc) |>
arrange(expression, subbranch) |>
print() -> bench_results

# plot benchmark results
test_n <- length(unique(bench_results$expression))
bench_results |>
mutate(expression = fct_inorder(as.character(expression))) |>
mutate(across(c(median, mem_alloc), as.numeric)) |>
pivot_longer(
cols = c(median, mem_alloc),
names_to = "measure", values_to = "value"
) |>
ggplot(aes(x = subbranch, y = value)) +
facet_wrap(
facets = vars(measure, expression),
ncol = test_n, scales = "free_y"
) +
geom_boxplot(aes(color = subbranch)) +
scale_y_log10() +
theme(axis.text.x = element_text(angle = -30, hjust = 0)) +
labs(
title = "Enhanced RipserQ Benchmark Results",
subtitle = "Including intermediate-sized datasets for reliable performance testing",
x = "Branch Variation",
y = "Value (log scale)"
) ->
bench_plot
print(bench_plot)
ggsave(
here::here("ignore/benchmark-plot-enhanced.pdf"), bench_plot,
width = 10, height = 8
)
Loading