Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Using furrr instead of pbapply and letting the user set the parallelization. #17

Merged
merged 9 commits into from
Jan 6, 2025
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,16 @@ Description: A flexible permutation framework for making
data or density-valued data.
License: GPL (>= 3)
Encoding: UTF-8
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
Roxygen: list(markdown = TRUE)
Imports:
cli,
dials,
furrr,
ggplot2,
magrittr,
optimParallel,
pbapply,
progressr,
purrr,
R6,
Rcpp,
Expand Down
5 changes: 3 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ import(ggplot2)
import(rlang)
importFrom(R6,R6Class)
importFrom(Rcpp,sourceCpp)
importFrom(furrr,future_map)
importFrom(furrr,future_map_dbl)
importFrom(magrittr,"%>%")
importFrom(pbapply,pblapply)
importFrom(pbapply,pbsapply)
importFrom(progressr,progressor)
importFrom(tibble,tibble)
useDynLib(flipr, .registration = TRUE)
3 changes: 2 additions & 1 deletion R/flipr-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
#' density-valued data.
#'
#' @useDynLib flipr, .registration = TRUE
#' @importFrom furrr future_map future_map_dbl
#' @import ggplot2
#' @importFrom pbapply pblapply pbsapply
#' @importFrom progressr progressor
#' @importFrom R6 R6Class
#' @importFrom Rcpp sourceCpp
#' @import rlang
Expand Down
33 changes: 12 additions & 21 deletions R/plausibility-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -371,9 +371,6 @@ PlausibilityFunction <- R6::R6Class(
#' @param upper_bound A scalar or numeric vector specifying the lower bounds
#' for each parameter under investigation. If it is a scalar, the value is
#' used as lower bound for all parameters. Defaults to `10`.
#' @param ncores An integer specifying the number of cores to use for
#' maximizing the plausibility function to get a point estimate of the
#' parameters. Defaults to `1L`.
#' @param estimate A boolean specifying whether the rough point estimate
#' provided by `val` should serve as initial point for maximizing the
#' plausibility function (`estimate = TRUE`) or as final point estimate
Expand Down Expand Up @@ -401,7 +398,6 @@ PlausibilityFunction <- R6::R6Class(
set_point_estimate = function(point_estimate = NULL,
lower_bound = -10,
upper_bound = 10,
ncores = 1L,
estimate = FALSE,
overwrite = FALSE) {
if (!anyNA(self$point_estimate) && !overwrite) {
Expand All @@ -424,8 +420,7 @@ PlausibilityFunction <- R6::R6Class(
pf = self,
guess = point_estimate,
lower_bound = lower_bound,
upper_bound = upper_bound,
ncores = ncores
upper_bound = upper_bound
)
self$point_estimate <- opt$par
names(self$point_estimate) <- names(self$parameters)
Expand Down Expand Up @@ -611,8 +606,6 @@ PlausibilityFunction <- R6::R6Class(
#'
#' @param grid A \code{\link[tibble]{tibble}} storing a grid that spans the
#' space of parameters under investigation.
#' @param ncores An integer specifying the number of cores to run
#' evaluations in parallel. Defaults to `1L`.
#'
#' @examples
#' x <- rnorm(10)
Expand All @@ -639,7 +632,7 @@ PlausibilityFunction <- R6::R6Class(
#' npoints = 2L
#' )
#' pf$evaluate_grid(grid = pf$grid)
evaluate_grid = function(grid, ncores = 1L) {
evaluate_grid = function(grid) {
if ("pvalue" %in% names(self$grid) && is_equal(grid, self$grid)) {
abort("The current grid has already been evaluated.")
}
Expand All @@ -649,18 +642,16 @@ PlausibilityFunction <- R6::R6Class(
self$grid <- grid
}

cl <- NULL
if (ncores > 1) {
cl <- parallel::makeCluster(ncores)
parallel::clusterEvalQ(cl, {
library(purrr)
})
}
self$grid$pvalue <- self$grid %>%
purrr::array_tree(margin = 1) %>%
pbapply::pbsapply(self$get_value, cl = cl)
if (ncores > 1L)
parallel::stopCluster(cl)
transformed_grid <- self$grid %>%
purrr::array_tree(margin = 1)

cli::cli_alert_info("Evaluating grid.")
p <- progressr::progressor(steps = length(transformed_grid)) #progress bar set up

self$grid$pvalue <- furrr::future_map_dbl(transformed_grid, function(.l) {
p() #progress bar update
self$get_value(.l)
}, .options = furrr::furrr_options(seed = TRUE))
},

#' @field seed A numeric value specifying the seed to be used. Defaults to `1234`.
Expand Down
79 changes: 26 additions & 53 deletions R/point-estimation.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,16 @@ compute_point_estimate <- function(pf,
guess = NULL,
lower_bound = -10,
upper_bound = 10,
ncores = 1L,
verbose = FALSE) {
nparams <- pf$nparams

if (!is.null(guess)) {
if (ncores == 1) {
opt <- stats::optim(
par = guess,
fn = pf$get_value,
method = "L-BFGS-B",
control = list(fnscale = -1)
)
} else {
parallel_opts <- list(
cl = parallel::makeCluster(ncores),
forward = FALSE,
loginfo = FALSE
)
opt <- optimParallel::optimParallel(
par = guess,
fn = pf$get_value,
control = list(fnscale = -1),
parallel = parallel_opts
)
parallel::stopCluster(parallel_opts$cl)
}
# uses user default cluster
opt <- optimParallel::optimParallel(
par = guess,
fn = pf$get_value,
control = list(fnscale = -1)
)
x0 <- opt$par
fval <- opt$value
} else {
Expand All @@ -37,37 +21,26 @@ compute_point_estimate <- function(pf,
if (length(upper_bound) != nparams)
abort("The number of provided upper bounds does not match the number of parameters.")

if (nparams == 1 && ncores == 1) {
opt <- stats::optimise(
f = pf$get_value,
interval = c(lower_bound, upper_bound),
maximum = TRUE
)
x0 <- opt$maximum
fval <- opt$objective
} else {
opt <- rgenoud::genoud(
fn = pf$get_value,
nvars = nparams,
Domains = cbind(lower_bound, upper_bound),
max = TRUE,
pop.size = 20 * nparams,
max.generations = 10 * nparams,
wait.generations = 2 * nparams + 1,
BFGSburnin = 2 * nparams + 1,
print.level = 0,
cluster = if (ncores == 1) FALSE else ncores,
balance = nparams > 2
)
opt <- compute_point_estimate(
pf = pf,
guess = opt$par,
ncores = ncores,
verbose = FALSE
)
x0 <- opt$par
fval <- opt$value
}
opt <- rgenoud::genoud(
fn = pf$get_value,
nvars = nparams,
Domains = cbind(lower_bound, upper_bound),
max = TRUE,
pop.size = 20 * nparams,
max.generations = 10 * nparams,
wait.generations = 2 * nparams + 1,
BFGSburnin = 2 * nparams + 1,
print.level = 0,
cluster = parallel::getDefaultCluster(),
balance = nparams > 2
)
opt <- compute_point_estimate(
pf = pf,
guess = opt$par,
verbose = FALSE
)
x0 <- opt$par
fval <- opt$value
}

if (verbose) {
Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
4 changes: 1 addition & 3 deletions R/viz.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
#' function will be evaluated. Specifically if `K` is the number of parameters
#' under investigation, the grid will be of size `(ngrid + 1)^K`. Defaults to
#' `10L`.
#' @param ncores An integer specifying the number of cores to use for
#' parallelized computations. Defaults to `1L`.
#' @param subtitle A string for specifying a subtitle to the plot. Defaults to
#' `""` leading to no subtitle.
#'
Expand Down Expand Up @@ -42,7 +40,7 @@
#' )
#' pf$evaluate_grid(grid = pf$grid)
#' plot_pf(pf)
plot_pf <- function(pf, alpha = 0.05, ngrid = 10, ncores = 1, subtitle = "") {
plot_pf <- function(pf, alpha = 0.05, ngrid = 10, subtitle = "") {
if (pf$nparams > 2)
abort("Only one- or two-dimensional plausibility functions can currently be plotted.")

Expand Down
Binary file modified data-raw/alpha.rds
Binary file not shown.
2 changes: 2 additions & 0 deletions data-raw/build-sysdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ df_mean <- readRDS("data-raw/df_mean.rds")
df_sd <- readRDS("data-raw/df_sd.rds")
df_fisher <- readRDS("data-raw/df_fisher.rds")
df_tippett <- readRDS("data-raw/df_tippett.rds")
df_parallelization <- readRDS("data-raw/df_parallelization.rds")
usethis::use_data(
alpha_estimates,
pfa, pfb, pfc,
df_mean, df_sd,
df_fisher, df_tippett,
df_parallelization,
overwrite = TRUE,
internal = TRUE
)
Binary file modified data-raw/df_mean.rds
Binary file not shown.
Binary file added data-raw/df_parallelization.rds
Binary file not shown.
Binary file modified data-raw/df_sd.rds
Binary file not shown.
47 changes: 28 additions & 19 deletions data-raw/exactness-vignette.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

library(purrr)
library(parallel)
library(furrr)
library(flipr)

# Parallelization
future::plan("future::multisession", workers = availableCores())

# General setup
nreps <- 1e4
Expand All @@ -16,21 +21,23 @@ sim <- map(sample.int(.Machine$integer.max, nreps, replace = TRUE), ~ {
)
})

# Cluster setup
cl <- makeCluster(detectCores(logical = FALSE))
clusterEvalQ(cl, {
library(purrr)
library(flipr)
null_spec <- function(y, parameters) {
map(y, ~ .x - parameters)
}
stat_functions <- list(stat_t)
stat_assignments <- list(delta = 1)
nperms <- 20
alpha <- 0.05
})
null_spec <- function(y, parameters) {
map(y, ~ .x - parameters)
}
stat_functions <- list(stat_t)
stat_assignments <- list(delta = 1)
nperms <- 20
alpha <- 0.05


progressr::with_progress({
p <- progressr::progressor(steps = length(sim) / 10) # progress bar set up
ii <- 1

alpha_estimates <- furrr::future_map(sim, function(.l) {
if (ii %% 10 == 0) {p()} # progress bar update
ii <<- ii + 1

alpha_estimates <- pbapply::pblapply(sim, function(.l) {
pf <- PlausibilityFunction$new(
null_spec = null_spec,
stat_functions = stat_functions,
Expand All @@ -51,11 +58,13 @@ alpha_estimates <- pbapply::pblapply(sim, function(.l) {
upper_bound = pv_upper_bound <= alpha,
estimate = pv_estimate <= alpha
)
}, cl = cl) %>%
transpose() %>%
simplify_all() %>%
map(mean)
stopCluster(cl)
}, .options = furrr_options(seed = TRUE)) %>%
transpose() %>%
simplify_all() %>%
map(mean)
})


alpha_estimates

saveRDS(alpha_estimates, "data-raw/alpha.rds")
17 changes: 3 additions & 14 deletions data-raw/flipr-vignette.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# Setup -------------------------------------------------------------------

library(purrr)
library(parallel)
library(flipr)

n1 <- 10
Expand All @@ -13,7 +12,6 @@ mu2 <- 4
sd1 <- 1
sd2 <- 1
nperms <- 100000
ncores <- 6

null_spec <- function(y, parameters) {
purrr::map(y, ~ .x - parameters[1])
Expand Down Expand Up @@ -48,10 +46,7 @@ pfa$set_grid(
)

pfa$set_nperms(nperms)
pfa$evaluate_grid(
grid = pfa$grid,
ncores = ncores
)
pfa$evaluate_grid(grid = pfa$grid)

saveRDS(pfa, "data-raw/pfa.rds")

Expand Down Expand Up @@ -82,10 +77,7 @@ pfb$set_grid(
)

pfb$set_nperms(nperms)
pfb$evaluate_grid(
grid = pfb$grid,
ncores = ncores
)
pfb$evaluate_grid(grid = pfb$grid)

saveRDS(pfb, "data-raw/pfb.rds")

Expand Down Expand Up @@ -116,9 +108,6 @@ pfc$set_grid(
)

pfc$set_nperms(nperms)
pfc$evaluate_grid(
grid = pfc$grid,
ncores = ncores
)
pfc$evaluate_grid(grid = pfb$grid)

saveRDS(pfc, "data-raw/pfc.rds")
Loading
Loading