Skip to content

Optimize .compress_mat #353

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
^_pkgdown\.yml$
^docs$
^logs$
^dev$
^man-roxygen$
^staged_dependencies\.yaml$
^temp$
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
## formatters 0.5.11.9000
* Optimized pagination sub-routines to avoid `matrix_form()` calls when not needed.
* Optimized pagination sub-routine `.compress_mat()` to reduce computing time for long listings.

## formatters 0.5.11
* Fixed a bug in `mform_handle_newlines` that caused string matrix column names to be removed. This prevented paginated listing key column info from being repeated when vertically spanning multiple pages.
Expand Down
58 changes: 35 additions & 23 deletions R/matrix_form.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,13 @@ mform_handle_newlines <- function(matform) {

# pre-proc in case of wrapping and \n
line_grouping <- mf_lgrouping(matform)
strmat <- .compress_mat(strmat, line_grouping, "nl")
frmmat <- .compress_mat(frmmat, line_grouping, "unique") # never not unique
spamat <- .compress_mat(spamat, line_grouping, "unique")
alimat <- .compress_mat(alimat, line_grouping, "unique")
line_grouping <- unique(line_grouping)
if (any(duplicated(line_grouping))) {
strmat <- .compress_mat(strmat, line_grouping, "nl")
frmmat <- .compress_mat(frmmat, line_grouping, "unique") # never not unique
spamat <- .compress_mat(spamat, line_grouping, "unique")
alimat <- .compress_mat(alimat, line_grouping, "unique")
line_grouping <- unique(line_grouping)
}

# nlines detects if there is a newline character
# colwidths = NULL, max_width = NULL, fontspec = NULL
Expand Down Expand Up @@ -143,32 +145,42 @@ mform_handle_newlines <- function(matform) {

.quick_handle_nl <- function(str_v) {
if (any(grepl("\n", str_v))) {
return(unlist(strsplit(str_v, "\n", fixed = TRUE)))
unlist(strsplit(str_v, "\n", fixed = TRUE))
} else {
return(str_v)
str_v
}
}

# Helper function to recompact the lines following line groupings to then have them expanded again
#### Helper function to recompact the lines following line groupings to then have them expanded again
# This version now ensures its output has the exact same row order as the original function.
# -> see file in `dev/benchmark_compress_mat.R` for a benchmark of the two versions(+).
.compress_mat <- function(mat, line_grouping, collapse_method = c("nl", "unique")) {
list_compacted_mat <- lapply(unique(line_grouping), function(lg) {
apply(mat, 2, function(mat_cols) {
col_vec <- mat_cols[which(line_grouping == lg)]
if (collapse_method[1] == "nl") {
paste0(col_vec, collapse = "\n")
} else {
val <- unique(col_vec)
val <- val[nzchar(val)]
df <- as.data.frame(mat, stringsAsFactors = FALSE)

# The original function processes groups in the order they appear in `unique(line_grouping)`.
# We create a factor with levels set to that specific order to force `split` to maintain it.
factor_grouping <- factor(line_grouping, levels = unique(line_grouping))
list_of_dfs <- split(df, factor_grouping)

if (collapse_method[1] == "nl") {
result_list <- lapply(list_of_dfs, function(sub_df) {
sapply(sub_df, paste, collapse = "\n")
})
} else { # "unique" method
result_list <- lapply(list_of_dfs, function(sub_df) {
sapply(sub_df, function(col) {
val <- unique(col[nzchar(col)])
if (length(val) > 1) {
stop("Problem in linegroupings! Some do not have the same values.") # nocov
} else if (length(val) < 1) {
val <- "" # Case in which it is only ""
stop("Problem in linegroupings! Some do not have the same values.")
} else if (length(val) == 0) {
""
} else {
val
}
val[[1]]
}
})
})
})
do.call("rbind", list_compacted_mat)
}
do.call("rbind", result_list)
}

disp_from_spans <- function(spans) {
Expand Down
54 changes: 30 additions & 24 deletions R/pagination.R
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,7 @@ pag_indices_inner <- function(pagdf,
#' @inheritParams pag_indices_inner
#' @inheritParams open_font_dev
#' @inheritParams format_value
#' @param obj (`ANY`)\cr object to be paginated. Must have a [matrix_form()] method.
#' @param mf (`MatrixPrintForm`)\cr object to be paginated.
#' @param cpp (`numeric(1)`)\cr number of characters per page (width).
#' @param colwidths (`numeric`)\cr vector of column widths (in characters) for use in vertical pagination.
#' @param rep_cols (`numeric(1)`)\cr number of *columns* (not including row labels) to be repeated on every page.
Expand All @@ -583,7 +583,7 @@ pag_indices_inner <- function(pagdf,
#' lapply(colpaginds, function(j) mtcars[, j, drop = FALSE])
#'
#' @export
vert_pag_indices <- function(obj,
vert_pag_indices <- function(mf,
cpp = 40,
colwidths = NULL,
verbose = FALSE,
Expand All @@ -594,7 +594,7 @@ vert_pag_indices <- function(obj,
if (is.list(nosplitin)) {
nosplitin <- nosplitin[["cols"]]
}
mf <- matrix_form(obj, indent_rownames = TRUE, fontspec = fontspec, round_type = round_type)
# mf <- matrix_form(obj, indent_rownames = TRUE, fontspec = fontspec, round_type = round_type)
clwds <- colwidths %||% propose_column_widths(mf, fontspec = fontspec)
if (is.null(mf_cinfo(mf))) { ## like always, ugh.
mf <- mpf_infer_cinfo(mf, colwidths = clwds, rep_cols = rep_cols, fontspec = fontspec)
Expand Down Expand Up @@ -951,6 +951,7 @@ splice_idx_lists <- function(lsts) {
#' @inheritParams page_lcpp
#' @inheritParams toString
#' @inheritParams propose_column_widths
#' @param obj (`ANY`)\cr object to be paginated. Must have a [matrix_form()] method.
#' @param lpp (`numeric(1)` or `NULL`)\cr lines per page. If `NA` (the default), this is calculated automatically
#' based on the specified page size). `NULL` indicates no vertical pagination should occur.
#' @param cpp (`numeric(1)` or `NULL`)\cr width (in characters) per page. If `NA` (the default), this is calculated
Expand Down Expand Up @@ -1014,6 +1015,8 @@ paginate_indices <- function(obj,
if (newdev) {
on.exit(close_font_dev())
}


## this MUST alsways return a list, inluding list(obj) when
## no forced pagination is needed! otherwise stuff breaks for things
## based on s3 classes that are lists underneath!!!
Expand All @@ -1034,15 +1037,17 @@ paginate_indices <- function(obj,

## order is annoying here, since we won't actually need the mpf if
## we run into forced pagination, but life is short and this should work fine.
# this step can be very slow. No need to do it internally
mpf <- matrix_form(obj, TRUE, TRUE, indent_size = indent_size, fontspec = fontspec, round_type = round_type)

if (is.null(colwidths)) {
colwidths <- mf_col_widths(mpf) %||% propose_column_widths(mpf, fontspec = fontspec, round_type = round_type)
} else {
mf_col_widths(mpf) <- colwidths
}

mf_colgap(mpf) <- col_gap
if (!is.null(rep_cols) && rep_cols != num_rep_cols(obj)) {
if (!is.null(rep_cols) && rep_cols != num_rep_cols(mpf)) {
num_rep_cols(mpf) <- rep_cols
}
if (NROW(mf_cinfo(mpf)) == 0) {
Expand Down Expand Up @@ -1078,27 +1083,29 @@ paginate_indices <- function(obj,

## this wraps the cell contents AND shoves referential footnote
## info into mf_rinfo(mpf)
mpf <- do_cell_fnotes_wrap(mpf, colwidths, max_width, tf_wrap = tf_wrap, fontspec = fontspec)
ori_mpf <- mpf # used for pulling right split labels
mpf <- do_cell_fnotes_wrap(ori_mpf, colwidths, max_width, tf_wrap = tf_wrap, fontspec = fontspec)

# rlistings note: if there is a wrapping in a keycol, it is not calculated correctly
# in the above call, so we need to keep this information in mf_rinfo
# and use it here.
mfri <- mf_rinfo(mpf)
keycols <- .get_keycols_from_listing(obj)
keycols <- .get_keycols_from_listing(mpf)
seq_nrh <- seq_len(mf_nrheader(mpf))
if (NROW(mfri) > 1 && .is_listing_mf(mpf) && length(keycols) > 0) {
# Lets determine the groupings created by keycols
keycols_grouping_df <- NULL
for (i in seq_along(keycols)) {
kcol <- keycols[i]
if (is(obj, "MatrixPrintForm")) {
# This makes the function work also in the case we have only matrix form (mainly for testing purposes)
kcolvec <- mf_strings(obj)[, mf_strings(obj)[1, , drop = TRUE] == kcol][-1]
while (any(kcolvec == "")) {
kcolvec[which(kcolvec == "")] <- kcolvec[which(kcolvec == "") - 1]
# This makes the function work also in the case we have only matrix form (mainly for testing purposes)
ori_mpf_colnames <- names(mf_strings(ori_mpf)[1, , drop = TRUE])
kcolvec <- mf_strings(ori_mpf)[, ori_mpf_colnames == kcol][-seq_nrh]

# if the keycol is not present, we just use the previous one
for (j in seq_along(kcolvec)) {
if (j > 1 && kcolvec[j] == "") {
kcolvec[j] <- kcolvec[j - 1]
}
} else {
kcolvec <- obj[[kcol]]
kcolvec <- vapply(kcolvec, format_value, "", format = obj_format(kcolvec), na_str = obj_na_str(kcolvec))
}
groupings <- as.numeric(factor(kcolvec, levels = unique(kcolvec)))
where_they_start <- which(c(1, diff(groupings)) > 0)
Expand Down Expand Up @@ -1310,19 +1317,18 @@ paginate_to_mpfs <- function(obj,
return(deep_pag)
} else if (has_page_title(fpags[[1]])) {
obj <- fpags[[1]]
## we run into forced pagination, but life is short and this should work fine.
mpf <- matrix_form(obj, TRUE, TRUE, indent_size = indent_size, fontspec = fontspec, round_type = round_type)
num_rep_cols(mpf) <- rep_cols
if (is.null(colwidths)) {
colwidths <- mf_col_widths(mpf) %||% propose_column_widths(mpf, fontspec = fontspec, round_type = round_type)
}
mf_col_widths(mpf) <- colwidths
mf_colgap(mpf) <- col_gap
}

## we run into forced pagination, but life is short and this should work fine.
mpf <- matrix_form(obj, TRUE, TRUE, indent_size = indent_size, fontspec = fontspec, round_type = round_type)
num_rep_cols(mpf) <- rep_cols
if (is.null(colwidths)) {
colwidths <- mf_col_widths(mpf) %||% propose_column_widths(mpf, fontspec = fontspec, round_type = round_type)
}
mf_col_widths(mpf) <- colwidths
mf_colgap(mpf) <- col_gap

page_indices <- paginate_indices(
obj = obj,
obj = mpf,
## page_type = page_type,
## font_family = font_family,
## font_size = font_size,
Expand Down
Loading
Loading