From 8b4e436cda7d6612dd8fed4cd6d8e20a7253910e Mon Sep 17 00:00:00 2001 From: Brown Date: Wed, 26 Apr 2023 09:57:13 -0400 Subject: [PATCH 01/20] Add test comment --- README.Rmd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.Rmd b/README.Rmd index 14c283dc..915b2a1b 100644 --- a/README.Rmd +++ b/README.Rmd @@ -22,6 +22,8 @@ library(gnomeR) # gnomeR +hi + [![R-CMD-check](https://github.com/MSKCC-Epi-Bio/gnomeR/workflows/R-CMD-check/badge.svg)](https://github.com/MSKCC-Epi-Bio/gnomeR/actions) [![Codecov test coverage](https://codecov.io/gh/MSKCC-Epi-Bio/gnomeR/branch/main/graph/badge.svg)](https://app.codecov.io/gh/MSKCC-Epi-Bio/gnomeR?branch=main) From fc0d9c03a716bc9e2b3eea11a33a5db8f9f70b21 Mon Sep 17 00:00:00 2001 From: Brown Date: Wed, 26 Apr 2023 09:57:51 -0400 Subject: [PATCH 02/20] Revert "Add test comment" This reverts commit 8b4e436cda7d6612dd8fed4cd6d8e20a7253910e. --- README.Rmd | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.Rmd b/README.Rmd index 915b2a1b..14c283dc 100644 --- a/README.Rmd +++ b/README.Rmd @@ -22,8 +22,6 @@ library(gnomeR) # gnomeR -hi - [![R-CMD-check](https://github.com/MSKCC-Epi-Bio/gnomeR/workflows/R-CMD-check/badge.svg)](https://github.com/MSKCC-Epi-Bio/gnomeR/actions) [![Codecov test coverage](https://codecov.io/gh/MSKCC-Epi-Bio/gnomeR/branch/main/graph/badge.svg)](https://app.codecov.io/gh/MSKCC-Epi-Bio/gnomeR?branch=main) From 17de7eb6a7387e6bcb1da4ee4b977e563ad15870 Mon Sep 17 00:00:00 2001 From: Brown Date: Thu, 27 Apr 2023 09:11:22 -0400 Subject: [PATCH 03/20] Add wide_format argument to stratify tbl by alteration type --- R/tbl_genomic.R | 152 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 150 insertions(+), 2 deletions(-) diff --git a/R/tbl_genomic.R b/R/tbl_genomic.R index 19cd3a57..2fa429dd 100644 --- a/R/tbl_genomic.R +++ b/R/tbl_genomic.R @@ -44,6 +44,7 @@ tbl_genomic <- function(gene_binary, freq_cutoff = deprecated(), freq_cutoff_by_gene = deprecated(), gene_subset = deprecated(), + wide_format = FALSE, ...) { # Check arguments & prep data ------------------------------------------------ @@ -106,9 +107,156 @@ tbl_genomic <- function(gene_binary, any_of(order_genes)) %>% dplyr::select(-"sample_id") + # * Wide format by alteration type ----- + + if (wide_format) { + + # identify types of alterations in data + any_mut <- table_data %>% + dplyr::select(-ends_with(".Amp"), + -ends_with(".Del"), + -ends_with(".fus")) %>% + ncol() > 0 + + any_amp <- table_data %>% + dplyr::select(ends_with(".Amp")) %>% + ncol() > 0 + + any_del <- table_data %>% + dplyr::select(ends_with(".Del")) %>% + ncol() > 0 + + any_fus <- table_data %>% + dplyr::select(ends_with(".fus")) %>% + ncol() > 0 + + # get names of genes + names_genes <- gene_binary %>% + gnomeR::summarize_by_gene() %>% + dplyr::select(-"sample_id") %>% + colnames() %>% + as.vector() + + # create table of overall frequencies + tbl1 <- gene_binary %>% + gnomeR::summarize_by_gene() %>% + dplyr::select(-"sample_id") %>% + gtsummary::tbl_summary() + + # create table of mutation frequencies + tbl_mut <- if (any_mut) { + + mut_df <- gene_binary %>% + dplyr::select(all_of(by), + any_of(order_genes)) %>% + dplyr::select(-ends_with(".Amp"), + -ends_with(".Del"), + -ends_with(".fus")) + + mut_df %>% + dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(mut_df)))), + setdiff(names_genes, names(mut_df)))) %>% + dplyr::select(-"sample_id") %>% + gtsummary::tbl_summary() + + # table_data %>% + # dplyr::select(-ends_with(".Amp"), + # -ends_with(".Del"), + # -ends_with(".fus")) %>% + # gtsummary::tbl_summary() + } + + # create table of .Amp frequencies + tbl_amp <- if (any_amp) { + + amp_df <- gene_binary %>% + dplyr::select(all_of(by), + any_of(order_genes)) %>% + dplyr::select(sample_id, ends_with(".Amp")) %>% + dplyr::rename_with( ~ stringr::str_remove(., ".Amp")) + + amp_df %>% + dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(amp_df)))), + setdiff(names_genes, names(amp_df)))) %>% + dplyr::select(-"sample_id") %>% + gtsummary::tbl_summary() + + # table_data %>% + # dplyr::select(ends_with(".Amp")) %>% + # dplyr::rename_with( ~ stringr::str_remove(., '.Amp')) %>% + # gtsummary::tbl_summary() + } + + # create table of .Del frequencies + tbl_del <- if (any_del) { + + del_df <- gene_binary %>% + dplyr::select(all_of(by), + any_of(order_genes)) %>% + dplyr::select(sample_id, ends_with(".Del")) %>% + dplyr::rename_with( ~ stringr::str_remove(., ".Del")) + + del_df %>% + dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(del_df)))), + setdiff(names_genes, names(del_df)))) %>% + dplyr::select(-"sample_id") %>% + gtsummary::tbl_summary() + + # table_data %>% + # dplyr::select(ends_with(".Del")) %>% + # dplyr::rename_with( ~ stringr::str_remove(., '.Del')) %>% + # gtsummary::tbl_summary() + } + + # create table of .fus frequencies + tbl_fus <- if (any_fus) { + + fus_df <- gene_binary %>% + dplyr::select(all_of(by), + any_of(order_genes)) %>% + dplyr::select(sample_id, ends_with(".fus")) %>% + dplyr::rename_with( ~ stringr::str_remove(., ".fus")) + + fus_df %>% + dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(fus_df)))), + setdiff(names_genes, names(fus_df)))) %>% + dplyr::select(-"sample_id") %>% + gtsummary::tbl_summary() + + # table_data %>% + # dplyr::select(ends_with(".fus")) %>% + # dplyr::rename_with( ~ stringr::str_remove(., '.fus')) %>% + # gtsummary::tbl_summary() + } + + # create list of tables + tbls_list_pre <- + list(tbl1, + if (any_mut) {tbl_mut}, + if (any_amp) {tbl_amp}, + if (any_del) {tbl_del}, + if (any_fus) {tbl_fus}) + + # drop NULL tables + tbls_list <- tbls_list_pre %>% purrr::keep( ~ !is.null(.) ) + + tab_spanner_vec <- c( + "**Overall**", + if (any_mut) "**Mutations**", + if (any_amp) "**Amplifications**", + if (any_del) "**Deletions**", + if (any_fus) "**Fusions**" + ) + + # merge tables + gtsummary::tbl_merge(tbls_list, + tab_spanner = tab_spanner_vec) + + } + # Construct Final Table --------------------------------------------------- - final_table <- table_data %>% + else {final_table <- table_data %>% gtsummary::tbl_summary(by = any_of(by),...) if (!is.null(by)) { @@ -116,6 +264,6 @@ tbl_genomic <- function(gene_binary, gtsummary::add_overall() } - final_table + final_table} } From 022d8c47b6c38064ca85329bd0b0967b89415ffa Mon Sep 17 00:00:00 2001 From: karissawhiting Date: Fri, 22 Sep 2023 15:17:45 -0400 Subject: [PATCH 04/20] Reverted tbl_genomic to version from upstream main branch --- R/tbl_genomic.R | 152 +----------------------------------------------- 1 file changed, 2 insertions(+), 150 deletions(-) diff --git a/R/tbl_genomic.R b/R/tbl_genomic.R index 2fa429dd..19cd3a57 100644 --- a/R/tbl_genomic.R +++ b/R/tbl_genomic.R @@ -44,7 +44,6 @@ tbl_genomic <- function(gene_binary, freq_cutoff = deprecated(), freq_cutoff_by_gene = deprecated(), gene_subset = deprecated(), - wide_format = FALSE, ...) { # Check arguments & prep data ------------------------------------------------ @@ -107,156 +106,9 @@ tbl_genomic <- function(gene_binary, any_of(order_genes)) %>% dplyr::select(-"sample_id") - # * Wide format by alteration type ----- - - if (wide_format) { - - # identify types of alterations in data - any_mut <- table_data %>% - dplyr::select(-ends_with(".Amp"), - -ends_with(".Del"), - -ends_with(".fus")) %>% - ncol() > 0 - - any_amp <- table_data %>% - dplyr::select(ends_with(".Amp")) %>% - ncol() > 0 - - any_del <- table_data %>% - dplyr::select(ends_with(".Del")) %>% - ncol() > 0 - - any_fus <- table_data %>% - dplyr::select(ends_with(".fus")) %>% - ncol() > 0 - - # get names of genes - names_genes <- gene_binary %>% - gnomeR::summarize_by_gene() %>% - dplyr::select(-"sample_id") %>% - colnames() %>% - as.vector() - - # create table of overall frequencies - tbl1 <- gene_binary %>% - gnomeR::summarize_by_gene() %>% - dplyr::select(-"sample_id") %>% - gtsummary::tbl_summary() - - # create table of mutation frequencies - tbl_mut <- if (any_mut) { - - mut_df <- gene_binary %>% - dplyr::select(all_of(by), - any_of(order_genes)) %>% - dplyr::select(-ends_with(".Amp"), - -ends_with(".Del"), - -ends_with(".fus")) - - mut_df %>% - dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(mut_df)))), - setdiff(names_genes, names(mut_df)))) %>% - dplyr::select(-"sample_id") %>% - gtsummary::tbl_summary() - - # table_data %>% - # dplyr::select(-ends_with(".Amp"), - # -ends_with(".Del"), - # -ends_with(".fus")) %>% - # gtsummary::tbl_summary() - } - - # create table of .Amp frequencies - tbl_amp <- if (any_amp) { - - amp_df <- gene_binary %>% - dplyr::select(all_of(by), - any_of(order_genes)) %>% - dplyr::select(sample_id, ends_with(".Amp")) %>% - dplyr::rename_with( ~ stringr::str_remove(., ".Amp")) - - amp_df %>% - dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(amp_df)))), - setdiff(names_genes, names(amp_df)))) %>% - dplyr::select(-"sample_id") %>% - gtsummary::tbl_summary() - - # table_data %>% - # dplyr::select(ends_with(".Amp")) %>% - # dplyr::rename_with( ~ stringr::str_remove(., '.Amp')) %>% - # gtsummary::tbl_summary() - } - - # create table of .Del frequencies - tbl_del <- if (any_del) { - - del_df <- gene_binary %>% - dplyr::select(all_of(by), - any_of(order_genes)) %>% - dplyr::select(sample_id, ends_with(".Del")) %>% - dplyr::rename_with( ~ stringr::str_remove(., ".Del")) - - del_df %>% - dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(del_df)))), - setdiff(names_genes, names(del_df)))) %>% - dplyr::select(-"sample_id") %>% - gtsummary::tbl_summary() - - # table_data %>% - # dplyr::select(ends_with(".Del")) %>% - # dplyr::rename_with( ~ stringr::str_remove(., '.Del')) %>% - # gtsummary::tbl_summary() - } - - # create table of .fus frequencies - tbl_fus <- if (any_fus) { - - fus_df <- gene_binary %>% - dplyr::select(all_of(by), - any_of(order_genes)) %>% - dplyr::select(sample_id, ends_with(".fus")) %>% - dplyr::rename_with( ~ stringr::str_remove(., ".fus")) - - fus_df %>% - dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(fus_df)))), - setdiff(names_genes, names(fus_df)))) %>% - dplyr::select(-"sample_id") %>% - gtsummary::tbl_summary() - - # table_data %>% - # dplyr::select(ends_with(".fus")) %>% - # dplyr::rename_with( ~ stringr::str_remove(., '.fus')) %>% - # gtsummary::tbl_summary() - } - - # create list of tables - tbls_list_pre <- - list(tbl1, - if (any_mut) {tbl_mut}, - if (any_amp) {tbl_amp}, - if (any_del) {tbl_del}, - if (any_fus) {tbl_fus}) - - # drop NULL tables - tbls_list <- tbls_list_pre %>% purrr::keep( ~ !is.null(.) ) - - tab_spanner_vec <- c( - "**Overall**", - if (any_mut) "**Mutations**", - if (any_amp) "**Amplifications**", - if (any_del) "**Deletions**", - if (any_fus) "**Fusions**" - ) - - # merge tables - gtsummary::tbl_merge(tbls_list, - tab_spanner = tab_spanner_vec) - - } - # Construct Final Table --------------------------------------------------- - else {final_table <- table_data %>% + final_table <- table_data %>% gtsummary::tbl_summary(by = any_of(by),...) if (!is.null(by)) { @@ -264,6 +116,6 @@ tbl_genomic <- function(gene_binary, gtsummary::add_overall() } - final_table} + final_table } From fc7f572060bc4e85640a3e320c7a1d1d66e692a1 Mon Sep 17 00:00:00 2001 From: karissawhiting Date: Fri, 22 Sep 2023 15:18:39 -0400 Subject: [PATCH 05/20] add Sammi's changes to separate file --- R/tbl_genomic_wide.R | 269 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100644 R/tbl_genomic_wide.R diff --git a/R/tbl_genomic_wide.R b/R/tbl_genomic_wide.R new file mode 100644 index 00000000..ed704bbf --- /dev/null +++ b/R/tbl_genomic_wide.R @@ -0,0 +1,269 @@ +#' tbl_genomic_wide +#' +#' This function will select genes based on user inputs or on frequency counts and then +#' will pass the data.frame to `gtsummary::tbl_summary()`. You can specify a `by` variable and other +#' parameters that are accepted by `gtsummary::tbl_summary()`. Note the `by` variable must be merged on to +#' onto the data before using the `by` parameter in the function. +#' +#' @param gene_binary data.frame of genetic samples +#' @param freq_cutoff deprecated +#' @param freq_cutoff_by_gene deprecated +#' @param gene_subset deprecated +#' @param by A variable to be passed to `gtsummary::tbl_summary()`'s by parameter +#' @param ... Additional parameters that can be passed to `gtsummary::tbl_summary()`. To access the additional parameters you need to load `gtsummary`. +#' @return A `tbl_summary()` object +#' @export +#' +#' @examples +#' +#' samples <- unique(mutations$sampleId)[1:10] +#' +#' gene_binary <- create_gene_binary( +#' samples = samples, +#' mutation = gnomeR::mutations, +#' cna = gnomeR::cna, +#' mut_type = "somatic_only", snp_only = FALSE, +#' specify_panel = "no" +#' ) +#' +#' tbl1 <- tbl_genomic(gene_binary) +#' +#' # Example wth `by` variable +#' +#' gene_binary$sex <- sample( c("M", "F"), size = nrow(gene_binary), replace = TRUE) +#' +#' tbl2 <- tbl_genomic( +#' gene_binary = gene_binary, +#' by = sex +#' ) %>% +#' gtsummary::add_p() %>% +#' gtsummary::add_q() +#' +tbl_genomic <- function(gene_binary, + by = NULL, + freq_cutoff = deprecated(), + freq_cutoff_by_gene = deprecated(), + gene_subset = deprecated(), + wide_format = FALSE, + ...) { + + # Check arguments & prep data ------------------------------------------------ + + if (!inherits(gene_binary, "data.frame")) { + stop("`gene_binary=` argument must be a tibble or data frame.", call. = FALSE) + } + + .check_required_cols(gene_binary, "sample_id", "gene_binary") + + if("sample_id" %in% names(gene_binary)) { + if(any(table(gene_binary$sample_id) > 1)) { + cli::cli_abort("Duplicate `sample_ids` found in `gene_binary`. Samples IDs should be unique.") + } + + } + # * Deprecated Arguments (will remove this in the future) ---- + + if (lifecycle::is_present(freq_cutoff)) { + lifecycle::deprecate_stop(when = "1.3.0", + what = "tbl_genomic(freq_cutoff)", + details = c( + i = c("Please pre-select gene columns to summarize before passing to `tbl_genomic()`.", + "Use `gnomeR::subset_by_frequency(t)` to easily subset by a gene prevalance threshold."))) + } + + if (lifecycle::is_present(freq_cutoff_by_gene)) { + lifecycle::deprecate_stop(when = "1.3.0", + what = "tbl_genomic(freq_cutoff_by_gene)", + details = c( + i = c("Please pre-select gene columns to summarize before passing to `tbl_genomic()`.", + "Use `gnomeR::subset_by_frequency(t)` to easily subset by a gene prevalance threshold."))) + } + + if (lifecycle::is_present(gene_subset)) { + lifecycle::deprecate_stop(when = "1.3.0", + what = "tbl_genomic(gene_subset)", + details = c( + i = c("Please pre-select gene columns to summarize before passing to `tbl_genomic()`.", + "Use `gnomeR::subset_by_frequency(t)` to easily subset by a gene prevalance threshold."))) + } + + # * Other Args ----- + + by <- + .select_to_varnames({{ by }}, + data = gene_binary, + arg_name = "by", select_single = TRUE + ) + + # Order Genes for Final Table --------------------------------------------- + + order_genes <- gene_binary %>% + dplyr::select(-all_of(by)) %>% + gnomeR::subset_by_frequency(t = 0) %>% + names() + + table_data <- gene_binary %>% + dplyr::select(all_of(by), + any_of(order_genes)) %>% + dplyr::select(-"sample_id") + + # * Wide format by alteration type ----- + + if (wide_format) { + + # identify types of alterations in data + any_mut <- table_data %>% + dplyr::select(-ends_with(".Amp"), + -ends_with(".Del"), + -ends_with(".fus")) %>% + ncol() > 0 + + any_amp <- table_data %>% + dplyr::select(ends_with(".Amp")) %>% + ncol() > 0 + + any_del <- table_data %>% + dplyr::select(ends_with(".Del")) %>% + ncol() > 0 + + any_fus <- table_data %>% + dplyr::select(ends_with(".fus")) %>% + ncol() > 0 + + # get names of genes + names_genes <- gene_binary %>% + gnomeR::summarize_by_gene() %>% + dplyr::select(-"sample_id") %>% + colnames() %>% + as.vector() + + # create table of overall frequencies + tbl1 <- gene_binary %>% + gnomeR::summarize_by_gene() %>% + dplyr::select(-"sample_id") %>% + gtsummary::tbl_summary() + + # create table of mutation frequencies + tbl_mut <- if (any_mut) { + + mut_df <- gene_binary %>% + dplyr::select(all_of(by), + any_of(order_genes)) %>% + dplyr::select(-ends_with(".Amp"), + -ends_with(".Del"), + -ends_with(".fus")) + + mut_df %>% + dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(mut_df)))), + setdiff(names_genes, names(mut_df)))) %>% + dplyr::select(-"sample_id") %>% + gtsummary::tbl_summary() + + # table_data %>% + # dplyr::select(-ends_with(".Amp"), + # -ends_with(".Del"), + # -ends_with(".fus")) %>% + # gtsummary::tbl_summary() + } + + # create table of .Amp frequencies + tbl_amp <- if (any_amp) { + + amp_df <- gene_binary %>% + dplyr::select(all_of(by), + any_of(order_genes)) %>% + dplyr::select(sample_id, ends_with(".Amp")) %>% + dplyr::rename_with( ~ stringr::str_remove(., ".Amp")) + + amp_df %>% + dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(amp_df)))), + setdiff(names_genes, names(amp_df)))) %>% + dplyr::select(-"sample_id") %>% + gtsummary::tbl_summary() + + # table_data %>% + # dplyr::select(ends_with(".Amp")) %>% + # dplyr::rename_with( ~ stringr::str_remove(., '.Amp')) %>% + # gtsummary::tbl_summary() + } + + # create table of .Del frequencies + tbl_del <- if (any_del) { + + del_df <- gene_binary %>% + dplyr::select(all_of(by), + any_of(order_genes)) %>% + dplyr::select(sample_id, ends_with(".Del")) %>% + dplyr::rename_with( ~ stringr::str_remove(., ".Del")) + + del_df %>% + dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(del_df)))), + setdiff(names_genes, names(del_df)))) %>% + dplyr::select(-"sample_id") %>% + gtsummary::tbl_summary() + + # table_data %>% + # dplyr::select(ends_with(".Del")) %>% + # dplyr::rename_with( ~ stringr::str_remove(., '.Del')) %>% + # gtsummary::tbl_summary() + } + + # create table of .fus frequencies + tbl_fus <- if (any_fus) { + + fus_df <- gene_binary %>% + dplyr::select(all_of(by), + any_of(order_genes)) %>% + dplyr::select(sample_id, ends_with(".fus")) %>% + dplyr::rename_with( ~ stringr::str_remove(., ".fus")) + + fus_df %>% + dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(fus_df)))), + setdiff(names_genes, names(fus_df)))) %>% + dplyr::select(-"sample_id") %>% + gtsummary::tbl_summary() + + # table_data %>% + # dplyr::select(ends_with(".fus")) %>% + # dplyr::rename_with( ~ stringr::str_remove(., '.fus')) %>% + # gtsummary::tbl_summary() + } + + # create list of tables + tbls_list_pre <- + list(tbl1, + if (any_mut) {tbl_mut}, + if (any_amp) {tbl_amp}, + if (any_del) {tbl_del}, + if (any_fus) {tbl_fus}) + + # drop NULL tables + tbls_list <- tbls_list_pre %>% purrr::keep( ~ !is.null(.) ) + + tab_spanner_vec <- c( + "**Overall**", + if (any_mut) "**Mutations**", + if (any_amp) "**Amplifications**", + if (any_del) "**Deletions**", + if (any_fus) "**Fusions**" + ) + + # merge tables + gtsummary::tbl_merge(tbls_list, + tab_spanner = tab_spanner_vec) + + } + + # Construct Final Table --------------------------------------------------- + + else {final_table <- table_data %>% + gtsummary::tbl_summary(by = any_of(by),...) + + if (!is.null(by)) { + final_table <- final_table %>% + gtsummary::add_overall() + } + + final_table} + +} From 47affff8ecefa3119dfefcfbe0127eb4eca1533f Mon Sep 17 00:00:00 2001 From: Brown Date: Thu, 12 Oct 2023 14:32:28 -0400 Subject: [PATCH 06/20] Create add_q_tbl_wide.R Getting the following error: Error in get_theme_element("pkgwide-lgl:quiet") : could not find function "get_theme_element" --- R/add_q_tbl_wide.R | 143 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 R/add_q_tbl_wide.R diff --git a/R/add_q_tbl_wide.R b/R/add_q_tbl_wide.R new file mode 100644 index 00000000..549a6df0 --- /dev/null +++ b/R/add_q_tbl_wide.R @@ -0,0 +1,143 @@ +#' Add a column of q-values to account for +#' multiple comparisons +#' +#' Adjustments to p-values are performed with [stats::p.adjust]. +#' +#' @param x a `gtsummary` object +#' @param method String indicating method to be used for p-value +#' adjustment. Methods from +#' [stats::p.adjust] are accepted. Default is `method = "fdr"`. +#' @inheritParams tbl_regression +#' @inheritParams add_global_p +#' @author Esther Drill, Daniel D. Sjoberg +#' @family tbl_summary tools +#' @family tbl_svysummary tools +#' @family tbl_regression tools +#' @family tbl_uvregression tools +#' @export +#' @examplesIf broom.helpers::.assert_package("car", pkg_search = "gtsummary", boolean = TRUE) +#' \donttest{ +#' # Example 1 ---------------------------------- +#' add_q_ex1 <- +#' trial[c("trt", "age", "grade", "response")] %>% +#' tbl_summary(by = trt) %>% +#' add_p() %>% +#' add_q() +#' +#' # Example 2 ---------------------------------- +#' add_q_ex2 <- +#' trial[c("trt", "age", "grade", "response")] %>% +#' tbl_uvregression( +#' y = response, +#' method = glm, +#' method.args = list(family = binomial), +#' exponentiate = TRUE +#' ) %>% +#' add_global_p() %>% +#' add_q() +#' } +#' @section Example Output: +#' \if{html}{Example 1} +#' +#' \if{html}{\out{ +#' `r man_create_image_tag(file = "add_q_ex1.png", width = "65")` +#' }} +#' +#' \if{html}{Example 2} +#' +#' \if{html}{\out{ +#' `r man_create_image_tag(file = "add_q_ex2.png", width = "60")` +#' }} + +add_q_tbl_wide <- function(x, method = "fdr", pvalue_fun = NULL, n_comp = NULL, quiet = NULL) { + updated_call_list <- c(x$call_list, list(add_q = match.call())) + # setting defaults ----------------------------------------------------------- + quiet <- quiet %||% get_theme_element("pkgwide-lgl:quiet") %||% FALSE + + # checking inputs ------------------------------------------------------------ + # checking class of x + .assert_class(x, "gtsummary") + + # checking input table has a p.value column + if (!"p.value" %in% names(x$table_body)) { + stop("There is no p-value column. `x$table_body` must have a column called 'p.value'", + call. = FALSE + ) + } + + ## Sammi's edits + n_comp <- length(which(!is.na(x$table_body$p.value))) + ## end Sammi's edits + + # setting defaults from gtsummary theme -------------------------------------- + pvalue_fun <- + pvalue_fun %||% + # defaults from theme + get_theme_element("add_q-arg:pvalue_fun") %||% + get_theme_element("pkgwide-fn:pvalue_fun") %||% + # default from p-value formatting function + (filter(x$table_styling$fmt_fun, .data$column == "p.value") %>% pull("fmt_fun") %>% pluck(1)) %>% + gts_mapper("add_q(pvalue_fun=)") + + # checking pvalue_fun are functions + if (!is.function(pvalue_fun)) { + stop("Input 'pvalue_fun' must be a function.") + } + + # perform multiple comparisons ----------------------------------------------- + expr_p.adjust <- + ## Sammi: added `n = n_comp` argument + rlang::expr(stats::p.adjust(x$table_body$p.value, method = !!method, n = n_comp)) %>% + deparse() + if (quiet == FALSE) { + rlang::inform(glue("add_q: Adjusting p-values with\n`{expr_p.adjust}`")) + } + + x$table_body$q.value <- x$table_body$p.value %>% stats::p.adjust(method = method) + + # update table_styling ------------------------------------------------------- + # footnote text + footnote_text <- + add_q_method_lookup[add_q_method_lookup$method == method, ]$method_label %>% + translate_text() + x <- + modify_table_styling( + x, + columns = "q.value", + footnote = footnote_text, + fmt_fun = pvalue_fun + ) + + # adding column header + x <- modify_header(x, q.value = paste0("**", translate_text("q-value"), "**")) + + # return final object -------------------------------------------------------- + # fill in the Ns in the header table modify_stat_* columns + x <- .fill_table_header_modify_stats(x) + # adding call + x$call_list <- updated_call_list + + x +} + + +# match method input to display name +add_q_method_lookup <- + tibble::tibble( + method = stats::p.adjust.methods + ) %>% + left_join( + tibble::tribble( + ~method, ~method_label, + "holm", "Holm correction for multiple testing", + "hochberg", "Hochberg correction for multiple testing", + "hommel", "Hommel correction for multiple testing", + "bonferroni", "Bonferroni correction for multiple testing", + "BH", "Benjamini & Hochberg correction for multiple testing", + "BY", "Benjamini & Yekutieli correction for multiple testing", + "fdr", "False discovery rate correction for multiple testing", + "none", "No correction for multiple testing" + ), + by = "method" + ) %>% + mutate(method_label = coalesce(method_label, method)) From ee638350b2ae6cdec163081cffaa8b5df51dddf0 Mon Sep 17 00:00:00 2001 From: Hannah Fuchs <100386516+hfuchs5@users.noreply.github.com> Date: Thu, 2 Nov 2023 15:19:39 -0400 Subject: [PATCH 07/20] rename file back to tbl_genomic.R --- R/tbl_genomic.R | 158 ++++++++++++++++++++++++- R/tbl_genomic_wide.R | 269 ------------------------------------------- 2 files changed, 153 insertions(+), 274 deletions(-) delete mode 100644 R/tbl_genomic_wide.R diff --git a/R/tbl_genomic.R b/R/tbl_genomic.R index 19cd3a57..ed704bbf 100644 --- a/R/tbl_genomic.R +++ b/R/tbl_genomic.R @@ -1,4 +1,4 @@ -#' tbl_genomic +#' tbl_genomic_wide #' #' This function will select genes based on user inputs or on frequency counts and then #' will pass the data.frame to `gtsummary::tbl_summary()`. You can specify a `by` variable and other @@ -44,6 +44,7 @@ tbl_genomic <- function(gene_binary, freq_cutoff = deprecated(), freq_cutoff_by_gene = deprecated(), gene_subset = deprecated(), + wide_format = FALSE, ...) { # Check arguments & prep data ------------------------------------------------ @@ -90,8 +91,8 @@ tbl_genomic <- function(gene_binary, by <- .select_to_varnames({{ by }}, - data = gene_binary, - arg_name = "by", select_single = TRUE + data = gene_binary, + arg_name = "by", select_single = TRUE ) # Order Genes for Final Table --------------------------------------------- @@ -106,9 +107,156 @@ tbl_genomic <- function(gene_binary, any_of(order_genes)) %>% dplyr::select(-"sample_id") + # * Wide format by alteration type ----- + + if (wide_format) { + + # identify types of alterations in data + any_mut <- table_data %>% + dplyr::select(-ends_with(".Amp"), + -ends_with(".Del"), + -ends_with(".fus")) %>% + ncol() > 0 + + any_amp <- table_data %>% + dplyr::select(ends_with(".Amp")) %>% + ncol() > 0 + + any_del <- table_data %>% + dplyr::select(ends_with(".Del")) %>% + ncol() > 0 + + any_fus <- table_data %>% + dplyr::select(ends_with(".fus")) %>% + ncol() > 0 + + # get names of genes + names_genes <- gene_binary %>% + gnomeR::summarize_by_gene() %>% + dplyr::select(-"sample_id") %>% + colnames() %>% + as.vector() + + # create table of overall frequencies + tbl1 <- gene_binary %>% + gnomeR::summarize_by_gene() %>% + dplyr::select(-"sample_id") %>% + gtsummary::tbl_summary() + + # create table of mutation frequencies + tbl_mut <- if (any_mut) { + + mut_df <- gene_binary %>% + dplyr::select(all_of(by), + any_of(order_genes)) %>% + dplyr::select(-ends_with(".Amp"), + -ends_with(".Del"), + -ends_with(".fus")) + + mut_df %>% + dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(mut_df)))), + setdiff(names_genes, names(mut_df)))) %>% + dplyr::select(-"sample_id") %>% + gtsummary::tbl_summary() + + # table_data %>% + # dplyr::select(-ends_with(".Amp"), + # -ends_with(".Del"), + # -ends_with(".fus")) %>% + # gtsummary::tbl_summary() + } + + # create table of .Amp frequencies + tbl_amp <- if (any_amp) { + + amp_df <- gene_binary %>% + dplyr::select(all_of(by), + any_of(order_genes)) %>% + dplyr::select(sample_id, ends_with(".Amp")) %>% + dplyr::rename_with( ~ stringr::str_remove(., ".Amp")) + + amp_df %>% + dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(amp_df)))), + setdiff(names_genes, names(amp_df)))) %>% + dplyr::select(-"sample_id") %>% + gtsummary::tbl_summary() + + # table_data %>% + # dplyr::select(ends_with(".Amp")) %>% + # dplyr::rename_with( ~ stringr::str_remove(., '.Amp')) %>% + # gtsummary::tbl_summary() + } + + # create table of .Del frequencies + tbl_del <- if (any_del) { + + del_df <- gene_binary %>% + dplyr::select(all_of(by), + any_of(order_genes)) %>% + dplyr::select(sample_id, ends_with(".Del")) %>% + dplyr::rename_with( ~ stringr::str_remove(., ".Del")) + + del_df %>% + dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(del_df)))), + setdiff(names_genes, names(del_df)))) %>% + dplyr::select(-"sample_id") %>% + gtsummary::tbl_summary() + + # table_data %>% + # dplyr::select(ends_with(".Del")) %>% + # dplyr::rename_with( ~ stringr::str_remove(., '.Del')) %>% + # gtsummary::tbl_summary() + } + + # create table of .fus frequencies + tbl_fus <- if (any_fus) { + + fus_df <- gene_binary %>% + dplyr::select(all_of(by), + any_of(order_genes)) %>% + dplyr::select(sample_id, ends_with(".fus")) %>% + dplyr::rename_with( ~ stringr::str_remove(., ".fus")) + + fus_df %>% + dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(fus_df)))), + setdiff(names_genes, names(fus_df)))) %>% + dplyr::select(-"sample_id") %>% + gtsummary::tbl_summary() + + # table_data %>% + # dplyr::select(ends_with(".fus")) %>% + # dplyr::rename_with( ~ stringr::str_remove(., '.fus')) %>% + # gtsummary::tbl_summary() + } + + # create list of tables + tbls_list_pre <- + list(tbl1, + if (any_mut) {tbl_mut}, + if (any_amp) {tbl_amp}, + if (any_del) {tbl_del}, + if (any_fus) {tbl_fus}) + + # drop NULL tables + tbls_list <- tbls_list_pre %>% purrr::keep( ~ !is.null(.) ) + + tab_spanner_vec <- c( + "**Overall**", + if (any_mut) "**Mutations**", + if (any_amp) "**Amplifications**", + if (any_del) "**Deletions**", + if (any_fus) "**Fusions**" + ) + + # merge tables + gtsummary::tbl_merge(tbls_list, + tab_spanner = tab_spanner_vec) + + } + # Construct Final Table --------------------------------------------------- - final_table <- table_data %>% + else {final_table <- table_data %>% gtsummary::tbl_summary(by = any_of(by),...) if (!is.null(by)) { @@ -116,6 +264,6 @@ tbl_genomic <- function(gene_binary, gtsummary::add_overall() } - final_table + final_table} } diff --git a/R/tbl_genomic_wide.R b/R/tbl_genomic_wide.R deleted file mode 100644 index ed704bbf..00000000 --- a/R/tbl_genomic_wide.R +++ /dev/null @@ -1,269 +0,0 @@ -#' tbl_genomic_wide -#' -#' This function will select genes based on user inputs or on frequency counts and then -#' will pass the data.frame to `gtsummary::tbl_summary()`. You can specify a `by` variable and other -#' parameters that are accepted by `gtsummary::tbl_summary()`. Note the `by` variable must be merged on to -#' onto the data before using the `by` parameter in the function. -#' -#' @param gene_binary data.frame of genetic samples -#' @param freq_cutoff deprecated -#' @param freq_cutoff_by_gene deprecated -#' @param gene_subset deprecated -#' @param by A variable to be passed to `gtsummary::tbl_summary()`'s by parameter -#' @param ... Additional parameters that can be passed to `gtsummary::tbl_summary()`. To access the additional parameters you need to load `gtsummary`. -#' @return A `tbl_summary()` object -#' @export -#' -#' @examples -#' -#' samples <- unique(mutations$sampleId)[1:10] -#' -#' gene_binary <- create_gene_binary( -#' samples = samples, -#' mutation = gnomeR::mutations, -#' cna = gnomeR::cna, -#' mut_type = "somatic_only", snp_only = FALSE, -#' specify_panel = "no" -#' ) -#' -#' tbl1 <- tbl_genomic(gene_binary) -#' -#' # Example wth `by` variable -#' -#' gene_binary$sex <- sample( c("M", "F"), size = nrow(gene_binary), replace = TRUE) -#' -#' tbl2 <- tbl_genomic( -#' gene_binary = gene_binary, -#' by = sex -#' ) %>% -#' gtsummary::add_p() %>% -#' gtsummary::add_q() -#' -tbl_genomic <- function(gene_binary, - by = NULL, - freq_cutoff = deprecated(), - freq_cutoff_by_gene = deprecated(), - gene_subset = deprecated(), - wide_format = FALSE, - ...) { - - # Check arguments & prep data ------------------------------------------------ - - if (!inherits(gene_binary, "data.frame")) { - stop("`gene_binary=` argument must be a tibble or data frame.", call. = FALSE) - } - - .check_required_cols(gene_binary, "sample_id", "gene_binary") - - if("sample_id" %in% names(gene_binary)) { - if(any(table(gene_binary$sample_id) > 1)) { - cli::cli_abort("Duplicate `sample_ids` found in `gene_binary`. Samples IDs should be unique.") - } - - } - # * Deprecated Arguments (will remove this in the future) ---- - - if (lifecycle::is_present(freq_cutoff)) { - lifecycle::deprecate_stop(when = "1.3.0", - what = "tbl_genomic(freq_cutoff)", - details = c( - i = c("Please pre-select gene columns to summarize before passing to `tbl_genomic()`.", - "Use `gnomeR::subset_by_frequency(t)` to easily subset by a gene prevalance threshold."))) - } - - if (lifecycle::is_present(freq_cutoff_by_gene)) { - lifecycle::deprecate_stop(when = "1.3.0", - what = "tbl_genomic(freq_cutoff_by_gene)", - details = c( - i = c("Please pre-select gene columns to summarize before passing to `tbl_genomic()`.", - "Use `gnomeR::subset_by_frequency(t)` to easily subset by a gene prevalance threshold."))) - } - - if (lifecycle::is_present(gene_subset)) { - lifecycle::deprecate_stop(when = "1.3.0", - what = "tbl_genomic(gene_subset)", - details = c( - i = c("Please pre-select gene columns to summarize before passing to `tbl_genomic()`.", - "Use `gnomeR::subset_by_frequency(t)` to easily subset by a gene prevalance threshold."))) - } - - # * Other Args ----- - - by <- - .select_to_varnames({{ by }}, - data = gene_binary, - arg_name = "by", select_single = TRUE - ) - - # Order Genes for Final Table --------------------------------------------- - - order_genes <- gene_binary %>% - dplyr::select(-all_of(by)) %>% - gnomeR::subset_by_frequency(t = 0) %>% - names() - - table_data <- gene_binary %>% - dplyr::select(all_of(by), - any_of(order_genes)) %>% - dplyr::select(-"sample_id") - - # * Wide format by alteration type ----- - - if (wide_format) { - - # identify types of alterations in data - any_mut <- table_data %>% - dplyr::select(-ends_with(".Amp"), - -ends_with(".Del"), - -ends_with(".fus")) %>% - ncol() > 0 - - any_amp <- table_data %>% - dplyr::select(ends_with(".Amp")) %>% - ncol() > 0 - - any_del <- table_data %>% - dplyr::select(ends_with(".Del")) %>% - ncol() > 0 - - any_fus <- table_data %>% - dplyr::select(ends_with(".fus")) %>% - ncol() > 0 - - # get names of genes - names_genes <- gene_binary %>% - gnomeR::summarize_by_gene() %>% - dplyr::select(-"sample_id") %>% - colnames() %>% - as.vector() - - # create table of overall frequencies - tbl1 <- gene_binary %>% - gnomeR::summarize_by_gene() %>% - dplyr::select(-"sample_id") %>% - gtsummary::tbl_summary() - - # create table of mutation frequencies - tbl_mut <- if (any_mut) { - - mut_df <- gene_binary %>% - dplyr::select(all_of(by), - any_of(order_genes)) %>% - dplyr::select(-ends_with(".Amp"), - -ends_with(".Del"), - -ends_with(".fus")) - - mut_df %>% - dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(mut_df)))), - setdiff(names_genes, names(mut_df)))) %>% - dplyr::select(-"sample_id") %>% - gtsummary::tbl_summary() - - # table_data %>% - # dplyr::select(-ends_with(".Amp"), - # -ends_with(".Del"), - # -ends_with(".fus")) %>% - # gtsummary::tbl_summary() - } - - # create table of .Amp frequencies - tbl_amp <- if (any_amp) { - - amp_df <- gene_binary %>% - dplyr::select(all_of(by), - any_of(order_genes)) %>% - dplyr::select(sample_id, ends_with(".Amp")) %>% - dplyr::rename_with( ~ stringr::str_remove(., ".Amp")) - - amp_df %>% - dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(amp_df)))), - setdiff(names_genes, names(amp_df)))) %>% - dplyr::select(-"sample_id") %>% - gtsummary::tbl_summary() - - # table_data %>% - # dplyr::select(ends_with(".Amp")) %>% - # dplyr::rename_with( ~ stringr::str_remove(., '.Amp')) %>% - # gtsummary::tbl_summary() - } - - # create table of .Del frequencies - tbl_del <- if (any_del) { - - del_df <- gene_binary %>% - dplyr::select(all_of(by), - any_of(order_genes)) %>% - dplyr::select(sample_id, ends_with(".Del")) %>% - dplyr::rename_with( ~ stringr::str_remove(., ".Del")) - - del_df %>% - dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(del_df)))), - setdiff(names_genes, names(del_df)))) %>% - dplyr::select(-"sample_id") %>% - gtsummary::tbl_summary() - - # table_data %>% - # dplyr::select(ends_with(".Del")) %>% - # dplyr::rename_with( ~ stringr::str_remove(., '.Del')) %>% - # gtsummary::tbl_summary() - } - - # create table of .fus frequencies - tbl_fus <- if (any_fus) { - - fus_df <- gene_binary %>% - dplyr::select(all_of(by), - any_of(order_genes)) %>% - dplyr::select(sample_id, ends_with(".fus")) %>% - dplyr::rename_with( ~ stringr::str_remove(., ".fus")) - - fus_df %>% - dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(fus_df)))), - setdiff(names_genes, names(fus_df)))) %>% - dplyr::select(-"sample_id") %>% - gtsummary::tbl_summary() - - # table_data %>% - # dplyr::select(ends_with(".fus")) %>% - # dplyr::rename_with( ~ stringr::str_remove(., '.fus')) %>% - # gtsummary::tbl_summary() - } - - # create list of tables - tbls_list_pre <- - list(tbl1, - if (any_mut) {tbl_mut}, - if (any_amp) {tbl_amp}, - if (any_del) {tbl_del}, - if (any_fus) {tbl_fus}) - - # drop NULL tables - tbls_list <- tbls_list_pre %>% purrr::keep( ~ !is.null(.) ) - - tab_spanner_vec <- c( - "**Overall**", - if (any_mut) "**Mutations**", - if (any_amp) "**Amplifications**", - if (any_del) "**Deletions**", - if (any_fus) "**Fusions**" - ) - - # merge tables - gtsummary::tbl_merge(tbls_list, - tab_spanner = tab_spanner_vec) - - } - - # Construct Final Table --------------------------------------------------- - - else {final_table <- table_data %>% - gtsummary::tbl_summary(by = any_of(by),...) - - if (!is.null(by)) { - final_table <- final_table %>% - gtsummary::add_overall() - } - - final_table} - -} From 1b9ee81d7220d088f9ab12cf262d3d6490fb7edc Mon Sep 17 00:00:00 2001 From: Hannah Fuchs <100386516+hfuchs5@users.noreply.github.com> Date: Thu, 9 Nov 2023 10:20:52 -0500 Subject: [PATCH 08/20] updated documentation and create .remove_endings `tbl_genomic()` wide was using too much time on the `summarize_by_gene()` so I made `.remove_endings()`. it's still slow but here is the tictoc now: `tic() tbl1 <- tbl_genomic(gene_binary, wide = T) toc()` 76.4 sec elapsed --- NAMESPACE | 1 + NEWS.md | 3 +- R/tbl_genomic.R | 9 ++-- R/utils.R | 21 +++++++++ codemeta.json | 2 +- man/add_q_tbl_wide.Rd | 71 +++++++++++++++++++++++++++++++ man/tbl_genomic.Rd | 1 + tests/testthat/test-genomic_tbl.R | 28 ++++++++++++ 8 files changed, 130 insertions(+), 6 deletions(-) create mode 100644 man/add_q_tbl_wide.Rd diff --git a/NAMESPACE b/NAMESPACE index 680c7240..4637d023 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export("%>%") export(.mutations_gene_binary) export(.sum_alts_in_pathway) export(add_pathways) +export(add_q_tbl_wide) export(annotate_any_panel) export(annotate_specific_panel) export(create_gene_binary) diff --git a/NEWS.md b/NEWS.md index 85d7258c..3a1a6148 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,7 +13,8 @@ - A dictionary of old to new names for `rename_columns()` output is now an attribute of the returned object. Now messages can reference the original names of data columns (ex: `TumorAllele2` not `tumor_allele_2`) to make it more intuitive to users (#302). - Fixed bug that wasn't consistently filtering out germline samples - Enhanced `subset_by_frequency()` to users to select hugo_symbols if they reach a threshold in any level of a variable (ex: high risk vs low risk) (#305) - +- Added ability to create a "wide" table in `tbl_genomic()` where mutations, +fusions, and CNAs are their own sections (#197) # gnomeR 1.2.0 diff --git a/R/tbl_genomic.R b/R/tbl_genomic.R index ed704bbf..cafdd46d 100644 --- a/R/tbl_genomic.R +++ b/R/tbl_genomic.R @@ -1,4 +1,4 @@ -#' tbl_genomic_wide +#' tbl_genomic #' #' This function will select genes based on user inputs or on frequency counts and then #' will pass the data.frame to `gtsummary::tbl_summary()`. You can specify a `by` variable and other @@ -132,11 +132,12 @@ tbl_genomic <- function(gene_binary, # get names of genes names_genes <- gene_binary %>% - gnomeR::summarize_by_gene() %>% - dplyr::select(-"sample_id") %>% - colnames() %>% + names()%>% + .remove_endings()%>% as.vector() + names_genes <- names_genes[!names_genes %in% c('sample_id')] + # create table of overall frequencies tbl1 <- gene_binary %>% gnomeR::summarize_by_gene() %>% diff --git a/R/utils.R b/R/utils.R index 765d0e02..17d8df84 100644 --- a/R/utils.R +++ b/R/utils.R @@ -221,6 +221,27 @@ recode_cna <- function(alteration_vector){ return(names) } + +#' Remove descriptive endings to hugo symbol names (quicker summarize) +#' +#' @param names hugo symbols to check +#' @return a vector of hugo symbols where each entry DOES NOT have a descriptive ending +#' from the following list: ".Amp", ".Del", ".fus", ".cna", ".mut". +#' @keywords internal +#' try + +.remove_endings = function(names) { + + names[str_detect(names, ".Amp|.Del|.fus|.cna")] <- + stringr::str_remove_all( + names[str_detect(names, ".Amp|.Del|.fus|.cna")], + ".Amp|.Del|.fus|.cna") + + names <- unique(names) + + return(names) +} + #' Check if all gene_binary columns except sample_id and other_vars are numeric #' #' @param alt_data a binary data frame created from `create_gene_binary()` diff --git a/codemeta.json b/codemeta.json index 35d4ef66..7a043315 100644 --- a/codemeta.json +++ b/codemeta.json @@ -371,7 +371,7 @@ }, "SystemRequirements": null }, - "fileSize": "2582.696KB", + "fileSize": "2592.545KB", "releaseNotes": "https://github.com/MSKCC-Epi-Bio/gnomeR/blob/master/NEWS.md", "readme": "https://github.com/MSKCC-Epi-Bio/gnomeR/blob/main/README.md", "contIntegration": ["https://github.com/MSKCC-Epi-Bio/gnomeR/actions", "https://app.codecov.io/gh/MSKCC-Epi-Bio/gnomeR?branch=main"], diff --git a/man/add_q_tbl_wide.Rd b/man/add_q_tbl_wide.Rd new file mode 100644 index 00000000..b428377a --- /dev/null +++ b/man/add_q_tbl_wide.Rd @@ -0,0 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add_q_tbl_wide.R +\name{add_q_tbl_wide} +\alias{add_q_tbl_wide} +\title{Add a column of q-values to account for +multiple comparisons} +\usage{ +add_q_tbl_wide( + x, + method = "fdr", + pvalue_fun = NULL, + n_comp = NULL, + quiet = NULL +) +} +\arguments{ +\item{x}{a \code{gtsummary} object} + +\item{method}{String indicating method to be used for p-value +adjustment. Methods from +\link[stats:p.adjust]{stats::p.adjust} are accepted. Default is \code{method = "fdr"}.} +} +\description{ +Adjustments to p-values are performed with \link[stats:p.adjust]{stats::p.adjust}. +} +\section{Example Output}{ + +\if{html}{Example 1} + +\if{html}{\out{ +`r man_create_image_tag(file = "add_q_ex1.png", width = "65")` +}} + +\if{html}{Example 2} + +\if{html}{\out{ +`r man_create_image_tag(file = "add_q_ex2.png", width = "60")` +}} +} + +\examples{ +\dontshow{if (broom.helpers::.assert_package("car", pkg_search = "gtsummary", boolean = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\donttest{ +# Example 1 ---------------------------------- +add_q_ex1 <- + trial[c("trt", "age", "grade", "response")] \%>\% + tbl_summary(by = trt) \%>\% + add_p() \%>\% + add_q() + +# Example 2 ---------------------------------- +add_q_ex2 <- + trial[c("trt", "age", "grade", "response")] \%>\% + tbl_uvregression( + y = response, + method = glm, + method.args = list(family = binomial), + exponentiate = TRUE + ) \%>\% + add_global_p() \%>\% + add_q() +} +\dontshow{\}) # examplesIf} +} +\author{ +Esther Drill, Daniel D. Sjoberg +} +\concept{tbl_regression tools} +\concept{tbl_summary tools} +\concept{tbl_svysummary tools} +\concept{tbl_uvregression tools} diff --git a/man/tbl_genomic.Rd b/man/tbl_genomic.Rd index c9d93518..a22bf50b 100644 --- a/man/tbl_genomic.Rd +++ b/man/tbl_genomic.Rd @@ -10,6 +10,7 @@ tbl_genomic( freq_cutoff = deprecated(), freq_cutoff_by_gene = deprecated(), gene_subset = deprecated(), + wide_format = FALSE, ... ) } diff --git a/tests/testthat/test-genomic_tbl.R b/tests/testthat/test-genomic_tbl.R index cd1a8f26..42a1b793 100644 --- a/tests/testthat/test-genomic_tbl.R +++ b/tests/testthat/test-genomic_tbl.R @@ -165,3 +165,31 @@ test_that("check gene_subset is not used",{ }) +# Wide format ------------------------------------ + +test_that("test if output is correct if all three types of alt are present", + { + samples <- Reduce(intersect, list(gnomeR::mutations$sampleId, + gnomeR::cna$sampleId, + gnomeR::sv$sampleId))[1:10] + gene_binary <- create_gene_binary(samples = samples, + mutation = mutations, + cna = cna, + fusion = sv, + mut_type = "somatic_only") + + + + tbl <- tbl_genomic(gene_binary, wide = T) + + expect_true(map(c("Mutations", + "Amplifications", + "Deletions", + "Fusions"), + ~any(grepl(., (tbl$table_styling$header$spanning_header)))) %>% + unlist() %>% all()) + + + + + }) From 2bc408945bd00dddc9d7e422cd5540166af32cbc Mon Sep 17 00:00:00 2001 From: Hannah Fuchs <100386516+hfuchs5@users.noreply.github.com> Date: Thu, 9 Nov 2023 10:21:05 -0500 Subject: [PATCH 09/20] Create dot-remove_endings.Rd --- man/dot-remove_endings.Rd | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 man/dot-remove_endings.Rd diff --git a/man/dot-remove_endings.Rd b/man/dot-remove_endings.Rd new file mode 100644 index 00000000..7a213d56 --- /dev/null +++ b/man/dot-remove_endings.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{.remove_endings} +\alias{.remove_endings} +\title{Remove descriptive endings to hugo symbol names (quicker summarize)} +\usage{ +.remove_endings(names) +} +\arguments{ +\item{names}{hugo symbols to check} +} +\value{ +a vector of hugo symbols where each entry DOES NOT have a descriptive ending +from the following list: ".Amp", ".Del", ".fus", ".cna", ".mut". +} +\description{ +Remove descriptive endings to hugo symbol names (quicker summarize) +} +\keyword{internal} +\keyword{try} From e86f2576a561d270692c88eb1e45737142408913 Mon Sep 17 00:00:00 2001 From: Hannah Fuchs <100386516+hfuchs5@users.noreply.github.com> Date: Thu, 9 Nov 2023 10:55:43 -0500 Subject: [PATCH 10/20] update docs and test --- R/add_q_tbl_wide.R | 4 ++-- man/add_q_tbl_wide.Rd | 10 ++++++++++ tests/testthat/test-genomic_tbl.R | 8 ++++---- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/R/add_q_tbl_wide.R b/R/add_q_tbl_wide.R index 549a6df0..97f823cb 100644 --- a/R/add_q_tbl_wide.R +++ b/R/add_q_tbl_wide.R @@ -7,8 +7,8 @@ #' @param method String indicating method to be used for p-value #' adjustment. Methods from #' [stats::p.adjust] are accepted. Default is `method = "fdr"`. -#' @inheritParams tbl_regression -#' @inheritParams add_global_p +#' @inheritParams gtsummary::tbl_regression +#' @inheritParams gtsummary::add_global_p #' @author Esther Drill, Daniel D. Sjoberg #' @family tbl_summary tools #' @family tbl_svysummary tools diff --git a/man/add_q_tbl_wide.Rd b/man/add_q_tbl_wide.Rd index b428377a..ff33ef80 100644 --- a/man/add_q_tbl_wide.Rd +++ b/man/add_q_tbl_wide.Rd @@ -19,6 +19,16 @@ add_q_tbl_wide( \item{method}{String indicating method to be used for p-value adjustment. Methods from \link[stats:p.adjust]{stats::p.adjust} are accepted. Default is \code{method = "fdr"}.} + +\item{pvalue_fun}{Function to round and format p-values. +Default is \link[gtsummary]{style_pvalue}. +The function must have a numeric vector input (the numeric, exact p-value), +and return a string that is the rounded/formatted p-value (e.g. +\code{pvalue_fun = function(x) style_pvalue(x, digits = 2)} or equivalently, +\code{purrr::partial(style_pvalue, digits = 2)}).} + +\item{quiet}{Logical indicating whether to print messages in console. Default is +\code{FALSE}} } \description{ Adjustments to p-values are performed with \link[stats:p.adjust]{stats::p.adjust}. diff --git a/tests/testthat/test-genomic_tbl.R b/tests/testthat/test-genomic_tbl.R index 42a1b793..74bbe9f6 100644 --- a/tests/testthat/test-genomic_tbl.R +++ b/tests/testthat/test-genomic_tbl.R @@ -180,16 +180,16 @@ test_that("test if output is correct if all three types of alt are present", - tbl <- tbl_genomic(gene_binary, wide = T) + tbl1 <- tbl_genomic(gene_binary, wide = T) - expect_true(map(c("Mutations", + expect_true(purrr::map(c("Mutations", "Amplifications", "Deletions", "Fusions"), - ~any(grepl(., (tbl$table_styling$header$spanning_header)))) %>% + ~any(grepl(., (tbl1$table_styling$header$spanning_header)))) %>% unlist() %>% all()) + }) - }) From 45c0028deaa756ad7432b7c7fd77bd61d4a3be58 Mon Sep 17 00:00:00 2001 From: Hannah Fuchs <100386516+hfuchs5@users.noreply.github.com> Date: Thu, 16 Nov 2023 15:42:36 -0500 Subject: [PATCH 11/20] streamline process with purrr --- R/tbl_genomic.R | 192 +++++++++++++++++++----------------------------- R/utils.R | 9 +-- 2 files changed, 79 insertions(+), 122 deletions(-) diff --git a/R/tbl_genomic.R b/R/tbl_genomic.R index cafdd46d..97fa42ab 100644 --- a/R/tbl_genomic.R +++ b/R/tbl_genomic.R @@ -107,146 +107,106 @@ tbl_genomic <- function(gene_binary, any_of(order_genes)) %>% dplyr::select(-"sample_id") + # * Wide format by alteration type ----- if (wide_format) { - # identify types of alterations in data - any_mut <- table_data %>% - dplyr::select(-ends_with(".Amp"), - -ends_with(".Del"), - -ends_with(".fus")) %>% - ncol() > 0 - - any_amp <- table_data %>% - dplyr::select(ends_with(".Amp")) %>% - ncol() > 0 - - any_del <- table_data %>% - dplyr::select(ends_with(".Del")) %>% - ncol() > 0 - - any_fus <- table_data %>% - dplyr::select(ends_with(".fus")) %>% - ncol() > 0 - - # get names of genes - names_genes <- gene_binary %>% - names()%>% + genes <- order_genes[!order_genes %in% by]%>% .remove_endings()%>% - as.vector() + unique() + + # identify types of alterations in data + + gene_endings <- c(".mut", ".Amp", ".Del", ".fus") + + # add .mut to endings to make easier - names_genes <- names_genes[!names_genes %in% c('sample_id')] + gb_alt_all <- order_genes %>% + .paste_endings() + + any_alt_types <- purrr::map(gene_endings, + ~any(grepl(.x, gb_alt_all))) + + names(any_alt_types) <- gene_endings # create table of overall frequencies + tbl1 <- gene_binary %>% - gnomeR::summarize_by_gene() %>% - dplyr::select(-"sample_id") %>% - gtsummary::tbl_summary() + pivot_longer(!sample_id)%>% + mutate(name = .remove_endings(name))%>% + group_by(sample_id, name)%>% + slice(which.max(value))%>% + ungroup()%>% + select(-"sample_id") + + + tbl2 <- tbl1 %>% + split(tbl1$name) + + tbl_overall <- purrr::map(1:length(names(tbl2)), function(x){ + tbl2[[x]] %>% + select(-"name")%>% + setNames(names(tbl2)[[x]]) + })%>% + do.call(cbind, .)%>% + gtsummary::tbl_summary(by = any_of(by)) # create table of mutation frequencies - tbl_mut <- if (any_mut) { - - mut_df <- gene_binary %>% - dplyr::select(all_of(by), - any_of(order_genes)) %>% - dplyr::select(-ends_with(".Amp"), - -ends_with(".Del"), - -ends_with(".fus")) - - mut_df %>% - dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(mut_df)))), - setdiff(names_genes, names(mut_df)))) %>% - dplyr::select(-"sample_id") %>% - gtsummary::tbl_summary() - - # table_data %>% - # dplyr::select(-ends_with(".Amp"), - # -ends_with(".Del"), - # -ends_with(".fus")) %>% - # gtsummary::tbl_summary() - } - # create table of .Amp frequencies - tbl_amp <- if (any_amp) { - - amp_df <- gene_binary %>% - dplyr::select(all_of(by), - any_of(order_genes)) %>% - dplyr::select(sample_id, ends_with(".Amp")) %>% - dplyr::rename_with( ~ stringr::str_remove(., ".Amp")) - - amp_df %>% - dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(amp_df)))), - setdiff(names_genes, names(amp_df)))) %>% - dplyr::select(-"sample_id") %>% - gtsummary::tbl_summary() - - # table_data %>% - # dplyr::select(ends_with(".Amp")) %>% - # dplyr::rename_with( ~ stringr::str_remove(., '.Amp')) %>% - # gtsummary::tbl_summary() - } + tbls_alt_types <- purrr::map2( + any_alt_types, + gene_endings, + function(yes_exist, gene) { - # create table of .Del frequencies - tbl_del <- if (any_del) { - - del_df <- gene_binary %>% - dplyr::select(all_of(by), - any_of(order_genes)) %>% - dplyr::select(sample_id, ends_with(".Del")) %>% - dplyr::rename_with( ~ stringr::str_remove(., ".Del")) - - del_df %>% - dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(del_df)))), - setdiff(names_genes, names(del_df)))) %>% - dplyr::select(-"sample_id") %>% - gtsummary::tbl_summary() - - # table_data %>% - # dplyr::select(ends_with(".Del")) %>% - # dplyr::rename_with( ~ stringr::str_remove(., '.Del')) %>% - # gtsummary::tbl_summary() - } + if(yes_exist[[1]]){ + data <- table_data + + # need to figure out how to add by variables HERE + names(data) <- c("sample_id", any_of(by), + .paste_endings(names(data)[2:length(names(data))])) + + data <- data %>% + select(any_of(by), + ends_with(gene)) + + names(data) <- .remove_endings(names(data)) - # create table of .fus frequencies - tbl_fus <- if (any_fus) { - - fus_df <- gene_binary %>% - dplyr::select(all_of(by), - any_of(order_genes)) %>% - dplyr::select(sample_id, ends_with(".fus")) %>% - dplyr::rename_with( ~ stringr::str_remove(., ".fus")) - - fus_df %>% - dplyr::mutate(!!!setNames(rep(0, length(setdiff(names_genes, names(fus_df)))), - setdiff(names_genes, names(fus_df)))) %>% - dplyr::select(-"sample_id") %>% - gtsummary::tbl_summary() - - # table_data %>% - # dplyr::select(ends_with(".fus")) %>% - # dplyr::rename_with( ~ stringr::str_remove(., '.fus')) %>% - # gtsummary::tbl_summary() + genes_not_obs <- setdiff(genes, names(data))[setdiff(genes, names(data)) %in% by] + + data <- data %>% + # need to fill in 0 for all unobserved alterations in hugo_symbol + dplyr::mutate(!!!setNames(rep(0, length(genes_not_obs)), + genes_not_obs)) + + data %>% + gtsummary::tbl_summary(by = any_of(by)) + } else { + NULL } + } +) + + # create list of tables tbls_list_pre <- - list(tbl1, - if (any_mut) {tbl_mut}, - if (any_amp) {tbl_amp}, - if (any_del) {tbl_del}, - if (any_fus) {tbl_fus}) + append(list(tbl_overall), + purrr::map2(any_alt_types, + tbls_alt_types, function(x, y){ + if (x) {y} + })) # drop NULL tables tbls_list <- tbls_list_pre %>% purrr::keep( ~ !is.null(.) ) + tab_spanner_vec <- c( "**Overall**", - if (any_mut) "**Mutations**", - if (any_amp) "**Amplifications**", - if (any_del) "**Deletions**", - if (any_fus) "**Fusions**" + purrr::map2(any_alt_types, c("**Mutations**", "**Amplifications**", + "**Deletions**", "**Fusions**"), + ~if(.x){.y}) %>% + unlist() ) # merge tables diff --git a/R/utils.R b/R/utils.R index 17d8df84..6ee6eb83 100644 --- a/R/utils.R +++ b/R/utils.R @@ -232,12 +232,9 @@ recode_cna <- function(alteration_vector){ .remove_endings = function(names) { - names[str_detect(names, ".Amp|.Del|.fus|.cna")] <- - stringr::str_remove_all( - names[str_detect(names, ".Amp|.Del|.fus|.cna")], - ".Amp|.Del|.fus|.cna") - - names <- unique(names) + stringr::str_remove_all( + names, + ".Amp|.Del|.fus|.cna|.mut") return(names) } From adf7828005cf3a0aa92d17d6684315071567b0bf Mon Sep 17 00:00:00 2001 From: BrownS7 Date: Thu, 11 Apr 2024 09:37:59 -0400 Subject: [PATCH 12/20] Split out tbl_wide code to its own helper function --- R/tbl_genomic.R | 209 ++++++++++++++++++++++++++++-------------------- 1 file changed, 121 insertions(+), 88 deletions(-) diff --git a/R/tbl_genomic.R b/R/tbl_genomic.R index 97fa42ab..0021f2bc 100644 --- a/R/tbl_genomic.R +++ b/R/tbl_genomic.R @@ -1,16 +1,23 @@ #' tbl_genomic #' -#' This function will select genes based on user inputs or on frequency counts and then -#' will pass the data.frame to `gtsummary::tbl_summary()`. You can specify a `by` variable and other -#' parameters that are accepted by `gtsummary::tbl_summary()`. Note the `by` variable must be merged on to -#' onto the data before using the `by` parameter in the function. +#' This function will select genes based on user inputs or on frequency counts +#' and then will pass the data.frame to `gtsummary::tbl_summary()`. You can +#' specify a `by` variable and other parameters that are accepted by +#' `gtsummary::tbl_summary()`. Note the `by` variable must be merged on to onto +#' the data before using the `by` parameter in the function. #' #' @param gene_binary data.frame of genetic samples #' @param freq_cutoff deprecated #' @param freq_cutoff_by_gene deprecated #' @param gene_subset deprecated -#' @param by A variable to be passed to `gtsummary::tbl_summary()`'s by parameter -#' @param ... Additional parameters that can be passed to `gtsummary::tbl_summary()`. To access the additional parameters you need to load `gtsummary`. +#' @param wide_format Specifies whether to stratify tbl_genomic by alteration +#' type such that the resulting table will include one column per alteration +#' type and an overall summary column. Default is FALSE. +#' @param by A variable to be passed to `gtsummary::tbl_summary()`'s by +#' parameter +#' @param ... Additional parameters that can be passed to +#' `gtsummary::tbl_summary()`. To access the additional parameters you need to +#' load `gtsummary`. #' @return A `tbl_summary()` object #' @export #' @@ -112,119 +119,145 @@ tbl_genomic <- function(gene_binary, if (wide_format) { - genes <- order_genes[!order_genes %in% by]%>% - .remove_endings()%>% - unique() + final_table <- .create_wide_format( + gene_binary_df = gene_binary, + order_genes_df = order_genes, + table_data_df = table_data + ) - # identify types of alterations in data + } - gene_endings <- c(".mut", ".Amp", ".Del", ".fus") + # Construct Final Table --------------------------------------------------- - # add .mut to endings to make easier + else {final_table <- table_data %>% + gtsummary::tbl_summary(by = any_of(by),...) - gb_alt_all <- order_genes %>% - .paste_endings() + if (!is.null(by)) { + final_table <- final_table %>% + gtsummary::add_overall() + } - any_alt_types <- purrr::map(gene_endings, - ~any(grepl(.x, gb_alt_all))) + final_table} - names(any_alt_types) <- gene_endings +} - # create table of overall frequencies +#' * Wide format by alteration type ----- +#' +#' This helper function will stratify tbl_genomic by alteration type such that +#' the resulting table will include one column per alteration type and an +#' overall summary column. +#' +#' @param gene_binary_df +#' @param order_genes_df +#' @param table_data_df +#' +#' @return wide format 'tbl_genomic' by alteration type +#' @keywords internal +#' @export +#' +.create_wide_format <- function(gene_binary_df, order_genes_df, table_data_df){ - tbl1 <- gene_binary %>% - pivot_longer(!sample_id)%>% - mutate(name = .remove_endings(name))%>% - group_by(sample_id, name)%>% - slice(which.max(value))%>% - ungroup()%>% - select(-"sample_id") + genes <- order_genes_df[!order_genes_df %in% by]%>% + .remove_endings()%>% + unique() + # identify types of alterations in data - tbl2 <- tbl1 %>% - split(tbl1$name) + gene_endings <- c(".mut", ".Amp", ".Del", ".fus") - tbl_overall <- purrr::map(1:length(names(tbl2)), function(x){ - tbl2[[x]] %>% - select(-"name")%>% - setNames(names(tbl2)[[x]]) - })%>% - do.call(cbind, .)%>% - gtsummary::tbl_summary(by = any_of(by)) + # add .mut to endings to make easier - # create table of mutation frequencies + gb_alt_all <- order_genes_df %>% + .paste_endings() - tbls_alt_types <- purrr::map2( - any_alt_types, - gene_endings, - function(yes_exist, gene) { + any_alt_types <- purrr::map(gene_endings, + ~any(grepl(.x, gb_alt_all))) - if(yes_exist[[1]]){ - data <- table_data + names(any_alt_types) <- gene_endings - # need to figure out how to add by variables HERE - names(data) <- c("sample_id", any_of(by), - .paste_endings(names(data)[2:length(names(data))])) + # create table of overall frequencies - data <- data %>% - select(any_of(by), - ends_with(gene)) + tbl1 <- gene_binary_df %>% + tidyr::pivot_longer(!sample_id)%>% + dplyr::mutate(name = .remove_endings(name))%>% + dplyr::group_by(sample_id, name)%>% + dplyr::slice(which.max(value))%>% + dplyr::ungroup()%>% + dplyr::select(-"sample_id") - names(data) <- .remove_endings(names(data)) - genes_not_obs <- setdiff(genes, names(data))[setdiff(genes, names(data)) %in% by] + tbl2 <- tbl1 %>% + split(tbl1$name) - data <- data %>% - # need to fill in 0 for all unobserved alterations in hugo_symbol - dplyr::mutate(!!!setNames(rep(0, length(genes_not_obs)), - genes_not_obs)) + tbl_overall <- purrr::map(1:length(names(tbl2)), function(x){ + tbl2[[x]] %>% + dplyr::select(-"name")%>% + stats::setNames(names(tbl2)[[x]]) + })%>% + do.call(cbind, .)%>% + gtsummary::tbl_summary(by = any_of(by)) - data %>% - gtsummary::tbl_summary(by = any_of(by)) - } else { - NULL - } + # create table of mutation frequencies - } -) + tbls_alt_types <- purrr::map2( + any_alt_types, + gene_endings, + function(yes_exist, gene) { + if(yes_exist[[1]]){ + data <- table_data - # create list of tables - tbls_list_pre <- - append(list(tbl_overall), - purrr::map2(any_alt_types, - tbls_alt_types, function(x, y){ - if (x) {y} - })) + # need to figure out how to add by variables HERE + names(data) <- c("sample_id", any_of(by), + .paste_endings(names(data)[2:length(names(data))])) - # drop NULL tables - tbls_list <- tbls_list_pre %>% purrr::keep( ~ !is.null(.) ) + data <- data %>% + dplyr::select(any_of(by), + ends_with(gene)) + names(data) <- .remove_endings(names(data)) - tab_spanner_vec <- c( - "**Overall**", - purrr::map2(any_alt_types, c("**Mutations**", "**Amplifications**", - "**Deletions**", "**Fusions**"), - ~if(.x){.y}) %>% - unlist() - ) + genes_not_obs <- dplyr::setdiff(genes, names(data))[dplyr::setdiff(genes, names(data)) %in% by] - # merge tables - gtsummary::tbl_merge(tbls_list, - tab_spanner = tab_spanner_vec) + data <- data %>% + # need to fill in 0 for all unobserved alterations in hugo_symbol + dplyr::mutate(!!!stats::setNames(rep(0, length(genes_not_obs)), + genes_not_obs)) - } + data %>% + gtsummary::tbl_summary(by = any_of(by)) + } else { + NULL + } - # Construct Final Table --------------------------------------------------- + } + ) - else {final_table <- table_data %>% - gtsummary::tbl_summary(by = any_of(by),...) + # create list of tables + tbls_list_wide_pre <- + append(list(tbl_overall), + purrr::map2(any_alt_types, + tbls_alt_types, function(x, y){ + if (x) {y} + })) - if (!is.null(by)) { - final_table <- final_table %>% - gtsummary::add_overall() - } + # drop NULL tables + tbls_list_wide <- tbls_list_wide_pre %>% purrr::keep( ~ !is.null(.) ) - final_table} + + tab_spanner_vec <- c( + "**Overall**", + purrr::map2(any_alt_types, c("**Mutations**", "**Amplifications**", + "**Deletions**", "**Fusions**"), + ~if(.x){.y}) %>% + unlist() + ) + + # merge tables + final_table_wide <- gtsummary::tbl_merge(tbls_list_wide, + tab_spanner = tab_spanner_vec) + + return(final_table_wide) } + From 3f74f08833636f2a110aff6f78900bc3a414eafa Mon Sep 17 00:00:00 2001 From: BrownS7 Date: Thu, 11 Apr 2024 09:38:12 -0400 Subject: [PATCH 13/20] Create dot-create_wide_format.Rd --- man/dot-create_wide_format.Rd | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 man/dot-create_wide_format.Rd diff --git a/man/dot-create_wide_format.Rd b/man/dot-create_wide_format.Rd new file mode 100644 index 00000000..6a61454a --- /dev/null +++ b/man/dot-create_wide_format.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tbl_genomic.R +\name{.create_wide_format} +\alias{.create_wide_format} +\title{\itemize{ +\item Wide format by alteration type ----- +}} +\usage{ +.create_wide_format(gene_binary_df, order_genes_df, table_data_df) +} +\arguments{ +\item{table_data_df}{} +} +\value{ +wide format 'tbl_genomic' by alteration type +} +\description{ +This helper function will stratify tbl_genomic by alteration type such that +the resulting table will include one column per alteration type and an +overall summary column. +} +\keyword{internal} From 940dd087e14b0f4e4f48783be3e30fbe9c64b3f9 Mon Sep 17 00:00:00 2001 From: BrownS7 Date: Thu, 11 Apr 2024 09:39:20 -0400 Subject: [PATCH 14/20] Update NAMESPACE --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index 6438ba5d..982d2927 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export("%>%") +export(.create_wide_format) export(.mutations_gene_binary) export(.sum_alts_in_pathway) export(add_pathways) From 5b05cd7368fa9db80fd72d9b1c285ae6c239759f Mon Sep 17 00:00:00 2001 From: BrownS7 Date: Thu, 11 Apr 2024 10:14:38 -0400 Subject: [PATCH 15/20] Updated roxygen comment for wide_format default --- R/tbl_genomic.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/tbl_genomic.R b/R/tbl_genomic.R index 0021f2bc..ae98415d 100644 --- a/R/tbl_genomic.R +++ b/R/tbl_genomic.R @@ -12,7 +12,7 @@ #' @param gene_subset deprecated #' @param wide_format Specifies whether to stratify tbl_genomic by alteration #' type such that the resulting table will include one column per alteration -#' type and an overall summary column. Default is FALSE. +#' type and an overall summary column. Default is `wide_format = FALSE`. #' @param by A variable to be passed to `gtsummary::tbl_summary()`'s by #' parameter #' @param ... Additional parameters that can be passed to From 08d18812c0c821b998f724863e258a18eeb0ecce Mon Sep 17 00:00:00 2001 From: BrownS7 Date: Thu, 11 Apr 2024 10:24:43 -0400 Subject: [PATCH 16/20] Updated roxygen for create_wide_format helper function --- R/tbl_genomic.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/tbl_genomic.R b/R/tbl_genomic.R index ae98415d..ddba183c 100644 --- a/R/tbl_genomic.R +++ b/R/tbl_genomic.R @@ -147,9 +147,9 @@ tbl_genomic <- function(gene_binary, #' the resulting table will include one column per alteration type and an #' overall summary column. #' -#' @param gene_binary_df -#' @param order_genes_df -#' @param table_data_df +#' @param gene_binary_df data.frame of genetic samples +#' @param order_genes_df data.frame +#' @param table_data_df data.frame #' #' @return wide format 'tbl_genomic' by alteration type #' @keywords internal From 4bc812cbbda1de1fa1fb849cad4fe747755efb04 Mon Sep 17 00:00:00 2001 From: BrownS7 Date: Thu, 11 Apr 2024 10:24:49 -0400 Subject: [PATCH 17/20] Update tbl_genomic.Rd --- man/tbl_genomic.Rd | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/man/tbl_genomic.Rd b/man/tbl_genomic.Rd index a22bf50b..93dc89ee 100644 --- a/man/tbl_genomic.Rd +++ b/man/tbl_genomic.Rd @@ -17,7 +17,8 @@ tbl_genomic( \arguments{ \item{gene_binary}{data.frame of genetic samples} -\item{by}{A variable to be passed to \code{gtsummary::tbl_summary()}'s by parameter} +\item{by}{A variable to be passed to \code{gtsummary::tbl_summary()}'s by +parameter} \item{freq_cutoff}{deprecated} @@ -25,16 +26,23 @@ tbl_genomic( \item{gene_subset}{deprecated} -\item{...}{Additional parameters that can be passed to \code{gtsummary::tbl_summary()}. To access the additional parameters you need to load \code{gtsummary}.} +\item{wide_format}{Specifies whether to stratify tbl_genomic by alteration +type such that the resulting table will include one column per alteration +type and an overall summary column. Default is \code{wide_format = FALSE}.} + +\item{...}{Additional parameters that can be passed to +\code{gtsummary::tbl_summary()}. To access the additional parameters you need to +load \code{gtsummary}.} } \value{ A \code{tbl_summary()} object } \description{ -This function will select genes based on user inputs or on frequency counts and then -will pass the data.frame to \code{gtsummary::tbl_summary()}. You can specify a \code{by} variable and other -parameters that are accepted by \code{gtsummary::tbl_summary()}. Note the \code{by} variable must be merged on to -onto the data before using the \code{by} parameter in the function. +This function will select genes based on user inputs or on frequency counts +and then will pass the data.frame to \code{gtsummary::tbl_summary()}. You can +specify a \code{by} variable and other parameters that are accepted by +\code{gtsummary::tbl_summary()}. Note the \code{by} variable must be merged on to onto +the data before using the \code{by} parameter in the function. } \examples{ From 7e27afe6f7a6b5cfbdb8594ebf8a6bc636c21e7a Mon Sep 17 00:00:00 2001 From: BrownS7 Date: Thu, 11 Apr 2024 10:24:56 -0400 Subject: [PATCH 18/20] Update dot-create_wide_format.Rd --- man/dot-create_wide_format.Rd | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/man/dot-create_wide_format.Rd b/man/dot-create_wide_format.Rd index 6a61454a..2dd2b122 100644 --- a/man/dot-create_wide_format.Rd +++ b/man/dot-create_wide_format.Rd @@ -9,7 +9,11 @@ .create_wide_format(gene_binary_df, order_genes_df, table_data_df) } \arguments{ -\item{table_data_df}{} +\item{gene_binary_df}{data.frame of genetic samples} + +\item{order_genes_df}{data.frame} + +\item{table_data_df}{data.frame} } \value{ wide format 'tbl_genomic' by alteration type From c869952809ddf3b36ae61bbeeb7411b9252d31c3 Mon Sep 17 00:00:00 2001 From: BrownS7 Date: Thu, 11 Apr 2024 10:34:12 -0400 Subject: [PATCH 19/20] Updated documentation for add_q_tbl_wide --- R/add_q_tbl_wide.R | 12 ------------ man/add_q_tbl_wide.Rd | 15 --------------- 2 files changed, 27 deletions(-) diff --git a/R/add_q_tbl_wide.R b/R/add_q_tbl_wide.R index 97f823cb..623eae59 100644 --- a/R/add_q_tbl_wide.R +++ b/R/add_q_tbl_wide.R @@ -36,18 +36,6 @@ #' add_global_p() %>% #' add_q() #' } -#' @section Example Output: -#' \if{html}{Example 1} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_q_ex1.png", width = "65")` -#' }} -#' -#' \if{html}{Example 2} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_q_ex2.png", width = "60")` -#' }} add_q_tbl_wide <- function(x, method = "fdr", pvalue_fun = NULL, n_comp = NULL, quiet = NULL) { updated_call_list <- c(x$call_list, list(add_q = match.call())) diff --git a/man/add_q_tbl_wide.Rd b/man/add_q_tbl_wide.Rd index ff33ef80..578311da 100644 --- a/man/add_q_tbl_wide.Rd +++ b/man/add_q_tbl_wide.Rd @@ -33,21 +33,6 @@ and return a string that is the rounded/formatted p-value (e.g. \description{ Adjustments to p-values are performed with \link[stats:p.adjust]{stats::p.adjust}. } -\section{Example Output}{ - -\if{html}{Example 1} - -\if{html}{\out{ -`r man_create_image_tag(file = "add_q_ex1.png", width = "65")` -}} - -\if{html}{Example 2} - -\if{html}{\out{ -`r man_create_image_tag(file = "add_q_ex2.png", width = "60")` -}} -} - \examples{ \dontshow{if (broom.helpers::.assert_package("car", pkg_search = "gtsummary", boolean = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ From 81002f939633f586428bdae8a3fdced0da344723 Mon Sep 17 00:00:00 2001 From: karissawhiting Date: Thu, 11 Apr 2024 10:40:32 -0400 Subject: [PATCH 20/20] update q functions and clean up tbl_genomic wide --- R/add_q.R | 107 ++++++++++++++++++++++++ R/add_q_tbl_wide.R | 13 --- R/tbl_genomic.R | 199 +++++++++++++++++++++++++++------------------ R/utils.R | 4 +- 4 files changed, 231 insertions(+), 92 deletions(-) create mode 100644 R/add_q.R diff --git a/R/add_q.R b/R/add_q.R new file mode 100644 index 00000000..30e61d16 --- /dev/null +++ b/R/add_q.R @@ -0,0 +1,107 @@ +#' Adds p-values to gtsummary table +#' +#' @param x Object created from a gtsummary function +#' @param ... Additional arguments passed to other methods. +#' @keywords internal +#' @author Daniel D. Sjoberg +#' @seealso [add_p.tbl_summary], [add_p.tbl_cross], [add_p.tbl_svysummary], [add_p.tbl_survfit], [add_p.tbl_continuous] +#' @export +add_q <- function(x, ...) { + UseMethod("add_q") +} + +add_q.default <- gtsummary::add_q + +add_q.tbl_genomic_wide <- function(x, + method = "fdr", + pvalue_fun = NULL, + quiet = NULL) { + + updated_call_list <- c(x$call_list, list(add_q = match.call())) + + # setting defaults ----------------------------------------------------------- + quiet <- quiet %||% get_theme_element("pkgwide-lgl:quiet") %||% FALSE + + # checking inputs ------------------------------------------------------------ + # checking class of x + .assert_class(x, "gtsummary") + + # checking input table has a p.value column + if (!"p.value" %in% names(x$table_body)) { + stop("There is no p-value column. `x$table_body` must have a column called 'p.value'", + call. = FALSE + ) + } + + # setting defaults from gtsummary theme -------------------------------------- + pvalue_fun <- + pvalue_fun %||% + # defaults from theme + get_theme_element("add_q-arg:pvalue_fun") %||% + get_theme_element("pkgwide-fn:pvalue_fun") %||% + # default from p-value formatting function + (filter(x$table_styling$fmt_fun, .data$column == "p.value") %>% pull("fmt_fun") %>% pluck(1)) %>% + gts_mapper("add_q(pvalue_fun=)") + + # checking pvalue_fun are functions + if (!is.function(pvalue_fun)) { + stop("Input 'pvalue_fun' must be a function.") + } + + # perform multiple comparisons ----------------------------------------------- + expr_p.adjust <- + rlang::expr(stats::p.adjust(x$table_body$p.value, method = !!method)) %>% + deparse() + if (quiet == FALSE) { + rlang::inform(glue("add_q: Adjusting p-values with\n`{expr_p.adjust}`")) + } + + x$table_body$q.value <- x$table_body$p.value %>% + stats::p.adjust(method = method) + + # update table_styling ------------------------------------------------------- + # footnote text + footnote_text <- + add_q_method_lookup[add_q_method_lookup$method == method, ]$method_label %>% + translate_text() + x <- + modify_table_styling( + x, + columns = "q.value", + footnote = footnote_text, + fmt_fun = pvalue_fun + ) + + # adding column header + x <- modify_header(x, q.value = paste0("**", translate_text("q-value"), "**")) + + # return final object -------------------------------------------------------- + # fill in the Ns in the header table modify_stat_* columns + x <- .fill_table_header_modify_stats(x) + # adding call + x$call_list <- updated_call_list + + x +} + + +# match method input to display name +add_q_method_lookup <- + tibble::tibble( + method = stats::p.adjust.methods + ) %>% + left_join( + tibble::tribble( + ~method, ~method_label, + "holm", "Holm correction for multiple testing", + "hochberg", "Hochberg correction for multiple testing", + "hommel", "Hommel correction for multiple testing", + "bonferroni", "Bonferroni correction for multiple testing", + "BH", "Benjamini & Hochberg correction for multiple testing", + "BY", "Benjamini & Yekutieli correction for multiple testing", + "fdr", "False discovery rate correction for multiple testing", + "none", "No correction for multiple testing" + ), + by = "method" + ) %>% + mutate(method_label = coalesce(method_label, method)) diff --git a/R/add_q_tbl_wide.R b/R/add_q_tbl_wide.R index 97f823cb..788c34c9 100644 --- a/R/add_q_tbl_wide.R +++ b/R/add_q_tbl_wide.R @@ -35,20 +35,7 @@ #' ) %>% #' add_global_p() %>% #' add_q() -#' } -#' @section Example Output: -#' \if{html}{Example 1} #' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_q_ex1.png", width = "65")` -#' }} -#' -#' \if{html}{Example 2} -#' -#' \if{html}{\out{ -#' `r man_create_image_tag(file = "add_q_ex2.png", width = "60")` -#' }} - add_q_tbl_wide <- function(x, method = "fdr", pvalue_fun = NULL, n_comp = NULL, quiet = NULL) { updated_call_list <- c(x$call_list, list(add_q = match.call())) # setting defaults ----------------------------------------------------------- diff --git a/R/tbl_genomic.R b/R/tbl_genomic.R index 97fa42ab..372a85b9 100644 --- a/R/tbl_genomic.R +++ b/R/tbl_genomic.R @@ -95,116 +95,117 @@ tbl_genomic <- function(gene_binary, arg_name = "by", select_single = TRUE ) - # Order Genes for Final Table --------------------------------------------- + # * Wide ------------ - order_genes <- gene_binary %>% - dplyr::select(-all_of(by)) %>% - gnomeR::subset_by_frequency(t = 0) %>% - names() + # create data set with only alterations + alts_only <- gene_binary %>% + dplyr::select(-all_of(by), -sample_id) - table_data <- gene_binary %>% - dplyr::select(all_of(by), - any_of(order_genes)) %>% - dplyr::select(-"sample_id") + # create data set with all endings (including .mut if needed) + alts_only_with_endings <- alts_only + names(alts_only_with_endings) <- .paste_endings(names(alts_only)) + # identify types of alterations in data + possible_alt_endings <- c(".mut", ".Amp", ".Del", ".fus") - # * Wide format by alteration type ----- + which_alt_types <- purrr::map_lgl(possible_alt_endings, + ~any(grepl(.x, names(alts_only_with_endings)))) - if (wide_format) { + names(which_alt_types) <- possible_alt_endings + which_alt_types_no_mut <- which_alt_types[names(which_alt_types) != ".mut"] - genes <- order_genes[!order_genes %in% by]%>% - .remove_endings()%>% - unique() + more_than_1_type <- (sum(which_alt_types_no_mut) > 0) - # identify types of alterations in data + if(wide_format == TRUE & !more_than_1_type) { - gene_endings <- c(".mut", ".Amp", ".Del", ".fus") + wide_format = FALSE + cli::cli_alert_warning("No .Amp, .Del or .fus found in your data. Ignoring `wide_format = TRUE` and returning a table of one column of summarized alterations (`wide_format = FALSE`).") - # add .mut to endings to make easier + } - gb_alt_all <- order_genes %>% - .paste_endings() +# Long Table ----------------------------------------------------------- - any_alt_types <- purrr::map(gene_endings, - ~any(grepl(.x, gb_alt_all))) - names(any_alt_types) <- gene_endings + if(wide_format == FALSE) { - # create table of overall frequencies + # * Order Genes for Final Table --------------------------------------------- - tbl1 <- gene_binary %>% - pivot_longer(!sample_id)%>% - mutate(name = .remove_endings(name))%>% - group_by(sample_id, name)%>% - slice(which.max(value))%>% - ungroup()%>% - select(-"sample_id") + order_genes <- gene_binary %>% + dplyr::select(-all_of(by)) %>% + gnomeR::subset_by_frequency(t = 0) %>% + names() + table_data <- gene_binary %>% + dplyr::select(all_of(by), + any_of(order_genes)) %>% + dplyr::select(-"sample_id") - tbl2 <- tbl1 %>% - split(tbl1$name) + # * Construct Final Table --------------------------------------------------- - tbl_overall <- purrr::map(1:length(names(tbl2)), function(x){ - tbl2[[x]] %>% - select(-"name")%>% - setNames(names(tbl2)[[x]]) - })%>% - do.call(cbind, .)%>% - gtsummary::tbl_summary(by = any_of(by)) + final_table <- table_data %>% + gtsummary::tbl_summary(by = any_of(by),...) + + if (!is.null(by)) { + final_table <- final_table %>% + gtsummary::add_overall() + } - # create table of mutation frequencies + final_table + } - tbls_alt_types <- purrr::map2( - any_alt_types, - gene_endings, - function(yes_exist, gene) { + # Wide Table ----------------------------------------------------------- - if(yes_exist[[1]]){ - data <- table_data + if (wide_format) { - # need to figure out how to add by variables HERE - names(data) <- c("sample_id", any_of(by), - .paste_endings(names(data)[2:length(names(data))])) + # * Get Gene Level Totals and Order ----------- + gene_level_binary <- summarize_by_gene(gene_binary, other_vars = any_of(by)) - data <- data %>% - select(any_of(by), - ends_with(gene)) + gene_level_alts_only <- gene_level_binary %>% + select(-sample_id, -any_of(by)) - names(data) <- .remove_endings(names(data)) + order_genes <- names(gene_level_alts_only)[order(apply(gene_level_alts_only, 2, sum), + decreasing = TRUE)] - genes_not_obs <- setdiff(genes, names(data))[setdiff(genes, names(data)) %in% by] + # * Create overall n table -------------- + gene_total_table <- gene_level_alts_only %>% + tbl_summary() - data <- data %>% - # need to fill in 0 for all unobserved alterations in hugo_symbol - dplyr::mutate(!!!setNames(rep(0, length(genes_not_obs)), - genes_not_obs)) - data %>% - gtsummary::tbl_summary(by = any_of(by)) - } else { - NULL - } + # * Create Each Alt Type Table ------------------ - } -) + gene_binary_with_endings <- gene_binary %>% + select(sample_id, any_of(by)) %>% + bind_rows(alts_only_with_endings) + + # # identify types of alterations in data + # possible_alt_endings <- c(".mut", ".Amp", ".Del", ".fus") + # + # # add .mut to endings to make easier + # order_alts_explicit <- order_alts %>% + # .paste_endings() +# - # create list of tables - tbls_list_pre <- - append(list(tbl_overall), - purrr::map2(any_alt_types, - tbls_alt_types, function(x, y){ - if (x) {y} - })) + # create each table + which_alt_types[which_alt_types == TRUE] - # drop NULL tables - tbls_list <- tbls_list_pre %>% purrr::keep( ~ !is.null(.) ) + tbls_list <- purrr::map(c(".mut", ".Amp", ".Del", ".fus"), + ~make_alt_table(alt_type = .x, + data_with_endings = gene_binary_with_endings, + order_genes = order_genes)) + names(tbls_list) <- c(".mut", ".Amp", ".Del", ".fus") + tbls_list <- tbls_list %>% purrr::keep( ~ !is.null(.) ) + tbls_list$overall <- gene_total_table + + + + # Construct Final Table --------------------------------------------------- tab_spanner_vec <- c( "**Overall**", - purrr::map2(any_alt_types, c("**Mutations**", "**Amplifications**", - "**Deletions**", "**Fusions**"), + purrr::map2(which_alt_types, + c("**Mutations**", "**Amplifications**", "**Deletions**", "**Fusions**"), ~if(.x){.y}) %>% unlist() ) @@ -215,7 +216,6 @@ tbl_genomic <- function(gene_binary, } - # Construct Final Table --------------------------------------------------- else {final_table <- table_data %>% gtsummary::tbl_summary(by = any_of(by),...) @@ -227,4 +227,49 @@ tbl_genomic <- function(gene_binary, final_table} +# Assign Class ------------------------------------------------------------ + + + if (wide_format) { + class(final_table) <- c("tbl_genomic_wide", class(final_table)) + } else { + class(final_table) <- c("tbl_genomic", class(final_table)) + + } + +} + + + + + +# Individual Alt Type Tables ---------------------------------------------- + +make_alt_table <- function(alt_type = ".mut", + data_with_endings = gene_binary_with_endings, + order_genes) { + + any_alts <- gene_binary_with_endings %>% + select(ends_with(alt_type)) + + + if(ncol(any_alts) > 0) { + alt_type_df <- gene_binary_with_endings %>% + select(sample_id, any_of(by), ends_with(alt_type)) + + names(alt_type_df) <- .remove_endings(names(alt_type_df)) + + # genes_not_obs <- setdiff(genes, names(data))[setdiff(genes, names(data)) %in% by] + # + # data <- data %>% + # # need to fill in 0 for all unobserved alterations in hugo_symbol + # dplyr::mutate(!!!setNames(rep(0, length(genes_not_obs)), + # genes_not_obs)) + + alt_type_df %>% + select(any_of(order_genes)) %>% + gtsummary::tbl_summary(by = any_of(by)) + } + + } diff --git a/R/utils.R b/R/utils.R index c42aae29..4fcbe908 100644 --- a/R/utils.R +++ b/R/utils.R @@ -254,11 +254,11 @@ extract_patient_id <- function(sample_id) { .remove_endings = function(names) { - stringr::str_remove_all( + new_names <- stringr::str_remove_all( names, ".Amp|.Del|.fus|.cna|.mut") - return(names) + return(new_names) } #' Check if all gene_binary columns except sample_id and other_vars are numeric