Skip to content
Merged

V1.0 #26

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,8 @@
^\.github$
^doc$
^Meta$
vignettes/quick_start_guide.Rmd
^\.claude$
^\.vscode$
^Dockerfile$
^_pkgdown\.yml$
^docs$
11 changes: 6 additions & 5 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ jobs:
fail-fast: false
matrix:
config:
- {os: macos-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'release'}
- {os: macos-latest, r: '4.4'}
- {os: windows-latest, r: '4.4'}
- {os: ubuntu-latest, r: '4.4'}

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
Expand All @@ -39,11 +39,12 @@ jobs:

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck
dependencies: 'c("hard", "Config/Needs/check")'
extra-packages: any::rcmdcheck, any::testthat, any::knitr, any::rmarkdown, any::tidyr, any::glmGamPoi
needs: check

- uses: r-lib/actions/check-r-package@v2
with:
args: 'c("--no-build-vignettes", "--no-manual")'
args: 'c("--no-build-vignettes", "--no-vignettes", "--no-manual", "--ignore-vignettes")'
build_args: 'c("--no-build-vignettes")'
upload-snapshots: true
16 changes: 11 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
Package: clustOpt
Version: 0.9
Date: 2023-10-11
Version: 1.0
Date: 2024-07-22
Title: Choosing a resolution parameter for single cell omics clustering
Description: Optimal clustering resolution parameters determined through Random Forests, a silhouette score-based assessment of clustering validity, and subject-wise cross-validation.
Author: Natalie Gill [aut, cre], Reuben Thomas [aut], Min-Gyoung Shin [aut], Ayushi Agrawal [aut]
Maintainer: Natalie Gill <natalie.gill@gladstone.ucsf.edu>
Authors@R: c(person("Natalie", "Gill", role = c("aut", "cre"), email = "natalie.gill@gladstone.ucsf.edu"),
person("Reuben", "Thomas", role = c("aut"), email = "reuben.thomas@gladstone.ucsf.edu"),
person("Min-Gyoung", "Shin", role = c("aut"), email = "mingyoung.shin@gladstone.ucsf.edu"),
Expand All @@ -11,7 +13,6 @@ Depends:
R (>= 4.4)
Imports:
progressr,
glmGamPoi,
future.apply,
ranger,
cluster,
Expand All @@ -21,8 +22,12 @@ Imports:
ggplot2 (>= 3.3.5),
purrr (>= 0.3.4),
tibble (>= 3.2.1),
profvis (>= 0.3.8)
methods,
stats,
rlang
License: MIT + file LICENSE
URL: https://gladstone-institutes.github.io/clustOpt, https://github.com/gladstone-institutes/clustOpt
BugReports: https://github.com/gladstone-institutes/clustOpt/issues
RoxygenNote: 7.3.2
Encoding: UTF-8
Collate:
Expand All @@ -35,6 +40,7 @@ Suggests:
rmarkdown,
tidyr,
testthat (>= 3.0.0),
BPCells
BPCells,
glmGamPoi
Config/testthat/edition: 3
VignetteBuilder: knitr
19 changes: 9 additions & 10 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,18 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"]
COPY . /opt/clustOpt
WORKDIR /opt/clustOpt

# renv is only used for local development of the package
RUN R -e 'renv::deactivate()'
RUN R -e "remotes::install_github('bnprks/BPCells/r')"
# Install pak for faster package installation
RUN R -e 'install.packages("pak", repos = "https://cloud.r-project.org")'
# Remove renv files that cause build issues
RUN rm -rf renv/ renv.lock .Rprofile


# Install GitHub dependencies first
RUN R -e "remotes::install_github('bnprks/BPCells/r')"

# Install using using pak (update the DESCRIPTION for new builds)
RUN R -e 'pak::pkg_install(pkg = ".", dependencies = TRUE)'
# Install the local package with all dependencies
RUN R -e "devtools::install('.', dependencies = TRUE, upgrade = 'never')"

# Install some extra packages to make running clustOpt in Rscripts easier
RUN R -e 'pak::pkg_install(pkg = "optparse")'
RUN R -e 'pak::pkg_install(pkg = "readr")'
# Install extra packages for Rscripts
RUN R -e 'install.packages(c("optparse", "readr"), repos = "https://cloud.r-project.org")'

# Default command
CMD ["/bin/bash"]
Expand Down
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ importFrom(dplyr,pull)
importFrom(dplyr,select)
importFrom(dplyr,summarize)
importFrom(dplyr,sym)
importFrom(dplyr,ungroup)
importFrom(future.apply,future_lapply)
importFrom(ggplot2,aes)
importFrom(ggplot2,geom_boxplot)
Expand All @@ -49,11 +50,16 @@ importFrom(ggplot2,geom_point)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,labs)
importFrom(ggplot2,theme_bw)
importFrom(methods,as)
importFrom(progressr,handlers)
importFrom(progressr,progressor)
importFrom(purrr,map_df)
importFrom(ranger,predictions)
importFrom(ranger,ranger)
importFrom(rlang,.data)
importFrom(stats,dist)
importFrom(stats,median)
importFrom(stats,predict)
importFrom(stats,sd)
importFrom(stats,var)
importFrom(tibble,as_tibble)
32 changes: 18 additions & 14 deletions R/clustOpt.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#' @include utils.R pca_split.R
#'
#' @importFrom rlang .data
NULL

# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Expand Down Expand Up @@ -29,7 +29,7 @@ NULL
#' @param min_cells Minimum cells per subject, default is 50
#' @return A data.frame containing a distribution of silhouette scores for each
#' resolution.
#'
#'
#' @details
#' The clustOpt algorithm works by:
#' \enumerate{
Expand All @@ -39,23 +39,27 @@ NULL
#' \item Training random forests on cluster assignments
#' \item Evaluating clustering quality using silhouette scores
#' }
#'
#' Both scRNA-seq and CyTOF data types support sketching for improved performance
#' on large datasets. For CyTOF data, normalization is skipped as data should
#'
#' Both scRNA-seq and CyTOF data types support sketching for improved performance
#' on large datasets. For CyTOF data, normalization is skipped as data should
#' already be arcsinh transformed.
#'
#'
#' @examples
#' \dontrun{
#' # Basic usage with scRNA-seq data
#' results <- clust_opt(seurat_obj, ndim = 50, subject_ids = "donor_id")
#'
#'
#' # CyTOF data analysis
#' cytof_results <- clust_opt(cytof_obj, ndim = 30, dtype = "CyTOF",
#' subject_ids = "sample_id")
#'
#' cytof_results <- clust_opt(cytof_obj,
#' ndim = 30, dtype = "CyTOF",
#' subject_ids = "sample_id"
#' )
#'
#' # Large dataset with custom sketch size
#' large_results <- clust_opt(large_obj, ndim = 50, sketch_size = 10000,
#' subject_ids = "donor_id")
#' large_results <- clust_opt(large_obj,
#' ndim = 50, sketch_size = 10000,
#' subject_ids = "donor_id"
#' )
#' }
#'
#' @export
Expand Down Expand Up @@ -495,7 +499,7 @@ project_pca <- function(train_seurat,
train_seurat[[train_with_pcs]]
) |>
tibble::as_tibble(rownames = "features") |>
dplyr::filter(features %in% common_features) |>
dplyr::filter(.data$features %in% common_features) |>
as.matrix()

rownames(loadings_common_features) <- loadings_common_features[, 1]
Expand All @@ -513,7 +517,7 @@ project_pca <- function(train_seurat,
train_seurat[[clust_pcs]]
) |>
tibble::as_tibble(rownames = "features") |>
dplyr::filter(features %in% common_features) |>
dplyr::filter(.data$features %in% common_features) |>
as.matrix()

rownames(loadings_common_features) <- loadings_common_features[, 1]
Expand Down
10 changes: 8 additions & 2 deletions R/pca_split.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,14 @@ split_pca_dimensions <- function(input,
odd_dims <- seq(1, dims, by = 2)

even_pca <- pca
even_pca@cell.embeddings <- even_pca@cell.embeddings[, even_dims, drop = FALSE]
even_pca@feature.loadings <- even_pca@feature.loadings[, even_dims, drop = FALSE]
even_pca@cell.embeddings <- even_pca@cell.embeddings[,
even_dims,
drop = FALSE
]
even_pca@feature.loadings <- even_pca@feature.loadings[,
even_dims,
drop = FALSE
]
even_pca@stdev <- even_pca@stdev[even_dims]
even_pca@key <- "even_pca_"

Expand Down
22 changes: 11 additions & 11 deletions R/plot.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#' @include utils.R
#'
#' @importFrom rlang .data
NULL


Expand All @@ -20,7 +20,7 @@ create_sil_plots <- function(sil_dist) {
sil_summary <- sil_summary(sil_dist)

plot1 <- sil_dist |>
ggplot2::ggplot(ggplot2::aes(x = as.factor(resolution), y = avg_width)) +
ggplot2::ggplot(ggplot2::aes(x = as.factor(.data$resolution), y = .data$avg_width)) +
ggplot2::geom_boxplot() +
ggplot2::theme_bw() +
ggplot2::labs(
Expand All @@ -30,8 +30,8 @@ create_sil_plots <- function(sil_dist) {

plot2 <- sil_dist |>
ggplot2::ggplot(ggplot2::aes(
x = as.factor(resolution),
y = cluster_median_widths
x = as.factor(.data$resolution),
y = .data$cluster_median_widths
)) +
ggplot2::geom_boxplot() +
ggplot2::theme_bw() +
Expand All @@ -42,12 +42,12 @@ create_sil_plots <- function(sil_dist) {

plot3 <- ggplot2::ggplot(
sil_summary,
ggplot2::aes(x = as.factor(resolution), y = median_score, group = 1)
ggplot2::aes(x = as.factor(.data$resolution), y = .data$median_score, group = 1)
) +
ggplot2::geom_errorbar(
ggplot2::aes(
ymin = median_score - (1.96 * standard_error_score),
ymax = median_score + (1.96 * standard_error_score),
ymin = .data$median_score - (1.96 * .data$standard_error_score),
ymax = .data$median_score + (1.96 * .data$standard_error_score),
width = .3
),
color = "red"
Expand All @@ -63,15 +63,15 @@ create_sil_plots <- function(sil_dist) {
plot4 <- ggplot2::ggplot(
sil_summary,
ggplot2::aes(
x = as.factor(resolution),
y = cluster_median_score,
x = as.factor(.data$resolution),
y = .data$cluster_median_score,
group = 1
)
) +
ggplot2::geom_errorbar(
ggplot2::aes(
ymin = cluster_median_score - (1.96 * standard_error_score),
ymax = cluster_median_score + (1.96 * standard_error_score),
ymin = .data$cluster_median_score - (1.96 * .data$standard_error_score),
ymax = .data$cluster_median_score + (1.96 * .data$standard_error_score),
width = .3
),
color = "red"
Expand Down
Loading
Loading