gladstone-institutes · natalie-23-gill · Jul 23, 2025 · Jul 22, 2025 · Jul 22, 2025 · Jul 22, 2025
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -6,4 +6,8 @@
 ^\.github$
 ^doc$
 ^Meta$
-vignettes/quick_start_guide.Rmd
+^\.claude$
+^\.vscode$
+^Dockerfile$
+^_pkgdown\.yml$
+^docs$
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -18,9 +18,9 @@ jobs:
       fail-fast: false
       matrix:
         config:
-          - {os: macos-latest,   r: 'release'}
-          - {os: windows-latest, r: 'release'}
-          - {os: ubuntu-latest,   r: 'release'}
+          - {os: macos-latest,   r: '4.4'}
+          - {os: windows-latest, r: '4.4'}
+          - {os: ubuntu-latest,   r: '4.4'}
 
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
@@ -39,11 +39,12 @@ jobs:
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          extra-packages: any::rcmdcheck
+          dependencies: 'c("hard", "Config/Needs/check")'
+          extra-packages: any::rcmdcheck, any::testthat, any::knitr, any::rmarkdown, any::tidyr, any::glmGamPoi
           needs: check
 
       - uses: r-lib/actions/check-r-package@v2
         with:
-          args: 'c("--no-build-vignettes", "--no-manual")'
+          args: 'c("--no-build-vignettes", "--no-vignettes", "--no-manual", "--ignore-vignettes")'
           build_args: 'c("--no-build-vignettes")'
           upload-snapshots: true
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,10 @@
 Package: clustOpt
-Version: 0.9
-Date: 2023-10-11
+Version: 1.0
+Date: 2024-07-22
 Title: Choosing a resolution parameter for single cell omics clustering
 Description: Optimal clustering resolution parameters determined through Random Forests, a silhouette score-based assessment of clustering validity, and subject-wise cross-validation.
+Author: Natalie Gill [aut, cre], Reuben Thomas [aut], Min-Gyoung Shin [aut], Ayushi Agrawal [aut]
+Maintainer: Natalie Gill <natalie.gill@gladstone.ucsf.edu>
 Authors@R: c(person("Natalie", "Gill", role = c("aut", "cre"), email = "natalie.gill@gladstone.ucsf.edu"),
 	person("Reuben", "Thomas", role = c("aut"), email = "reuben.thomas@gladstone.ucsf.edu"),
 	person("Min-Gyoung", "Shin", role = c("aut"), email = "mingyoung.shin@gladstone.ucsf.edu"),
@@ -11,7 +13,6 @@ Depends:
     R (>= 4.4)
 Imports:
     progressr,
-    glmGamPoi,
     future.apply,
     ranger,
     cluster,
@@ -21,8 +22,12 @@ Imports:
     ggplot2 (>= 3.3.5),
     purrr (>= 0.3.4),
     tibble (>= 3.2.1),
-    profvis (>= 0.3.8)
+    methods,
+    stats,
+    rlang
 License: MIT + file LICENSE
+URL: https://gladstone-institutes.github.io/clustOpt, https://github.com/gladstone-institutes/clustOpt
+BugReports: https://github.com/gladstone-institutes/clustOpt/issues
 RoxygenNote: 7.3.2
 Encoding: UTF-8
 Collate: 
@@ -35,6 +40,7 @@ Suggests:
     rmarkdown,
     tidyr,
     testthat (>= 3.0.0),
-    BPCells
+    BPCells,
+    glmGamPoi
 Config/testthat/edition: 3
 VignetteBuilder: knitr
diff --git a/Dockerfile b/Dockerfile
@@ -5,19 +5,18 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 COPY . /opt/clustOpt
 WORKDIR /opt/clustOpt
 
-# renv is only used for local development of the package
-RUN R -e 'renv::deactivate()'
-RUN R -e "remotes::install_github('bnprks/BPCells/r')"
-# Install pak for faster package installation 
-RUN R -e 'install.packages("pak", repos = "https://cloud.r-project.org")'
+# Remove renv files that cause build issues
+RUN rm -rf renv/ renv.lock .Rprofile
+
 
+# Install GitHub dependencies first
+RUN R -e "remotes::install_github('bnprks/BPCells/r')"
 
-# Install using using pak (update the DESCRIPTION for new builds)
-RUN R -e 'pak::pkg_install(pkg = ".", dependencies = TRUE)'
+# Install the local package with all dependencies
+RUN R -e "devtools::install('.', dependencies = TRUE, upgrade = 'never')"
 
-# Install some extra packages to make running clustOpt in Rscripts easier
-RUN R -e 'pak::pkg_install(pkg = "optparse")'
-RUN R -e 'pak::pkg_install(pkg = "readr")'
+# Install extra packages for Rscripts
+RUN R -e 'install.packages(c("optparse", "readr"), repos = "https://cloud.r-project.org")'
 
 # Default command
 CMD ["/bin/bash"]

diff --git a/NAMESPACE b/NAMESPACE
@@ -40,6 +40,7 @@ importFrom(dplyr,pull)
 importFrom(dplyr,select)
 importFrom(dplyr,summarize)
 importFrom(dplyr,sym)
+importFrom(dplyr,ungroup)
 importFrom(future.apply,future_lapply)
 importFrom(ggplot2,aes)
 importFrom(ggplot2,geom_boxplot)
@@ -49,11 +50,16 @@ importFrom(ggplot2,geom_point)
 importFrom(ggplot2,ggplot)
 importFrom(ggplot2,labs)
 importFrom(ggplot2,theme_bw)
+importFrom(methods,as)
 importFrom(progressr,handlers)
 importFrom(progressr,progressor)
 importFrom(purrr,map_df)
 importFrom(ranger,predictions)
 importFrom(ranger,ranger)
+importFrom(rlang,.data)
 importFrom(stats,dist)
+importFrom(stats,median)
 importFrom(stats,predict)
+importFrom(stats,sd)
+importFrom(stats,var)
 importFrom(tibble,as_tibble)
diff --git a/R/clustOpt.R b/R/clustOpt.R
@@ -1,5 +1,5 @@
 #' @include utils.R pca_split.R
-#'
+#' @importFrom rlang .data
 NULL
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -29,7 +29,7 @@ NULL
 #' @param min_cells Minimum cells per subject, default is 50
 #' @return A data.frame containing a distribution of silhouette scores for each
 #' resolution.
-#' 
+#'
 #' @details
 #' The clustOpt algorithm works by:
 #' \enumerate{
@@ -39,23 +39,27 @@ NULL
 #'   \item Training random forests on cluster assignments
 #'   \item Evaluating clustering quality using silhouette scores
 #' }
-#' 
-#' Both scRNA-seq and CyTOF data types support sketching for improved performance 
-#' on large datasets. For CyTOF data, normalization is skipped as data should 
+#'
+#' Both scRNA-seq and CyTOF data types support sketching for improved performance
+#' on large datasets. For CyTOF data, normalization is skipped as data should
 #' already be arcsinh transformed.
-#' 
+#'
 #' @examples
 #' \dontrun{
 #' # Basic usage with scRNA-seq data
 #' results <- clust_opt(seurat_obj, ndim = 50, subject_ids = "donor_id")
-#' 
+#'
 #' # CyTOF data analysis
-#' cytof_results <- clust_opt(cytof_obj, ndim = 30, dtype = "CyTOF", 
-#'                           subject_ids = "sample_id")
-#' 
+#' cytof_results <- clust_opt(cytof_obj,
+#'   ndim = 30, dtype = "CyTOF",
+#'   subject_ids = "sample_id"
+#' )
+#'
 #' # Large dataset with custom sketch size
-#' large_results <- clust_opt(large_obj, ndim = 50, sketch_size = 10000,
-#'                           subject_ids = "donor_id")
+#' large_results <- clust_opt(large_obj,
+#'   ndim = 50, sketch_size = 10000,
+#'   subject_ids = "donor_id"
+#' )
 #' }
 #'
 #' @export
@@ -495,7 +499,7 @@ project_pca <- function(train_seurat,
       train_seurat[[train_with_pcs]]
     ) |>
       tibble::as_tibble(rownames = "features") |>
-      dplyr::filter(features %in% common_features) |>
+      dplyr::filter(.data$features %in% common_features) |>
       as.matrix()
 
     rownames(loadings_common_features) <- loadings_common_features[, 1]
@@ -513,7 +517,7 @@ project_pca <- function(train_seurat,
       train_seurat[[clust_pcs]]
     ) |>
       tibble::as_tibble(rownames = "features") |>
-      dplyr::filter(features %in% common_features) |>
+      dplyr::filter(.data$features %in% common_features) |>
       as.matrix()
 
     rownames(loadings_common_features) <- loadings_common_features[, 1]

diff --git a/R/pca_split.R b/R/pca_split.R
@@ -35,8 +35,14 @@ split_pca_dimensions <- function(input,
   odd_dims <- seq(1, dims, by = 2)
 
   even_pca <- pca
-  even_pca@cell.embeddings <- even_pca@cell.embeddings[, even_dims, drop = FALSE]
-  even_pca@feature.loadings <- even_pca@feature.loadings[, even_dims, drop = FALSE]
+  even_pca@cell.embeddings <- even_pca@cell.embeddings[,
+    even_dims,
+    drop = FALSE
+  ]
+  even_pca@feature.loadings <- even_pca@feature.loadings[,
+    even_dims,
+    drop = FALSE
+  ]
   even_pca@stdev <- even_pca@stdev[even_dims]
   even_pca@key <- "even_pca_"
 

diff --git a/R/plot.R b/R/plot.R
@@ -1,5 +1,5 @@
 #' @include utils.R
-#'
+#' @importFrom rlang .data
 NULL
 
 
@@ -20,7 +20,7 @@ create_sil_plots <- function(sil_dist) {
   sil_summary <- sil_summary(sil_dist)
 
   plot1 <- sil_dist |>
-    ggplot2::ggplot(ggplot2::aes(x = as.factor(resolution), y = avg_width)) +
+    ggplot2::ggplot(ggplot2::aes(x = as.factor(.data$resolution), y = .data$avg_width)) +
     ggplot2::geom_boxplot() +
     ggplot2::theme_bw() +
     ggplot2::labs(
@@ -30,8 +30,8 @@ create_sil_plots <- function(sil_dist) {
 
   plot2 <- sil_dist |>
     ggplot2::ggplot(ggplot2::aes(
-      x = as.factor(resolution),
-      y = cluster_median_widths
+      x = as.factor(.data$resolution),
+      y = .data$cluster_median_widths
     )) +
     ggplot2::geom_boxplot() +
     ggplot2::theme_bw() +
@@ -42,12 +42,12 @@ create_sil_plots <- function(sil_dist) {
 
   plot3 <- ggplot2::ggplot(
     sil_summary,
-    ggplot2::aes(x = as.factor(resolution), y = median_score, group = 1)
+    ggplot2::aes(x = as.factor(.data$resolution), y = .data$median_score, group = 1)
   ) +
     ggplot2::geom_errorbar(
       ggplot2::aes(
-        ymin = median_score - (1.96 * standard_error_score),
-        ymax = median_score + (1.96 * standard_error_score),
+        ymin = .data$median_score - (1.96 * .data$standard_error_score),
+        ymax = .data$median_score + (1.96 * .data$standard_error_score),
         width = .3
       ),
       color = "red"
@@ -63,15 +63,15 @@ create_sil_plots <- function(sil_dist) {
   plot4 <- ggplot2::ggplot(
     sil_summary,
     ggplot2::aes(
-      x = as.factor(resolution),
-      y = cluster_median_score,
+      x = as.factor(.data$resolution),
+      y = .data$cluster_median_score,
       group = 1
     )
   ) +
     ggplot2::geom_errorbar(
       ggplot2::aes(
-        ymin = cluster_median_score - (1.96 * standard_error_score),
-        ymax = cluster_median_score + (1.96 * standard_error_score),
+        ymin = .data$cluster_median_score - (1.96 * .data$standard_error_score),
+        ymax = .data$cluster_median_score + (1.96 * .data$standard_error_score),
         width = .3
       ),
       color = "red"