diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 00000000..91114bf2 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,2 @@ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.Rhistory b/.Rhistory deleted file mode 100644 index 8bd69bd8..00000000 --- a/.Rhistory +++ /dev/null @@ -1,335 +0,0 @@ ---- -title: "ArchR Hematopoiesis Tutorial" -author: "Jeffrey Granja" -date: "11/22/2019" -output: html_document ---- -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) -``` -## R Markdown -knitr::opts_chunk$set(echo = TRUE) -knitr::opts_chunk$set(echo = TRUE) -summary(cars) -plot(pressure) -knitr::opts_chunk$set(echo = TRUE) -setwd("~/Documents/Heme_Tutorial/") -data("geneAnnoHg19") -data("genomeAnnoHg19") -threads <- 8 -pathFragments <- "Heme_Fragments" -inputFiles <- list.files(pathFragments, pattern = ".gz", full.names = TRUE) -names(inputFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathFragments, pattern = ".gz")) -inputFiles <- inputFiles[!grepl(".tbi", inputFiles)] -inputFiles -knitr::opts_chunk$set(echo = TRUE) -#Input Libraries -library(ArchR) -#Set/Create Working Directory to Folder -setwd("~/Documents/Heme_Tutorial/") -#Load Genome Annotations -data("geneAnnoHg19") -data("genomeAnnoHg19") -#Set Threads to be used -threads <- 8 -pathFragments <- "Heme_Fragments" -inputFiles <- list.files(pathFragments, pattern = ".gz", full.names = TRUE) -names(inputFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathFragments, pattern = ".gz")) -inputFiles <- inputFiles[!grepl(".tbi", inputFiles)] -inputFiles -#Input Libraries -library(ArchR) -#Set/Create Working Directory to Folder -setwd("~/Documents/Heme_Tutorial/") -#Load Genome Annotations -data("geneAnnoHg19") -data("genomeAnnoHg19") -#Set Threads to be used -threads <- 8 -knitr::opts_chunk$set(echo = TRUE) -opts_knit$set(root.dir = "~/Documents/Heme_Tutorial/") -library(knitr) -knitr::opts_chunk$set(echo = TRUE) -opts_knit$set(root.dir = "~/Documents/Heme_Tutorial/") -#Input Libraries -library(ArchR) -#Load Genome Annotations -data("geneAnnoHg19") -data("genomeAnnoHg19") -#Set Threads to be used -threads <- 8 -pathFragments <- "Heme_Fragments" -inputFiles <- list.files(pathFragments, pattern = ".gz", full.names = TRUE) -names(inputFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathFragments, pattern = ".gz")) -inputFiles <- inputFiles[!grepl(".tbi", inputFiles)] -inputFiles -#Input Libraries -library(ArchR) -#Load Genome Annotations -data("geneAnnoHg19") -data("genomeAnnoHg19") -#Set Threads to be used -threads <- 8 -library(knitr) -knitr::opts_chunk$set(echo = TRUE) -opts_knit$set(root.dir = "~/Documents/Heme_Tutorial/") -#Input Libraries -library(ArchR) -#Load Genome Annotations -data("geneAnnoHg19") -data("genomeAnnoHg19") -#Set Threads to be used -threads <- 8 -pathFragments <- "Heme_Fragments" -inputFiles <- list.files(pathFragments, pattern = ".gz", full.names = TRUE) -names(inputFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathFragments, pattern = ".gz")) -inputFiles <- inputFiles[!grepl(".tbi", inputFiles)] -inputFiles -#Input Libraries -library(ArchR) -#Load Genome Annotations -data("geneAnnoHg19") -data("genomeAnnoHg19") -#Set Threads to be used -threads <- 8 -library(knitr) -knitr::opts_chunk$set(echo = TRUE) -opts_knit$set(root.dir = "~/Documents/Heme_Tutorial/") -library(ArchR) -#Input Libraries -library(ArchR) -#Load Genome Annotations -data("geneAnnoHg19") -data("genomeAnnoHg19") -#Set Threads to be used -threads <- 8 -pathFragments <- "Heme_Fragments" -inputFiles <- list.files(pathFragments, pattern = ".gz", full.names = TRUE) -names(inputFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathFragments, pattern = ".gz")) -inputFiles <- inputFiles[!grepl(".tbi", inputFiles)] -inputFiles -library(knitr) -knitr::opts_chunk$set(echo = TRUE) -opts_knit$set(root.dir = "~/Documents/Heme_Tutorial/") -library(ArchR) -#Load Genome Annotations -data("geneAnnoHg19") -data("genomeAnnoHg19") -#Set Threads to be used -threads <- 8 -pathFragments <- "Heme_Fragments" -inputFiles <- list.files(pathFragments, pattern = ".gz", full.names = TRUE) -names(inputFiles) <- gsub(".fragments.tsv.gz", "", list.files(pathFragments, pattern = ".gz")) -inputFiles <- inputFiles[!grepl(".tbi", inputFiles)] -inputFiles -ArrowFiles <- createArrowFiles( -inputFiles = inputFiles[3], -sampleNames = names(inputFiles), -geneAnno = geneAnnoHg19, -genomeAnno = genomeAnnoHg19, -threads = threads, -force = TRUE -) -ArrowFiles <- createArrowFiles( -inputFiles = inputFiles[3], -sampleNames = names(inputFiles)[3], -geneAnno = geneAnnoHg19, -genomeAnno = genomeAnnoHg19, -threads = threads, -force = TRUE -) -ls -```{r eval=TRUE, cache=TRUE} -ArrowFiles <- createArrowFiles( -inputFiles = inputFiles[3], -sampleNames = names(inputFiles)[3], -geneAnno = geneAnnoHg19, -genomeAnno = genomeAnnoHg19, -threads = threads, -addGeneScoreMat = FALSE, -addTileMat = FALSE, -force = TRUE -) -knitr::opts_chunk$set(echo = TRUE) -knitr::opts_chunk$set(eval = FALSE) -# Small fig.width -knitr::include_graphics("~/Documents/Heme_Tutorial/Heme_Tutorial/Plots/Plot-UMAP-TileLSI_Date-2019-11-22_Time-01-35-25.pdf") -# Small fig.width -knitr::include_graphics("~/Documents/Heme_Tutorial/Heme_Tutorial/Plots/Plot-UMAP-TileLSI_Date-2019-11-22_Time-01-35-25.pdf") -# Small fig.width -knitr::include_graphics("~/Documents/Heme_Tutorial/Heme_Tutorial/Plots/Plot-UMAP-TileLSI_Date-2019-11-22_Time-01-35-25.pdf") -getwd() -"/Users/jeffreygranja/Documents/Heme_Tutorial/Heme_Tutorial/Plots/Plot-UMAP-TileLSI_Date-2019-11-22_Time-01-35-25.pdf -# Small fig.width -knitr::include_graphics("/Users/jeffreygranja/Documents/Heme_Tutorial/Heme_Tutorial/Plots/Plot-UMAP-TileLSI_Date-2019-11-22_Time-01-35-25.pdf") -# Small fig.width -knitr::include_graphics("/Users/jeffreygranja/Documents/Heme_Tutorial/Heme_Tutorial/Plots/Plot-UMAP-TileLSI_Date-2019-11-22_Time-01-35-25.pdf") -"~/Documents/HemeAll/heme/Plots/Plot-Tracks_Date-2019-11-17_Time-17-52-02.pdf" -knitr::include_graphics(path.expand("~/Documents/HemeAll/heme/Plots/Plot-Tracks_Date-2019-11-17_Time-17-52-02.pdf")) -file.copy -list.files() -list.files() -system("cp ~/Documents/HemeAll/heme/Plots/Plot-Tracks_Date-2019-11-17_Time-17-52-02.pdf mychart.pdf") -knitr::include_graphics(path = "mychart.pdf") -"mychart.pdf" -knitr::include_graphics(path = "mychart.pdf")#path.expand("~/Documents/HemeAll/heme/Plots/Plot-Tracks_Date-2019-11-17_Time-17-52-02.pdf")) -knitr::include_graphics(path = "mychart.pdf") -knitr::opts_chunk$set(echo = TRUE) -```{r pressure, echo=FALSE, eval = FALSE} -sessionInfo() -head(sessionInfo()) -suppressPackageStartupMessages() -Sys.info() -.Platform -parallel::detectCores() -try(system("grep MemTotal /proc/meminfo", intern = TRUE), silent = TRUE) -ram = substring(system("sysctl hw.memsize", intern = TRUE), 13) #nocov -ram -ram / 10^9 -as.numeric(ram) / 10^9 -get_windows_ram = function() { -ram = try(system("grep MemTotal /proc/meminfo", intern = TRUE), silent = TRUE) -if (class(ram) != "try-error" && length(ram) != 0) { -ram = strsplit(ram, " ")[[1]] -ram = as.numeric(ram[length(ram) - 1]) -ram_size = ram -} else { -# Fallback: This was the old method I used -# It worked for Windows 7 and below. -ram_size = system("wmic MemoryChip get Capacity", intern = TRUE)[-1] -} -return(ram_size) -} -system_ram = function(os) { -if (length(grep("^linux", os))) { -cmd = "awk '/MemTotal/ {print $2}' /proc/meminfo" -ram = system(cmd, intern = TRUE) -} else if (length(grep("^darwin", os))) { -ram = substring(system("sysctl hw.memsize", intern = TRUE), 13) #nocov -} else if (length(grep("^solaris", os))) { -cmd = "prtconf | grep Memory" # nocov -ram = system(cmd, intern = TRUE) ## Memory size: XXX Megabytes # nocov -} else { -ram = get_windows_ram() # nocov -} -ram -} -#' Get the amount of RAM -#' -#' Attempt to extract the amount of RAM on the current machine. This is OS -#' specific: -#' \itemize{ -#' \item Linux: \code{proc/meminfo} -#' \item Apple: \code{system_profiler -detailLevel mini} -#' \item Windows: \code{memory.size()} -#' \item Solaris: \code{prtconf} -#' } -#' A value of \code{NA} is return if it isn't possible to determine the amount of RAM. -#' @export -#' @references The \code{print.bytes} function was taken from the \pkg{pryr} package. -#' @examples -#' ## Return (and pretty print) the amount of RAM -#' get_ram() -get_ram = function() { -os = R.version$os -ram = suppressWarnings(try(system_ram(os), silent = TRUE)) -if (class(ram) == "try-error" || length(ram) == 0) { -message("\t Unable to detect your RAM. # nocov -Please raise an issue at https://github.com/csgillespie/benchmarkme") # nocov -ram = structure(NA, class = "ram") # nocov -} else { -cleaned_ram = suppressWarnings(try(clean_ram(ram, os), silent = TRUE)) -if (class(cleaned_ram) == "try-error" || length(ram) == 0) { -message("\t Unable to detect your RAM. # nocov -Please raise an issue at https://github.com/csgillespie/benchmarkme") # nocov -ram = structure(NA, class = "ram") #nocov -} else { -ram = structure(cleaned_ram, class = "ram") -} -} -return(ram) -} -#' @rawNamespace S3method(print,ram) -print.ram = function(x, digits = 3, unit_system = c("metric", "iec"), ...) { -#unit_system = match.arg(unit_system) -unit_system = "metric" -base = switch(unit_system, metric = 1000, iec = 1024) -power = min(floor(log(abs(x), base)), 8) -if (is.na(x) || power < 1) { -unit = "B" -} else { -unit_labels = switch( -unit_system, -metric = c("kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"), -iec = c("KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB") -) -unit = unit_labels[[power]] -x = x / (base^power) -} -formatted = format(signif(x, digits = digits), big.mark = ",", -scientific = FALSE, ...) -cat(unclass(formatted), " ", unit, "\n", sep = "") -invisible(paste(unclass(formatted), unit)) -} -get_ram() -plot(cars) -```{r eval = TRUE} -```{r} -suppressPackageStartupMessages(library(SnapATAC)) -suppressPackageStartupMessages(library(SnapATAC)) -suppressPackageStartupMessages(library(magrittr)) -setwd("/Volumes/JG_SSD_2/Data/Analysis/Tutorial/test-snap") -parallel::detectCores() -Sys.Date() -sessionInfo() -#5k -if(!file.exists("atac_pbmc_5k_nextgem_fragments.tsv.gz")){ -system("wget http://cf.10xgenomics.com/samples/cell-atac/1.2.0/atac_pbmc_5k_nextgem/atac_pbmc_5k_nextgem_fragments.tsv.gz") -system("wget http://cf.10xgenomics.com/samples/cell-atac/1.2.0/atac_pbmc_5k_nextgem/atac_pbmc_5k_nextgem_fragments.tsv.gz.tbi") -} -#10k -if(!file.exists("atac_pbmc_10k_nextgem_fragments.tsv.gz")){ -system("wget http://cf.10xgenomics.com/samples/cell-atac/1.2.0/atac_pbmc_10k_nextgem/atac_pbmc_10k_nextgem_fragments.tsv.gz") -system("wget http://cf.10xgenomics.com/samples/cell-atac/1.2.0/atac_pbmc_10k_nextgem/atac_pbmc_10k_nextgem_fragments.tsv.gz.tbi") -} -#Analysis of 10x Fragment Files -#Install pip install -user snaptools -snapTools <- "/Users/jeffreygranja/Library/Python/2.7/bin/snaptools" -#Fragment Files -fragFiles <- c( -"atac_pbmc_5k_nextgem_fragments.tsv.gz", -"atac_pbmc_10k_nextgem_fragments.tsv.gz" -) -#Input Genome -genome <- "hg19" -if(!file.exists(paste0(genome,".chrom.sizes"))){ -system(paste0("wget http://hgdownload.cse.ucsc.edu/goldenpath/",genome,"/bigZips/",genome,".chrom.sizes")) -} -#Download/Set Up 10x Fragment Files -#5k -if(!file.exists("atac_pbmc_5k_nextgem_fragments.tsv.gz")){ -system("wget http://cf.10xgenomics.com/samples/cell-atac/1.2.0/atac_pbmc_5k_nextgem/atac_pbmc_5k_nextgem_fragments.tsv.gz") -system("wget http://cf.10xgenomics.com/samples/cell-atac/1.2.0/atac_pbmc_5k_nextgem/atac_pbmc_5k_nextgem_fragments.tsv.gz.tbi") -} -#10k -if(!file.exists("atac_pbmc_10k_nextgem_fragments.tsv.gz")){ -system("wget http://cf.10xgenomics.com/samples/cell-atac/1.2.0/atac_pbmc_10k_nextgem/atac_pbmc_10k_nextgem_fragments.tsv.gz") -system("wget http://cf.10xgenomics.com/samples/cell-atac/1.2.0/atac_pbmc_10k_nextgem/atac_pbmc_10k_nextgem_fragments.tsv.gz.tbi") -} -#Analysis of 10x Fragment Files -#Install pip install -user snaptools -snapTools <- "/Users/jeffreygranja/Library/Python/2.7/bin/snaptools" -#Fragment Files -fragFiles <- c( -"atac_pbmc_5k_nextgem_fragments.tsv.gz", -"atac_pbmc_10k_nextgem_fragments.tsv.gz" -) -#Input Genome -genome <- "hg19" -if(!file.exists(paste0(genome,".chrom.sizes"))){ -system(paste0("wget http://hgdownload.cse.ucsc.edu/goldenpath/",genome,"/bigZips/",genome,".chrom.sizes")) -} -setwd("~/Documents/GitHub/ArchR") -pkgdown::build_site() -warnings() -pkgdown::build_site() diff --git a/.gitignore b/.gitignore index 65d29924..54618640 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ .DS_Store .DS_Store +.Rproj.user +.Rhistory diff --git a/R/.DS_Store b/R/.DS_Store deleted file mode 100644 index f00a9343..00000000 Binary files a/R/.DS_Store and /dev/null differ diff --git a/R/AllClasses.R b/R/AllClasses.R index 314c4182..c0211216 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -106,15 +106,15 @@ ArchRProject <- function( geneAnnotation <- .validGeneAnnoByGenomeAnno(geneAnnotation = geneAnnotation, genomeAnnotation = genomeAnnotation) .validInput(input = showLogo, name = "showLogo", valid = "boolean") .validInput(input = threads, name = "threads", valid = c("integer")) - - if(grepl(" ", outputDirectory)){ - stop("outputDirectory cannot have a space in the path! Path : ", outputDirectory) - } + # if(grepl(" ", outputDirectory)){ + # stop("outputDirectory cannot have a space in the path! Path : ", outputDirectory) + # } dir.create(outputDirectory,showWarnings=FALSE) - if(grepl(" ", normalizePath(outputDirectory))){ - stop("outputDirectory cannot have a space in the full path! Full path : ", normalizePath(outputDirectory)) - } - sampleDirectory <- file.path(normalizePath(outputDirectory), "ArrowFiles") + # if(grepl(" ", outputDirectory)){ + # stop("outputDirectory cannot have a space in the full path! Full path : ", outputDirectory) + # } + sampleDirectory <- file.path(outputDirectory, "ArrowFiles") + dir.create(sampleDirectory,showWarnings=FALSE) if(is.null(ArrowFiles)){ @@ -180,7 +180,7 @@ ArchRProject <- function( message("Initializing ArchRProject...") AProj <- new("ArchRProject", - projectMetadata = SimpleList(outputDirectory = normalizePath(outputDirectory)), + projectMetadata = SimpleList(outputDirectory = outputDirectory), projectSummary = SimpleList(), sampleColData = sampleColData, sampleMetadata = sampleMetadata, @@ -374,8 +374,7 @@ loadArchRProject <- function( ArchRProj <- recoverArchRProject(readRDS(path2Proj)) outputDir <- getOutputDirectory(ArchRProj) - outputDirNew <- normalizePath(path) - + outputDirNew <- path #1. Arrows Paths ArrowFilesNew <- file.path(outputDirNew, "ArrowFiles", basename(ArchRProj@sampleColData$ArrowFiles)) if(!all(file.exists(ArrowFilesNew))){ @@ -497,14 +496,12 @@ saveArchRProject <- function( .validInput(input = outputDirectory, name = "outputDirectory", valid = "character") .validInput(input = overwrite, name = "overwrite", valid = "boolean") .validInput(input = load, name = "load", valid = "boolean") - - if(grepl(" ", outputDirectory)){ - stop("outputDirectory cannot have a space in the path! Path : ", outputDirectory) - } + # if(grepl(" ", outputDirectory)){ + # stop("outputDirectory cannot have a space in the path! Path : ", outputDirectory) + # } dir.create(outputDirectory, showWarnings=FALSE) - outputDirectory <- normalizePath(outputDirectory) - outDirOld <- normalizePath(getOutputDirectory(ArchRProj)) + outDirOld <- getOutputDirectory(ArchRProj) newProj <- ArchRProj ArrowFiles <- getArrowFiles(ArchRProj) @@ -518,7 +515,7 @@ saveArchRProject <- function( names(ArrowFilesNew) <- names(ArrowFiles) if(outputDirectory != outDirOld){ - message("Copying ArchRProject to new outputDirectory : ", normalizePath(outputDirectory)) + message("Copying ArchRProject to new outputDirectory : ", outputDirectory) } if(!identical(paste0(ArrowFiles), paste0(ArrowFilesNew))){ diff --git a/R/BulkProjection.R b/R/BulkProjection.R index 882dd9a4..548d34cc 100644 --- a/R/BulkProjection.R +++ b/R/BulkProjection.R @@ -54,7 +54,7 @@ projectBulkATAC <- function( } .logThis(rDGR, "reducedDimsGRanges", logFile = logFile) subATAC <- subsetByOverlaps(seATAC, rDGR, ignore.strand = TRUE) - subATAC <- subATAC[order(rowSums(as.matrix(.getAssay(subATAC, "counts"))), decreasing = TRUE), ] + subATAC <- subATAC[order(rowSums(as.array(.getAssay(subATAC, "counts"))), decreasing = TRUE), ] o <- DataFrame(findOverlaps(subATAC, rDGR, ignore.strand = TRUE)) sumOverlap <- length(unique(o[,2])) .logThis(o, "overlapATAC", logFile = logFile) @@ -220,4 +220,198 @@ projectBulkATAC <- function( } +#' Project single cell data into single cell subspace +#' +#' This function will Project Bulk ATAC-seq data into single cell subspace. +#' +#' @param projector An `ArchRProject` object containing the dimensionality reduction matrix passed by `reducedDims`. +#' @param projectee a single cell Summarized Experiment containing the cells to be . +#' @param reducedDims A string specifying the reducedDims. +#' @param embedding A string specifying embedding. +#' @param verbose A boolean value indicating whether to use verbose output during execution of this function. Can be set to FALSE for a cleaner output. +#' @param threads The number of threads used for parallel execution +#' @param logFile The path to a file to be used for logging ArchR output. +#' @export +#' +projectData <- function( + projector = NULL, + projectee = NULL, + reducedDims = "IterativeLSI", + embedding = "UMAP", + verbose = TRUE, + threads = getArchRThreads(), + logFile = createLogFile("projectBulkATAC") +){ + + .validInput(input = projector, name = "projector", valid = c("ArchRProj")) + .validInput(input = projectee, name = "projectee", valid = c("SummarizedExperiment")) + .validInput(input = reducedDims, name = "reducedDims", valid = c("character")) + .validInput(input = embedding, name = "embedding", valid = c("character", "null")) + .validInput(input = verbose, name = "verbose", valid = c("boolean")) + .validInput(input = threads, name = "threads", valid = c("integer")) + .validInput(input = logFile, name = "logFile", valid = c("character")) + + tstart <- Sys.time() + + .startLogging(logFile = logFile) + .logThis(mget(names(formals()),sys.frame(sys.nframe())), "projectBulkATAC Input-Parameters", logFile = logFile) + + ################################################## + # Reduced Dimensions + ################################################## + rD <- getReducedDims(projector, reducedDims = reducedDims, returnMatrix = FALSE) + .logThis(names(rD), "reducedDimsNames", logFile = logFile) + .logThis(rD[[1]], "reducedDimsMat", logFile = logFile) + rDFeatures <- rD[[grep("Features", names(rD))]] + if("end" %in% colnames(rDFeatures)){ + rDGR <- GRanges(seqnames=rDFeatures$seqnames,IRanges(start=rDFeatures$start, end=rDFeatures$end)) + }else{ + rDGR <- GRanges(seqnames=rDFeatures$seqnames,IRanges(start=rDFeatures$start, width = (rDFeatures$start) / (rDFeatures$idx - 1))) + } + .logThis(rDGR, "reducedDimsGRanges", logFile = logFile) + subATAC <- subsetByOverlaps(projectee, rDGR, ignore.strand = TRUE) + subATAC <- subATAC[order(rowSums(as.array(.getAssay(subATAC, "counts"))), decreasing = TRUE), ] + o <- DataFrame(findOverlaps(subATAC, rDGR, ignore.strand = TRUE)) + sumOverlap <- length(unique(o[,2])) + .logThis(o, "overlapATAC", logFile = logFile) + + if(sumOverlap == 0){ + .logStop(paste0("No overlaps between bulk ATAC data and reduce dimensions feature found.", + "\nEither recreate counts matrix or most likely these data sets are incompatible!"), logFile = logFile) + } + if( (sumOverlap / length(rDGR)) < 0.25 ){ + if(force){ + .logMessage("Less than 25% of the features are present in this bulk ATAC data set! Continuing since force = TRUE!", verbose = TRUE, logFile = logFile) + }else{ + .logStop("Less than 25% of the features are present in this bulk ATAC data set! Set force = TRUE to continue!", logFile = logFile) + } + } + .logMessage("Overlap Ratio of Reduced Dims Features = ", (sumOverlap / length(rDGR)), verbose = TRUE, logFile = logFile) + + o <- o[!duplicated(o$subjectHits),] + subATAC <- subATAC[o$queryHits, ] + rownames(subATAC) <- paste0("f", o$subjectHits) + .logThis(subATAC, "subsettedATAC", logFile = logFile) + + ################################################## + # Create Bulk Matrix + ################################################## + bulkMat <- .safeSubset( + mat = .getAssay(subATAC, "counts"), + subsetRows = paste0("f", seq_along(rDGR)) + ) + .logThis(bulkMat, "bulkATACMat", logFile = logFile) + + simRD <- as.matrix(.projectLSI(bulkMat, LSI = rD, verbose = FALSE)) + if(is.null(embedding)){ + if(rD$scaleDims){ + simRD <- .scaleDims(simRD) + } + out <- SimpleList( + simulatedReducedDims = simRD + ) + return(out) + } + .logThis(simRD, "simulatedReducedDims", logFile = logFile) + + ################################################## + # Prep Reduced Dims + ################################################## + embedding <- getEmbedding(ArchRProj = projector, embedding = embedding, returnDF = FALSE) + corCutOff <- embedding$params$corCutOff + dimsToUse <- embedding$params$dimsToUse + scaleDims <- embedding$params$scaleDims + + if(is.null(scaleDims)){ + scaleDims <- rD$scaleDims + } + + simRD <- .scaleDims(simRD) + + if(embedding$params$nc != ncol(simRD)){ + + if(is.null(dimsToUse)){ + dimsToUse <- seq_len(ncol(rD[[1]])) + } + + if(!is.null(corCutOff)){ + if(scaleDims){ + corToDepth <- rD$corToDepth$scaled + dimsToUse <- dimsToUse[corToDepth < corCutOff] + }else{ + corToDepth <- rD$corToDepth$none + dimsToUse <- dimsToUse[corToDepth < corCutOff] + } + } + + if(embedding$params$nc != ncol(simRD)){ + .logMessage("Error incosistency found with matching LSI dimensions to those used in addEmbedding", + "\nReturning with simulated reduced dimension coordinates...", verbose = TRUE, logFile = logFile) + out <- SimpleList( + simulatedReducedDims = simRD + ) + return(out) + } + + simRD <- simRD[, dimsToUse, drop = FALSE] + + } + + ################################################## + # Get Previous UMAP Model + ################################################## + umapModel <- .loadUWOT(embedding$params$uwotModel, embedding$params$nc) + + idx <- sort(sample(seq_len(nrow(rD[[1]])), min(nrow(rD[[1]]), 5000))) #Try to use 5000 or total cells to check validity + rD2 <- getReducedDims( + ArchRProj = projector, + reducedDims = reducedDims, + dimsToUse = embedding$params$dimsToUse, + scaleDims = embedding$params$scaleDims, + corCutOff = embedding$params$corCutOff + )[idx,,drop=FALSE] + + ################################################## + # Project UMAP + ################################################## + set.seed(1) + threads2 <- max(floor(threads/2), 1) + simUMAP <- uwot::umap_transform( + X = rbind(rD2, simRD), + model = umapModel, + verbose = TRUE, + n_threads = threads2 + ) + rownames(simUMAP) <- c(rownames(rD2), rownames(simRD)) + .logThis(simUMAP, "simulatedUMAP", logFile = logFile) + + #Check if the projection matches using previous data + c1 <- cor(simUMAP[rownames(rD2), 1], embedding[[1]][rownames(rD2),1]) + c2 <- cor(simUMAP[rownames(rD2), 2], embedding[[1]][rownames(rD2),2]) + if(min(c1, c2) < 0.8){ + .logMessage("Warning projection correlation is less than 0.8 (R = ", round(min(c1,c2), 4),").\nThese results may not be accurate because of the lack of heterogeneity in the single cell data.", verbose = TRUE, logFile = logFile) + } + + dfUMAP <- embedding[[1]] + colnames(dfUMAP) <- c("UMAP1", "UMAP2") + colnames(simUMAP) <- c("UMAP1", "UMAP2") + dfUMAP <- DataFrame(dfUMAP) + dfUMAP$Type <- Rle("projector", lengths = nrow(dfUMAP)) + + simUMAP <- DataFrame(simUMAP[rownames(simRD),,drop=FALSE]) + simUMAP$Type <- Rle(stringr::str_split(rownames(simUMAP), pattern = "#", simplify = TRUE)[,1]) + + out <- SimpleList( + simulatedBulkUMAP = simUMAP, + singleCellUMAP = dfUMAP, + simulatedReducedDims = simRD + ) + .endLogging(logFile = logFile) + + return(out) + +} + + + diff --git a/R/CreateArrow.R b/R/CreateArrow.R index 08243139..6fa164c7 100644 --- a/R/CreateArrow.R +++ b/R/CreateArrow.R @@ -451,6 +451,7 @@ createArrowFiles <- function( ############################################################# #Compute Fragment Information! ############################################################# + .logDiffTime(sprintf("%s Adding Fragment Summary", prefix), t1 = tstart, verbose = FALSE, logFile = logFile) fragSummary <- .fastFragmentInfo( ArrowFile = ArrowFile, @@ -2282,7 +2283,7 @@ createArrowFiles <- function( tryCatch({ - system(paste0("mv ", from, " ", to)) + system(paste0("mv '", from, "' '", to, "'")) return(to) diff --git a/R/GRangesUtils.R b/R/GRangesUtils.R index 23cd8990..670f2332 100644 --- a/R/GRangesUtils.R +++ b/R/GRangesUtils.R @@ -164,5 +164,24 @@ extendGR <- function(gr = NULL, upstream = NULL, downstream = NULL){ return(gr) } - +.featureDFtoGR <- function(featureDF, seqlevels){ + ArchR:::.validInput(input = featureDF, name = "featureDF", valid = c("DataFrame")) + ArchR:::.validInput(input = seqlevels, name = "seqlevels", valid = c("character")) + #find what strand is F + if(length(levels(factor(featureDF$strand)))>2){stop("Error with featureDF; more than two strands provided")} + for(qstrand in levels(factor(featureDF$strand))){ + if(all(featureDF[featureDF$strand %in% qstrand,]$start>featureDF[featureDF$strand %in% qstrand,]$end)) + isMinus<-qstrand + else{ + isOther<-qstrand + } + } + minusDF<-featureDF[featureDF$strand %in% isMinus,] + otherDF<-featureDF[featureDF$strand %in% isOther,] + minusGR<-GRanges(seqnames = minusDF$seqnames, ranges = IRanges(start = minusDF$end, end = minusDF$start), names=minusDF$name, strand = "-") + otherGR<-GRanges(seqnames = otherDF$seqnames, ranges = IRanges(start = otherDF$start, end = otherDF$end), names=otherDF$name, strand = "+") + seqlevels(minusGR) <- seqlevels + seqlevels(otherGR) <- seqlevels + return(sort(sortSeqlevels(c(otherGR, minusGR)), ignore.strand=TRUE)) +} diff --git a/R/GroupExport.R b/R/GroupExport.R index fdb15a07..51a46d87 100644 --- a/R/GroupExport.R +++ b/R/GroupExport.R @@ -361,7 +361,6 @@ getGroupBW <- function( }else{ stop("NormMethod not recognized!") } - } tilesk <- coverage(tilesk, weight = tilesk$reads)[[availableChr[k]]] @@ -380,4 +379,209 @@ getGroupBW <- function( } +#' Export to Monocle3 +#' +#' This function will return a monocle3 cell_data_set for a assay in a ArchRProject. +#' +#' @param ArchRProj An `ArchRProject` object. +#' @param useMatrix The name of the matrix in the ArrowFiles. See getAvailableMatrices to see options +#' @param threads An integer specifying the number of threads for parallel. +#' @param verbose A boolean specifying to print messages during computation. +#' @param logFile The path to a file to be used for logging ArchR output. +#' @import monocle3 +#' @import SingleCellExperiment +#' @export + +exportMonocle3 <- function( + ArchRProj = NULL, + useMatrix = NULL, + threads = getArchRThreads(), + verbose = TRUE, + binarize = T, + logFile = createLogFile("exportMonocle3") +){ + require(SingleCellExperiment) + require(monocle3) + ArchR:::.validInput(input = ArchRProj, name = "ArchRProj", valid = c("ArchRProj")) + ArchR:::.validInput(input = useMatrix, name = "useMatrix", valid = c("character")) + ArchR:::.validInput(input = threads, name = "threads", valid = c("integer")) + ArchR:::.validInput(input = verbose, name = "verbose", valid = c("boolean")) + ArchR:::.validInput(input = logFile, name = "logFile", valid = c("character")) + ArchR:::.startLogging(logFile = logFile) + ArchR:::.logThis(mget(names(formals()),sys.frame(sys.nframe())), "exportMonocle3 Input-Parameters", logFile = logFile) + + ArrowFiles <- getArrowFiles(ArchRProj) + ArrowFiles <- sapply(ArrowFiles, function(file) ArchR:::.validArrow(file)) + featureDF <- ArchR:::.getFeatureDF(ArrowFiles, subGroup = useMatrix) + Groups <- getCellColData(ArchRProj = ArchRProj) + Cells <- ArchRProj$cellNames + if(is.null(featureDF$end)){ + starts<-featureDF[as.character(featureDF$seqnames) %in% as.character(featureDF$seqnames)[1],]$start + width<-getmode(starts[-1]-starts[-length(starts)]) + ranges<-GRanges(seqnames = featureDF$seqnames, ranges = IRanges(start = featureDF$start, width = width), names=featureDF$idx) + }else{ + ranges<-GRanges(seqnames = featureDF$seqnames, ranges = IRanges(start = featureDF$start, end = featureDF$end), names=featureDF$idx) + } + ArchR:::.logMessage("Getting Group Matrix", logFile=logFile) + mat <- tryCatch({ + getMatrixFromArrow(ArrowFiles, useMatrix = useMatrix, binarize = binarize, cellNames = Cells, useSeqnames = as.character(seqnames(ranges))) + }, error = function(e){ + errorList <- list( + ArrowFiles = ArrowFiles, + featureDF = featureDF, + useMatrix = useMatrix, + threads = threads, + verbose = verbose + ) + ArchR:::.logError(e, fn = ".getMatrixFromArrow", info = "", errorList = errorList, logFile = logFile) + }) + o <- h5read(file = ArrowFiles, name = paste0("/", useMatrix,"/Info/Params")) + h5closeAll() + window_size<-getmode(o$tileSize) + mat@assays@data$counts<-mat@assays@data[[useMatrix]] + mat@assays@data[[useMatrix]]<-NULL + rs<-ArchR:::.getRowSums(ArrowFiles = ArrowFiles, useMatrix = useMatrix) + rowRanges(mat)<-ranges + mat<-mat[, match(rownames(ArchRProj@reducedDims$IterativeLSI$matSVD), colnames(mat))] + cds<-new_cell_data_set(expression_data = mat@assays@data$counts, cell_metadata = colData(mat)) + rowRanges(cds)<-rowRanges(mat) + cds<-cds[,rownames(ArchRProj@reducedDims$IterativeLSI$matSVD)] + reducedDims(cds)<-SimpleList(LSI=ArchRProj@reducedDims$IterativeLSI$matSVD, + UMAP=ArchRProj@embeddings$UMAP$df) + # irlba_rotation = ArchRProj@reducedDims$IterativeLSI$svd$u + # rownames(irlba_rotation) = ArchRProj@reducedDims$IterativeLSI$LSIFeatures$idx + iLSI<-SimpleList(svd=ArchRProj@reducedDims$IterativeLSI$svd, + features=ArchRProj@reducedDims$IterativeLSI$LSIFeatures, + row_sums = ArchRProj@reducedDims$IterativeLSI$rowSm, + seed=ArchRProj@reducedDims$IterativeLSI$seed, + binarize=ArchRProj@reducedDims$IterativeLSI$binarize, + scale_to=ArchRProj@reducedDims$IterativeLSI$scaleTo, + num_dim=ArchRProj@reducedDims$IterativeLSI$nDimensions, + resolution=NULL, + granges=ArchRProj@reducedDims$IterativeLSI$LSIFeatures, + LSI_method=ArchRProj@reducedDims$IterativeLSI$LSIMethod, outliers=NULL) + pp_aux <- SimpleList(iLSI=iLSI, gene_loadings=NULL) + cds@preprocess_aux <- pp_aux + if(is.null(cds@preprocess_aux$iLSI$granges$end)){ + starts<-cds@preprocess_aux$iLSI$granges[as.character(cds@preprocess_aux$iLSI$granges$seqnames) %in% as.character(cds@preprocess_aux$iLSI$granges$seqnames)[1],]$start + width<-getmode(starts[-1]-starts[-length(starts)]) + ranges<-GRanges(seqnames = cds@preprocess_aux$iLSI$granges$seqnames, ranges = IRanges(start = cds@preprocess_aux$iLSI$granges$start, width = width), names=cds@preprocess_aux$iLSI$granges$idx) + }else{ + ranges<-GRanges(seqnames = cds@preprocess_aux$iLSI$granges$seqnames, ranges = IRanges(start = cds@preprocess_aux$iLSI$granges$start, end = cds@preprocess_aux$iLSI$granges$end), names=cds@preprocess_aux$iLSI$granges$idx) + } + cds@preprocess_aux$iLSI$granges<-ranges + cds@clusters[["UMAP"]]$clusters[colnames(exprs(cds))]<-as.character(ArchRProj@cellColData[colnames(exprs(cds)),]$Clusters) + cds@reduce_dim_aux<-SimpleList(UMAP=SimpleList(scale_info=NULL, model_file=ArchRProj@embeddings$UMAP$params$uwotModel, num_dim=cds@preprocess_aux$iLSI$num_dim)) + cds + +} + +#' @export +getmode<-function(v) { + uniqv <- unique(v) + uniqv[which.max(tabulate(match(v, uniqv)))] +} + +.createGroupBW <- function( + i = NULL, + cellGroups = NULL, + ArrowFiles = NULL, + cellsInArrow = NULL, + availableChr = NULL, + chromLengths = NULL, + tiles = NULL, + ceiling = NULL, + tileSize = 100, + normMethod = NULL, + normBy = NULL, + bwDir = "bigwigs", + tstart = NULL, + verbose = TRUE, + logFile = NULL, + threads = 1 +){ + + .logDiffTime(sprintf("%s (%s of %s) : Creating BigWig for Group", names(cellGroups)[i], i, length(cellGroups)), tstart, logFile = logFile, verbose = verbose) + + #Cells + cellGroupi <- cellGroups[[i]] + #print(sum(normBy[cellGroupi, 1])) + + #Bigwig File! + covFile <- file.path(bwDir, paste0(make.names(names(cellGroups)[i]), "-TileSize-",tileSize,"-normMethod-",normMethod,"-ArchR.bw")) + rmf <- .suppressAll(file.remove(covFile)) + + covList <- .safelapply(seq_along(availableChr), function(k){ + + it <- 0 + + for(j in seq_along(ArrowFiles)){ + cellsInI <- sum(cellsInArrow[[names(ArrowFiles)[j]]] %in% cellGroupi) + if(cellsInI > 0){ + it <- it + 1 + if(it == 1){ + fragik <- .getFragsFromArrow(ArrowFiles[j], chr = availableChr[k], out = "GRanges", cellNames = cellGroupi) + }else{ + fragik <- c(fragik, .getFragsFromArrow(ArrowFiles[j], chr = availableChr[k], out = "GRanges", cellNames = cellGroupi)) + } + } + } + + tilesk <- tiles[BiocGenerics::which(seqnames(tiles) %bcin% availableChr[k])] + + if(length(fragik) == 0){ + + tilesk$reads <- 0 + + }else{ + + #N Tiles + nTiles <- trunc(chromLengths[availableChr[k]] / tileSize) + 1 + + #Create Sparse Matrix + matchID <- S4Vectors::match(mcols(fragik)$RG, cellGroupi) + + mat <- Matrix::sparseMatrix( + i = c(trunc(start(fragik) / tileSize), trunc(end(fragik) / tileSize)) + 1, + j = as.vector(c(matchID, matchID)), + x = rep(1, 2*length(fragik)), + dims = c(nTiles, length(cellGroupi)) + ) + + if(!is.null(ceiling)){ + mat@x[mat@x > ceiling] <- ceiling + } + + mat <- Matrix::rowSums(mat) + + rm(fragik, matchID) + + tilesk$reads <- mat + + if(tolower(normMethod) %in% c("readsintss", "readsinpromoter", "nfrags")){ + tilesk$reads <- tilesk$reads * 10^4 / sum(normBy[cellGroupi, 1]) + }else if(tolower(normMethod) %in% c("ncells")){ + tilesk$reads <- tilesk$reads / length(cellGroupi) + }else if(tolower(normMethod) %in% c("none")){ + }else{ + stop("NormMethod not recognized!") + } + + } + + tilesk <- coverage(tilesk, weight = tilesk$reads)[[availableChr[k]]] + + tilesk + + }, threads = threads) + + names(covList) <- availableChr + + covList <- as(covList, "RleList") + + rtracklayer::export.bw(object = covList, con = covFile) + + return(covFile) + +} diff --git a/R/IntegrativeAnalysis.R b/R/IntegrativeAnalysis.R index 94116f75..c86e3acb 100644 --- a/R/IntegrativeAnalysis.R +++ b/R/IntegrativeAnalysis.R @@ -813,7 +813,6 @@ addCoAccessibility <- function( o[idx,]$correlation <- as.numeric(corVals) o[idx,]$Variability1 <- rowVars[o[idx,]$idx1] o[idx,]$Variability2 <- rowVars[o[idx,]$idx2] - .logThis(groupMat, paste0("SubsetGroupMat-", x), logFile = logFile) .logThis(o[idx,], paste0("SubsetCoA-", x), logFile = logFile) diff --git a/R/ReproduciblePeakSet.R b/R/ReproduciblePeakSet.R index a5e47a53..302d43db 100644 --- a/R/ReproduciblePeakSet.R +++ b/R/ReproduciblePeakSet.R @@ -772,7 +772,7 @@ addReproduciblePeakSet <- function( #Create MACS2 Command cmd <- sprintf("callpeak -g %s --name %s --treatment %s --outdir %s --format BED --call-summits --keep-dup all %s", - genomeSize, basename(bedName), bedFile, dirname(bedName), additionalParams) + genomeSize, basename(bedName), .cl_safe(bedFile), .cl_safe(dirname(bedName)), additionalParams) if(!is.null(shift) & !is.null(extsize)){ cmd <- sprintf("%s --shift %s --extsize %s", cmd , shift, extsize) @@ -860,3 +860,13 @@ findMacs2 <- function(){ stop("Could Not Find Macs2! Please install w/ pip, add to your $PATH, or just supply the macs2 path directly and avoid this function!") } + +.cl_safe<-function(string){ + #this is a simple function for system calls from R to a command line tool that will check for any spaces and enclose the string with single + #quote if the string contains a space + if(grepl("\\s+", string, perl = TRUE)){ + return(sQuote(string, q="'")) + }else{ + return(string) + } +} diff --git a/data/.DS_Store b/data/.DS_Store deleted file mode 100644 index 5008ddfc..00000000 Binary files a/data/.DS_Store and /dev/null differ diff --git a/docs/.DS_Store b/docs/.DS_Store deleted file mode 100644 index a0589a20..00000000 Binary files a/docs/.DS_Store and /dev/null differ diff --git a/docs/articles/.DS_Store b/docs/articles/.DS_Store deleted file mode 100644 index 4a2be0f2..00000000 Binary files a/docs/articles/.DS_Store and /dev/null differ diff --git a/docs/articles/Articles/.DS_Store b/docs/articles/Articles/.DS_Store deleted file mode 100644 index 5008ddfc..00000000 Binary files a/docs/articles/Articles/.DS_Store and /dev/null differ diff --git a/man/.DS_Store b/man/.DS_Store deleted file mode 100644 index 5008ddfc..00000000 Binary files a/man/.DS_Store and /dev/null differ diff --git a/man/exportMonocle3.Rd b/man/exportMonocle3.Rd new file mode 100644 index 00000000..e55fe5bb --- /dev/null +++ b/man/exportMonocle3.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/GroupExport.R +\name{exportMonocle3} +\alias{exportMonocle3} +\title{Export to Monocle3} +\usage{ +exportMonocle3( + ArchRProj = NULL, + useMatrix = NULL, + threads = getArchRThreads(), + verbose = TRUE, + binarize = T, + logFile = createLogFile("exportMonocle3") +) +} +\arguments{ +\item{ArchRProj}{An \code{ArchRProject} object.} + +\item{useMatrix}{The name of the matrix in the ArrowFiles. See getAvailableMatrices to see options} + +\item{threads}{An integer specifying the number of threads for parallel.} + +\item{verbose}{A boolean specifying to print messages during computation.} + +\item{logFile}{The path to a file to be used for logging ArchR output.} +} +\description{ +This function will return a monocle3 cell_data_set for a assay in a ArchRProject. +} diff --git a/man/projectData.Rd b/man/projectData.Rd new file mode 100644 index 00000000..d44008e7 --- /dev/null +++ b/man/projectData.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/BulkProjection.R +\name{projectData} +\alias{projectData} +\title{Project single cell data into single cell subspace} +\usage{ +projectData( + projector = NULL, + projectee = NULL, + reducedDims = "IterativeLSI", + embedding = "UMAP", + verbose = TRUE, + threads = getArchRThreads(), + logFile = createLogFile("projectBulkATAC") +) +} +\arguments{ +\item{projector}{An \code{ArchRProject} object containing the dimensionality reduction matrix passed by \code{reducedDims}.} + +\item{projectee}{a single cell Summarized Experiment containing the cells to be .} + +\item{reducedDims}{A string specifying the reducedDims.} + +\item{embedding}{A string specifying embedding.} + +\item{verbose}{A boolean value indicating whether to use verbose output during execution of this function. Can be set to FALSE for a cleaner output.} + +\item{threads}{The number of threads used for parallel execution} + +\item{logFile}{The path to a file to be used for logging ArchR output.} +} +\description{ +This function will Project Bulk ATAC-seq data into single cell subspace. +} diff --git a/src/.DS_Store b/src/.DS_Store deleted file mode 100644 index 5008ddfc..00000000 Binary files a/src/.DS_Store and /dev/null differ