-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGenericHelpers.R
More file actions
executable file
·111 lines (94 loc) · 4.39 KB
/
GenericHelpers.R
File metadata and controls
executable file
·111 lines (94 loc) · 4.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
nun <- function(lst){
return( length(unique(lst)) )
}
substrRight <- function(string,start,stop = 0){
#expects negative numbers
l <- nchar( string )
return( substr(string, l+start+1, l+stop) )
}
substrLeft <- function(string,start,stop = 0){
#expects negative numbers
l <- nchar(string)
return(substr(string, start, l+stop))
}
strReverse <- function(x){
sapply(lapply(strsplit(x, NULL), rev), paste, collapse="")}
#' @param replace 2 column data frame/table with the recognition patterns in the first columns and replacements in the second column.
#' @param remove a character vector of strings to remove
multiGsub <- function (characterVector, replace, remove = ""){
characterVector <- gsub(remove, "", characterVector)
for (x in 1:nrow(replace)){
print(x)
characterVector <- ifelse(grepl(replace[x,1],characterVector), gsub(replace[x,1], replace[x,2],characterVector),characterVector)
}
return(characterVector)
}
multiGrepl <- function(patterns, v){
logi <- grepl(patterns[1],v)
for (i in patterns[2:length(patterns)]){
logi <- grepl(i,v) | logi
}
return(logi)
}
collapseVecPairwise <- function(vec, sep = "_"){
n <- length(vec)
if (n==2){
return( paste(vec[1], vec[2], sep = sep) )
} else {
return( c( paste(vec[1], vec[2:n], sep = sep), collapseVecPairwise( vec[2:n], sep = sep )) )
}
}
LoadPackagesFromCSV <- function(fpath, inclBioc = T){
s
packageTable <- read.csv(fpath)
packageList <- packageTable$Package
packagesToInstall <- packageList[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(packagesToInstall)){
if ( sum(grepl("Bioc", packageTable[packagesToInstall,]$Depends)) > 0){
if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager")
BiocManager::install( packageList[grepl("Bioc", packageTable[packagesToInstall,]$Depends)] )
install.packages(packageList[!grepl("Bioc", packageTable[packagesToInstall,]$Depends)] )
} else {
install.packages( packagesToInstall )
}
}
}
#' FUNCTION: freadDirectory()
#' This function reads all parse-able files into the parent environment as a data.table(s)
#' Some important functionalities include options to load files from all subdirectories, to supply names for each created table,
#' to limit the filetypes parsed, and to rbind all data into one data.table with the original file name (or supplied names) as a column.
#' @param directory character vector that says where to look
#' @param Names list of character vectors to name created data.tables, applied in alphabetical order to files in directory. if this is
#' shorter than the number of files to read, only the first files will be named and remaining files will use their filenames
#' @param recursive boolean to go into all subdirectories and look for files there too. this is all or nothing, so make sure only files you want to read are in there
#' @param patterns regex character vector to match the file extensions to parse. gzipped files matching this regex are also read.
#' @param separate boolean to assign individual data.tables in parent envir or if FALSE, rbind into one data.table THAT IS RETURNED
#' @param verbose boolean to print names of files read and objects created
freadDirectory <- function(directory = ".", Names = NULL, recursive = T, patterns = "\\.csv|\\.tsv|\\.txt|\\.table", separate = TRUE, verbose = T){
files <- list.files(path = directory, pattern = patterns, recursive = recursive)
if (is.null(Names)) {
Names <- gsub(paste0(patterns, "|\\.gz"),"", files)
} else {
if(length(Names) < length(files)){
append( Names, files[length(Names):length(files)] )
}
}
for (i in 1:length(files)){
if (separate){
assign(Names[i], data.table::fread(file.path(directory,files[i])), envir = parent.env(environment()))
if (verbose) print(sprintf("Created data.table of %s named %s", files[i], Names[i]))
} else {
rbind()
}
}
}
library(parallel)
availCoresForParallel <- function(leaveFree = 0.66){
# Check how many times session memory can fit into avail memory, as a rule, leave 2/3 of available memory
gc()
sessMem <- sum(memory.profile())#(gc()[, 6] |> sum()) * 1000
availMem <- as.numeric(system("awk '/MemFree/ {print $2}' /proc/meminfo", intern=TRUE))
nCoresMem <- floor((availMem * (1-leaveFree) ) / sessMem)
nCoresSys <- max(1L, round(detectCores()*0.8), na.rm = TRUE)
return(min(nCoresSys, nCoresMem))
}