@@ -101,10 +101,12 @@ get_bootstraps.kallisto <- function(kal, transcript, max_bs = 30) {
101
101
# @param kal a kallisto object
102
102
# @param column the column to pull out of the kallisto results (default = "tpm")
103
103
# @return a molten data.frame with columns "target_id", "sample" and the selected variable
104
+ # @importFrom dplyr %>%
104
105
# @export
105
106
melt_bootstrap <- function (kal , column = " tpm" , transform = identity ) {
106
107
stopifnot(is(kal , " kallisto" ))
107
108
stopifnot(length(kal $ bootstrap ) > 0 )
109
+ `%>%` <- dplyr :: `%>%`
108
110
109
111
all_boot <- kal $ bootstrap
110
112
boot <- data.frame (lapply(all_boot , select_ , .dots = list (column )))
@@ -129,11 +131,13 @@ melt_bootstrap <- function(kal, column = "tpm", transform = identity) {
129
131
# @param aggregate_fun a function to aggregate
130
132
# @return a data.frame nrow(mapping) rows that has been aggregated
131
133
# groupwise using \code{aggregate_fun}
134
+ # @importFrom dplyr %>%
132
135
# @export
133
136
aggregate_bootstrap <- function (kal , mapping , split_by = " gene_id" ,
134
137
column = " tpm" , aggregate_fun = sum ) {
135
138
136
139
stopifnot( is(kal , " kallisto" ) )
140
+ `%>%` <- dplyr :: `%>%`
137
141
138
142
if ( ! (column %in% c(" tpm" , " est_counts" )) ) {
139
143
stop(" Unit must be 'tpm' or 'est_counts'" )
@@ -177,9 +181,12 @@ aggregate_bootstrap <- function(kal, mapping, split_by = "gene_id",
177
181
# @param kal a kallisto object with a non-null bootstrap list
178
182
# @param column the column to select (rho, tpm, est_counts
179
183
# @return a summarized data.frame
184
+ # @importFrom dplyr %>%
180
185
# @export
181
186
summarize_bootstrap <- function (kal , column = " tpm" , transform = identity ) {
182
187
stopifnot(is(kal , " kallisto" ))
188
+ `%>%` <- dplyr :: `%>%`
189
+
183
190
bs <- melt_bootstrap(kal , column , transform )
184
191
185
192
mean_col <- paste0(" bs_mean_" , column )
@@ -256,11 +263,12 @@ get_bootstrap_summary <- function(obj, target_id, units = 'est_counts') {
256
263
stop(paste0(" '" , units , " ' is invalid for 'units'. please see documentation" ))
257
264
}
258
265
259
- if (is.null(obj $ bs_quants )) {
260
- if (units == ' est_counts' ) {
261
- stop(" bootstrap summary missing. rerun sleuth_prep() with argument 'extra_bootstrap_summary = TRUE'" )
266
+ if (is.null(obj $ bs_quants ) | is.null( obj $ bs_quants [[ 1 ]][[ units ]]) ) {
267
+ if (units %in% c( ' est_counts' , ' scaled_reads_per_base ' ) ) {
268
+ stop(" bootstrap summary appears to be missing. rerun sleuth_prep() with argument 'extra_bootstrap_summary = TRUE'" )
262
269
} else {
263
- stop(" bootstrap summary missing. rerun sleuth_prep() with argument 'extra_bootstrap_summary = TRUE' and 'read_bootstrap_tpm = TRUE'" )
270
+ stop(" bootstrap summary appears to be missing. rerun sleuth_prep() with argument 'extra_bootstrap_summary = TRUE' " ,
271
+ " and 'read_bootstrap_tpm = TRUE'" )
264
272
}
265
273
}
266
274
@@ -312,7 +320,7 @@ sample_bootstrap <- function(obj, n_samples = 100L) {
312
320
mat <- matrix (NA_real_ , nrow = nrow(obj $ kal [[1 ]]$ abundance ),
313
321
ncol = nrow(which_samp ))
314
322
rownames(mat ) <- obj $ kal [[1 ]]$ abundance $ target_id
315
- colnames(mat ) <- obj $ sample_to_condition $ sample
323
+ colnames(mat ) <- obj $ sample_to_covariates $ sample
316
324
mat
317
325
})
318
326
@@ -376,13 +384,15 @@ process_bootstrap <- function(i, samp_name, kal_path,
376
384
read_bootstrap_tpm , gene_mode ,
377
385
extra_bootstrap_summary ,
378
386
target_id , mappings , which_ids ,
379
- aggregation_column , transform_fun )
387
+ aggregation_column , transform_fun_counts ,
388
+ transform_fun_tpm , max_bootstrap )
380
389
{
381
390
dot(i )
382
391
bs_quants <- list ()
383
392
384
393
num_bootstrap <- as.integer(rhdf5 :: h5read(kal_path $ path ,
385
394
" aux/num_bootstrap" ))
395
+ num_bootstrap <- min(num_bootstrap , max_bootstrap )
386
396
if (num_bootstrap == 0 ) {
387
397
stop(paste0(" File " , kal_path , " has no bootstraps." ,
388
398
" Please generate bootstraps using \" kallisto quant -b\" ." ))
@@ -396,17 +406,16 @@ process_bootstrap <- function(i, samp_name, kal_path,
396
406
est_count_sf = est_count_sf )
397
407
398
408
if (read_bootstrap_tpm ) {
399
- bs_quant_tpm <- aperm(apply(bs_mat , 1 , counts_to_tpm ,
409
+ bs_tpm <- aperm(apply(bs_mat , 1 , counts_to_tpm ,
400
410
eff_len ))
401
- colnames(bs_quant_tpm ) <- colnames(bs_mat )
411
+ colnames(bs_tpm ) <- colnames(bs_mat )
402
412
403
413
# gene level code is analogous here to below code
404
414
if (gene_mode ) {
405
- colnames(bs_quant_tpm ) <- target_id
406
415
# Make bootstrap_num an explicit column; each is treated as a "sample"
407
416
bs_tpm_df <- data.frame (bootstrap_num = c(1 : num_bootstrap ),
408
- bs_quant_tpm , check.names = F )
409
- rm(bs_quant_tpm )
417
+ bs_tpm , check.names = F )
418
+ rm(bs_tpm )
410
419
# Make long tidy table; this step is much faster
411
420
# using data.table melt rather than tidyr gather
412
421
tidy_tpm <- data.table :: melt(bs_tpm_df , id.vars = " bootstrap_num" ,
@@ -423,13 +432,14 @@ process_bootstrap <- function(i, samp_name, kal_path,
423
432
# see: http://stackoverflow.com/a/31295592
424
433
quant_tpm_formula <- paste(" bootstrap_num ~" ,
425
434
aggregation_column )
426
- bs_quant_tpm <- data.table :: dcast(tidy_tpm ,
435
+ bs_tpm <- data.table :: dcast(tidy_tpm ,
427
436
quant_tpm_formula , value.var = " tpm" ,
428
437
fun.aggregate = sum )
429
- bs_quant_tpm <- as.matrix(bs_quant_tpm [, - 1 ])
438
+ bs_tpm <- as.matrix(bs_tpm [, - 1 ])
430
439
rm(tidy_tpm ) # these tables are very large
431
440
}
432
- bs_quant_tpm <- aperm(apply(bs_quant_tpm , 2 ,
441
+ bs_tpm <- transform_fun_tpm(bs_tpm [, which_ids ])
442
+ bs_quant_tpm <- aperm(apply(bs_tpm , 2 ,
433
443
quantile ))
434
444
colnames(bs_quant_tpm ) <- c(" min" , " lower" , " mid" ,
435
445
" upper" , " max" )
@@ -483,6 +493,7 @@ process_bootstrap <- function(i, samp_name, kal_path,
483
493
rm(tidy_bs , scaled_bs )
484
494
}
485
495
496
+ bs_mat <- transform_fun_counts(bs_mat [, which_ids ])
486
497
if (extra_bootstrap_summary ) {
487
498
bs_quant_est_counts <- aperm(apply(bs_mat , 2 ,
488
499
quantile ))
@@ -491,20 +502,24 @@ process_bootstrap <- function(i, samp_name, kal_path,
491
502
bs_quants $ est_counts <- bs_quant_est_counts
492
503
}
493
504
494
- bs_mat <- transform_fun(bs_mat )
495
505
# If bs_mat was made at gene-level, already has column names
496
506
# If at transcript-level, need to add target_ids
497
- if (! gene_mode ) {
498
- colnames(bs_mat ) <- target_id
499
- } else {
507
+ if (gene_mode & extra_bootstrap_summary ) {
500
508
# rename est_counts to scaled_reads_per_base
501
509
bs_quants $ scaled_reads_per_base <- bs_quants $ est_counts
502
510
bs_quants $ est_counts <- NULL
503
511
}
504
512
# all_sample_bootstrap[, i] bootstrap point estimate of the inferential
505
513
# variability in sample i
506
514
# NOTE: we are only keeping the ones that pass the filter
507
- bootstrap_result <- matrixStats :: colVars(bs_mat [, which_ids ] )
515
+ bootstrap_result <- matrixStats :: colVars(bs_mat )
508
516
509
- list (index = i , bs_quants = bs_quants , bootstrap_result = bootstrap_result )
517
+ if (read_bootstrap_tpm ) {
518
+ tpm_result <- matrixStats :: colVars(bs_tpm )
519
+ list (index = i , bs_quants = bs_quants , bootstrap_result = bootstrap_result ,
520
+ bootstrap_tpm_result = tpm_result )
521
+ } else {
522
+ list (index = i , bs_quants = bs_quants , bootstrap_result = bootstrap_result ,
523
+ bootstrap_tpm_result = NULL )
524
+ }
510
525
}
0 commit comments