@@ -78,8 +78,9 @@ sleuth_fit <- function(obj, formula = NULL, fit_name = NULL, ...) {
78
78
X <- formula
79
79
}
80
80
rownames(X ) <- obj $ sample_to_covariates $ sample
81
- A <- solve( t(X ) %*% X )
81
+ A <- solve(t(X ) %*% X )
82
82
83
+ msg(" fitting measurement error models" )
83
84
mes <- me_model_by_row(obj , X , obj $ bs_summary )
84
85
tid <- names(mes )
85
86
@@ -109,13 +110,12 @@ sleuth_fit <- function(obj, formula = NULL, fit_name = NULL, ...) {
109
110
l_smooth <- dplyr :: mutate(l_smooth ,
110
111
smooth_sigma_sq_pmax = pmax(smooth_sigma_sq , sigma_sq ))
111
112
112
-
113
113
msg(' computing variance of betas' )
114
114
beta_covars <- lapply(1 : nrow(l_smooth ),
115
115
function (i ) {
116
116
row <- l_smooth [i ,]
117
117
with(row ,
118
- covar_beta(smooth_sigma_sq_pmax + sigma_q_sq , X , A )
118
+ covar_beta(smooth_sigma_sq_pmax + sigma_q_sq , X , A )
119
119
)
120
120
})
121
121
names(beta_covars ) <- l_smooth $ target_id
@@ -266,8 +266,7 @@ me_model_by_row <- function(obj, design, bs_summary) {
266
266
stopifnot( length(bs_summary $ sigma_q_sq ) == nrow(bs_summary $ obs_counts ))
267
267
268
268
models <- lapply(1 : nrow(bs_summary $ obs_counts ),
269
- function (i )
270
- {
269
+ function (i ) {
271
270
me_model(design , bs_summary $ obs_counts [i ,], bs_summary $ sigma_q_sq [i ])
272
271
})
273
272
names(models ) <- rownames(bs_summary $ obs_counts )
@@ -305,7 +304,7 @@ me_heteroscedastic_by_row <- function(obj, design, samp_bs_summary, obs_counts)
305
304
models <- lapply(1 : nrow(bs_summary $ obs_counts ),
306
305
function (i ) {
307
306
res <- me_white_model(design , obs_counts [i ,], sigma_q_sq [i ,], A )
308
- res $ df $ target_id = rownames(obs_counts )[i ]
307
+ res $ df $ target_id <- rownames(obs_counts )[i ]
309
308
res
310
309
})
311
310
names(models ) <- rownames(obs_counts )
@@ -354,8 +353,14 @@ me_white_var <- function(df, sigma_col, sigma_q_col, X, tXX_inv) {
354
353
res
355
354
}
356
355
356
+
357
+
357
358
# ' @export
358
- bs_sigma_summary <- function (obj , transform = identity ) {
359
+ bs_sigma_summary <- function (obj , transform = identity , norm_by_length = FALSE ) {
360
+ # if (norm_by_length) {
361
+ # scaling_factor <- get_scaling_factors(obj$obs_raw)
362
+ # reads_per_base_transform()
363
+ # }
359
364
obs_counts <- obs_to_matrix(obj , " est_counts" )
360
365
obs_counts <- transform( obs_counts )
361
366
@@ -373,8 +378,114 @@ bs_sigma_summary <- function(obj, transform = identity) {
373
378
list (obs_counts = obs_counts , sigma_q_sq = bs_sigma )
374
379
}
375
380
376
- me_model <- function (X , y , sigma_q_sq )
377
- {
381
+ # transform reads into reads per base
382
+ #
383
+ #
384
+ reads_per_base_transform <- function (reads_table , scale_factor_input ,
385
+ collapse_column = NULL ,
386
+ mapping = NULL ,
387
+ norm_by_length = TRUE ) {
388
+
389
+ if (is(scale_factor_input , ' data.frame' )) {
390
+ # message('USING NORMALIZATION BY EFFECTIVE LENGTH')
391
+ # browser()
392
+ reads_table <- dplyr :: left_join(
393
+ data.table :: as.data.table(reads_table ),
394
+ data.table :: as.data.table(dplyr :: select(scale_factor_input , target_id , sample , scale_factor )),
395
+ by = c(' sample' , ' target_id' ))
396
+ } else {
397
+ reads_table <- dplyr :: mutate(reads_table , scale_factor = scale_factor_input )
398
+ }
399
+ # browser()
400
+ reads_table <- dplyr :: mutate(reads_table ,
401
+ reads_per_base = est_counts / eff_len ,
402
+ scaled_reads_per_base = scale_factor * reads_per_base
403
+ )
404
+
405
+ reads_table <- data.table :: as.data.table(reads_table )
406
+
407
+ if (! is.null(collapse_column )) {
408
+ mapping <- data.table :: as.data.table(mapping )
409
+ # old stuff
410
+ if (! (collapse_column %in% colnames(reads_table ))) {
411
+ reads_table <- dplyr :: left_join(reads_table , mapping , by = ' target_id' )
412
+ }
413
+ # browser()
414
+ # reads_table <- dplyr::left_join(reads_table, mapping, by = 'target_id')
415
+
416
+ rows_to_remove <- ! is.na(reads_table [[collapse_column ]])
417
+ reads_table <- dplyr :: filter(reads_table , rows_to_remove )
418
+ if (' sample' %in% colnames(reads_table )) {
419
+ reads_table <- dplyr :: group_by_(reads_table , ' sample' , collapse_column )
420
+ } else {
421
+ reads_table <- dplyr :: group_by_(reads_table , collapse_column )
422
+ }
423
+
424
+ reads_table <- dplyr :: summarize(reads_table ,
425
+ scaled_reads_per_base = sum(scaled_reads_per_base ))
426
+ data.table :: setnames(reads_table , collapse_column , ' target_id' )
427
+ }
428
+
429
+ as_df(reads_table )
430
+ }
431
+
432
+ gene_summary <- function (obj , which_column , transform = identity , norm_by_length = TRUE ) {
433
+ # stopifnot(is(obj, 'sleuth'))
434
+ msg(paste0(' aggregating by column: ' , which_column ))
435
+ obj_mod <- obj
436
+ if (norm_by_length ) {
437
+ tmp <- obj $ obs_raw
438
+ # tmp <- as.data.table(tmp)
439
+ tmp <- dplyr :: left_join(
440
+ data.table :: as.data.table(tmp ),
441
+ data.table :: as.data.table(obj $ target_mapping ),
442
+ by = ' target_id' )
443
+ tmp <- dplyr :: group_by_(tmp , ' sample' , which_column )
444
+ scale_factor <- dplyr :: mutate(tmp , scale_factor = median(eff_len ))
445
+ } else {
446
+ scale_factor <- median(obj_mod $ obs_norm_filt $ eff_len )
447
+ }
448
+ # scale_factor <- median(obj_mod$obs_norm_filt$eff_len)
449
+ obj_mod $ obs_norm_filt <- reads_per_base_transform(obj_mod $ obs_norm_filt ,
450
+ scale_factor , which_column , obj $ target_mapping , norm_by_length )
451
+ obj_mod $ obs_norm <- reads_per_base_transform(obj_mod $ obs_norm ,
452
+ scale_factor , which_column , obj $ target_mapping , norm_by_length )
453
+
454
+ obs_counts <- obs_to_matrix(obj_mod , " scaled_reads_per_base" )
455
+ obs_counts <- transform(obs_counts )
456
+
457
+ obj_mod $ kal <- parallel :: mclapply(seq_along(obj_mod $ kal ),
458
+ function (i ) {
459
+ k <- obj_mod $ kal [[i ]]
460
+ current_sample <- obj_mod $ sample_to_covariates $ sample [i ]
461
+ msg(paste(' aggregating across sample: ' , current_sample ))
462
+ k $ bootstrap <- lapply(k $ bootstrap , function (b ) {
463
+ b <- dplyr :: mutate(b , sample = current_sample )
464
+ reads_per_base_transform(b , scale_factor , which_column ,
465
+ obj $ target_mapping , norm_by_length )
466
+ })
467
+
468
+ k
469
+ })
470
+
471
+ bs_summary <- sleuth_summarize_bootstrap_col(obj_mod , " scaled_reads_per_base" ,
472
+ transform )
473
+
474
+ bs_summary <- dplyr :: group_by(bs_summary , target_id )
475
+ # FIXME: the column name 'bs_var_est_counts' is incorrect. should actually rename it above
476
+ bs_summary <- dplyr :: summarise(bs_summary ,
477
+ sigma_q_sq = mean(bs_var_scaled_reads_per_base ))
478
+
479
+ bs_summary <- as_df(bs_summary )
480
+
481
+ bs_sigma <- bs_summary $ sigma_q_sq
482
+ names(bs_sigma ) <- bs_summary $ target_id
483
+ bs_sigma <- bs_sigma [rownames(obs_counts )]
484
+
485
+ list (obs_counts = obs_counts , sigma_q_sq = bs_sigma )
486
+ }
487
+
488
+ me_model <- function (X , y , sigma_q_sq ) {
378
489
n <- nrow(X )
379
490
degrees_free <- n - ncol(X )
380
491
0 commit comments