@@ -7,6 +7,7 @@ suppressPackageStartupMessages(library(magrittr))
7
7
suppressPackageStartupMessages(library(tidyr ))
8
8
suppressPackageStartupMessages(library(reshape2 ))
9
9
suppressPackageStartupMessages(library(tibble ))
10
+ suppressPackageStartupMessages(library(stringr ))
10
11
11
12
# # check_replicate_cor
12
13
# # checks that technical and biological replicates are all well correlated with each other
@@ -18,7 +19,7 @@ suppressPackageStartupMessages(library(tibble))
18
19
check_replicate_cor = function (normalized_counts , out ) {
19
20
tech_rep_cor = normalized_counts %> %
20
21
filter(is.na(Name )) %> %
21
- dcast(CCLE_name ~ sample_ID + bio_rep + tech_rep , value.var = " log_normalized_n" ) %> %
22
+ dcast(CCLE_name ~ profile_id + bio_rep + tech_rep , value.var = " log_normalized_n" ) %> %
22
23
dplyr :: select(- CCLE_name ) %> %
23
24
cor(use = " complete.obs" ) %> % as.data.frame()
24
25
@@ -27,31 +28,25 @@ check_replicate_cor = function(normalized_counts, out) {
27
28
28
29
tech_rep_cor_long = tech_rep_cor %> %
29
30
rownames_to_column(" sample_1" ) %> %
30
- melt(id.vars = " sample_1" , variable.name = " sample_2" , value.name = " cor" ) %> %
31
- mutate(sample_ID_1 = as.character(sample_1 ) %> % purrr :: map(strsplit , " _" ) %> % purrr :: map(`[[` , 1 ) %> % purrr :: map(`[` , 1 ) %> % unlist(),
32
- sample_ID_2 = as.character(sample_2 ) %> % purrr :: map(strsplit , " _" ) %> % purrr :: map(`[[` , 1 ) %> % purrr :: map(`[` , 1 ) %> % unlist()) %> %
33
- filter(sample_ID_1 == sample_ID_2 ) %> %
34
- mutate(bio_rep_1 = as.character(sample_1 ) %> % purrr :: map(strsplit , " _" ) %> % purrr :: map(`[[` , 1 ) %> % purrr :: map(`[` , 2 ) %> % unlist(),
35
- bio_rep_2 = as.character(sample_2 ) %> % purrr :: map(strsplit , " _" ) %> % purrr :: map(`[[` , 1 ) %> % purrr :: map(`[` , 2 ) %> % unlist()) %> %
36
- filter(bio_rep_1 == bio_rep_2 ) %> %
37
- mutate(tech_rep_1 = as.character(sample_1 ) %> % purrr :: map(strsplit , " _" ) %> % purrr :: map(`[[` , 1 ) %> % purrr :: map(`[` , 3 ) %> % unlist(),
38
- tech_rep_2 = as.character(sample_2 ) %> % purrr :: map(strsplit , " _" ) %> % purrr :: map(`[[` , 1 ) %> % purrr :: map(`[` , 3 ) %> % unlist()) %> %
39
- filter(tech_rep_2 > tech_rep_1 ) %> %
40
- dplyr :: rename(sample_ID = sample_ID_1 , bio_rep = bio_rep_1 ) %> %
41
- dcast(sample_ID + bio_rep ~ tech_rep_1 + tech_rep_2 , value.var = " cor" )
31
+ melt(id.vars = " sample_1" , variable.name = " sample_2" , value.name = " tech_rep_cor" ) %> %
32
+ mutate(sample_1 = gsub(' .{2}$' , ' ' , sample_1 ),
33
+ sample_2 = gsub(' .{2}$' , ' ' , sample_2 )) %> %
34
+ filter(sample_1 == sample_2 ) %> %
35
+ dplyr :: rename(profile_id = sample_1 ) %> %
36
+ dplyr :: select(profile_id , tech_rep_cor )
42
37
43
38
trep_long_out = paste(args $ out , " tech_rep_cor_long.csv" , sep = ' /' )
44
- write.csv(tech_rep_cor_long , trep_long_out , row.names = T , quote = F )
39
+ write.csv(tech_rep_cor_long , trep_long_out , row.names = F , quote = F )
45
40
46
41
tech_collapsed_counts = normalized_counts %> %
47
42
filter(is.na(Name )) %> %
48
- dplyr :: select(- Name , - log_dose , - n , - log_n , - log_normalized_n , - profile_id ) %> %
43
+ dplyr :: select(- Name , - log_dose , - n , - log_n , - log_normalized_n ) %> %
49
44
group_by_at(setdiff(names(. ), c(" normalized_n" , " tech_rep" ))) %> %
50
45
dplyr :: summarise(sum_normalized_n = sum(normalized_n )) %> %
51
46
ungroup()
52
47
53
48
bio_rep_cor = tech_collapsed_counts %> %
54
- dcast(CCLE_name ~ sample_ID + bio_rep , value.var = " sum_normalized_n" ) %> %
49
+ dcast(CCLE_name ~ profile_id + bio_rep , value.var = " sum_normalized_n" ) %> %
55
50
dplyr :: select(- CCLE_name ) %> %
56
51
cor(use = " complete.obs" ) %> %
57
52
as.data.frame()
@@ -62,17 +57,17 @@ check_replicate_cor = function(normalized_counts, out) {
62
57
bio_rep_cor_long = bio_rep_cor %> %
63
58
rownames_to_column(" sample_1" ) %> %
64
59
melt(id.vars = " sample_1" , variable.name = " sample_2" , value.name = " cor" ) %> %
65
- mutate(sample_ID_1 = as.character( sample_1 ) % > % purrr :: map( strsplit , " _ " ) % > % purrr :: map( `[[` , 1 ) % > % purrr :: map( `[` , 1 ) % > % unlist( ),
66
- sample_ID_2 = as.character( sample_2 ) % > % purrr :: map( strsplit , " _ " ) % > % purrr :: map( `[[` , 1 ) % > % purrr :: map( `[` , 1 ) % > % unlist( )) %> %
60
+ mutate(sample_ID_1 = gsub( ' .{2}$ ' , ' ' , sample_1 ),
61
+ sample_ID_2 = gsub( ' .{2}$ ' , ' ' , sample_2 )) %> %
67
62
filter(sample_ID_1 == sample_ID_2 ) %> %
68
- mutate(bio_rep_1 = as.character(sample_1 ) %> % purrr :: map(strsplit , " _ " ) % > % purrr :: map( `[[` , 1 ) % > % purrr :: map( `[` , 2 ) %> % unlist(),
69
- bio_rep_2 = as.character(sample_2 ) %> % purrr :: map(strsplit , " _ " ) % > % purrr :: map( `[[` , 1 ) % > % purrr :: map( `[` , 2 ) %> % unlist()) %> %
63
+ mutate(bio_rep_1 = as.character(sample_1 ) %> % purrr :: map(str_sub , - 1 , - 1 ) %> % unlist(),
64
+ bio_rep_2 = as.character(sample_2 ) %> % purrr :: map(str_sub , - 1 , - 1 ) %> % unlist()) %> %
70
65
filter(bio_rep_2 > bio_rep_1 ) %> %
71
- dplyr :: rename(sample_ID = sample_ID_1 ) %> %
72
- dcast(sample_ID ~ bio_rep_1 + bio_rep_2 , value.var = " cor" )
66
+ dplyr :: rename(profile_id = sample_ID_1 ) %> %
67
+ dcast(profile_id ~ bio_rep_1 + bio_rep_2 , value.var = " cor" )
73
68
74
69
brep_long_out = paste(args $ out , " bio_rep_cor_long.csv" , sep = ' /' )
75
- write.csv(bio_rep_cor_long , brep_long_out , row.names = T , quote = F )
70
+ write.csv(bio_rep_cor_long , brep_long_out , row.names = F , quote = F )
76
71
}
77
72
78
73
0 commit comments