@@ -7,10 +7,20 @@ library(Matrix)
7
7
library(matrixStats )
8
8
9
9
# # VIASH START
10
+ dir <- " work/66/65ddd5a686b4c712172ff7aa70cce8/_viash_par"
10
11
par <- list (
11
- input_spatial_dataset = " resources_test/spatialsimbench_mobnew/dataset_sp.h5ad" ,
12
- input_singlecell_dataset = " resources_test/spatialsimbench_mobnew/dataset_sc.h5ad" ,
13
- input_simulated_dataset = " resources_test/spatialsimbench_mobnew/simulated_dataset.h5ad" ,
12
+ input_spatial_dataset = paste0(
13
+ dir ,
14
+ " /input_spatial_dataset_1/dataset_sp.h5ad"
15
+ ),
16
+ input_singlecell_dataset = paste0(
17
+ dir ,
18
+ " /input_singlecell_dataset_1/dataset_sc.h5ad"
19
+ ),
20
+ input_simulated_dataset = paste0(
21
+ dir ,
22
+ " /input_simulated_dataset_1/spatialsimbench_mobnew.negative_normal.generate_sim_spatialcluster.output_sp.h5ad"
23
+ ),
14
24
output = " output.h5ad"
15
25
)
16
26
meta <- list (
@@ -20,88 +30,177 @@ meta <- list(
20
30
21
31
cat(" Reading input files\n " )
22
32
input_spatial_dataset <- anndata :: read_h5ad(par [[" input_spatial_dataset" ]])
23
- input_singlecell_dataset <- anndata :: read_h5ad(par [[" input_singlecell_dataset" ]])
33
+ input_singlecell_dataset <- anndata :: read_h5ad(par [[
34
+ " input_singlecell_dataset"
35
+ ]])
24
36
input_simulated_dataset <- anndata :: read_h5ad(par [[" input_simulated_dataset" ]])
25
37
26
38
real_counts <- input_spatial_dataset $ layers [[" counts" ]]
27
39
sim_counts <- input_simulated_dataset $ layers [[" counts" ]]
28
40
41
+ try_kde_test <- function (x1 , x2 ) {
42
+ tryCatch(
43
+ {
44
+ ks :: kde.test(x1 = x1 , x2 = x2 )
45
+ },
46
+ error = function (e ) {
47
+ warning(
48
+ " Caught error in ks::kde.test: " ,
49
+ e $ message ,
50
+ " \n\n Trying again with some random noise added to the vectors."
51
+ )
52
+ x1_noise <- stats :: runif(length(x1 ), - 1e-8 , 1e-8 )
53
+ x2_noise <- stats :: runif(length(x2 ), - 1e-8 , 1e-8 )
54
+ ks :: kde.test(x1 = x1 + x1_noise , x2 = x2 + x2_noise )
55
+ }
56
+ )
57
+ }
58
+
29
59
cat(" Computing ks statistic of fraction of zeros per gene\n " )
30
60
frac_zero_real_genes <- colMeans(real_counts == 0 )
31
61
frac_zero_sim_genes <- colMeans(sim_counts == 0 )
32
- ks_statistic_frac_zero_genes <- ks :: kde.test(x1 = frac_zero_real_genes , x2 = frac_zero_sim_genes )
62
+ ks_statistic_frac_zero_genes <- try_kde_test(
63
+ x1 = frac_zero_real_genes ,
64
+ x2 = frac_zero_sim_genes
65
+ )
33
66
34
67
cat(" Computing ks statistic of fraction of zeros per cell\n " )
35
68
frac_zero_real_cells <- rowMeans(real_counts == 0 )
36
69
frac_zero_sim_cells <- rowMeans(sim_counts == 0 )
37
- ks_statistic_frac_zero_cells <- ks :: kde.test(x1 = frac_zero_real_cells , x2 = frac_zero_sim_cells )
70
+ ks_statistic_frac_zero_cells <- try_kde_test(
71
+ x1 = frac_zero_real_cells ,
72
+ x2 = frac_zero_sim_cells
73
+ )
38
74
39
75
cat(" Computing ks statistic of the library size\n " )
40
76
lib_size_real_cells <- log1p(rowSums(real_counts ))
41
77
lib_size_sim_cells <- log1p(rowSums(sim_counts ))
42
- ks_statistic_lib_size_cells <- ks :: kde.test(x1 = lib_size_real_cells , x2 = lib_size_sim_cells )
78
+ ks_statistic_lib_size_cells <- try_kde_test(
79
+ x1 = lib_size_real_cells ,
80
+ x2 = lib_size_sim_cells
81
+ )
43
82
44
83
cat(" Computing ks statistic of the effective library size\n " )
45
84
efflib_size_real_cells <- log1p(rowSums(real_counts ))
46
85
efflib_size_sim_cells <- log1p(rowSums(sim_counts ))
47
- ks_statistic_efflib_size_cells <- ks :: kde.test(x1 = efflib_size_real_cells , x2 = efflib_size_sim_cells )
86
+ ks_statistic_efflib_size_cells <- try_kde_test(
87
+ x1 = efflib_size_real_cells ,
88
+ x2 = efflib_size_sim_cells
89
+ )
48
90
49
91
cat(" Computing ks statistic of TMM\n " )
50
92
real_dge <- edgeR :: DGEList(counts = Matrix :: t(real_counts ))
51
93
sim_dge <- edgeR :: DGEList(counts = Matrix :: t(sim_counts ))
52
- tmm_real_cells <- edgeR :: calcNormFactors(real_dge , method = " TMM" )$ samples $ norm.factors
53
- tmm_sim_cells <- edgeR :: calcNormFactors(sim_dge , method = " TMM" )$ samples $ norm.factors
54
- ks_statistic_tmm_cells <- ks :: kde.test(x1 = tmm_real_cells , x2 = tmm_sim_cells )
94
+ tmm_real_cells <- edgeR :: calcNormFactors(
95
+ real_dge ,
96
+ method = " TMM"
97
+ )$ samples $ norm.factors
98
+ tmm_sim_cells <- edgeR :: calcNormFactors(
99
+ sim_dge ,
100
+ method = " TMM"
101
+ )$ samples $ norm.factors
102
+ ks_statistic_tmm_cells <- try_kde_test(x1 = tmm_real_cells , x2 = tmm_sim_cells )
55
103
56
104
cat(" Computing ks statistic of the cell-level scaled variance\n " )
57
- scaled_var_real_cells <- scale(sparseMatrixStats :: colVars(Matrix :: t(real_counts )))
105
+ scaled_var_real_cells <- scale(sparseMatrixStats :: colVars(Matrix :: t(
106
+ real_counts
107
+ )))
58
108
scaled_var_sim_cells <- scale(sparseMatrixStats :: colVars(Matrix :: t(sim_counts )))
59
- ks_statistic_scaled_var_cells <- ks :: kde.test(x1 = as.numeric(scaled_var_sim_cells ), x2 = as.numeric(scaled_var_sim_cells ))
109
+ ks_statistic_scaled_var_cells <- try_kde_test(
110
+ x1 = as.numeric(scaled_var_sim_cells ),
111
+ x2 = as.numeric(scaled_var_sim_cells )
112
+ )
60
113
61
114
cat(" Computing ks statistic of the cell-level scaled mean\n " )
62
115
scaled_mean_real_cells <- scale(colMeans(Matrix :: t(real_counts )))
63
116
scaled_mean_sim_cells <- scale(colMeans(Matrix :: t(sim_counts )))
64
- ks_statistic_scaled_mean_cells <- ks :: kde.test(x1 = as.numeric(scaled_mean_sim_cells ), x2 = as.numeric(scaled_mean_sim_cells ))
117
+ ks_statistic_scaled_mean_cells <- try_kde_test(
118
+ x1 = as.numeric(scaled_mean_sim_cells ),
119
+ x2 = as.numeric(scaled_mean_sim_cells )
120
+ )
65
121
66
- cat(" Computing ks statistic of the library size vs fraction of zeros per cell\n " )
67
- lib_fraczero_real_cells <- data.frame (lib = lib_size_real_cells , fraczero = frac_zero_real_cells )
68
- lib_fraczero_sim_cells <- data.frame (lib = lib_size_sim_cells , fraczero = frac_zero_sim_cells )
69
- ks_statistic_lib_fraczero_cells <- ks :: kde.test(x1 = lib_fraczero_real_cells , x2 = lib_fraczero_sim_cells )
122
+ cat(
123
+ " Computing ks statistic of the library size vs fraction of zeros per cell\n "
124
+ )
125
+ lib_fraczero_real_cells <- data.frame (
126
+ lib = lib_size_real_cells ,
127
+ fraczero = frac_zero_real_cells
128
+ )
129
+ lib_fraczero_sim_cells <- data.frame (
130
+ lib = lib_size_sim_cells ,
131
+ fraczero = frac_zero_sim_cells
132
+ )
133
+ ks_statistic_lib_fraczero_cells <- try_kde_test(
134
+ x1 = lib_fraczero_real_cells ,
135
+ x2 = lib_fraczero_sim_cells
136
+ )
70
137
71
138
cat(" Computing ks statistic of the sample Pearson correlation\n " )
72
139
# pearson_real_cells <- reshape2::melt(cor(as.matrix(Matrix::t(real_counts)), method = "pearson"))
73
140
pearson_real_cells <- proxyC :: simil(real_counts , method = " correlation" )
74
141
# pearson_sim_cells <- reshape2::melt(cor(as.matrix(Matrix::t(sim_counts)), method = "pearson"))
75
142
pearson_sim_cells <- proxyC :: simil(sim_counts , method = " correlation" )
76
143
77
- ks_statistic_pearson_cells <- ks :: kde.test(x1 = sample(as.numeric(pearson_real_cells ), 10000 ), x2 = sample(as.numeric(pearson_sim_cells ), 10000 ))
144
+ ks_statistic_pearson_cells <- try_kde_test(
145
+ x1 = sample(as.numeric(pearson_real_cells ), 10000 ),
146
+ x2 = sample(as.numeric(pearson_sim_cells ), 10000 )
147
+ )
78
148
79
149
cat(" Computing ks statistic of the gene-level scaled variance\n " )
80
150
scaled_var_real_genes <- scale(sparseMatrixStats :: colVars(real_counts ))
81
151
scaled_var_sim_genes <- scale(sparseMatrixStats :: colVars(sim_counts ))
82
- ks_statistic_scaled_var_genes <- ks :: kde.test(x1 = as.numeric(scaled_var_sim_genes ), x2 = as.numeric(scaled_var_sim_genes ))
152
+ ks_statistic_scaled_var_genes <- try_kde_test(
153
+ x1 = as.numeric(scaled_var_sim_genes ),
154
+ x2 = as.numeric(scaled_var_sim_genes )
155
+ )
83
156
84
157
cat(" Computing ks statistic of the gene-level scaled mean\n " )
85
158
scaled_mean_real_genes <- scale(colMeans(real_counts ))
86
159
scaled_mean_sim_genes <- scale(colMeans(sim_counts ))
87
- ks_statistic_scaled_mean_genes <- ks :: kde.test(x1 = as.numeric(scaled_mean_real_genes ), x2 = as.numeric(scaled_mean_sim_genes ))
160
+ ks_statistic_scaled_mean_genes <- try_kde_test(
161
+ x1 = as.numeric(scaled_mean_real_genes ),
162
+ x2 = as.numeric(scaled_mean_sim_genes )
163
+ )
88
164
89
165
cat(" Computing ks statistic of the gene Pearson correlation\n " )
90
166
# pearson_real_genes <- reshape2::melt(cor(as.matrix(real_counts), method = "pearson"))
91
167
pearson_real_genes <- proxyC :: simil(real_counts , method = " correlation" )
92
168
# pearson_sim_genes <- reshape2::melt(cor(as.matrix(sim_counts), method = "pearson"))
93
169
pearson_sim_genes <- proxyC :: simil(sim_counts , method = " correlation" )
94
- ks_statistic_pearson_genes <- ks :: kde.test(x1 = sample(as.numeric(pearson_real_genes ), 10000 ), x2 = sample(as.numeric(pearson_sim_genes ), 10000 ))
170
+ ks_statistic_pearson_genes <- try_kde_test(
171
+ x1 = sample(as.numeric(pearson_real_genes ), 10000 ),
172
+ x2 = sample(as.numeric(pearson_sim_genes ), 10000 )
173
+ )
95
174
96
175
cat(" Computing ks statistic of the mean expression vs variance expression\n " )
97
- mean_var_real_genes <- data.frame (mean = colMeans(real_counts ), var = sparseMatrixStats :: colVars(real_counts ))
98
- mean_var_sim_genes <- data.frame (mean = colMeans(sim_counts ), var = sparseMatrixStats :: colVars(sim_counts ))
99
- ks_statistic_mean_var_genes <- ks :: kde.test(x1 = mean_var_real_genes , x2 = mean_var_sim_genes )
176
+ mean_var_real_genes <- data.frame (
177
+ mean = colMeans(real_counts ),
178
+ var = sparseMatrixStats :: colVars(real_counts )
179
+ )
180
+ mean_var_sim_genes <- data.frame (
181
+ mean = colMeans(sim_counts ),
182
+ var = sparseMatrixStats :: colVars(sim_counts )
183
+ )
184
+ ks_statistic_mean_var_genes <- try_kde_test(
185
+ x1 = mean_var_real_genes ,
186
+ x2 = mean_var_sim_genes
187
+ )
100
188
101
- cat(" Computing ks statistic of the mean expression vs fraction of zeros per gene\n " )
102
- mean_fraczero_real_genes <- data.frame (mean = colMeans(real_counts ), fraczero = frac_zero_real_genes )
103
- mean_fraczero_sim_genes <- data.frame (mean = colMeans(sim_counts ), fraczero = frac_zero_sim_genes )
104
- ks_statistic_mean_fraczero_genes <- ks :: kde.test(x1 = mean_fraczero_real_genes , x2 = mean_fraczero_sim_genes )
189
+ cat(
190
+ " Computing ks statistic of the mean expression vs fraction of zeros per gene\n "
191
+ )
192
+ mean_fraczero_real_genes <- data.frame (
193
+ mean = colMeans(real_counts ),
194
+ fraczero = frac_zero_real_genes
195
+ )
196
+ mean_fraczero_sim_genes <- data.frame (
197
+ mean = colMeans(sim_counts ),
198
+ fraczero = frac_zero_sim_genes
199
+ )
200
+ ks_statistic_mean_fraczero_genes <- try_kde_test(
201
+ x1 = mean_fraczero_real_genes ,
202
+ x2 = mean_fraczero_sim_genes
203
+ )
105
204
106
205
cat(" Combining metric values\n " )
107
206
uns_metric_ids <- c(
@@ -134,7 +233,6 @@ uns_metric_ids <- c(
134
233
" ks_statistic_pearson_genes_tstat" ,
135
234
" ks_statistic_mean_var_genes_tstat" ,
136
235
" ks_statistic_mean_fraczero_genes_tstat"
137
-
138
236
)
139
237
uns_metric_values <- c(
140
238
ks_statistic_frac_zero_genes $ zstat ,
@@ -151,7 +249,7 @@ uns_metric_values <- c(
151
249
ks_statistic_pearson_genes $ zstat ,
152
250
ks_statistic_mean_var_genes $ zstat ,
153
251
ks_statistic_mean_fraczero_genes $ zstat ,
154
-
252
+
155
253
ks_statistic_frac_zero_genes $ tstat ,
156
254
ks_statistic_frac_zero_cells $ tstat ,
157
255
ks_statistic_lib_size_cells $ tstat ,
0 commit comments