Skip to content

Commit 0ac6b16

Browse files
authored
fix ks_statistic metrics (#8)
1 parent e681cf1 commit 0ac6b16

File tree

3 files changed

+204
-49
lines changed

3 files changed

+204
-49
lines changed

src/methods/scdesign3_nb/config.vsh.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ engines:
4141
setup:
4242
- type: r
4343
github:
44-
- SONGDONGYUAN1994/scDesign3
44+
- SONGDONGYUAN1994/scDesign3
4545
url:
4646
- https://cran.r-project.org/src/contrib/Archive/reticulate/reticulate_1.40.0.tar.gz
4747

src/metrics/ks_statistic_gene_cell/script.R

+129-31
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,20 @@ library(Matrix)
77
library(matrixStats)
88

99
## VIASH START
10+
dir <- "work/66/65ddd5a686b4c712172ff7aa70cce8/_viash_par"
1011
par <- list(
11-
input_spatial_dataset = "resources_test/spatialsimbench_mobnew/dataset_sp.h5ad",
12-
input_singlecell_dataset = "resources_test/spatialsimbench_mobnew/dataset_sc.h5ad",
13-
input_simulated_dataset = "resources_test/spatialsimbench_mobnew/simulated_dataset.h5ad",
12+
input_spatial_dataset = paste0(
13+
dir,
14+
"/input_spatial_dataset_1/dataset_sp.h5ad"
15+
),
16+
input_singlecell_dataset = paste0(
17+
dir,
18+
"/input_singlecell_dataset_1/dataset_sc.h5ad"
19+
),
20+
input_simulated_dataset = paste0(
21+
dir,
22+
"/input_simulated_dataset_1/spatialsimbench_mobnew.negative_normal.generate_sim_spatialcluster.output_sp.h5ad"
23+
),
1424
output = "output.h5ad"
1525
)
1626
meta <- list(
@@ -20,88 +30,177 @@ meta <- list(
2030

2131
cat("Reading input files\n")
2232
input_spatial_dataset <- anndata::read_h5ad(par[["input_spatial_dataset"]])
23-
input_singlecell_dataset <- anndata::read_h5ad(par[["input_singlecell_dataset"]])
33+
input_singlecell_dataset <- anndata::read_h5ad(par[[
34+
"input_singlecell_dataset"
35+
]])
2436
input_simulated_dataset <- anndata::read_h5ad(par[["input_simulated_dataset"]])
2537

2638
real_counts <- input_spatial_dataset$layers[["counts"]]
2739
sim_counts <- input_simulated_dataset$layers[["counts"]]
2840

41+
try_kde_test <- function(x1, x2) {
42+
tryCatch(
43+
{
44+
ks::kde.test(x1 = x1, x2 = x2)
45+
},
46+
error = function(e) {
47+
warning(
48+
"Caught error in ks::kde.test: ",
49+
e$message,
50+
"\n\nTrying again with some random noise added to the vectors."
51+
)
52+
x1_noise <- stats::runif(length(x1), -1e-8, 1e-8)
53+
x2_noise <- stats::runif(length(x2), -1e-8, 1e-8)
54+
ks::kde.test(x1 = x1 + x1_noise, x2 = x2 + x2_noise)
55+
}
56+
)
57+
}
58+
2959
cat("Computing ks statistic of fraction of zeros per gene\n")
3060
frac_zero_real_genes <- colMeans(real_counts == 0)
3161
frac_zero_sim_genes <- colMeans(sim_counts == 0)
32-
ks_statistic_frac_zero_genes <- ks::kde.test(x1 = frac_zero_real_genes, x2 = frac_zero_sim_genes)
62+
ks_statistic_frac_zero_genes <- try_kde_test(
63+
x1 = frac_zero_real_genes,
64+
x2 = frac_zero_sim_genes
65+
)
3366

3467
cat("Computing ks statistic of fraction of zeros per cell\n")
3568
frac_zero_real_cells <- rowMeans(real_counts == 0)
3669
frac_zero_sim_cells <- rowMeans(sim_counts == 0)
37-
ks_statistic_frac_zero_cells <- ks::kde.test(x1 = frac_zero_real_cells, x2 = frac_zero_sim_cells)
70+
ks_statistic_frac_zero_cells <- try_kde_test(
71+
x1 = frac_zero_real_cells,
72+
x2 = frac_zero_sim_cells
73+
)
3874

3975
cat("Computing ks statistic of the library size\n")
4076
lib_size_real_cells <- log1p(rowSums(real_counts))
4177
lib_size_sim_cells <- log1p(rowSums(sim_counts))
42-
ks_statistic_lib_size_cells <- ks::kde.test(x1 = lib_size_real_cells, x2 = lib_size_sim_cells)
78+
ks_statistic_lib_size_cells <- try_kde_test(
79+
x1 = lib_size_real_cells,
80+
x2 = lib_size_sim_cells
81+
)
4382

4483
cat("Computing ks statistic of the effective library size\n")
4584
efflib_size_real_cells <- log1p(rowSums(real_counts))
4685
efflib_size_sim_cells <- log1p(rowSums(sim_counts))
47-
ks_statistic_efflib_size_cells <- ks::kde.test(x1 = efflib_size_real_cells, x2 = efflib_size_sim_cells)
86+
ks_statistic_efflib_size_cells <- try_kde_test(
87+
x1 = efflib_size_real_cells,
88+
x2 = efflib_size_sim_cells
89+
)
4890

4991
cat("Computing ks statistic of TMM\n")
5092
real_dge <- edgeR::DGEList(counts = Matrix::t(real_counts))
5193
sim_dge <- edgeR::DGEList(counts = Matrix::t(sim_counts))
52-
tmm_real_cells <- edgeR::calcNormFactors(real_dge, method = "TMM")$samples$norm.factors
53-
tmm_sim_cells <- edgeR::calcNormFactors(sim_dge, method = "TMM")$samples$norm.factors
54-
ks_statistic_tmm_cells <- ks::kde.test(x1 = tmm_real_cells, x2 = tmm_sim_cells)
94+
tmm_real_cells <- edgeR::calcNormFactors(
95+
real_dge,
96+
method = "TMM"
97+
)$samples$norm.factors
98+
tmm_sim_cells <- edgeR::calcNormFactors(
99+
sim_dge,
100+
method = "TMM"
101+
)$samples$norm.factors
102+
ks_statistic_tmm_cells <- try_kde_test(x1 = tmm_real_cells, x2 = tmm_sim_cells)
55103

56104
cat("Computing ks statistic of the cell-level scaled variance\n")
57-
scaled_var_real_cells <- scale(sparseMatrixStats::colVars(Matrix::t(real_counts)))
105+
scaled_var_real_cells <- scale(sparseMatrixStats::colVars(Matrix::t(
106+
real_counts
107+
)))
58108
scaled_var_sim_cells <- scale(sparseMatrixStats::colVars(Matrix::t(sim_counts)))
59-
ks_statistic_scaled_var_cells <- ks::kde.test(x1 = as.numeric(scaled_var_sim_cells), x2 = as.numeric(scaled_var_sim_cells))
109+
ks_statistic_scaled_var_cells <- try_kde_test(
110+
x1 = as.numeric(scaled_var_sim_cells),
111+
x2 = as.numeric(scaled_var_sim_cells)
112+
)
60113

61114
cat("Computing ks statistic of the cell-level scaled mean\n")
62115
scaled_mean_real_cells <- scale(colMeans(Matrix::t(real_counts)))
63116
scaled_mean_sim_cells <- scale(colMeans(Matrix::t(sim_counts)))
64-
ks_statistic_scaled_mean_cells <- ks::kde.test(x1 = as.numeric(scaled_mean_sim_cells), x2 = as.numeric(scaled_mean_sim_cells))
117+
ks_statistic_scaled_mean_cells <- try_kde_test(
118+
x1 = as.numeric(scaled_mean_sim_cells),
119+
x2 = as.numeric(scaled_mean_sim_cells)
120+
)
65121

66-
cat("Computing ks statistic of the library size vs fraction of zeros per cell\n")
67-
lib_fraczero_real_cells <- data.frame(lib = lib_size_real_cells, fraczero = frac_zero_real_cells)
68-
lib_fraczero_sim_cells <- data.frame(lib = lib_size_sim_cells, fraczero = frac_zero_sim_cells)
69-
ks_statistic_lib_fraczero_cells <- ks::kde.test(x1 = lib_fraczero_real_cells, x2 = lib_fraczero_sim_cells)
122+
cat(
123+
"Computing ks statistic of the library size vs fraction of zeros per cell\n"
124+
)
125+
lib_fraczero_real_cells <- data.frame(
126+
lib = lib_size_real_cells,
127+
fraczero = frac_zero_real_cells
128+
)
129+
lib_fraczero_sim_cells <- data.frame(
130+
lib = lib_size_sim_cells,
131+
fraczero = frac_zero_sim_cells
132+
)
133+
ks_statistic_lib_fraczero_cells <- try_kde_test(
134+
x1 = lib_fraczero_real_cells,
135+
x2 = lib_fraczero_sim_cells
136+
)
70137

71138
cat("Computing ks statistic of the sample Pearson correlation\n")
72139
# pearson_real_cells <- reshape2::melt(cor(as.matrix(Matrix::t(real_counts)), method = "pearson"))
73140
pearson_real_cells <- proxyC::simil(real_counts, method = "correlation")
74141
# pearson_sim_cells <- reshape2::melt(cor(as.matrix(Matrix::t(sim_counts)), method = "pearson"))
75142
pearson_sim_cells <- proxyC::simil(sim_counts, method = "correlation")
76143

77-
ks_statistic_pearson_cells <- ks::kde.test(x1 = sample(as.numeric(pearson_real_cells), 10000), x2 = sample(as.numeric(pearson_sim_cells), 10000))
144+
ks_statistic_pearson_cells <- try_kde_test(
145+
x1 = sample(as.numeric(pearson_real_cells), 10000),
146+
x2 = sample(as.numeric(pearson_sim_cells), 10000)
147+
)
78148

79149
cat("Computing ks statistic of the gene-level scaled variance\n")
80150
scaled_var_real_genes <- scale(sparseMatrixStats::colVars(real_counts))
81151
scaled_var_sim_genes <- scale(sparseMatrixStats::colVars(sim_counts))
82-
ks_statistic_scaled_var_genes <- ks::kde.test(x1 = as.numeric(scaled_var_sim_genes), x2 = as.numeric(scaled_var_sim_genes))
152+
ks_statistic_scaled_var_genes <- try_kde_test(
153+
x1 = as.numeric(scaled_var_sim_genes),
154+
x2 = as.numeric(scaled_var_sim_genes)
155+
)
83156

84157
cat("Computing ks statistic of the gene-level scaled mean\n")
85158
scaled_mean_real_genes <- scale(colMeans(real_counts))
86159
scaled_mean_sim_genes <- scale(colMeans(sim_counts))
87-
ks_statistic_scaled_mean_genes <- ks::kde.test(x1 = as.numeric(scaled_mean_real_genes), x2 = as.numeric(scaled_mean_sim_genes))
160+
ks_statistic_scaled_mean_genes <- try_kde_test(
161+
x1 = as.numeric(scaled_mean_real_genes),
162+
x2 = as.numeric(scaled_mean_sim_genes)
163+
)
88164

89165
cat("Computing ks statistic of the gene Pearson correlation\n")
90166
# pearson_real_genes <- reshape2::melt(cor(as.matrix(real_counts), method = "pearson"))
91167
pearson_real_genes <- proxyC::simil(real_counts, method = "correlation")
92168
# pearson_sim_genes <- reshape2::melt(cor(as.matrix(sim_counts), method = "pearson"))
93169
pearson_sim_genes <- proxyC::simil(sim_counts, method = "correlation")
94-
ks_statistic_pearson_genes <- ks::kde.test(x1 = sample(as.numeric(pearson_real_genes), 10000), x2 = sample(as.numeric(pearson_sim_genes), 10000))
170+
ks_statistic_pearson_genes <- try_kde_test(
171+
x1 = sample(as.numeric(pearson_real_genes), 10000),
172+
x2 = sample(as.numeric(pearson_sim_genes), 10000)
173+
)
95174

96175
cat("Computing ks statistic of the mean expression vs variance expression\n")
97-
mean_var_real_genes <- data.frame(mean = colMeans(real_counts), var = sparseMatrixStats::colVars(real_counts))
98-
mean_var_sim_genes <- data.frame(mean = colMeans(sim_counts), var = sparseMatrixStats::colVars(sim_counts))
99-
ks_statistic_mean_var_genes <- ks::kde.test(x1 = mean_var_real_genes, x2 = mean_var_sim_genes)
176+
mean_var_real_genes <- data.frame(
177+
mean = colMeans(real_counts),
178+
var = sparseMatrixStats::colVars(real_counts)
179+
)
180+
mean_var_sim_genes <- data.frame(
181+
mean = colMeans(sim_counts),
182+
var = sparseMatrixStats::colVars(sim_counts)
183+
)
184+
ks_statistic_mean_var_genes <- try_kde_test(
185+
x1 = mean_var_real_genes,
186+
x2 = mean_var_sim_genes
187+
)
100188

101-
cat("Computing ks statistic of the mean expression vs fraction of zeros per gene\n")
102-
mean_fraczero_real_genes <- data.frame(mean = colMeans(real_counts), fraczero = frac_zero_real_genes)
103-
mean_fraczero_sim_genes <- data.frame(mean = colMeans(sim_counts), fraczero = frac_zero_sim_genes)
104-
ks_statistic_mean_fraczero_genes <- ks::kde.test(x1 = mean_fraczero_real_genes, x2 = mean_fraczero_sim_genes)
189+
cat(
190+
"Computing ks statistic of the mean expression vs fraction of zeros per gene\n"
191+
)
192+
mean_fraczero_real_genes <- data.frame(
193+
mean = colMeans(real_counts),
194+
fraczero = frac_zero_real_genes
195+
)
196+
mean_fraczero_sim_genes <- data.frame(
197+
mean = colMeans(sim_counts),
198+
fraczero = frac_zero_sim_genes
199+
)
200+
ks_statistic_mean_fraczero_genes <- try_kde_test(
201+
x1 = mean_fraczero_real_genes,
202+
x2 = mean_fraczero_sim_genes
203+
)
105204

106205
cat("Combining metric values\n")
107206
uns_metric_ids <- c(
@@ -134,7 +233,6 @@ uns_metric_ids <- c(
134233
"ks_statistic_pearson_genes_tstat",
135234
"ks_statistic_mean_var_genes_tstat",
136235
"ks_statistic_mean_fraczero_genes_tstat"
137-
138236
)
139237
uns_metric_values <- c(
140238
ks_statistic_frac_zero_genes$zstat,
@@ -151,7 +249,7 @@ uns_metric_values <- c(
151249
ks_statistic_pearson_genes$zstat,
152250
ks_statistic_mean_var_genes$zstat,
153251
ks_statistic_mean_fraczero_genes$zstat,
154-
252+
155253
ks_statistic_frac_zero_genes$tstat,
156254
ks_statistic_frac_zero_cells$tstat,
157255
ks_statistic_lib_size_cells$tstat,

src/metrics/ks_statistic_sc_features/script.R

+74-17
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,33 @@ requireNamespace("SingleCellExperiment", quietly = TRUE)
99
requireNamespace("SummarizedExperiment", quietly = TRUE)
1010

1111

12-
packages <- c("SingleCellExperiment", "SummarizedExperiment",
13-
"concaveman", "sp", "Matrix", "methods",
14-
"ggplot2", "ggcorrplot", "MuSiC", "fields",
15-
"MCMCpack", "dplyr", "sf", "RANN", "stats",
16-
"reshape2", "RColorBrewer", "scatterpie",
17-
"grDevices", "nnls", "pbmcapply", "spatstat",
18-
"gtools", "RcppML", "NMF")
12+
packages <- c(
13+
"SingleCellExperiment",
14+
"SummarizedExperiment",
15+
"concaveman",
16+
"sp",
17+
"Matrix",
18+
"methods",
19+
"ggplot2",
20+
"ggcorrplot",
21+
"MuSiC",
22+
"fields",
23+
"MCMCpack",
24+
"dplyr",
25+
"sf",
26+
"RANN",
27+
"stats",
28+
"reshape2",
29+
"RColorBrewer",
30+
"scatterpie",
31+
"grDevices",
32+
"nnls",
33+
"pbmcapply",
34+
"spatstat",
35+
"gtools",
36+
"RcppML",
37+
"NMF"
38+
)
1939

2040
# Load each package
2141
lapply(packages, library, character.only = TRUE)
@@ -43,8 +63,12 @@ real_log_count <- t(input_real_sp$layers[["logcounts"]])
4363
real_prob_matrix <- input_real_sp$obsm[["celltype_proportions"]]
4464
colnames(real_prob_matrix) <- paste0("ct", seq_len(ncol(real_prob_matrix)))
4565

46-
sim_log_count <- scuttle::normalizeCounts(t(input_simulated_sp$layers[["counts"]]))
47-
real_log_count <- real_log_count[ , colnames(real_log_count) %in% rownames(real_prob_matrix)]
66+
sim_log_count <- scuttle::normalizeCounts(t(input_simulated_sp$layers[[
67+
"counts"
68+
]]))
69+
real_log_count <- real_log_count[,
70+
colnames(real_log_count) %in% rownames(real_prob_matrix)
71+
]
4872

4973

5074
sim_sce <- scater::logNormCounts(SingleCellExperiment::SingleCellExperiment(
@@ -55,10 +79,31 @@ sim_sce <- scater::logNormCounts(SingleCellExperiment::SingleCellExperiment(
5579

5680
sim_log_count <- SummarizedExperiment::assay(sim_sce, "logcounts")
5781

82+
# helper function
83+
try_kde_test <- function(x1, x2) {
84+
tryCatch(
85+
{
86+
ks::kde.test(x1 = x1, x2 = x2)
87+
},
88+
error = function(e) {
89+
warning(
90+
"Caught error in ks::kde.test: ",
91+
e$message,
92+
"\n\nTrying again with some random noise added to the vectors."
93+
)
94+
x1_noise <- stats::runif(length(x1), -1e-8, 1e-8)
95+
x2_noise <- stats::runif(length(x2), -1e-8, 1e-8)
96+
ks::kde.test(x1 = x1 + x1_noise, x2 = x2 + x2_noise)
97+
}
98+
)
99+
}
100+
58101
# build cell type deconvolution in simulated data ## error
59102
sim_prob_matrix <- CARD_processing(input_real_sp, input_sc)
60103

61-
sim_log_count <- sim_log_count[ , colnames(sim_log_count) %in% rownames(sim_prob_matrix)]
104+
sim_log_count <- sim_log_count[,
105+
colnames(sim_log_count) %in% rownames(sim_prob_matrix)
106+
]
62107

63108
feat_types <- c("L_stats", "celltype_interaction", "nn_correlation", "morans_I")
64109

@@ -69,7 +114,7 @@ real_scfeatures_result <- scFeatures::scFeatures(
69114
feature_types = feat_types,
70115
type = "spatial_t",
71116
species = sc_species,
72-
spotProbability = t(real_prob_matrix)
117+
spotProbability = t(real_prob_matrix)
73118
)
74119

75120
sim_scfeatures_result <- scFeatures::scFeatures(
@@ -79,13 +124,25 @@ sim_scfeatures_result <- scFeatures::scFeatures(
79124
feature_types = feat_types,
80125
type = "spatial_t",
81126
species = sc_species,
82-
spotProbability = t(sim_prob_matrix)
127+
spotProbability = t(sim_prob_matrix)
83128
)
84129

85-
ks_statistic_L_stats <- ks::kde.test(x1 = as.numeric(real_scfeatures_result$L_stats), x2 = as.numeric(sim_scfeatures_result$L_stats))
86-
ks_statistic_celltype_interaction <- ks::kde.test(x1 = as.numeric(real_scfeatures_result$celltype_interaction), x2 = as.numeric(sim_scfeatures_result$celltype_interaction))
87-
ks_statistic_nn_correlation <- ks::kde.test(x1 = as.numeric(real_scfeatures_result$nn_correlation), x2 = as.numeric(sim_scfeatures_result$nn_correlation))
88-
ks_statistic_morans_I <- ks::kde.test(x1 = as.numeric(real_scfeatures_result$morans_I), x2 = as.numeric(sim_scfeatures_result$morans_I))
130+
ks_statistic_L_stats <- try_kde_test(
131+
x1 = as.numeric(real_scfeatures_result$L_stats),
132+
x2 = as.numeric(sim_scfeatures_result$L_stats)
133+
)
134+
ks_statistic_celltype_interaction <- try_kde_test(
135+
x1 = as.numeric(real_scfeatures_result$celltype_interaction),
136+
x2 = as.numeric(sim_scfeatures_result$celltype_interaction)
137+
)
138+
ks_statistic_nn_correlation <- try_kde_test(
139+
x1 = as.numeric(real_scfeatures_result$nn_correlation),
140+
x2 = as.numeric(sim_scfeatures_result$nn_correlation)
141+
)
142+
ks_statistic_morans_I <- try_kde_test(
143+
x1 = as.numeric(real_scfeatures_result$morans_I),
144+
x2 = as.numeric(sim_scfeatures_result$morans_I)
145+
)
89146

90147
cat("Combining metric values\n")
91148
uns_metric_ids <- c(
@@ -112,4 +169,4 @@ output <- anndata::AnnData(
112169
),
113170
shape = c(0L, 0L)
114171
)
115-
output$write_h5ad(par[["output"]], compression = "gzip")
172+
output$write_h5ad(par[["output"]], compression = "gzip")

0 commit comments

Comments
 (0)