diff --git a/CHANGELOG.md b/CHANGELOG.md index 859869e4..2a051099 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ * Added `ARI_batch` and `NMI_batch` to `metrics/clustering_overlap` (PR #68). +* Added `metrics/cilisi` new metric component (PR #57). + - ciLISI measures batch mixing in a cell type-aware manner by computing iLISI within each cell type and normalizing + the scores between 0 and 1. Unlike iLISI, ciLISI preserves sensitivity to biological variance and avoids favoring + overcorrected datasets with removed cell type signals. + We propose adding this metric to substitute iLISI. + ## Minor changes * Un-pin the scPRINT version and update parameters (PR #51) diff --git a/src/metrics/cilisi/config.vsh.yaml b/src/metrics/cilisi/config.vsh.yaml new file mode 100644 index 00000000..b82656eb --- /dev/null +++ b/src/metrics/cilisi/config.vsh.yaml @@ -0,0 +1,51 @@ +__merge__: ../../api/comp_metric.yaml +name: cilisi +info: + metrics: + - name: cilisi + label: CiLISI + summary: Cell-type aware version of iLISI (Local inverse Simpson's Index). + iLISI is computed separately for each cell type or cluster, normalized between 0 and 1, and averaged across all cells (global mean). + By default, CiLISI is calculated only for groups with at least 10 cells and 2 distinct batch labels (configurable). + description: | + ciLISI measures batch mixing in a cell type-aware manner by computing iLISI within each cell type and normalizing + the scores between 0 and 1. Unlike iLISI, ciLISI preserves sensitivity to biological variance and avoids favoring + overcorrected datasets with removed cell type signals. + references: + doi: 10.1038/s41467-024-45240-z + links: + documentation: https://github.com/carmonalab/scIntegrationMetrics + repository: https://github.com/carmonalab/scIntegrationMetrics + min: 0 + max: 1 + maximize: true + + - name: cilisi_means + label: CiLISI_means + summary: As CiLISI, but returns mean of per-group CiLISI values (i.e., average of the means per group). instead of a global average. + description: | + ciLISI measures batch mixing in a cell type-aware manner by computing iLISI within each cell type and normalizing + the scores between 0 and 1. Unlike iLISI, ciLISI preserves sensitivity to biological variance and avoids favoring + overcorrected datasets with removed cell type signals. + references: + doi: 10.1038/s41467-024-45240-z + links: + documentation: https://github.com/carmonalab/scIntegrationMetrics + repository: https://github.com/carmonalab/scIntegrationMetrics + min: 0 + max: 1 + maximize: true +resources: + - type: r_script + path: script.R +engines: + - type: docker + image: openproblems/base_r:1 + setup: + - type: r + github: https://github.com/carmonalab/scIntegrationMetrics.git@1.2.0 +runners: + - type: executable + - type: nextflow + directives: + label: [midtime,midmem,midcpu] diff --git a/src/metrics/cilisi/script.R b/src/metrics/cilisi/script.R new file mode 100644 index 00000000..4b7cba27 --- /dev/null +++ b/src/metrics/cilisi/script.R @@ -0,0 +1,49 @@ +library(anndata) +library(scIntegrationMetrics) + +## VIASH START +par <- list( + input_integrated = "resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated_processed.h5ad", + input_solution = "resources_test/task_batch_integration/cxg_immune_cell_atlas/solution.h5ad", + output = "output.h5ad" +) +meta <- list( + name = "cilisi" +) +## VIASH END + +cat("Reading input files\n") +adata <- anndata::read_h5ad(par[["input_integrated"]]) +solution <- anndata::read_h5ad(par[["input_solution"]]) +embeddings <- adata$obsm[["X_emb"]] +metadata <- solution$obs + +cat("Compute CiLISI metrics...\n") +lisisplit <- + scIntegrationMetrics::compute_lisi_splitBy( + X = embeddings, + meta_data = metadata, + label_colnames = "batch", + perplexity = 30, + split_by_colname = "cell_type", + normalize = TRUE, + min.cells.split = 10, + min.vars.label = 2 +) +# average CiLISI +cilisi <- mean(unlist(lisisplit)) +# Mean per cell type +cilisi_means <- mean(sapply(lisisplit, function(x) mean(x[, 1]))) + +cat("Write output AnnData to file\n") +output <- anndata::AnnData( + shape = c(1,2), + uns = list( + dataset_id = adata$uns[["dataset_id"]], + normalization_id = adata$uns[["normalization_id"]], + method_id = adata$uns[["method_id"]], + metric_ids = c("cilisi", "cilisi_means"), + metric_values = list(cilisi, cilisi_means) + ) +) +output$write_h5ad(par[["output"]], compression = "gzip")