From 29176c912cc83b652a5f4ca06bf6c5e06c817a02 Mon Sep 17 00:00:00 2001 From: JGarnica22 Date: Tue, 15 Apr 2025 14:30:34 +0200 Subject: [PATCH 1/6] add metric: CiLISI --- src/metrics/cilisi/config.vsh.yaml | 105 +++++++++++++++++++++++++++++ src/metrics/cilisi/script.R | 48 +++++++++++++ 2 files changed, 153 insertions(+) create mode 100644 src/metrics/cilisi/config.vsh.yaml create mode 100644 src/metrics/cilisi/script.R diff --git a/src/metrics/cilisi/config.vsh.yaml b/src/metrics/cilisi/config.vsh.yaml new file mode 100644 index 00000000..ff88532e --- /dev/null +++ b/src/metrics/cilisi/config.vsh.yaml @@ -0,0 +1,105 @@ +# The API specifies which type of component this is. +# It contains specifications for: +# - The input/output files +# - Common parameters +# - A unit test +__merge__: ../../api/comp_metric.yaml + +# A unique identifier for your component (required). +# Can contain only lowercase letters or underscores. +name: cilisi + + + +# Metadata for your component +info: + metrics: + # A unique identifier for your metric (required). + # Can contain only lowercase letters or underscores. + - name: cilisi + # A relatively short label, used when rendering visualisations (required) + label: CiLISI + # A one sentence summary of how this metric works (required). Used when + # rendering summary tables. + summary: Cell-type aware version of iLISI (Local inverse Simpson's Index). + iLISI is computed separately for each cell type or cluster, normalized between 0 and 1, and averaged across all cells (global mean). + By default, CiLISI is calculated only for groups with at least 10 cells and 2 distinct batch labels (configurable). + # A multi-line description of how this component works (required). Used + # when rendering reference documentation. + description: | + ciLISI measures batch mixing in a cell type-aware manner by computing iLISI within each cell type and normalizing + the scores between 0 and 1. Unlike iLISI, ciLISI preserves sensitivity to biological variance and avoids favoring + overcorrected datasets with removed cell type signals. + references: + doi: 10.1038/s41467-024-45240-z + links: + # URL to the documentation for this metric (required). + documentation: https://github.com/carmonalab/scIntegrationMetrics + # URL to the code repository for this metric (required). + repository: https://github.com/carmonalab/scIntegrationMetrics + # The minimum possible value for this metric (required) + min: 0 + # The maximum possible value for this metric (required) + max: 1 + # Whether a higher value represents a 'better' solution (required) + maximize: true + + - name: cilisi_means + # A relatively short label, used when rendering visualisations (required) + label: CiLISI_means + # A one sentence summary of how this metric works (required). Used when + # rendering summary tables. + summary: As CiLISI, but returns mean of per-group CiLISI values (i.e., average of the means per group). instead of a global average. + # A multi-line description of how this component works (required). Used + # when rendering reference documentation. + description: | + ciLISI measures batch mixing in a cell type-aware manner by computing iLISI within each cell type and normalizing + the scores between 0 and 1. Unlike iLISI, ciLISI preserves sensitivity to biological variance and avoids favoring + overcorrected datasets with removed cell type signals. + references: + doi: 10.1038/s41467-024-45240-z + links: + # URL to the documentation for this metric (required). + documentation: https://github.com/carmonalab/scIntegrationMetrics + # URL to the code repository for this metric (required). + repository: https://github.com/carmonalab/scIntegrationMetrics + # The minimum possible value for this metric (required) + min: 0 + # The maximum possible value for this metric (required) + max: 1 + # Whether a higher value represents a 'better' solution (required) + maximize: true + +# Component-specific parameters (optional) +# arguments: +# - name: "--n_neighbors" +# type: "integer" +# default: 5 +# description: Number of neighbors to use. + +# Resources required to run the component +resources: + # The script of your component (required) + - type: r_script + path: script.R + # Additional resources your script needs (optional) + # - type: file + # path: weights.pt + +engines: + # Specifications for the Docker image for this component. + - type: docker + image: openproblems/base_r:1.0.0 + # Add custom dependencies here (optional). For more information, see + # https://viash.io/reference/config/engines/docker/#setup . + setup: + - type: r + github: https://github.com/carmonalab/scIntegrationMetrics.git@1.2.0 + +runners: + # This platform allows running the component natively + - type: executable + # Allows turning the component into a Nextflow module / pipeline. + - type: nextflow + directives: + label: [midtime,midmem,midcpu] diff --git a/src/metrics/cilisi/script.R b/src/metrics/cilisi/script.R new file mode 100644 index 00000000..a63fdab1 --- /dev/null +++ b/src/metrics/cilisi/script.R @@ -0,0 +1,48 @@ +library(anndata) +library(scIntegrationMetrics) + +## VIASH START +par <- list( + input_integrated = "resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated_processed.h5ad", + input_solution = "resources_test/task_batch_integration/cxg_immune_cell_atlas/solution.h5ad", + output = "output.h5ad" +) +meta <- list( + name = "cilisi" +) +## VIASH END + +cat("Reading input files\n") +adata <- anndata::read_h5ad(par[["input_integrated"]]) +solution <- anndata::read_h5ad(par[["input_solution"]]) +embeddings <- adata$obsm[["X_emb"]] +metadata <- solution$obs + +cat("Compute CiLISI metrics...\n") +lisisplit <- + scIntegrationMetrics::compute_lisi_splitBy( + X = embeddings, + meta_data = metadata, + label_colnames = "batch", + perplexity = 30, + split_by_colname = "cell_type", + normalize = TRUE, + min.cells.split = 10, + min.vars.label = 2) +# average CiLISI +cilisi <- mean(unlist(lisisplit)) +# Mean per cell type +cilisi_means <- mean(sapply(lisisplit, function(x) mean(x[, 1]))) + +cat("Write output AnnData to file\n") +output <- anndata::AnnData( + shape = c(1,2), + uns = list( + dataset_id = adata$uns[["dataset_id"]], + normalization_id = adata$uns[["normalization_id"]], + method_id = adata$uns[["method_id"]], + metric_ids = c("cilisi", "cilisi_means"), + metric_values = list(cilisi, cilisi_means) + ) +) +output$write_h5ad(par[["output"]], compression = "gzip") From 36b1a74e6c10615b16c882ab81ef918a6fe69a6f Mon Sep 17 00:00:00 2001 From: JGarnica22 Date: Tue, 15 Apr 2025 14:49:39 +0200 Subject: [PATCH 2/6] udpate change log --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 09d672d0..75ae5038 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # task_batch_integration devel +## New functionality +* Add `metrics/cilisi` new metric component. + - ciLISI measures batch mixing in a cell type-aware manner by computing iLISI within each cell type and normalizing + the scores between 0 and 1. Unlike iLISI, ciLISI preserves sensitivity to biological variance and avoids favoring + overcorrected datasets with removed cell type signals. + We propose adding this metric to substitute iLISI. + ## New functionality * Added `metrics/kbet_pg` and `metrics/kbet_pg_label` components (PR #52). From 5e86f8d37f89c255a34eddf7dbf7734c57b84808 Mon Sep 17 00:00:00 2001 From: JGarnica22 Date: Wed, 23 Jul 2025 08:58:11 +0200 Subject: [PATCH 3/6] fix: move clisi comment below the kBET note to avoid having 2 new functionality sections --- CHANGELOG.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 75ae5038..c1e9fb86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,16 +1,15 @@ # task_batch_integration devel ## New functionality -* Add `metrics/cilisi` new metric component. + +* Added `metrics/kbet_pg` and `metrics/kbet_pg_label` components (PR #52). + +* Added `metrics/cilisi` new metric component (PR #57). - ciLISI measures batch mixing in a cell type-aware manner by computing iLISI within each cell type and normalizing the scores between 0 and 1. Unlike iLISI, ciLISI preserves sensitivity to biological variance and avoids favoring overcorrected datasets with removed cell type signals. We propose adding this metric to substitute iLISI. -## New functionality - -* Added `metrics/kbet_pg` and `metrics/kbet_pg_label` components (PR #52). - ## Minor changes * Un-pin the scPRINT version and update parameters (PR #51) From f3fa1dbda4fa0cea629a61e4e1f2f81c419d1d82 Mon Sep 17 00:00:00 2001 From: JGarnica22 Date: Wed, 23 Jul 2025 09:02:26 +0200 Subject: [PATCH 4/6] fix: remove boilerplate comments for better readability --- src/metrics/cilisi/config.vsh.yaml | 54 ------------------------------ 1 file changed, 54 deletions(-) diff --git a/src/metrics/cilisi/config.vsh.yaml b/src/metrics/cilisi/config.vsh.yaml index ff88532e..c762fd7a 100644 --- a/src/metrics/cilisi/config.vsh.yaml +++ b/src/metrics/cilisi/config.vsh.yaml @@ -1,31 +1,12 @@ -# The API specifies which type of component this is. -# It contains specifications for: -# - The input/output files -# - Common parameters -# - A unit test __merge__: ../../api/comp_metric.yaml - -# A unique identifier for your component (required). -# Can contain only lowercase letters or underscores. name: cilisi - - - -# Metadata for your component info: metrics: - # A unique identifier for your metric (required). - # Can contain only lowercase letters or underscores. - name: cilisi - # A relatively short label, used when rendering visualisations (required) label: CiLISI - # A one sentence summary of how this metric works (required). Used when - # rendering summary tables. summary: Cell-type aware version of iLISI (Local inverse Simpson's Index). iLISI is computed separately for each cell type or cluster, normalized between 0 and 1, and averaged across all cells (global mean). By default, CiLISI is calculated only for groups with at least 10 cells and 2 distinct batch labels (configurable). - # A multi-line description of how this component works (required). Used - # when rendering reference documentation. description: | ciLISI measures batch mixing in a cell type-aware manner by computing iLISI within each cell type and normalizing the scores between 0 and 1. Unlike iLISI, ciLISI preserves sensitivity to biological variance and avoids favoring @@ -33,25 +14,15 @@ info: references: doi: 10.1038/s41467-024-45240-z links: - # URL to the documentation for this metric (required). documentation: https://github.com/carmonalab/scIntegrationMetrics - # URL to the code repository for this metric (required). repository: https://github.com/carmonalab/scIntegrationMetrics - # The minimum possible value for this metric (required) min: 0 - # The maximum possible value for this metric (required) max: 1 - # Whether a higher value represents a 'better' solution (required) maximize: true - name: cilisi_means - # A relatively short label, used when rendering visualisations (required) label: CiLISI_means - # A one sentence summary of how this metric works (required). Used when - # rendering summary tables. summary: As CiLISI, but returns mean of per-group CiLISI values (i.e., average of the means per group). instead of a global average. - # A multi-line description of how this component works (required). Used - # when rendering reference documentation. description: | ciLISI measures batch mixing in a cell type-aware manner by computing iLISI within each cell type and normalizing the scores between 0 and 1. Unlike iLISI, ciLISI preserves sensitivity to biological variance and avoids favoring @@ -59,47 +30,22 @@ info: references: doi: 10.1038/s41467-024-45240-z links: - # URL to the documentation for this metric (required). documentation: https://github.com/carmonalab/scIntegrationMetrics - # URL to the code repository for this metric (required). repository: https://github.com/carmonalab/scIntegrationMetrics - # The minimum possible value for this metric (required) min: 0 - # The maximum possible value for this metric (required) max: 1 - # Whether a higher value represents a 'better' solution (required) maximize: true - -# Component-specific parameters (optional) -# arguments: -# - name: "--n_neighbors" -# type: "integer" -# default: 5 -# description: Number of neighbors to use. - -# Resources required to run the component resources: - # The script of your component (required) - type: r_script path: script.R - # Additional resources your script needs (optional) - # - type: file - # path: weights.pt - engines: - # Specifications for the Docker image for this component. - type: docker image: openproblems/base_r:1.0.0 - # Add custom dependencies here (optional). For more information, see - # https://viash.io/reference/config/engines/docker/#setup . setup: - type: r github: https://github.com/carmonalab/scIntegrationMetrics.git@1.2.0 - runners: - # This platform allows running the component natively - type: executable - # Allows turning the component into a Nextflow module / pipeline. - type: nextflow directives: label: [midtime,midmem,midcpu] From c451f46f6fe638ce5a4ca1d60e1513cc61790170 Mon Sep 17 00:00:00 2001 From: Josep Garnica <61703467+JGarnica22@users.noreply.github.com> Date: Wed, 17 Sep 2025 16:00:00 +0200 Subject: [PATCH 5/6] Update base_r container MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michaela Müller <51025211+mumichae@users.noreply.github.com> --- src/metrics/cilisi/config.vsh.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/metrics/cilisi/config.vsh.yaml b/src/metrics/cilisi/config.vsh.yaml index c762fd7a..b82656eb 100644 --- a/src/metrics/cilisi/config.vsh.yaml +++ b/src/metrics/cilisi/config.vsh.yaml @@ -40,7 +40,7 @@ resources: path: script.R engines: - type: docker - image: openproblems/base_r:1.0.0 + image: openproblems/base_r:1 setup: - type: r github: https://github.com/carmonalab/scIntegrationMetrics.git@1.2.0 From 135949822639c059a5476405525ffeb49d23299d Mon Sep 17 00:00:00 2001 From: Josep Garnica <61703467+JGarnica22@users.noreply.github.com> Date: Wed, 17 Sep 2025 16:02:11 +0200 Subject: [PATCH 6/6] fix: standardize R identing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michaela Müller <51025211+mumichae@users.noreply.github.com> --- src/metrics/cilisi/script.R | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/metrics/cilisi/script.R b/src/metrics/cilisi/script.R index a63fdab1..4b7cba27 100644 --- a/src/metrics/cilisi/script.R +++ b/src/metrics/cilisi/script.R @@ -21,14 +21,15 @@ metadata <- solution$obs cat("Compute CiLISI metrics...\n") lisisplit <- scIntegrationMetrics::compute_lisi_splitBy( - X = embeddings, - meta_data = metadata, - label_colnames = "batch", - perplexity = 30, - split_by_colname = "cell_type", - normalize = TRUE, - min.cells.split = 10, - min.vars.label = 2) + X = embeddings, + meta_data = metadata, + label_colnames = "batch", + perplexity = 30, + split_by_colname = "cell_type", + normalize = TRUE, + min.cells.split = 10, + min.vars.label = 2 +) # average CiLISI cilisi <- mean(unlist(lisisplit)) # Mean per cell type