From 0c3f6b74b64971d18072c4486cde76496cf8dbae Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Mon, 25 Nov 2024 11:53:53 +0100 Subject: [PATCH 1/7] Populate gene symbols --- vignettes/laminr.Rmd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vignettes/laminr.Rmd b/vignettes/laminr.Rmd index b297618..cfd67ca 100644 --- a/vignettes/laminr.Rmd +++ b/vignettes/laminr.Rmd @@ -126,6 +126,8 @@ seurat <- SeuratObject::CreateSeuratObject( counts = as(Matrix::t(adata$X), "CsparseMatrix"), meta.data = adata$obs ) +# add gene metadata +seurat[["RNA"]] <- AddMetaData(GetAssay(seurat), adata$var) # Set cell identities to the provided cell type annotation SeuratObject::Idents(seurat) <- "cell_type" # Normalise the data From b66e16cebf86df51000a0a33eb076fafb4f722e6 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Mon, 25 Nov 2024 12:43:04 +0100 Subject: [PATCH 2/7] Add an exemplary query of Census --- vignettes/laminr.Rmd | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/vignettes/laminr.Rmd b/vignettes/laminr.Rmd index cfd67ca..ded5f18 100644 --- a/vignettes/laminr.Rmd +++ b/vignettes/laminr.Rmd @@ -115,42 +115,66 @@ This artifact contains an [`AnnData`](https://anndata.readthedocs.io) object. If you prefer a path to a local file or folder, call `path <- artifact$cache()`. -# Work with the data +# Work with the dataset Once you have loaded a dataset you can perform any analysis with it as you would normally. Here, marker genes are calculated for each of the provided cell type labels using [**{Seurat}**](https://satijalab.org/seurat/). ```{r create-seurat} # Create a Seurat object -seurat <- SeuratObject::CreateSeuratObject( +seurat_obj <- SeuratObject::CreateSeuratObject( counts = as(Matrix::t(adata$X), "CsparseMatrix"), meta.data = adata$obs ) # add gene metadata -seurat[["RNA"]] <- AddMetaData(GetAssay(seurat), adata$var) +seurat_obj[["RNA"]] <- AddMetaData(GetAssay(seurat_obj), adata$var) # Set cell identities to the provided cell type annotation -SeuratObject::Idents(seurat) <- "cell_type" +SeuratObject::Idents(seurat_obj) <- "cell_type" # Normalise the data -seurat <- Seurat::NormalizeData(seurat) +seurat_obj <- Seurat::NormalizeData(seurat_obj) # Test for marker genes (the output is a data.frame) markers <- Seurat::FindAllMarkers( - seurat, - features = SeuratObject::Features(seurat)[1:100] # Only test a few features for speed + seurat_obj, + features = SeuratObject::Features(seurat_obj)[1:100] # Only test a few features for speed ) # Display the marker genes knitr::kable(markers) # Plot the marker genes -Seurat::DotPlot(seurat, features = unique(markers$gene)) + +Seurat::DotPlot(seurat_obj, features = unique(markers$gene)) + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5)) ``` +# Slice the tiledbsoma array store + +Alternatively to querying individual datasets, you can also slice the `.tiledbsoma` array store, which stores Census, a concatenated version of most datasets in CELLxGENE. + +```{r create-seurat} + +library("cellxgene.census") + +census <- open_soma() + +organism <- "Homo sapiens" +gene_filter <- "feature_id %in% c('ENSG00000107317', 'ENSG00000106034')" +cell_filter <- "cell_type == 'sympathetic neuron'" +cell_columns <- c("assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease") + +seurat_obj2 <- get_seurat( + census = census, + organism = organism, + var_value_filter = gene_filter, + obs_value_filter = cell_filter, + obs_column_names = cell_columns +) +``` + # Save the results Save results as new artifacts to the default LaminDB instance. ```{r save-results, eval = submit_eval} seurat_path <- tempfile(fileext = ".rds") -saveRDS(seurat, seurat_path) +saveRDS(seurat_obj, seurat_path) db$Artifact$from_df( markers, From 6e25d8e996a585ce27142332db3c72ea5a72af55 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Tue, 26 Nov 2024 07:59:56 +0100 Subject: [PATCH 3/7] Fix build Co-authored-by: Luke Zappia --- vignettes/laminr.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/laminr.Rmd b/vignettes/laminr.Rmd index ded5f18..1f72260 100644 --- a/vignettes/laminr.Rmd +++ b/vignettes/laminr.Rmd @@ -148,7 +148,7 @@ Seurat::DotPlot(seurat_obj, features = unique(markers$gene)) + Alternatively to querying individual datasets, you can also slice the `.tiledbsoma` array store, which stores Census, a concatenated version of most datasets in CELLxGENE. -```{r create-seurat} +```{r slice-tiledb} library("cellxgene.census") From ebc195914f7ab4f58af5d60830e824f05dc1e21b Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Tue, 26 Nov 2024 08:12:46 +0100 Subject: [PATCH 4/7] Adjust CELLxGENE Census chunk in Get Started --- vignettes/laminr.Rmd | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/vignettes/laminr.Rmd b/vignettes/laminr.Rmd index 1f72260..66701ba 100644 --- a/vignettes/laminr.Rmd +++ b/vignettes/laminr.Rmd @@ -16,6 +16,7 @@ knitr::opts_chunk$set( # actually upload results to the LaminDB instance # -> testuser1 is a test account that cannot upload results submit_eval <- laminr:::.get_user_settings()$handle != "testuser1" +submit_eval <- FALSE ``` This vignette introduces the basic **{laminr}** workflow. @@ -126,8 +127,10 @@ seurat_obj <- SeuratObject::CreateSeuratObject( counts = as(Matrix::t(adata$X), "CsparseMatrix"), meta.data = adata$obs ) -# add gene metadata -seurat_obj[["RNA"]] <- AddMetaData(GetAssay(seurat_obj), adata$var) +# Add gene metadata +seurat_obj[["RNA"]] <- SeuratObject::AddMetaData( + Seurat::GetAssay(seurat_obj), adata$var +) # Set cell identities to the provided cell type annotation SeuratObject::Idents(seurat_obj) <- "cell_type" # Normalise the data @@ -146,10 +149,9 @@ Seurat::DotPlot(seurat_obj, features = unique(markers$gene)) + # Slice the tiledbsoma array store -Alternatively to querying individual datasets, you can also slice the `.tiledbsoma` array store, which stores Census, a concatenated version of most datasets in CELLxGENE. - -```{r slice-tiledb} +Alternatively to accessing individual CELLxGENE datasets from LaminDB, the **{cellxgene.census}** package can be used to slice the TileDB-SOMA array store for CELLxGENE Census, a concatenated version of most datasets in CELLxGENE. +```{r cellxgene-census} library("cellxgene.census") census <- open_soma() @@ -157,7 +159,9 @@ census <- open_soma() organism <- "Homo sapiens" gene_filter <- "feature_id %in% c('ENSG00000107317', 'ENSG00000106034')" cell_filter <- "cell_type == 'sympathetic neuron'" -cell_columns <- c("assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease") +cell_columns <- c( + "assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease" +) seurat_obj2 <- get_seurat( census = census, From 68a1278edc4b22dcb1b1540af21f9530186433b5 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 26 Nov 2024 08:49:45 +0100 Subject: [PATCH 5/7] minor changes --- vignettes/laminr.Rmd | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/vignettes/laminr.Rmd b/vignettes/laminr.Rmd index 66701ba..5c44c1c 100644 --- a/vignettes/laminr.Rmd +++ b/vignettes/laminr.Rmd @@ -122,28 +122,30 @@ Once you have loaded a dataset you can perform any analysis with it as you would Here, marker genes are calculated for each of the provided cell type labels using [**{Seurat}**](https://satijalab.org/seurat/). ```{r create-seurat} +library(Seurat) + # Create a Seurat object -seurat_obj <- SeuratObject::CreateSeuratObject( +seurat_obj <- CreateSeuratObject( counts = as(Matrix::t(adata$X), "CsparseMatrix"), meta.data = adata$obs ) # Add gene metadata -seurat_obj[["RNA"]] <- SeuratObject::AddMetaData( - Seurat::GetAssay(seurat_obj), adata$var +seurat_obj[["RNA"]] <- AddMetaData( + GetAssay(seurat_obj), adata$var ) # Set cell identities to the provided cell type annotation -SeuratObject::Idents(seurat_obj) <- "cell_type" +Idents(seurat_obj) <- "cell_type" # Normalise the data -seurat_obj <- Seurat::NormalizeData(seurat_obj) +seurat_obj <- NormalizeData(seurat_obj) # Test for marker genes (the output is a data.frame) -markers <- Seurat::FindAllMarkers( +markers <- FindAllMarkers( seurat_obj, - features = SeuratObject::Features(seurat_obj)[1:100] # Only test a few features for speed + features = Features(seurat_obj)[1:100] # Only test a few features for speed ) # Display the marker genes knitr::kable(markers) # Plot the marker genes -Seurat::DotPlot(seurat_obj, features = unique(markers$gene)) + +DotPlot(seurat_obj, features = unique(markers$gene)) + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5)) ``` From c22b4a80196f6a37cc8a07351f45e70f3bc44665 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 26 Nov 2024 11:05:08 +0100 Subject: [PATCH 6/7] don't evaluate cellxgene.census block (otherwise would need to be a dependency but is not on cran yet) --- vignettes/laminr.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/laminr.Rmd b/vignettes/laminr.Rmd index 5c44c1c..85b36d1 100644 --- a/vignettes/laminr.Rmd +++ b/vignettes/laminr.Rmd @@ -153,7 +153,7 @@ DotPlot(seurat_obj, features = unique(markers$gene)) + Alternatively to accessing individual CELLxGENE datasets from LaminDB, the **{cellxgene.census}** package can be used to slice the TileDB-SOMA array store for CELLxGENE Census, a concatenated version of most datasets in CELLxGENE. -```{r cellxgene-census} +```{r slice-tiledbsoma, eval=FALSE} library("cellxgene.census") census <- open_soma() From 6df6a70a1ba394458e893e0a0d5e31dd31fda64a Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 26 Nov 2024 11:05:17 +0100 Subject: [PATCH 7/7] style code --- vignettes/laminr.Rmd | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/vignettes/laminr.Rmd b/vignettes/laminr.Rmd index 85b36d1..151ae0d 100644 --- a/vignettes/laminr.Rmd +++ b/vignettes/laminr.Rmd @@ -154,23 +154,23 @@ DotPlot(seurat_obj, features = unique(markers$gene)) + Alternatively to accessing individual CELLxGENE datasets from LaminDB, the **{cellxgene.census}** package can be used to slice the TileDB-SOMA array store for CELLxGENE Census, a concatenated version of most datasets in CELLxGENE. ```{r slice-tiledbsoma, eval=FALSE} -library("cellxgene.census") +library(cellxgene.census) census <- open_soma() organism <- "Homo sapiens" gene_filter <- "feature_id %in% c('ENSG00000107317', 'ENSG00000106034')" -cell_filter <- "cell_type == 'sympathetic neuron'" +cell_filter <- "cell_type == 'sympathetic neuron'" cell_columns <- c( "assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease" ) seurat_obj2 <- get_seurat( - census = census, - organism = organism, - var_value_filter = gene_filter, - obs_value_filter = cell_filter, - obs_column_names = cell_columns + census = census, + organism = organism, + var_value_filter = gene_filter, + obs_value_filter = cell_filter, + obs_column_names = cell_columns ) ```