Skip to content

Commit 659d541

Browse files
committed
New functionality: function to add SCENIC metadata to an existing scanpy AnnData object.
1 parent 293e303 commit 659d541

File tree

2 files changed

+66
-2
lines changed

2 files changed

+66
-2
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,5 @@ docs/_build/
1616
**/dask-worker-space/
1717
scripts/cli_test_data/
1818
.python-version
19+
.vscode/
20+
.mypy_cache/

src/pyscenic/export.py

+64-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from sklearn.manifold.t_sne import TSNE
88
from .aucell import aucell
99
from .genesig import Regulon
10-
from typing import List, Mapping, Sequence, Optional
10+
from typing import List, Mapping, Union, Sequence, Optional
1111
from operator import attrgetter
1212
from multiprocessing import cpu_count
1313
from .binarization import binarize
@@ -53,7 +53,7 @@ def export2loom(ex_mtx: pd.DataFrame, regulons: List[Regulon], out_fname: str,
5353
# Information on the SCope specific alterations: https://github.com/aertslab/SCope/wiki/Data-Format
5454

5555
if cell_annotations is None:
56-
cell_annotations=dict(zip(ex_matrix.index, ['-']*ex_matrix.shape[0]))
56+
cell_annotations=dict(zip(ex_mtx.index, ['-']*ex_mtx.shape[0]))
5757

5858
if(regulons[0].name.find(' ')==-1):
5959
print("Regulon name does not seem to be compatible with SCOPE. It should include a space to allow selection of the TF.",
@@ -176,6 +176,68 @@ def fetch_logo(context):
176176
file_attrs=general_attrs)
177177

178178

179+
def add_scenic_metadata(adata: 'sc.AnnData',
180+
auc_mtx: pd.DataFrame,
181+
regulons: Union[None, Sequence[Regulon]] = None,
182+
bin_rep: bool = False,
183+
copy: bool = False) -> 'sc.AnnData':
184+
"""
185+
Add AUCell values and regulon metadata to AnnData object.
186+
187+
:param adata: The AnnData object.
188+
:param auc_mtx: The dataframe containing the AUCell values (#observations x #regulons).
189+
:param bin_rep: Also add binarized version of AUCell values as separate representation. This representation
190+
is stored as `adata.obsm['X_aucell_bin']`.
191+
:param copy: Return a copy instead of writing to adata.
192+
:
193+
"""
194+
# To avoid dependency with scanpy package the type hinting intentionally uses string literals.
195+
# In addition, the assert statement to assess runtime type is also commented out.
196+
#assert isinstance(adata, sc.AnnData)
197+
assert isinstance(auc_mtx, pd.DataFrame)
198+
assert len(auc_mtx) == adata.n_obs
199+
200+
REGULON_SUFFIX_PATTERN = 'Regulon({})'
201+
202+
result = adata.copy() if copy else adata
203+
204+
# Add AUCell values as new representation (similar to a PCA). This facilitates the usage of
205+
# AUCell as initial dimensional reduction.
206+
result.obsm['X_aucell'] = auc_mtx.values.copy()
207+
if bin_rep:
208+
bin_mtx, _ = binarize(auc_mtx)
209+
result.obsm['X_aucell_bin'] = bin_mtx.values
210+
211+
# Encode genes in regulons as "binary" membership matrix.
212+
if regulons is not None:
213+
genes = np.array(adata.var_names)
214+
data = np.zeros(shape=(adata.n_vars, len(regulons)), dtype=bool)
215+
for idx, regulon in enumerate(regulons):
216+
data[:, idx] = np.isin(genes, regulon.genes).astype(bool)
217+
regulon_assignment = pd.DataFrame(data=data, index=genes,
218+
columns=list(map(lambda r: REGULON_SUFFIX_PATTERN.format(r.name), regulons)))
219+
result.var = pd.merge(result.var, regulon_assignment, left_index=True, right_index=True, how='left')
220+
221+
# Add additional meta-data/information on the regulons.
222+
def fetch_logo(context):
223+
for elem in context:
224+
if elem.endswith('.png'):
225+
return elem
226+
return ""
227+
result.uns['aucell'] = {
228+
'regulon_names': auc_mtx.columns.map(lambda s: REGULON_SUFFIX_PATTERN.format(s)).values,
229+
'regulon_motifs': np.array([fetch_logo(reg.context) for reg in regulons] if regulons is not None else [])
230+
}
231+
232+
# Add the AUCell values also as annotations of observations. This way regulon activity can be
233+
# depicted on cellular scatterplots.
234+
mtx = auc_mtx.copy()
235+
mtx.columns = result.uns['aucell']['regulon_names']
236+
result.obs = pd.merge(result.obs, mtx, left_index=True, right_index=True, how='left')
237+
238+
return result
239+
240+
179241
def export_regulons(regulons: Sequence[Regulon], fname: str) -> None:
180242
"""
181243

0 commit comments

Comments
 (0)