|
7 | 7 | from sklearn.manifold.t_sne import TSNE
|
8 | 8 | from .aucell import aucell
|
9 | 9 | from .genesig import Regulon
|
10 |
| -from typing import List, Mapping, Sequence, Optional |
| 10 | +from typing import List, Mapping, Union, Sequence, Optional |
11 | 11 | from operator import attrgetter
|
12 | 12 | from multiprocessing import cpu_count
|
13 | 13 | from .binarization import binarize
|
@@ -53,7 +53,7 @@ def export2loom(ex_mtx: pd.DataFrame, regulons: List[Regulon], out_fname: str,
|
53 | 53 | # Information on the SCope specific alterations: https://github.com/aertslab/SCope/wiki/Data-Format
|
54 | 54 |
|
55 | 55 | if cell_annotations is None:
|
56 |
| - cell_annotations=dict(zip(ex_matrix.index, ['-']*ex_matrix.shape[0])) |
| 56 | + cell_annotations=dict(zip(ex_mtx.index, ['-']*ex_mtx.shape[0])) |
57 | 57 |
|
58 | 58 | if(regulons[0].name.find(' ')==-1):
|
59 | 59 | print("Regulon name does not seem to be compatible with SCOPE. It should include a space to allow selection of the TF.",
|
@@ -176,6 +176,68 @@ def fetch_logo(context):
|
176 | 176 | file_attrs=general_attrs)
|
177 | 177 |
|
178 | 178 |
|
| 179 | +def add_scenic_metadata(adata: 'sc.AnnData', |
| 180 | + auc_mtx: pd.DataFrame, |
| 181 | + regulons: Union[None, Sequence[Regulon]] = None, |
| 182 | + bin_rep: bool = False, |
| 183 | + copy: bool = False) -> 'sc.AnnData': |
| 184 | + """ |
| 185 | + Add AUCell values and regulon metadata to AnnData object. |
| 186 | +
|
| 187 | + :param adata: The AnnData object. |
| 188 | + :param auc_mtx: The dataframe containing the AUCell values (#observations x #regulons). |
| 189 | + :param bin_rep: Also add binarized version of AUCell values as separate representation. This representation |
| 190 | + is stored as `adata.obsm['X_aucell_bin']`. |
| 191 | + :param copy: Return a copy instead of writing to adata. |
| 192 | + : |
| 193 | + """ |
| 194 | + # To avoid dependency with scanpy package the type hinting intentionally uses string literals. |
| 195 | + # In addition, the assert statement to assess runtime type is also commented out. |
| 196 | + #assert isinstance(adata, sc.AnnData) |
| 197 | + assert isinstance(auc_mtx, pd.DataFrame) |
| 198 | + assert len(auc_mtx) == adata.n_obs |
| 199 | + |
| 200 | + REGULON_SUFFIX_PATTERN = 'Regulon({})' |
| 201 | + |
| 202 | + result = adata.copy() if copy else adata |
| 203 | + |
| 204 | + # Add AUCell values as new representation (similar to a PCA). This facilitates the usage of |
| 205 | + # AUCell as initial dimensional reduction. |
| 206 | + result.obsm['X_aucell'] = auc_mtx.values.copy() |
| 207 | + if bin_rep: |
| 208 | + bin_mtx, _ = binarize(auc_mtx) |
| 209 | + result.obsm['X_aucell_bin'] = bin_mtx.values |
| 210 | + |
| 211 | + # Encode genes in regulons as "binary" membership matrix. |
| 212 | + if regulons is not None: |
| 213 | + genes = np.array(adata.var_names) |
| 214 | + data = np.zeros(shape=(adata.n_vars, len(regulons)), dtype=bool) |
| 215 | + for idx, regulon in enumerate(regulons): |
| 216 | + data[:, idx] = np.isin(genes, regulon.genes).astype(bool) |
| 217 | + regulon_assignment = pd.DataFrame(data=data, index=genes, |
| 218 | + columns=list(map(lambda r: REGULON_SUFFIX_PATTERN.format(r.name), regulons))) |
| 219 | + result.var = pd.merge(result.var, regulon_assignment, left_index=True, right_index=True, how='left') |
| 220 | + |
| 221 | + # Add additional meta-data/information on the regulons. |
| 222 | + def fetch_logo(context): |
| 223 | + for elem in context: |
| 224 | + if elem.endswith('.png'): |
| 225 | + return elem |
| 226 | + return "" |
| 227 | + result.uns['aucell'] = { |
| 228 | + 'regulon_names': auc_mtx.columns.map(lambda s: REGULON_SUFFIX_PATTERN.format(s)).values, |
| 229 | + 'regulon_motifs': np.array([fetch_logo(reg.context) for reg in regulons] if regulons is not None else []) |
| 230 | + } |
| 231 | + |
| 232 | + # Add the AUCell values also as annotations of observations. This way regulon activity can be |
| 233 | + # depicted on cellular scatterplots. |
| 234 | + mtx = auc_mtx.copy() |
| 235 | + mtx.columns = result.uns['aucell']['regulon_names'] |
| 236 | + result.obs = pd.merge(result.obs, mtx, left_index=True, right_index=True, how='left') |
| 237 | + |
| 238 | + return result |
| 239 | + |
| 240 | + |
179 | 241 | def export_regulons(regulons: Sequence[Regulon], fname: str) -> None:
|
180 | 242 | """
|
181 | 243 |
|
|
0 commit comments