Skip to content

Commit f3923ca

Browse files
author
ktroule
committed
first commit
0 parents  commit f3923ca

File tree

5 files changed

+96
-0
lines changed

5 files changed

+96
-0
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.pyc

README.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# sctools
2+
3+
Repository with utilities that I've create for the analysis of single-cell data.

__init__.py

Whitespace-only changes.

metrics.py

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
from joblib import Parallel, delayed
2+
import multiprocessing
3+
import scanpy as sc
4+
import pandas as pd
5+
import glob
6+
import os
7+
8+
def soupy_ratio(data_dir, num_cores = -1):
9+
10+
'''
11+
Calculates the ratio of gene counts associated to empty droplets to all droplets.
12+
13+
This aims to identify soupy genes, those whose expression ratio is high in the empty droplets.
14+
A higher score (i.e close to 1), the more likely the gene is soupy.
15+
16+
Parameters:
17+
- data_dir (int): Folder containing all the libraries associated to a given experiment.
18+
- num_cores (int): Number of cores to use, default all unless specified.
19+
20+
Parameters:
21+
- a dataframe with the ratio of soupiness associated to every gene.
22+
23+
'''
24+
25+
# -- List all subfolders containing raw and filtered matrices
26+
dirs_dropl = glob.glob(os.path.join(data_dir, '*', 'raw_feature_bc_matrix'))
27+
dirs_cells = glob.glob(os.path.join(data_dir, '*', 'filtered_feature_bc_matrix'))
28+
29+
# -- Function to load adata and modify cell barcodes
30+
def load_adata(obj_dir):
31+
adata = sc.read_10x_mtx(obj_dir)
32+
adata.obs.index = os.path.basename(obj_dir.split('/')[-2]) + '_' + adata.obs.index
33+
return adata
34+
35+
36+
# -- Set number of cores for parallelization
37+
if num_cores <= 0:
38+
num_cores = multiprocessing.cpu_count()
39+
40+
# -- Read all matrices and concateante into a single object for raw and filtered
41+
if __name__ == "__main__":
42+
print('Reading raw_feature_bc_matrix objects')
43+
dropl_holder = Parallel(n_jobs = num_cores)(delayed(load_adata)(obj_dir = i)
44+
for i in dirs_dropl)
45+
46+
print('Reading filtered_feature_bc_matrix objects')
47+
cells_holder = Parallel(n_jobs = num_cores)(delayed(load_adata)(obj_dir = i)
48+
for i in dirs_cells)
49+
50+
# -- Concatenate objets
51+
adata_dropl = dropl_holder[0].concatenate(dropl_holder[1:], join = 'outer')
52+
del dropl_holder
53+
54+
adata_cells = cells_holder[0].concatenate(cells_holder[1:], join = 'outer')
55+
del cells_holder
56+
57+
# -- From droplets remove barcodes associtate to cells
58+
adata_dropl = adata_dropl[~adata_dropl.obs.index.isin(list(adata_cells.obs.index))]
59+
60+
# -- Calculate number of gene counts
61+
counts_dropl = adata_dropl.X.sum(axis = 0).A1
62+
counts_cells = adata_cells.X.sum(axis = 0).A1
63+
64+
# -- Join counts in a dataframe and discard genes whose counts is 0 in both objects
65+
soupy_ratio = pd.DataFrame({'droplets' : counts_dropl, 'cells' : counts_cells},
66+
index = adata_dropl.var.index)
67+
68+
# -- Remove genes 0 counts for droplets and cells
69+
soupy_ratio = soupy_ratio[soupy_ratio[['droplets', 'cells']].sum(axis = 1) != 0]
70+
71+
# -- Calculate ratio of soupiness
72+
soupy_ratio['soupy_ratio'] = soupy_ratio['droplets']/soupy_ratio[['droplets', 'cells']].sum(axis = 1)
73+
74+
return(soupy_ratio)

pyproject.toml

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
[project]
2+
name = "sctools"
3+
version = "0.0.1"
4+
authors = [
5+
{ name="Kevin", email="" },
6+
]
7+
description = "Utilities for single-cell analyses."
8+
readme = "README.md"
9+
requires-python = ">=3.7"
10+
classifiers = [
11+
"Programming Language :: Python :: 3",
12+
"License :: OSI Approved :: MIT License",
13+
"Operating System :: OS Independent",
14+
]
15+
16+
[project.urls]
17+
"Homepage" = ""
18+
"Bug Tracker" = "https://github.com/ktroule/sctools"

0 commit comments

Comments
 (0)