Skip to content

Commit 15fa63a

Browse files
committed
Created using Colab
1 parent 8d74aea commit 15fa63a

File tree

1 file changed

+117
-107
lines changed

1 file changed

+117
-107
lines changed

docs/UNIFORM.ipynb

+117-107
Original file line numberDiff line numberDiff line change
@@ -99,16 +99,20 @@
9999
"\n",
100100
"! curl -S --proto '=https' --tlsv1.2 -LsSf https://github.com/COMBINE-lab/simpleaf/releases/download/v0.17.2/simpleaf-installer.sh | sh > /dev/null 2>&1\n",
101101
"%env ALEVIN_FRY_HOME=\"$HOME/.cargo/bin/alevin-fry\"\n",
102-
"! $HOME/.cargo/bin/simpleaf --version"
102+
"! $HOME/.cargo/bin/simpleaf --version\n",
103+
"\n",
104+
"!wget --quiet --show-progress https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64\n",
105+
"!chmod +x jq-linux-amd64\n",
106+
"!mv jq-linux-amd64 /usr/bin/jq"
103107
],
104108
"metadata": {
105109
"id": "4EWVWe1hgpAG",
106-
"outputId": "f58e5593-e264-421a-eb6a-0482b38714e1",
110+
"outputId": "b8a9391b-71ec-47c3-b141-0e94e915d651",
107111
"colab": {
108112
"base_uri": "https://localhost:8080/"
109113
}
110114
},
111-
"execution_count": 1,
115+
"execution_count": 2,
112116
"outputs": [
113117
{
114118
"output_type": "stream",
@@ -130,11 +134,12 @@
130134
" -h, --help Show this help message and exit\n",
131135
" --list Display list of supported single-cell technologies\n",
132136
"gget version: 0.28.6\n",
133-
"2.7.11b.tar.gz [ <=> ] 11.89M 7.43MB/s in 1.6s \n",
137+
"2.7.11b.tar.gz [ <=> ] 11.89M 7.50MB/s in 1.6s \n",
134138
"2.7.11b\n",
135139
"alevin-fry 0.10.0\n",
136140
"env: ALEVIN_FRY_HOME=\"$HOME/.cargo/bin/alevin-fry\"\n",
137-
"simpleaf 0.17.2\n"
141+
"simpleaf 0.17.2\n",
142+
"jq-linux-amd64 100%[===================>] 2.21M --.-KB/s in 0.02s \n"
138143
]
139144
}
140145
]
@@ -155,35 +160,35 @@
155160
],
156161
"metadata": {
157162
"id": "hoDCIDwhkEKU",
158-
"outputId": "0d293a8d-ee0d-4055-e80b-fc5041f87e34",
163+
"outputId": "b167779c-568a-4682-f537-4fbe2b3f939b",
159164
"colab": {
160165
"base_uri": "https://localhost:8080/"
161166
}
162167
},
163-
"execution_count": 2,
168+
"execution_count": 3,
164169
"outputs": [
165170
{
166171
"output_type": "stream",
167172
"name": "stdout",
168173
"text": [
169-
"\rspec.yaml 0%[ ] 0 --.-KB/s \rspec.yaml 100%[===================>] 14.17K --.-KB/s in 0.001s \n"
174+
"\rspec.yaml 0%[ ] 0 --.-KB/s \rspec.yaml 100%[===================>] 14.09K --.-KB/s in 0s \n"
170175
]
171176
}
172177
]
173178
},
174179
{
175180
"cell_type": "code",
176181
"source": [
177-
"!seqspec print spec.yaml"
182+
"! seqspec print spec.yaml"
178183
],
179184
"metadata": {
180185
"id": "RIEiCVzNom8_",
181-
"outputId": "14d9e06a-c21f-4ada-8c43-acb82da096ae",
186+
"outputId": "fe89372f-9236-4d64-88ef-c5960c407993",
182187
"colab": {
183188
"base_uri": "https://localhost:8080/"
184189
}
185190
},
186-
"execution_count": 3,
191+
"execution_count": 4,
187192
"outputs": [
188193
{
189194
"output_type": "stream",
@@ -215,146 +220,150 @@
215220
}
216221
]
217222
},
218-
{
219-
"cell_type": "markdown",
220-
"source": [
221-
"## Single-cell/nuclei RNAseq quantification"
222-
],
223-
"metadata": {
224-
"id": "zhQsQD42giYi"
225-
}
226-
},
227-
{
228-
"cell_type": "markdown",
229-
"source": [
230-
"### `kb-python (kallisto bustools)`"
231-
],
232-
"metadata": {
233-
"id": "joQ-Vzgagi_9"
234-
}
235-
},
236223
{
237224
"cell_type": "code",
238225
"source": [
239-
"!seqspec file"
226+
"! seqspec file -m rna -f json -s file -k all spec.yaml"
240227
],
241228
"metadata": {
242-
"id": "E86RJ-9Eq0gE",
243-
"outputId": "bbb791f9-9301-4415-a653-3016212d43c2",
229+
"id": "8AUi7mH31BiL",
230+
"outputId": "010c4942-27e2-4623-b7c1-86504e2ef4df",
244231
"colab": {
245232
"base_uri": "https://localhost:8080/"
246233
}
247234
},
248-
"execution_count": 13,
235+
"execution_count": 12,
249236
"outputs": [
250237
{
251238
"output_type": "stream",
252239
"name": "stdout",
253240
"text": [
254-
"usage: seqspec file [-h] [-o OUT] [-i IDs] -m MODALITY [-s SELECTOR] [-f FORMAT] [-k KEY] yaml\n",
255-
"\n",
256-
"List files present in seqspec file.\n",
257-
"\n",
258-
"Examples:\n",
259-
"seqspec file -m rna spec.yaml # List paired read files\n",
260-
"seqspec file -m rna -f interleaved spec.yaml # List interleaved read files\n",
261-
"seqspec file -m rna -f list -k url spec.yaml # List urls of all read files\n",
262-
"seqspec file -m rna -f list -s onlist -k all spec.yaml # List onlist files\n",
263-
"---\n",
264-
"\n",
265-
"positional arguments:\n",
266-
" yaml Sequencing specification yaml file\n",
267-
"\n",
268-
"options:\n",
269-
" -h, --help show this help message and exit\n",
270-
" -o OUT Path to output file\n",
271-
" -i IDs Ids to list\n",
272-
" -s SELECTOR Selector for ID, [read, region, file, onlist] (default: read)\n",
273-
" -f FORMAT Format, [paired, interleaved, index, list], default: paired\n",
274-
" -k KEY Key, [file_id, filename, filetype, filesize, url, urltype, md5, all], default: file_id\n",
275-
"\n",
276-
"required arguments:\n",
277-
" -m MODALITY Modality\n"
241+
"[\n",
242+
" {\n",
243+
" \"file_id\": \"rna_R1_SRR18677638.fastq.gz\",\n",
244+
" \"filename\": \"rna_R1_SRR18677638.fastq.gz\",\n",
245+
" \"filetype\": \"fastq\",\n",
246+
" \"filesize\": 18499436,\n",
247+
" \"url\": \"https://github.com/pachterlab/seqspec/raw/devel/examples/specs/dogmaseq-dig/fastqs/rna_R1_SRR18677638.fastq.gz\",\n",
248+
" \"urltype\": \"https\",\n",
249+
" \"md5\": \"7eb15a70da9b729b5a87e30b6596b641\"\n",
250+
" },\n",
251+
" {\n",
252+
" \"file_id\": \"rna_R2_SRR18677638.fastq.gz\",\n",
253+
" \"filename\": \"rna_R2_SRR18677638.fastq.gz\",\n",
254+
" \"filetype\": \"fastq\",\n",
255+
" \"filesize\": 45812569,\n",
256+
" \"url\": \"https://github.com/pachterlab/seqspec/raw/devel/examples/specs/dogmaseq-dig/fastqs/rna_R2_SRR18677638.fastq.gz\",\n",
257+
" \"urltype\": \"https\",\n",
258+
" \"md5\": \"5e6915770e50f72e462e5b2575089c66\"\n",
259+
" },\n",
260+
" {\n",
261+
" \"file_id\": \"RNA-737K-arc-v1.txt\",\n",
262+
" \"filename\": \"RNA-737K-arc-v1.txt\",\n",
263+
" \"filetype\": \"txt\",\n",
264+
" \"filesize\": 2142553,\n",
265+
" \"url\": \"https://github.com/pachterlab/qcbc/raw/main/tests/10xMOME/RNA-737K-arc-v1.txt.gz\",\n",
266+
" \"urltype\": \"https\",\n",
267+
" \"md5\": \"a88cd21e801ae6f9a7d9a48b67ccf693\"\n",
268+
" }\n",
269+
"]\n"
278270
]
279271
}
280272
]
281273
},
282274
{
283275
"cell_type": "code",
284276
"source": [
285-
"!seqspec file -m tag -f list -s onlist -k all spec.yaml"
277+
"! seqspec file -m rna -f json -s file -k all spec.yaml | jq '.[].url' | xargs wget --continue --quiet --show-progress\n",
278+
"! seqspec file -m atac -f json -s file -k all spec.yaml | jq '.[].url' | xargs wget --continue --quiet --show-progress\n",
279+
"! seqspec file -m tag -f json -s file -k all spec.yaml | jq '.[].url' | xargs wget --continue --quiet --show-progress\n",
280+
"! seqspec file -m protein -f json -s file -k all spec.yaml | jq '.[].url' | xargs wget --continue --quiet --show-progress"
286281
],
287282
"metadata": {
288-
"id": "YMlhrWCjrLir",
289-
"outputId": "93811de4-5995-4105-94ec-b5da54435b35",
283+
"id": "c1ZjLfnb1EL2",
284+
"outputId": "d87fb519-e67d-448e-df2b-2ca5b74e7452",
290285
"colab": {
291286
"base_uri": "https://localhost:8080/"
292287
}
293288
},
294-
"execution_count": 18,
289+
"execution_count": 13,
295290
"outputs": [
296291
{
297292
"output_type": "stream",
298293
"name": "stdout",
299294
"text": [
300-
"tag_cell_bc\tRNA-737K-arc-v1.txt\tRNA-737K-arc-v1.txt\ttxt\t2142553\thttps://github.com/pachterlab/qcbc/raw/main/tests/10xMOME/RNA-737K-arc-v1.txt.gz\thttps\ta88cd21e801ae6f9a7d9a48b67ccf693\n",
301-
"tag_seq\ttag_0419_feature_barcodes.txt\ttag_0419_feature_barcodes.txt\ttxt\t0\thttps://raw.githubusercontent.com/pachterlab/seqspec/devel/examples/specs/dogmaseq-dig/tag_0419_feature_barcodes.txt\thttps\tde44ad6d5c4b9f381a352283a6831112\n"
295+
"rna_R1_SRR18677638. 100%[===================>] 17.64M --.-KB/s in 0.07s \n",
296+
"rna_R2_SRR18677638. 100%[===================>] 43.69M 270MB/s in 0.2s \n",
297+
"RNA-737K-arc-v1.txt 100%[===================>] 2.04M --.-KB/s in 0.03s \n",
298+
"atac_R1_SRR18677642 100%[===================>] 38.33M 165MB/s in 0.2s \n",
299+
"atac_R2_SRR18677642 100%[===================>] 20.01M --.-KB/s in 0.1s \n",
300+
"atac_R3_SRR18677642 100%[===================>] 34.88M --.-KB/s in 0.1s \n",
301+
"ATA-737K-arc-v1.txt 100%[===================>] 2.35M --.-KB/s in 0.03s \n",
302+
"tag_R1_SRR18677640. 100%[===================>] 17.20M --.-KB/s in 0.07s \n",
303+
"tag_R2_SRR18677640. 100%[===================>] 7.13M --.-KB/s in 0.05s \n",
304+
"RNA-737K-arc-v1.txt 100%[===================>] 2.04M --.-KB/s in 0.03s \n",
305+
"tag_feature_barcode 100%[===================>] 208 --.-KB/s in 0s \n",
306+
"protein_R1_SRR18677 100%[===================>] 17.33M --.-KB/s in 0.1s \n",
307+
"protein_R2_SRR18677 100%[===================>] 8.98M --.-KB/s in 0.05s \n",
308+
"RNA-737K-arc-v1.txt 100%[===================>] 2.04M --.-KB/s in 0.03s \n",
309+
"protein_feature_bar 100%[===================>] 4.55K --.-KB/s in 0s \n"
302310
]
303311
}
304312
]
305313
},
306314
{
307315
"cell_type": "code",
308316
"source": [
309-
"!seqspec file -m rna -f list -s onlist -k all spec.yaml | cut -f 6 | curl | zcat > onlist_rna.txt\n",
310-
"!seqspec file -m rna -f list -s onlist -k all spec.yaml | cut -f 6 | curl | zcat > onlist_rna.txt\n",
311-
"!seqspec file -m rna -f list -s onlist -k all spec.yaml | cut -f 6 | curl | zcat > onlist_rna.txt\n",
312-
"!seqspec file -m rna -f list -s onlist -k all spec.yaml | cut -f 6 | curl | zcat > onlist_rna.txt"
317+
"! gunzip *.txt.gz"
313318
],
314319
"metadata": {
315-
"id": "Z3Jv3tKSqSSv",
316-
"outputId": "b2ff09ba-1023-48b4-f99d-0c07c78f8713",
317-
"colab": {
318-
"base_uri": "https://localhost:8080/"
319-
}
320+
"id": "SmUey5ga1xnB"
320321
},
321-
"execution_count": 16,
322-
"outputs": [
323-
{
324-
"output_type": "stream",
325-
"name": "stdout",
326-
"text": [
327-
"https://github.com/pachterlab/qcbc/raw/main/tests/10xMOME/RNA-737K-arc-v1.txt.gz\n"
328-
]
329-
}
330-
]
322+
"execution_count": 14,
323+
"outputs": []
324+
},
325+
{
326+
"cell_type": "markdown",
327+
"source": [
328+
"## Single-cell/nuclei RNAseq quantification"
329+
],
330+
"metadata": {
331+
"id": "zhQsQD42giYi"
332+
}
333+
},
334+
{
335+
"cell_type": "markdown",
336+
"source": [
337+
"### `kb-python (kallisto bustools)`"
338+
],
339+
"metadata": {
340+
"id": "joQ-Vzgagi_9"
341+
}
331342
},
332343
{
333344
"cell_type": "code",
334345
"source": [
335-
"! # seqspec commands to get onlist, technology string, and files\n",
336-
"! w=$(seqspec onlist -m rna -o onlist.txt -s region-type -i barcode spec.yaml) && echo \"Onlist: \" $w\n",
337-
"\n",
338-
"! x=$(seqspec index -m rna -t kb -s file spec.yaml) && echo \"Technology string: \" $x\n",
339-
"\n",
340-
"! f=$(seqspec file -m rna -s read -f paired -k url spec.yaml | tr \"\\t\\n\" \" \") && echo \"Files: \" $f"
346+
"# seqspec commands to get onlist, technology string, and file\n",
347+
"! seqspec index -m rna -t kb -s file spec.yaml\n",
348+
"! seqspec file -m rna -s region -k filename spec.yaml\n",
349+
"! seqspec file -m rna -s read -f paired -k filename spec.yaml | tr \"\\t\\n\" \" \""
341350
],
342351
"metadata": {
343-
"id": "MptvRm20psFR",
344-
"outputId": "eba4bacf-2b85-4f26-fc09-84c2a01380e9",
352+
"id": "UmwOAvSu2hik",
353+
"outputId": "a9851d77-485e-419d-e1a7-8ebdc48cda07",
345354
"colab": {
346355
"base_uri": "https://localhost:8080/"
347356
}
348357
},
349-
"execution_count": 6,
358+
"execution_count": 31,
350359
"outputs": [
351360
{
352361
"output_type": "stream",
353362
"name": "stdout",
354363
"text": [
355-
"Onlist: /content/RNA-737K-arc-v1.txt\n",
356-
"Technology string: 0,0,16:0,16,28:1,0,102\n",
357-
"Files: https://github.com/pachterlab/seqspec/raw/devel/examples/specs/dogmaseq-dig/fastqs/rna_R1_SRR18677638.fastq.gz https://github.com/pachterlab/seqspec/raw/devel/examples/specs/dogmaseq-dig/fastqs/rna_R2_SRR18677638.fastq.gz\n"
364+
"0,0,16:0,16,28:1,0,102\n",
365+
"RNA-737K-arc-v1.txt\n",
366+
"rna_R1_SRR18677638.fastq.gz rna_R2_SRR18677638.fastq.gz "
358367
]
359368
}
360369
]
@@ -368,20 +377,21 @@
368377
"outputs": [],
369378
"source": [
370379
"# standard reference\n",
371-
"! kb ref -i index.idx -g t2g.txt -f1 transcriptome.fa $(gget ref --ftp -w dna,gtf homo_sapiens)\n",
372-
"\n",
373-
"! # seqspec commands to get onlist, technology string, and files\n",
374-
"! w=$(seqspec onlist -m rna -o onlist.txt -s region-type -i barcode spec.yaml)\n",
375-
"! echo \"Onlist: \" $w\n",
376-
"\n",
377-
"! x=$(seqspec index -m rna -t kb -s file spec.yaml)\n",
378-
"! echo \"Technology string: \" $x\n",
379-
"\n",
380-
"! f=$(seqspec file -m rna -s read -f paired -k url spec.yaml | tr \"\\t\\n\" \" \")\n",
381-
"! echo \"Files: \" $f\n",
380+
"! kb ref \\\n",
381+
"-i index.idx \\\n",
382+
"-g t2g.txt \\\n",
383+
"-f1 transcriptome.fa \\\n",
384+
"$(gget ref --ftp -w dna,gtf homo_sapiens)\n",
382385
"\n",
383-
"! # standard quantification\n",
384-
"! kb count --h5ad -t 16 -m 32G -i index.idx -g t2g.txt -o kb_out -x \"$x\" -w \"$w\" \"$f\""
386+
"# standard quantification\n",
387+
"! kb count \\\n",
388+
"--h5ad -t 16 -m 32G \\\n",
389+
"-i index.idx \\\n",
390+
"-g t2g.txt \\\n",
391+
"-o kb_out \\\n",
392+
"-x $(seqspec index -m rna -t kb -s file spec.yaml) \\\n",
393+
"-w $(seqspec file -m rna -s region -k filename spec.yaml) \\\n",
394+
"$(seqspec file -m rna -s read -f paired -k filename spec.yaml | tr \"\\t\\n\" \" \")"
385395
]
386396
}
387397
],

0 commit comments

Comments
 (0)