|
99 | 99 | "\n",
|
100 | 100 | "! curl -S --proto '=https' --tlsv1.2 -LsSf https://github.com/COMBINE-lab/simpleaf/releases/download/v0.17.2/simpleaf-installer.sh | sh > /dev/null 2>&1\n",
|
101 | 101 | "%env ALEVIN_FRY_HOME=\"$HOME/.cargo/bin/alevin-fry\"\n",
|
102 |
| - "! $HOME/.cargo/bin/simpleaf --version" |
| 102 | + "! $HOME/.cargo/bin/simpleaf --version\n", |
| 103 | + "\n", |
| 104 | + "!wget --quiet --show-progress https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64\n", |
| 105 | + "!chmod +x jq-linux-amd64\n", |
| 106 | + "!mv jq-linux-amd64 /usr/bin/jq" |
103 | 107 | ],
|
104 | 108 | "metadata": {
|
105 | 109 | "id": "4EWVWe1hgpAG",
|
106 |
| - "outputId": "f58e5593-e264-421a-eb6a-0482b38714e1", |
| 110 | + "outputId": "b8a9391b-71ec-47c3-b141-0e94e915d651", |
107 | 111 | "colab": {
|
108 | 112 | "base_uri": "https://localhost:8080/"
|
109 | 113 | }
|
110 | 114 | },
|
111 |
| - "execution_count": 1, |
| 115 | + "execution_count": 2, |
112 | 116 | "outputs": [
|
113 | 117 | {
|
114 | 118 | "output_type": "stream",
|
|
130 | 134 | " -h, --help Show this help message and exit\n",
|
131 | 135 | " --list Display list of supported single-cell technologies\n",
|
132 | 136 | "gget version: 0.28.6\n",
|
133 |
| - "2.7.11b.tar.gz [ <=> ] 11.89M 7.43MB/s in 1.6s \n", |
| 137 | + "2.7.11b.tar.gz [ <=> ] 11.89M 7.50MB/s in 1.6s \n", |
134 | 138 | "2.7.11b\n",
|
135 | 139 | "alevin-fry 0.10.0\n",
|
136 | 140 | "env: ALEVIN_FRY_HOME=\"$HOME/.cargo/bin/alevin-fry\"\n",
|
137 |
| - "simpleaf 0.17.2\n" |
| 141 | + "simpleaf 0.17.2\n", |
| 142 | + "jq-linux-amd64 100%[===================>] 2.21M --.-KB/s in 0.02s \n" |
138 | 143 | ]
|
139 | 144 | }
|
140 | 145 | ]
|
|
155 | 160 | ],
|
156 | 161 | "metadata": {
|
157 | 162 | "id": "hoDCIDwhkEKU",
|
158 |
| - "outputId": "0d293a8d-ee0d-4055-e80b-fc5041f87e34", |
| 163 | + "outputId": "b167779c-568a-4682-f537-4fbe2b3f939b", |
159 | 164 | "colab": {
|
160 | 165 | "base_uri": "https://localhost:8080/"
|
161 | 166 | }
|
162 | 167 | },
|
163 |
| - "execution_count": 2, |
| 168 | + "execution_count": 3, |
164 | 169 | "outputs": [
|
165 | 170 | {
|
166 | 171 | "output_type": "stream",
|
167 | 172 | "name": "stdout",
|
168 | 173 | "text": [
|
169 |
| - "\rspec.yaml 0%[ ] 0 --.-KB/s \rspec.yaml 100%[===================>] 14.17K --.-KB/s in 0.001s \n" |
| 174 | + "\rspec.yaml 0%[ ] 0 --.-KB/s \rspec.yaml 100%[===================>] 14.09K --.-KB/s in 0s \n" |
170 | 175 | ]
|
171 | 176 | }
|
172 | 177 | ]
|
173 | 178 | },
|
174 | 179 | {
|
175 | 180 | "cell_type": "code",
|
176 | 181 | "source": [
|
177 |
| - "!seqspec print spec.yaml" |
| 182 | + "! seqspec print spec.yaml" |
178 | 183 | ],
|
179 | 184 | "metadata": {
|
180 | 185 | "id": "RIEiCVzNom8_",
|
181 |
| - "outputId": "14d9e06a-c21f-4ada-8c43-acb82da096ae", |
| 186 | + "outputId": "fe89372f-9236-4d64-88ef-c5960c407993", |
182 | 187 | "colab": {
|
183 | 188 | "base_uri": "https://localhost:8080/"
|
184 | 189 | }
|
185 | 190 | },
|
186 |
| - "execution_count": 3, |
| 191 | + "execution_count": 4, |
187 | 192 | "outputs": [
|
188 | 193 | {
|
189 | 194 | "output_type": "stream",
|
|
215 | 220 | }
|
216 | 221 | ]
|
217 | 222 | },
|
218 |
| - { |
219 |
| - "cell_type": "markdown", |
220 |
| - "source": [ |
221 |
| - "## Single-cell/nuclei RNAseq quantification" |
222 |
| - ], |
223 |
| - "metadata": { |
224 |
| - "id": "zhQsQD42giYi" |
225 |
| - } |
226 |
| - }, |
227 |
| - { |
228 |
| - "cell_type": "markdown", |
229 |
| - "source": [ |
230 |
| - "### `kb-python (kallisto bustools)`" |
231 |
| - ], |
232 |
| - "metadata": { |
233 |
| - "id": "joQ-Vzgagi_9" |
234 |
| - } |
235 |
| - }, |
236 | 223 | {
|
237 | 224 | "cell_type": "code",
|
238 | 225 | "source": [
|
239 |
| - "!seqspec file" |
| 226 | + "! seqspec file -m rna -f json -s file -k all spec.yaml" |
240 | 227 | ],
|
241 | 228 | "metadata": {
|
242 |
| - "id": "E86RJ-9Eq0gE", |
243 |
| - "outputId": "bbb791f9-9301-4415-a653-3016212d43c2", |
| 229 | + "id": "8AUi7mH31BiL", |
| 230 | + "outputId": "010c4942-27e2-4623-b7c1-86504e2ef4df", |
244 | 231 | "colab": {
|
245 | 232 | "base_uri": "https://localhost:8080/"
|
246 | 233 | }
|
247 | 234 | },
|
248 |
| - "execution_count": 13, |
| 235 | + "execution_count": 12, |
249 | 236 | "outputs": [
|
250 | 237 | {
|
251 | 238 | "output_type": "stream",
|
252 | 239 | "name": "stdout",
|
253 | 240 | "text": [
|
254 |
| - "usage: seqspec file [-h] [-o OUT] [-i IDs] -m MODALITY [-s SELECTOR] [-f FORMAT] [-k KEY] yaml\n", |
255 |
| - "\n", |
256 |
| - "List files present in seqspec file.\n", |
257 |
| - "\n", |
258 |
| - "Examples:\n", |
259 |
| - "seqspec file -m rna spec.yaml # List paired read files\n", |
260 |
| - "seqspec file -m rna -f interleaved spec.yaml # List interleaved read files\n", |
261 |
| - "seqspec file -m rna -f list -k url spec.yaml # List urls of all read files\n", |
262 |
| - "seqspec file -m rna -f list -s onlist -k all spec.yaml # List onlist files\n", |
263 |
| - "---\n", |
264 |
| - "\n", |
265 |
| - "positional arguments:\n", |
266 |
| - " yaml Sequencing specification yaml file\n", |
267 |
| - "\n", |
268 |
| - "options:\n", |
269 |
| - " -h, --help show this help message and exit\n", |
270 |
| - " -o OUT Path to output file\n", |
271 |
| - " -i IDs Ids to list\n", |
272 |
| - " -s SELECTOR Selector for ID, [read, region, file, onlist] (default: read)\n", |
273 |
| - " -f FORMAT Format, [paired, interleaved, index, list], default: paired\n", |
274 |
| - " -k KEY Key, [file_id, filename, filetype, filesize, url, urltype, md5, all], default: file_id\n", |
275 |
| - "\n", |
276 |
| - "required arguments:\n", |
277 |
| - " -m MODALITY Modality\n" |
| 241 | + "[\n", |
| 242 | + " {\n", |
| 243 | + " \"file_id\": \"rna_R1_SRR18677638.fastq.gz\",\n", |
| 244 | + " \"filename\": \"rna_R1_SRR18677638.fastq.gz\",\n", |
| 245 | + " \"filetype\": \"fastq\",\n", |
| 246 | + " \"filesize\": 18499436,\n", |
| 247 | + " \"url\": \"https://github.com/pachterlab/seqspec/raw/devel/examples/specs/dogmaseq-dig/fastqs/rna_R1_SRR18677638.fastq.gz\",\n", |
| 248 | + " \"urltype\": \"https\",\n", |
| 249 | + " \"md5\": \"7eb15a70da9b729b5a87e30b6596b641\"\n", |
| 250 | + " },\n", |
| 251 | + " {\n", |
| 252 | + " \"file_id\": \"rna_R2_SRR18677638.fastq.gz\",\n", |
| 253 | + " \"filename\": \"rna_R2_SRR18677638.fastq.gz\",\n", |
| 254 | + " \"filetype\": \"fastq\",\n", |
| 255 | + " \"filesize\": 45812569,\n", |
| 256 | + " \"url\": \"https://github.com/pachterlab/seqspec/raw/devel/examples/specs/dogmaseq-dig/fastqs/rna_R2_SRR18677638.fastq.gz\",\n", |
| 257 | + " \"urltype\": \"https\",\n", |
| 258 | + " \"md5\": \"5e6915770e50f72e462e5b2575089c66\"\n", |
| 259 | + " },\n", |
| 260 | + " {\n", |
| 261 | + " \"file_id\": \"RNA-737K-arc-v1.txt\",\n", |
| 262 | + " \"filename\": \"RNA-737K-arc-v1.txt\",\n", |
| 263 | + " \"filetype\": \"txt\",\n", |
| 264 | + " \"filesize\": 2142553,\n", |
| 265 | + " \"url\": \"https://github.com/pachterlab/qcbc/raw/main/tests/10xMOME/RNA-737K-arc-v1.txt.gz\",\n", |
| 266 | + " \"urltype\": \"https\",\n", |
| 267 | + " \"md5\": \"a88cd21e801ae6f9a7d9a48b67ccf693\"\n", |
| 268 | + " }\n", |
| 269 | + "]\n" |
278 | 270 | ]
|
279 | 271 | }
|
280 | 272 | ]
|
281 | 273 | },
|
282 | 274 | {
|
283 | 275 | "cell_type": "code",
|
284 | 276 | "source": [
|
285 |
| - "!seqspec file -m tag -f list -s onlist -k all spec.yaml" |
| 277 | + "! seqspec file -m rna -f json -s file -k all spec.yaml | jq '.[].url' | xargs wget --continue --quiet --show-progress\n", |
| 278 | + "! seqspec file -m atac -f json -s file -k all spec.yaml | jq '.[].url' | xargs wget --continue --quiet --show-progress\n", |
| 279 | + "! seqspec file -m tag -f json -s file -k all spec.yaml | jq '.[].url' | xargs wget --continue --quiet --show-progress\n", |
| 280 | + "! seqspec file -m protein -f json -s file -k all spec.yaml | jq '.[].url' | xargs wget --continue --quiet --show-progress" |
286 | 281 | ],
|
287 | 282 | "metadata": {
|
288 |
| - "id": "YMlhrWCjrLir", |
289 |
| - "outputId": "93811de4-5995-4105-94ec-b5da54435b35", |
| 283 | + "id": "c1ZjLfnb1EL2", |
| 284 | + "outputId": "d87fb519-e67d-448e-df2b-2ca5b74e7452", |
290 | 285 | "colab": {
|
291 | 286 | "base_uri": "https://localhost:8080/"
|
292 | 287 | }
|
293 | 288 | },
|
294 |
| - "execution_count": 18, |
| 289 | + "execution_count": 13, |
295 | 290 | "outputs": [
|
296 | 291 | {
|
297 | 292 | "output_type": "stream",
|
298 | 293 | "name": "stdout",
|
299 | 294 | "text": [
|
300 |
| - "tag_cell_bc\tRNA-737K-arc-v1.txt\tRNA-737K-arc-v1.txt\ttxt\t2142553\thttps://github.com/pachterlab/qcbc/raw/main/tests/10xMOME/RNA-737K-arc-v1.txt.gz\thttps\ta88cd21e801ae6f9a7d9a48b67ccf693\n", |
301 |
| - "tag_seq\ttag_0419_feature_barcodes.txt\ttag_0419_feature_barcodes.txt\ttxt\t0\thttps://raw.githubusercontent.com/pachterlab/seqspec/devel/examples/specs/dogmaseq-dig/tag_0419_feature_barcodes.txt\thttps\tde44ad6d5c4b9f381a352283a6831112\n" |
| 295 | + "rna_R1_SRR18677638. 100%[===================>] 17.64M --.-KB/s in 0.07s \n", |
| 296 | + "rna_R2_SRR18677638. 100%[===================>] 43.69M 270MB/s in 0.2s \n", |
| 297 | + "RNA-737K-arc-v1.txt 100%[===================>] 2.04M --.-KB/s in 0.03s \n", |
| 298 | + "atac_R1_SRR18677642 100%[===================>] 38.33M 165MB/s in 0.2s \n", |
| 299 | + "atac_R2_SRR18677642 100%[===================>] 20.01M --.-KB/s in 0.1s \n", |
| 300 | + "atac_R3_SRR18677642 100%[===================>] 34.88M --.-KB/s in 0.1s \n", |
| 301 | + "ATA-737K-arc-v1.txt 100%[===================>] 2.35M --.-KB/s in 0.03s \n", |
| 302 | + "tag_R1_SRR18677640. 100%[===================>] 17.20M --.-KB/s in 0.07s \n", |
| 303 | + "tag_R2_SRR18677640. 100%[===================>] 7.13M --.-KB/s in 0.05s \n", |
| 304 | + "RNA-737K-arc-v1.txt 100%[===================>] 2.04M --.-KB/s in 0.03s \n", |
| 305 | + "tag_feature_barcode 100%[===================>] 208 --.-KB/s in 0s \n", |
| 306 | + "protein_R1_SRR18677 100%[===================>] 17.33M --.-KB/s in 0.1s \n", |
| 307 | + "protein_R2_SRR18677 100%[===================>] 8.98M --.-KB/s in 0.05s \n", |
| 308 | + "RNA-737K-arc-v1.txt 100%[===================>] 2.04M --.-KB/s in 0.03s \n", |
| 309 | + "protein_feature_bar 100%[===================>] 4.55K --.-KB/s in 0s \n" |
302 | 310 | ]
|
303 | 311 | }
|
304 | 312 | ]
|
305 | 313 | },
|
306 | 314 | {
|
307 | 315 | "cell_type": "code",
|
308 | 316 | "source": [
|
309 |
| - "!seqspec file -m rna -f list -s onlist -k all spec.yaml | cut -f 6 | curl | zcat > onlist_rna.txt\n", |
310 |
| - "!seqspec file -m rna -f list -s onlist -k all spec.yaml | cut -f 6 | curl | zcat > onlist_rna.txt\n", |
311 |
| - "!seqspec file -m rna -f list -s onlist -k all spec.yaml | cut -f 6 | curl | zcat > onlist_rna.txt\n", |
312 |
| - "!seqspec file -m rna -f list -s onlist -k all spec.yaml | cut -f 6 | curl | zcat > onlist_rna.txt" |
| 317 | + "! gunzip *.txt.gz" |
313 | 318 | ],
|
314 | 319 | "metadata": {
|
315 |
| - "id": "Z3Jv3tKSqSSv", |
316 |
| - "outputId": "b2ff09ba-1023-48b4-f99d-0c07c78f8713", |
317 |
| - "colab": { |
318 |
| - "base_uri": "https://localhost:8080/" |
319 |
| - } |
| 320 | + "id": "SmUey5ga1xnB" |
320 | 321 | },
|
321 |
| - "execution_count": 16, |
322 |
| - "outputs": [ |
323 |
| - { |
324 |
| - "output_type": "stream", |
325 |
| - "name": "stdout", |
326 |
| - "text": [ |
327 |
| - "https://github.com/pachterlab/qcbc/raw/main/tests/10xMOME/RNA-737K-arc-v1.txt.gz\n" |
328 |
| - ] |
329 |
| - } |
330 |
| - ] |
| 322 | + "execution_count": 14, |
| 323 | + "outputs": [] |
| 324 | + }, |
| 325 | + { |
| 326 | + "cell_type": "markdown", |
| 327 | + "source": [ |
| 328 | + "## Single-cell/nuclei RNAseq quantification" |
| 329 | + ], |
| 330 | + "metadata": { |
| 331 | + "id": "zhQsQD42giYi" |
| 332 | + } |
| 333 | + }, |
| 334 | + { |
| 335 | + "cell_type": "markdown", |
| 336 | + "source": [ |
| 337 | + "### `kb-python (kallisto bustools)`" |
| 338 | + ], |
| 339 | + "metadata": { |
| 340 | + "id": "joQ-Vzgagi_9" |
| 341 | + } |
331 | 342 | },
|
332 | 343 | {
|
333 | 344 | "cell_type": "code",
|
334 | 345 | "source": [
|
335 |
| - "! # seqspec commands to get onlist, technology string, and files\n", |
336 |
| - "! w=$(seqspec onlist -m rna -o onlist.txt -s region-type -i barcode spec.yaml) && echo \"Onlist: \" $w\n", |
337 |
| - "\n", |
338 |
| - "! x=$(seqspec index -m rna -t kb -s file spec.yaml) && echo \"Technology string: \" $x\n", |
339 |
| - "\n", |
340 |
| - "! f=$(seqspec file -m rna -s read -f paired -k url spec.yaml | tr \"\\t\\n\" \" \") && echo \"Files: \" $f" |
| 346 | + "# seqspec commands to get onlist, technology string, and file\n", |
| 347 | + "! seqspec index -m rna -t kb -s file spec.yaml\n", |
| 348 | + "! seqspec file -m rna -s region -k filename spec.yaml\n", |
| 349 | + "! seqspec file -m rna -s read -f paired -k filename spec.yaml | tr \"\\t\\n\" \" \"" |
341 | 350 | ],
|
342 | 351 | "metadata": {
|
343 |
| - "id": "MptvRm20psFR", |
344 |
| - "outputId": "eba4bacf-2b85-4f26-fc09-84c2a01380e9", |
| 352 | + "id": "UmwOAvSu2hik", |
| 353 | + "outputId": "a9851d77-485e-419d-e1a7-8ebdc48cda07", |
345 | 354 | "colab": {
|
346 | 355 | "base_uri": "https://localhost:8080/"
|
347 | 356 | }
|
348 | 357 | },
|
349 |
| - "execution_count": 6, |
| 358 | + "execution_count": 31, |
350 | 359 | "outputs": [
|
351 | 360 | {
|
352 | 361 | "output_type": "stream",
|
353 | 362 | "name": "stdout",
|
354 | 363 | "text": [
|
355 |
| - "Onlist: /content/RNA-737K-arc-v1.txt\n", |
356 |
| - "Technology string: 0,0,16:0,16,28:1,0,102\n", |
357 |
| - "Files: https://github.com/pachterlab/seqspec/raw/devel/examples/specs/dogmaseq-dig/fastqs/rna_R1_SRR18677638.fastq.gz https://github.com/pachterlab/seqspec/raw/devel/examples/specs/dogmaseq-dig/fastqs/rna_R2_SRR18677638.fastq.gz\n" |
| 364 | + "0,0,16:0,16,28:1,0,102\n", |
| 365 | + "RNA-737K-arc-v1.txt\n", |
| 366 | + "rna_R1_SRR18677638.fastq.gz rna_R2_SRR18677638.fastq.gz " |
358 | 367 | ]
|
359 | 368 | }
|
360 | 369 | ]
|
|
368 | 377 | "outputs": [],
|
369 | 378 | "source": [
|
370 | 379 | "# standard reference\n",
|
371 |
| - "! kb ref -i index.idx -g t2g.txt -f1 transcriptome.fa $(gget ref --ftp -w dna,gtf homo_sapiens)\n", |
372 |
| - "\n", |
373 |
| - "! # seqspec commands to get onlist, technology string, and files\n", |
374 |
| - "! w=$(seqspec onlist -m rna -o onlist.txt -s region-type -i barcode spec.yaml)\n", |
375 |
| - "! echo \"Onlist: \" $w\n", |
376 |
| - "\n", |
377 |
| - "! x=$(seqspec index -m rna -t kb -s file spec.yaml)\n", |
378 |
| - "! echo \"Technology string: \" $x\n", |
379 |
| - "\n", |
380 |
| - "! f=$(seqspec file -m rna -s read -f paired -k url spec.yaml | tr \"\\t\\n\" \" \")\n", |
381 |
| - "! echo \"Files: \" $f\n", |
| 380 | + "! kb ref \\\n", |
| 381 | + "-i index.idx \\\n", |
| 382 | + "-g t2g.txt \\\n", |
| 383 | + "-f1 transcriptome.fa \\\n", |
| 384 | + "$(gget ref --ftp -w dna,gtf homo_sapiens)\n", |
382 | 385 | "\n",
|
383 |
| - "! # standard quantification\n", |
384 |
| - "! kb count --h5ad -t 16 -m 32G -i index.idx -g t2g.txt -o kb_out -x \"$x\" -w \"$w\" \"$f\"" |
| 386 | + "# standard quantification\n", |
| 387 | + "! kb count \\\n", |
| 388 | + "--h5ad -t 16 -m 32G \\\n", |
| 389 | + "-i index.idx \\\n", |
| 390 | + "-g t2g.txt \\\n", |
| 391 | + "-o kb_out \\\n", |
| 392 | + "-x $(seqspec index -m rna -t kb -s file spec.yaml) \\\n", |
| 393 | + "-w $(seqspec file -m rna -s region -k filename spec.yaml) \\\n", |
| 394 | + "$(seqspec file -m rna -s read -f paired -k filename spec.yaml | tr \"\\t\\n\" \" \")" |
385 | 395 | ]
|
386 | 396 | }
|
387 | 397 | ],
|
|
0 commit comments