|
226 | 226 | "! seqspec file -m rna -f json -s file -k all spec.yaml"
|
227 | 227 | ],
|
228 | 228 | "metadata": {
|
229 |
| - "id": "8AUi7mH31BiL", |
230 |
| - "outputId": "010c4942-27e2-4623-b7c1-86504e2ef4df", |
231 | 229 | "colab": {
|
232 | 230 | "base_uri": "https://localhost:8080/"
|
233 |
| - } |
| 231 | + }, |
| 232 | + "id": "8AUi7mH31BiL", |
| 233 | + "outputId": "010c4942-27e2-4623-b7c1-86504e2ef4df" |
234 | 234 | },
|
235 | 235 | "execution_count": 12,
|
236 | 236 | "outputs": [
|
|
280 | 280 | "! seqspec file -m protein -f json -s file -k all spec.yaml | jq '.[].url' | xargs wget --continue --quiet --show-progress"
|
281 | 281 | ],
|
282 | 282 | "metadata": {
|
283 |
| - "id": "c1ZjLfnb1EL2", |
284 |
| - "outputId": "d87fb519-e67d-448e-df2b-2ca5b74e7452", |
285 | 283 | "colab": {
|
286 | 284 | "base_uri": "https://localhost:8080/"
|
287 |
| - } |
| 285 | + }, |
| 286 | + "id": "c1ZjLfnb1EL2", |
| 287 | + "outputId": "d87fb519-e67d-448e-df2b-2ca5b74e7452" |
288 | 288 | },
|
289 | 289 | "execution_count": 13,
|
290 | 290 | "outputs": [
|
|
349 | 349 | "! seqspec file -m rna -s read -f paired -k filename spec.yaml | tr \"\\t\\n\" \" \""
|
350 | 350 | ],
|
351 | 351 | "metadata": {
|
352 |
| - "id": "UmwOAvSu2hik", |
353 |
| - "outputId": "a9851d77-485e-419d-e1a7-8ebdc48cda07", |
354 | 352 | "colab": {
|
355 | 353 | "base_uri": "https://localhost:8080/"
|
356 |
| - } |
| 354 | + }, |
| 355 | + "id": "UmwOAvSu2hik", |
| 356 | + "outputId": "a9851d77-485e-419d-e1a7-8ebdc48cda07" |
357 | 357 | },
|
358 | 358 | "execution_count": 31,
|
359 | 359 | "outputs": [
|
|
385 | 385 | "\n",
|
386 | 386 | "# standard quantification\n",
|
387 | 387 | "! kb count \\\n",
|
388 |
| - "--h5ad -t 16 -m 32G \\\n", |
| 388 | + "--h5ad \\\n", |
| 389 | + "-t 16 \\\n", |
| 390 | + "-m 32G \\\n", |
389 | 391 | "-i index.idx \\\n",
|
390 | 392 | "-g t2g.txt \\\n",
|
391 | 393 | "-o kb_out \\\n",
|
392 | 394 | "-x $(seqspec index -m rna -t kb -s file spec.yaml) \\\n",
|
393 | 395 | "-w $(seqspec file -m rna -s region -k filename spec.yaml) \\\n",
|
394 | 396 | "$(seqspec file -m rna -s read -f paired -k filename spec.yaml | tr \"\\t\\n\" \" \")"
|
395 | 397 | ]
|
| 398 | + }, |
| 399 | + { |
| 400 | + "cell_type": "code", |
| 401 | + "source": [ |
| 402 | + "# spliced, unspliced, ambiguous reference\n", |
| 403 | + "! kb ref \\\n", |
| 404 | + "--workflow nac \\\n", |
| 405 | + "-i index.idx \\\n", |
| 406 | + "-g t2g.txt \\\n", |
| 407 | + "-f1 spl.fa \\\n", |
| 408 | + "-f2 unspl.fa \\\n", |
| 409 | + "-c1 spl.t2c.txt \\\n", |
| 410 | + "-c2 unspl.t2c.txt \\\n", |
| 411 | + "$(gget ref --ftp -w dna,gtf homo_sapiens)\n", |
| 412 | + "\n", |
| 413 | + "# spliced, unspliced, ambiguous quantification\n", |
| 414 | + "! kb count \\\n", |
| 415 | + "--h5ad \\\n", |
| 416 | + "--workflow=nac \\\n", |
| 417 | + "-t 32 \\\n", |
| 418 | + "-m 64G \\\n", |
| 419 | + "-i index.idx \\\n", |
| 420 | + "-g t2g.txt \\\n", |
| 421 | + "-c1 spl.t2c.txt \\\n", |
| 422 | + "-c2 unspl.t2c.txt \\\n", |
| 423 | + "-o kb_out_nac \\\n", |
| 424 | + "-x $(seqspec index -m rna -t kb -s file spec.yaml) \\\n", |
| 425 | + "-w $(seqspec file -m rna -s region -k filename spec.yaml) \\\n", |
| 426 | + "$(seqspec file -m rna -s read -f paired -k filename spec.yaml | tr \"\\t\\n\" \" \")" |
| 427 | + ], |
| 428 | + "metadata": { |
| 429 | + "id": "TpfdS7oS4bFy" |
| 430 | + }, |
| 431 | + "execution_count": null, |
| 432 | + "outputs": [] |
| 433 | + }, |
| 434 | + { |
| 435 | + "cell_type": "markdown", |
| 436 | + "source": [ |
| 437 | + "### `STARsolo`" |
| 438 | + ], |
| 439 | + "metadata": { |
| 440 | + "id": "ZGYN-d6245Jq" |
| 441 | + } |
| 442 | + }, |
| 443 | + { |
| 444 | + "cell_type": "code", |
| 445 | + "source": [ |
| 446 | + "# download reference\n", |
| 447 | + "## todo\n", |
| 448 | + "\n", |
| 449 | + "# run quantification\n", |
| 450 | + "! star \\\n", |
| 451 | + "--soloFeatures Gene \\\n", |
| 452 | + "--genomeDir index \\\n", |
| 453 | + "--soloType Droplet \\\n", |
| 454 | + "--soloCBwhitelist \\\n", |
| 455 | + "$(seqspec file -m rna -s region -k filename spec.yaml) \\\n", |
| 456 | + "$(seqspec index -m rna -t starsolo -s file spec.yaml) \\\n", |
| 457 | + "--readFilesIn $(seqspec file -m rna -s read -f paired -k filename spec.yaml | tr \"\\t\\n\" \" \")" |
| 458 | + ], |
| 459 | + "metadata": { |
| 460 | + "id": "QWN3QgbA4-vo" |
| 461 | + }, |
| 462 | + "execution_count": null, |
| 463 | + "outputs": [] |
| 464 | + }, |
| 465 | + { |
| 466 | + "cell_type": "code", |
| 467 | + "source": [ |
| 468 | + "! seqspec file -m rna -s read -f paired -k filename spec.yaml | awk '{print \"-1 \"$1\" -2 \"$2}'" |
| 469 | + ], |
| 470 | + "metadata": { |
| 471 | + "id": "4BIrMTgH58Tk", |
| 472 | + "outputId": "5454b71b-03fa-4da2-b2a3-d111a576ec1e", |
| 473 | + "colab": { |
| 474 | + "base_uri": "https://localhost:8080/" |
| 475 | + } |
| 476 | + }, |
| 477 | + "execution_count": 33, |
| 478 | + "outputs": [ |
| 479 | + { |
| 480 | + "output_type": "stream", |
| 481 | + "name": "stdout", |
| 482 | + "text": [ |
| 483 | + "-1 rna_R1_SRR18677638.fastq.gz -2 rna_R2_SRR18677638.fastq.gz\n" |
| 484 | + ] |
| 485 | + } |
| 486 | + ] |
| 487 | + }, |
| 488 | + { |
| 489 | + "cell_type": "markdown", |
| 490 | + "source": [ |
| 491 | + "### `simpleaf`" |
| 492 | + ], |
| 493 | + "metadata": { |
| 494 | + "id": "K6D0Gyn45ZpL" |
| 495 | + } |
| 496 | + }, |
| 497 | + { |
| 498 | + "cell_type": "code", |
| 499 | + "source": [ |
| 500 | + "! mkdir -p simpleaf_ref\n", |
| 501 | + "\n", |
| 502 | + "# Download reference genome and gene annotations\n", |
| 503 | + "! wget -qO- https://cf.10xgenomics.com/supp/cell-exp/refdata-gex-GRCh38-2020-A.tar.gz | tar xzf - --strip-components=1 -C ./simpleaf_ref\n", |
| 504 | + "\n", |
| 505 | + "# simpleaf index\n", |
| 506 | + "! simpleaf index \\\n", |
| 507 | + "--output ./out \\\n", |
| 508 | + "--fasta ./simpleaf_ref/fasta/genome.fa \\\n", |
| 509 | + "--gtf ./simpleaf_ref/genes/genes.gtf \\\n", |
| 510 | + "--rlen 91 \\\n", |
| 511 | + "--threads 16 \\\n", |
| 512 | + "--use-piscem # remove this if missing piscem\n", |
| 513 | + "\n", |
| 514 | + "! simpleaf quant \\\n", |
| 515 | + "-r cr-like \\\n", |
| 516 | + "-i simpleaf_ref/ \\\n", |
| 517 | + "-m t2g.txt \\\n", |
| 518 | + "-c $(seqspec index -m rna -t simpleaf -s file spec.yaml) \\\n", |
| 519 | + "-o out/ -x $w \\\n", |
| 520 | + "$(seqspec file -m rna -s read -f paired -k filename spec.yaml | awk '{print \"-1 \"$1\" -2 \"$2}')" |
| 521 | + ], |
| 522 | + "metadata": { |
| 523 | + "id": "n-EtzzQU5cTX" |
| 524 | + }, |
| 525 | + "execution_count": null, |
| 526 | + "outputs": [] |
| 527 | + }, |
| 528 | + { |
| 529 | + "cell_type": "markdown", |
| 530 | + "source": [ |
| 531 | + "## Single-cell/nuclei TAG quantification" |
| 532 | + ], |
| 533 | + "metadata": { |
| 534 | + "id": "OGc3z4a96dxL" |
| 535 | + } |
| 536 | + }, |
| 537 | + { |
| 538 | + "cell_type": "markdown", |
| 539 | + "source": [ |
| 540 | + "### `kb-python (kallisto bustools)`" |
| 541 | + ], |
| 542 | + "metadata": { |
| 543 | + "id": "Ts3Nst9W6eni" |
| 544 | + } |
| 545 | + }, |
| 546 | + { |
| 547 | + "cell_type": "code", |
| 548 | + "source": [ |
| 549 | + "# build alignment reference\n", |
| 550 | + "kb ref \\\n", |
| 551 | + "--workflow kite \\\n", |
| 552 | + "-i index.idx \\\n", |
| 553 | + "-g t2g.txt \\\n", |
| 554 | + "-f1 transcriptome.fa \\\n", |
| 555 | + "tag_feature_barcodes.txt\n", |
| 556 | + "\n", |
| 557 | + "w=$(seqspec onlist -m tag -o onlist.txt -s region-type -i barcode spec.yaml)\n", |
| 558 | + "x=$(seqspec index -m tag -t kb -s file spec.yaml)\n", |
| 559 | + "f=$(seqspec file -m tag -s read -f paired -k url spec.yaml | tr \"\\t\\n\" \" \")\n", |
| 560 | + "\n", |
| 561 | + "# perform alignment, error correction, and counting\n", |
| 562 | + "kb count \\\n", |
| 563 | + "--workflow kite \\\n", |
| 564 | + "-i index.idx \\\n", |
| 565 | + "-g t2g.txt \\\n", |
| 566 | + "-x $x \\\n", |
| 567 | + "-w $w \\\n", |
| 568 | + "-o out --h5ad -t 2 \\\n", |
| 569 | + "$f" |
| 570 | + ], |
| 571 | + "metadata": { |
| 572 | + "id": "xGhxxFDI6ePO" |
| 573 | + }, |
| 574 | + "execution_count": null, |
| 575 | + "outputs": [] |
| 576 | + }, |
| 577 | + { |
| 578 | + "cell_type": "markdown", |
| 579 | + "source": [ |
| 580 | + "## Single-cell/nuclei PROTEIN quantification" |
| 581 | + ], |
| 582 | + "metadata": { |
| 583 | + "id": "S9gJjEVE6ltZ" |
| 584 | + } |
| 585 | + }, |
| 586 | + { |
| 587 | + "cell_type": "markdown", |
| 588 | + "source": [ |
| 589 | + "### `kb-python` (kallisto bustools)" |
| 590 | + ], |
| 591 | + "metadata": { |
| 592 | + "id": "Tfp3h6y06ob7" |
| 593 | + } |
| 594 | + }, |
| 595 | + { |
| 596 | + "cell_type": "code", |
| 597 | + "source": [ |
| 598 | + "# build alignment reference\n", |
| 599 | + "kb ref \\\n", |
| 600 | + "--workflow kite \\\n", |
| 601 | + "-i index.idx \\\n", |
| 602 | + "-g t2g.txt \\\n", |
| 603 | + "-f1 transcriptome.fa \\\n", |
| 604 | + "protein_feature_barcodes.txt\n", |
| 605 | + "\n", |
| 606 | + "w=$(seqspec onlist -m protein -o onlist.txt -s region-type -i barcode spec.yaml)\n", |
| 607 | + "x=$(seqspec index -m protein -t kb -s file spec.yaml)\n", |
| 608 | + "f=$(seqspec file -m protein -s read -f paired -k url spec.yaml | tr \"\\t\\n\" \" \")\n", |
| 609 | + "\n", |
| 610 | + "# perform alignment, error correction, and counting\n", |
| 611 | + "kb count \\\n", |
| 612 | + "--workflow kite \\\n", |
| 613 | + "-i index.idx \\\n", |
| 614 | + "-g t2g.txt \\\n", |
| 615 | + "-x $x \\\n", |
| 616 | + "-w $w \\\n", |
| 617 | + "-o out --h5ad -t 2 \\\n", |
| 618 | + "$f" |
| 619 | + ], |
| 620 | + "metadata": { |
| 621 | + "id": "YCkH0Qq76n9H" |
| 622 | + }, |
| 623 | + "execution_count": null, |
| 624 | + "outputs": [] |
| 625 | + }, |
| 626 | + { |
| 627 | + "cell_type": "markdown", |
| 628 | + "source": [ |
| 629 | + "## Single-cell/nuclei CRISPR quantification" |
| 630 | + ], |
| 631 | + "metadata": { |
| 632 | + "id": "g3mrKO3j6v2E" |
| 633 | + } |
| 634 | + }, |
| 635 | + { |
| 636 | + "cell_type": "markdown", |
| 637 | + "source": [ |
| 638 | + "Note that single-cell CRISPR guide RNAs can be quantified in the same way as TAG and PROTEIN data. Simply supply the guide RNA barcode file as the “feature barcodes” file." |
| 639 | + ], |
| 640 | + "metadata": { |
| 641 | + "id": "X2Da0_y86yPz" |
| 642 | + } |
| 643 | + }, |
| 644 | + { |
| 645 | + "cell_type": "code", |
| 646 | + "source": [], |
| 647 | + "metadata": { |
| 648 | + "id": "kupLer4S6wZW" |
| 649 | + }, |
| 650 | + "execution_count": null, |
| 651 | + "outputs": [] |
| 652 | + }, |
| 653 | + { |
| 654 | + "cell_type": "markdown", |
| 655 | + "source": [ |
| 656 | + "## Single-cell/nuclei ATAC quantification" |
| 657 | + ], |
| 658 | + "metadata": { |
| 659 | + "id": "olhjtMy660Iy" |
| 660 | + } |
| 661 | + }, |
| 662 | + { |
| 663 | + "cell_type": "code", |
| 664 | + "source": [], |
| 665 | + "metadata": { |
| 666 | + "id": "ThaYUsgv60oO" |
| 667 | + }, |
| 668 | + "execution_count": null, |
| 669 | + "outputs": [] |
396 | 670 | }
|
397 | 671 | ],
|
398 | 672 | "metadata": {
|
|
0 commit comments