diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d6ab2bfe6..b17fa63a68 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- [2044](https://github.com/nf-core/sarek/pull/2044) - Add filtering with `bcftools view -f PASS` following variantcalling step + ### Changed ### Fixed @@ -64,8 +66,10 @@ This patch release includes a bump to Nextflow 25.04.8. ### Parameters -| Params | status | -| ------ | ------ | +| Params | status | +| ---------------------------- | ------ | +| `--filter_vcfs` | New | +| `--bcftools_filter_criteria` | New | ### Developer section diff --git a/README.md b/README.md index f524c47a21..d130c7a767 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ Depending on the options and samples provided, the pipeline can currently perfor - `Strelka` - `TIDDIT` - Post-variant calling options, one of: - - `BCFtools concat` for germline vcfs and/or `BCFtools norm` for all vcfs (_experimental Feature_) + - `BCFtools concat` for germline vcfs and/or `BCFtools view` (default: filter by `PASS`), `BCFtools norm` for all vcfs - `Varlociraptor` for all vcfs - Variant filtering and annotation (`SnpEff`, `Ensembl VEP`, `BCFtools annotate`) - Summarise and represent QC (`MultiQC`) diff --git a/conf/modules/freebayes.config b/conf/modules/freebayes.config index eac7bab279..cb2da8f81d 100644 --- a/conf/modules/freebayes.config +++ b/conf/modules/freebayes.config @@ -62,6 +62,14 @@ process { ] } + withName: 'TABIX_VC_FREEBAYES_FILT' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/freebayes/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // PAIR_VARIANT_CALLING if (params.tools && params.tools.split(',').contains('freebayes')) { withName: '.*:BAM_VARIANT_CALLING_SOMATIC_ALL:BAM_VARIANT_CALLING_FREEBAYES:FREEBAYES' { diff --git a/conf/modules/post_variant_calling.config b/conf/modules/post_variant_calling.config index 034d3e0b30..b1f1b1bc50 100644 --- a/conf/modules/post_variant_calling.config +++ b/conf/modules/post_variant_calling.config @@ -16,34 +16,29 @@ process { - withName: 'GERMLINE_VCFS_CONCAT' { - ext.args = { "-a" } - publishDir = [ enabled: false ] + withName: 'FILTER_VCFS' { + ext.args = { [ params.bcftools_filter_criteria, + "--output-type z --write-index=tbi" + ].join(" ").trim() } + ext.prefix = { vcf.baseName - '.vcf' + '.bcftools_filtered' } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/filtered/${meta.id}/" }, + pattern: "*{.tbi,.vcf.gz}" + ] } withName: 'ADD_INFO_TO_VCF' { publishDir = [ enabled: false ] } - withName: 'GERMLINE_VCFS_CONCAT_SORT' { - ext.prefix = { "${meta.id}.germline" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/concat/${meta.id}/" }, - pattern: "*vcf.gz" - ] - } - - withName: 'VCFS_NORM_SORT' { - ext.prefix = { "${meta.id}.${meta.variantcaller}.norm" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/normalized/${meta.id}/" }, - pattern: "*vcf.gz" - ] + withName: 'TABIX_EXT_VCF' { + ext.prefix = { "${input.baseName}" } + publishDir = [ enabled: false ] } withName: 'VCFS_NORM' { + ext.prefix = { vcf.baseName - '.added_info.vcf' + '.norm' } ext.args = { [ '--multiallelics -both', // split multiallelic sites into biallelic records and both SNPs and indels should be merged separately into two records '--rm-dup all' // output only the first instance of a record which is present multiple times @@ -53,27 +48,29 @@ process { ] } - withName: 'TABIX_EXT_VCF' { - ext.prefix = { "${input.baseName}" } - publishDir = [ enabled: false ] - } - - withName: 'TABIX_GERMLINE_VCFS_CONCAT_SORT' { - ext.prefix = { "${meta.id}.germline" } + withName: 'VCFS_NORM_SORT' { + ext.prefix = { vcf.baseName - '.vcf' + '.sorted' } + ext.args = "--output-type z --write-index=tbi" + ext.when = { params.normalize_vcfs } publishDir = [ mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/concat/${meta.id}/" }, - pattern: "*.tbi" + path: { "${params.outdir}/variant_calling/normalized/${meta.id}/" }, + pattern: "*{vcf.gz,tbi}" ] } - withName: 'TABIX_VCFS_NORM_SORT' { - ext.prefix = { "${meta.id}.${meta.variantcaller}.norm" } + withName: 'GERMLINE_VCFS_CONCAT' { + ext.args = { "-a --output-type z" } + publishDir = [ enabled: false ] + } + + withName: 'GERMLINE_VCFS_CONCAT_SORT' { + ext.prefix = { "${meta.id}.germline" } + ext.args = "--output-type z --write-index=tbi" publishDir = [ mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/normalized/${meta.id}/" }, - pattern: "*.tbi" + path: { "${params.outdir}/variant_calling/concat/${meta.id}/" }, + pattern: "*{vcf.gz,tbi}" ] } - } diff --git a/conf/test_full.config b/conf/test_full.config index 69216b1509..f7d954392c 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -22,6 +22,7 @@ params { split_fastq = 20000000 intervals = 's3://ngi-igenomes/test-data/sarek/S07604624_Padded_Agilent_SureSelectXT_allexons_V6_UTR.bed' wes = true + filter_vcfs = true } process { diff --git a/conf/test_full_germline.config b/conf/test_full_germline.config index c959c133a0..9d8146dc15 100644 --- a/conf/test_full_germline.config +++ b/conf/test_full_germline.config @@ -20,4 +20,5 @@ params { // Other params tools = 'cnvkit,deepvariant,freebayes,haplotypecaller,indexcov,manta,strelka,tiddit,snpeff,vep' split_fastq = 50000000 + filter_vcfs = true } diff --git a/docs/images/sarek_subway.png b/docs/images/sarek_subway.png index 2ff055a6cc..c3cf8b9543 100644 Binary files a/docs/images/sarek_subway.png and b/docs/images/sarek_subway.png differ diff --git a/docs/images/sarek_subway.svg b/docs/images/sarek_subway.svg index a49c9a010c..2df0da408f 100644 --- a/docs/images/sarek_subway.svg +++ b/docs/images/sarek_subway.svg @@ -32,14 +32,14 @@ inkscape:pagecheckerboard="false" inkscape:document-units="mm" showgrid="true" - inkscape:zoom="0.86381075" - inkscape:cx="731.64174" - inkscape:cy="664.4974" - inkscape:window-width="1392" - inkscape:window-height="847" - inkscape:window-x="53" - inkscape:window-y="25" - inkscape:window-maximized="0" + inkscape:zoom="0.91934913" + inkscape:cx="686.35514" + inkscape:cy="113.66737" + inkscape:window-width="1512" + inkscape:window-height="916" + inkscape:window-x="0" + inkscape:window-y="38" + inkscape:window-maximized="1" inkscape:current-layer="layer1" width="211mm" fit-margin-top="0" @@ -887,6 +887,16 @@ id="path6731" sodipodi:nodetypes="cscc" />normalize,varlociraptor,concatenate(germline)varlociraptorfilter, normalize,concatenatemsisensor2 + id="tspan7">msisensor2 diff --git a/docs/output.md b/docs/output.md index 0afb507b7c..1ea842c5ea 100644 --- a/docs/output.md +++ b/docs/output.md @@ -59,9 +59,11 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Microsatellite instability (MSI)](#microsatellite-instability-msi) - [MSIsensor2](#msisensor2) - [MSIsensorPro](#msisensorpro) +- [Post variant calling](#post-variant-calling) - [Varlociraptor](#varlociraptor) - - [Concatenation](#concatenation) + - [Filtering](#filtering) - [Normalization](#normalization) + - [Concatenation](#concatenation) - [Variant annotation](#variant-annotation) - [snpEff](#snpeff) - [VEP](#vep) @@ -366,7 +368,7 @@ See the [`input`](usage#input-sample-sheet-configurations) section in the usage ## Variant Calling -The results regarding variant calling are collected in `{outdir}/variantcalling/`. +The results regarding variant calling are collected in `{outdir}/variant_calling/`. If some results from a variant caller do not appear here, please check out the `--tools` section in the parameter [documentation](https://nf-co.re/sarek/latest/parameters). (Recalibrated) CRAM files can used as an input to start the variant calling. @@ -383,7 +385,7 @@ For further reading and documentation see the [bcftools manual](https://samtools
Output files for all samples -**Output directory: `{outdir}/variantcalling/bcftools//`** +**Output directory: `{outdir}/variant_calling/bcftools//`** - `.bcftools.vcf.gz` and `.bcftools.vcf.gz.tbi` - VCF with tabix index @@ -397,7 +399,7 @@ For further reading and documentation see the [bcftools manual](https://samtools
Output files for normal samples -**Output directory: `{outdir}/variantcalling/deepvariant//`** +**Output directory: `{outdir}/variant_calling/deepvariant//`** - `.deepvariant.vcf.gz` and `.deepvariant.vcf.gz.tbi` - VCF with tabix index @@ -412,7 +414,7 @@ For further reading and documentation see the [bcftools manual](https://samtools
Output files for all samples -**Output directory: `{outdir}/variantcalling/freebayes/{sample,normalsample_vs_tumorsample}/`** +**Output directory: `{outdir}/variant_calling/freebayes/{sample,normalsample_vs_tumorsample}/`** - `.freebayes.vcf.gz` and `.freebayes.vcf.gz.tbi` - VCF with tabix index @@ -426,7 +428,7 @@ For further reading and documentation see the [bcftools manual](https://samtools
Output files for normal samples -**Output directory: `{outdir}/variantcalling/haplotypecaller//`** +**Output directory: `{outdir}/variant_calling/haplotypecaller//`** - `.haplotypecaller.vcf.gz` and `.haplotypecaller.vcf.gz.tbi` - VCF with tabix index @@ -443,7 +445,7 @@ If the haplotype-called VCF files are not filtered, then Sarek should be run wit
Output files for normal samples -**Output directory: `{outdir}/variantcalling/haplotypecaller//`** +**Output directory: `{outdir}/variant_calling/haplotypecaller//`** - `.haplotypecaller.filtered.vcf.gz` and `.haplotypecaller.filtered.vcf.gz.tbi` - VCF with tabix index @@ -457,12 +459,12 @@ If the haplotype-called VCF files are not filtered, then Sarek should be run wit
Output files from joint germline variant calling -**Output directory: `{outdir}/variantcalling/haplotypecaller//`** +**Output directory: `{outdir}/variant_calling/haplotypecaller//`** - `.haplotypecaller.g.vcf.gz` and `.haplotypecaller.g.vcf.gz.tbi` - gVCF with tabix index -**Output directory: `{outdir}/variantcalling/haplotypecaller/joint_variant_calling/`** +**Output directory: `{outdir}/variant_calling/haplotypecaller/joint_variant_calling/`** - `joint_germline.vcf.gz` and `joint_germline.vcf.gz.tbi` - VCF with tabix index @@ -481,7 +483,7 @@ It is not required, but recommended to have a [panel of normals (PON)](https://g
Output files for tumor-only and tumor/normal paired samples -**Output directory: `{outdir}/variantcalling/mutect2/{sample,tumorsample_vs_normalsample,patient}/`** +**Output directory: `{outdir}/variant_calling/mutect2/{sample,tumorsample_vs_normalsample,patient}/`** Files created: @@ -541,7 +543,7 @@ Files created:
Unfiltered VCF-files for normal samples -**Output directory: `{outdir}/variantcalling/sentieon_dnascope//`** +**Output directory: `{outdir}/variant_calling/sentieon_dnascope//`** - `.dnascope.unfiltered.vcf.gz` and `.dnascope.unfiltered.vcf.gz.tbi` - VCF with tabix index @@ -555,7 +557,7 @@ Unless `dnascope_filter` is listed under `--skip_tools` in the nextflow command,
Filtered VCF-files for normal samples -**Output directory: `{outdir}/variantcalling/sentieon_dnascope//`** +**Output directory: `{outdir}/variant_calling/sentieon_dnascope//`** - `.dnascope.filtered.vcf.gz` and `.dnascope.filtered.vcf.gz.tbi` - VCF with tabix index @@ -569,12 +571,12 @@ In Sentieon's package DNAscope, joint germline variant calling is done by first
Output files from joint germline variant calling -**Output directory: `{outdir}/variantcalling/sentieon_dnascope//`** +**Output directory: `{outdir}/variant_calling/sentieon_dnascope//`** - `.dnascope.g.vcf.gz` and `.dnascope.g.vcf.gz.tbi` - VCF with tabix index -**Output directory: `{outdir}/variantcalling/sentieon_dnascope/joint_variant_calling/`** +**Output directory: `{outdir}/variant_calling/sentieon_dnascope/joint_variant_calling/`** - `joint_germline.vcf.gz` and `joint_germline.vcf.gz.tbi` - VCF with tabix index @@ -588,7 +590,7 @@ In Sentieon's package DNAscope, joint germline variant calling is done by first
Unfiltered VCF-files for normal samples -**Output directory: `{outdir}/variantcalling/sentieon_haplotyper//`** +**Output directory: `{outdir}/variant_calling/sentieon_haplotyper//`** - `.haplotyper.unfiltered.vcf.gz` and `.haplotyper.unfiltered.vcf.gz.tbi` - VCF with tabix index @@ -602,7 +604,7 @@ Unless `haplotyper_filter` is listed under `--skip_tools` in the nextflow comman
Filtered VCF-files for normal samples -**Output directory: `{outdir}/variantcalling/sentieon_haplotyper//`** +**Output directory: `{outdir}/variant_calling/sentieon_haplotyper//`** - `.haplotyper.filtered.vcf.gz` and `.haplotyper.filtered.vcf.gz.tbi` - VCF with tabix index @@ -616,12 +618,12 @@ In Sentieon's package DNAseq, joint germline variant calling is done by first ru
Output files from joint germline variant calling -**Output directory: `{outdir}/variantcalling/sentieon_haplotyper//`** +**Output directory: `{outdir}/variant_calling/sentieon_haplotyper//`** - `.haplotyper.g.vcf.gz` and `.haplotyper.g.vcf.gz.tbi` - VCF with tabix index -**Output directory: `{outdir}/variantcalling/sentieon_haplotyper/joint_variant_calling/`** +**Output directory: `{outdir}/variant_calling/sentieon_haplotyper/joint_variant_calling/`** - `joint_germline.vcf.gz` and `joint_germline.vcf.gz.tbi` - VCF with tabix index @@ -637,7 +639,7 @@ In Sentieon's package DNAseq, joint germline variant calling is done by first ru
VCF-files for tumor-only and tumor/normal samples -**Output directory: `{outdir}/variantcalling/sentieon_tnscope//`** +**Output directory: `{outdir}/variant_calling/sentieon_tnscope//`** - `.tnscope.vcf.gz` and `.tnscope.vcf.gz.tbi` - VCF with tabix index @@ -652,7 +654,7 @@ For further downstream analysis, take a look [here](https://github.com/Illumina/
Output files for single samples (normal) -**Output directory: `{outdir}/variantcalling/strelka//`** +**Output directory: `{outdir}/variant_calling/strelka//`** - `.strelka.genome.vcf.gz` and `.strelka.genome.vcf.gz.tbi` - genome VCF with tabix index @@ -663,7 +665,7 @@ For further downstream analysis, take a look [here](https://github.com/Illumina/
Output files for tumor/normal paired samples -**Output directory: `{outdir}/variantcalling/strelka//`** +**Output directory: `{outdir}/variant_calling/strelka//`** - `.strelka.somatic_indels.vcf.gz` and `.strelka.somatic_indels.vcf.gz.tbi` - VCF with tabix index with all somatic indels inferred in the tumor sample. @@ -680,7 +682,7 @@ For further downstream analysis, take a look [here](https://github.com/Illumina/ A bam index has 16KB resolution and it is used as a coverage estimate . The output is scaled to around 1. So a long stretch with values of 1.5 would be a heterozygous duplication. This is useful as a quick QC to get coverage values across the genome. -**Output directory: `{outdir}/variantcalling/indexcov/`** +**Output directory: `{outdir}/variant_calling/indexcov/`** In addition to the interactive HTML files, `indexcov` outputs a number of text files: @@ -707,7 +709,7 @@ It is optimized for analysis of germline variation in small sets of individuals
Output files for normal samples -**Output directory: `{outdir}/variantcalling/manta//`** +**Output directory: `{outdir}/variant_calling/manta//`** - `.manta.diploid_sv.vcf.gz` and `.manta.diploid_sv.vcf.gz.tbi` - VCF with tabix index containing SVs and indels scored and genotyped under a diploid model for the sample. @@ -716,7 +718,7 @@ It is optimized for analysis of germline variation in small sets of individuals
Output files for tumor-only samples -**Output directory: `{outdir}/variantcalling/manta//`** +**Output directory: `{outdir}/variant_calling/manta//`** - `.manta.tumor_sv.vcf.gz` and `.manta.tumor_sv.vcf.gz.tbi` - VCF with tabix index containing a subset of the candidateSV.vcf.gz file after removing redundant candidates and small indels less than the minimum scored variant size (50 by default). The SVs are not scored, but include additional details: (1) paired and split read supporting evidence counts for each allele (2) a subset of the filters from the scored tumor-normal model are applied to the single tumor case to improve precision. @@ -725,7 +727,7 @@ It is optimized for analysis of germline variation in small sets of individuals
Output files for tumor/normal paired samples -**Output directory: `{outdir}/variantcalling/manta//`** +**Output directory: `{outdir}/variant_calling/manta//`** - `.manta.diploid_sv.vcf.gz` and `.manta.diploid_sv.vcf.gz.tbi` - VCF with tabix index containing SVs and indels scored and genotyped under a diploid model for the sample. In the case of a tumor/normal subtraction, the scores in this file do not reflect any information from the tumor sample. @@ -740,7 +742,7 @@ It is optimized for analysis of germline variation in small sets of individuals
Output files for normal and tumor-only samples -**Output directory: `{outdir}/variantcalling/tiddit//`** +**Output directory: `{outdir}/variant_calling/tiddit//`** - `.tiddit.vcf.gz` and `.tiddit.vcf.gz.tbi` - VCF with tabix index containing SV calls @@ -752,7 +754,7 @@ It is optimized for analysis of germline variation in small sets of individuals
Output files for tumor/normal paired samples -**Output directory: `{outdir}/variantcalling/tiddit//`** +**Output directory: `{outdir}/variant_calling/tiddit//`** - `.tiddit.normal.vcf.gz` and `.tiddit.normal.vcf.gz.tbi` - VCF with tabix index containing SV calls @@ -778,7 +780,7 @@ This is done internally using the software [AlleleCount](https://github.com/canc
Output files for tumor/normal paired samples -**Output directory: `{outdir}/variantcalling/ascat//`** +**Output directory: `{outdir}/variant_calling/ascat//`** - `.tumour.ASCATprofile.png` - image with information about allele-specific copy number profile @@ -827,7 +829,7 @@ The file `.cnvs.txt` contains all segments predicte
Output files for normal and tumor-only samples -**Output directory: `{outdir}/variantcalling/cnvkit//`** +**Output directory: `{outdir}/variant_calling/cnvkit//`** - `.antitargetcoverage.cnn` - file containing coverage information @@ -852,7 +854,7 @@ The file `.cnvs.txt` contains all segments predicte
Output files for tumor/normal samples -**Output directory: `{outdir}/variantcalling/cnvkit//`** +**Output directory: `{outdir}/variant_calling/cnvkit//`** - `.antitargetcoverage.cnn` - file containing coverage information @@ -887,7 +889,7 @@ It also detects subclonal gains and losses and evaluates the most likely average
Output files for tumor-only and tumor/normal paired samples -**Output directory: `{outdir}/variantcalling/controlfreec/{tumorsample,tumorsample_vs_normalsample}/`** +**Output directory: `{outdir}/variant_calling/controlfreec/{tumorsample,tumorsample_vs_normalsample}/`** - `config.txt` - Configuration file used to run Control-FREEC @@ -935,7 +937,7 @@ An altered distribution of microsatellite length is associated with a missed rep Output files for tumor only samples -**Output directory: `{outdir}/variantcalling/msisensor2//`** +**Output directory: `{outdir}/variant_calling/msisensor2//`** - `` - MSI score output, contains information about the number of somatic sites. @@ -953,7 +955,7 @@ It requires a normal sample for each tumour to differentiate the somatic and ger
Output files for tumor/normal paired samples -**Output directory: `{outdir}/variantcalling/msisensor//`** +**Output directory: `{outdir}/variant_calling/msisensor//`** - `` - MSI score output, contains information about the number of somatic sites. @@ -965,6 +967,10 @@ It requires a normal sample for each tumour to differentiate the somatic and ger - Somatic sites detected.
+## Post Variant Calling + +Optional steps to further filter or fine tune variant calling results. There are two branch: `Varlociraptor` or `bcftools` (filtering, normalisation, and concatenation). + ### Varlociraptor As varlociraptor requires to provide a set of candidate variants to consider it can be run in combination with any variant caller. @@ -972,7 +978,7 @@ As varlociraptor requires to provide a set of candidate variants to consider it
Output files for germline samples -**Output directory: `{outdir}/variantcalling/varlociraptor/{sample}`** +**Output directory: `{outdir}/variant_calling/varlociraptor/{sample}`** - `..germline.varlociraptor.vcf.gz` and `..germline.varlociraptor.vcf.gz.tbi` - Final VCF with tabix index @@ -985,7 +991,7 @@ As varlociraptor requires to provide a set of candidate variants to consider it
Postprocessed VCF files for tumor-normal calling -**Output directory: `{outdir}/variantcalling/varlociraptor/{tumorsample_vs_normalsample}`** +**Output directory: `{outdir}/variant_calling/varlociraptor/{tumorsample_vs_normalsample}`** - `_vs_...somatic.varlociraptor.vcf.gz` and `_vs_...somatic.varlociraptor.vcf.gz.tbi` - Final VCF with tabix index @@ -1002,7 +1008,7 @@ As varlociraptor requires to provide a set of candidate variants to consider it
Output files for tumor only samples -**Output directory: `{outdir}/variantcalling/varlociraptor/{sample}`** +**Output directory: `{outdir}/variant_calling/varlociraptor/{sample}`** - `..tumor_only.varlociraptor.vcf.gz` and `..tumor_only.varlociraptor.vcf.gz.tbi` - Final VCF with tabix index @@ -1012,32 +1018,45 @@ As varlociraptor requires to provide a set of candidate variants to consider it - JSON file containing alignment properties for tumor_only sample cram
-### Concatenation +### Filtering -Germline VCFs from `DeepVariant`, `FreeBayes`, `HaplotypeCaller`, `Haplotyper`, `Manta`, `bcftools mpileup`, `Strelka`, or `Tiddit` are concatenated with `bcftools concat`. The field `SOURCE` is added to the VCF header to report the variant caller. +VCFs from all variantcallers can be filtered using `bcftools view`. Filtering is enabled by setting `--filter_vcfs` parameter. By default, variants are filtered to include only those with `PASS` in the FILTER field. Custom filtering criteria can be specified using the `--bcftools_filtering` parameter (see [bcftools view documentation](https://samtools.github.io/bcftools/bcftools.html#view) for filter syntax).
-Concatenated VCF-files for normal samples +Filtered VCF-files for normal and tumor samples -**Output directory: `{outdir}/variantcalling/concat//`** +**Output directory: `{outdir}/variant_calling/filtered//`** -- `.germline.vcf.gz` and `.germline.vcf.gz.tbi` - - VCF with tabix index +- `..bcftools_filtered.vcf.gz` and `..bcftools_filtered.vcf.gz.tbi` + - VCF with tabix index containing filtered variants
### Normalization -_Experimental Feature_ All VCFs from `DeepVariant`, `FreeBayes`, `HaplotypeCaller`, `Haplotyper`, `Manta`, `bcftools mpileup`, `Strelka`, or `Tiddit` are normalized with `bcftools norm`. The field `SOURCE` is added to the VCF header to report the variant caller. -The concatenized VCFs are not normalized at the moment. +All VCFs are normalized with `bcftools norm`. The field `SOURCE` is added to the VCF header to report the variant caller.
Normalized VCF-files for normal and tumor samples -**Output directory: `{outdir}/variantcalling/normalized//`** +**Output directory: `{outdir}/variant_calling/normalized//`** -- `..norm.vcf.gz` and `..norm.vcf.gz.tbi` - - VCF with tabix index +- `..norm.sorted.vcf.gz` and `..norm.sorted.vcf.gz.tbi` + - VCF with tabix index containing normalized variants + +
+ +### Concatenation + +Germline VCFs from `DeepVariant`, `FreeBayes`, `HaplotypeCaller`, `Haplotyper`, `Manta`, `bcftools mpileup`, `Strelka`, or `Tiddit` are concatenated with `bcftools concat`. The field `SOURCE` is added to the VCF header to report the variant caller. + +
+Concatenated VCF-files for normal samples + +**Output directory: `{outdir}/variant_calling/concat//`** + +- `.germline.vcf.gz` and `.germline.vcf.gz.tbi` + - VCF with tabix index containing concatenated germline variants
diff --git a/modules.json b/modules.json index c3f0573661..295ebac535 100644 --- a/modules.json +++ b/modules.json @@ -50,6 +50,11 @@ "git_sha": "c9c3ef86c1892413b3c86fb38c4e39fd7288512f", "installed_by": ["modules"] }, + "bcftools/view": { + "branch": "master", + "git_sha": "f17049e03697726ace7499d2fe342f892594f6f3", + "installed_by": ["modules"] + }, "bwa/index": { "branch": "master", "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", diff --git a/modules/nf-core/bcftools/view/environment.yml b/modules/nf-core/bcftools/view/environment.yml new file mode 100644 index 0000000000..ba863b388f --- /dev/null +++ b/modules/nf-core/bcftools/view/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/bcftools + - bioconda::bcftools=1.22 diff --git a/modules/nf-core/bcftools/view/main.nf b/modules/nf-core/bcftools/view/main.nf new file mode 100644 index 0000000000..72b31200b5 --- /dev/null +++ b/modules/nf-core/bcftools/view/main.nf @@ -0,0 +1,75 @@ +process BCFTOOLS_VIEW { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data': + 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f' }" + + input: + tuple val(meta), path(vcf), path(index) + path(regions) + path(targets) + path(samples) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + """ + bcftools view \\ + --output ${prefix}.${extension} \\ + ${regions_file} \\ + ${targets_file} \\ + ${samples_file} \\ + $args \\ + --threads $task.cpus \\ + ${vcf} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def stub_index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && stub_index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${stub_index}" : "" + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/view/meta.yml b/modules/nf-core/bcftools/view/meta.yml new file mode 100644 index 0000000000..e9df974206 --- /dev/null +++ b/modules/nf-core/bcftools/view/meta.yml @@ -0,0 +1,98 @@ +name: bcftools_view +description: View, subset and filter VCF or BCF files by position and filtering expression. + Convert between VCF and BCF +keywords: + - variant calling + - view + - bcftools + - VCF +tools: + - view: + description: | + View, subset and filter VCF or BCF files by position and filtering expression. Convert between VCF and BCF + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be inspected. + e.g. 'file.vcf' + ontologies: [] + - index: + type: file + description: | + The tab index for the VCF file to be inspected. + e.g. 'file.tbi' + ontologies: [] + - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. + e.g. 'file.vcf' + ontologies: [] + - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon index files) + e.g. 'file.vcf' + ontologies: [] + - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: VCF normalized output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + ontologies: [] + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@abhi18av" +maintainers: + - "@abhi18av" diff --git a/nextflow.config b/nextflow.config index 4a9f078cd9..aa2a7f24c0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -31,6 +31,7 @@ params { tools = null // No default Variant_Calling or Annotation tools skip_tools = null // All tools (markduplicates + baserecalibrator + QC) are used by default split_fastq = 50000000 // FASTQ files will not be split by default by FASTP + wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers // Modify fastqs (trim/split) with FASTP trim_fastq = false // No trimming @@ -81,22 +82,25 @@ params { cf_minqual = 0 // ControlFreec default values cf_window = null // by default we are not using this in Control-FREEC cnvkit_reference = null // by default the reference is build from the fasta file - concatenate_vcfs = false // by default we don't concatenate the germline-vcf-files freebayes_filter = 30 // default filter for freebayes (filtering with vcflib/vcffilter) gatk_pcr_indel_model = 'CONSERVATIVE' // default value for GATK HaplotypeCaller ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2 joint_germline = false // g.vcf & joint germline calling are not run by default if HaplotypeCaller is selected joint_mutect2 = false // if true, enables patient-wise multi-sample somatic variant calling - normalize_vcfs = false // by default we don't normalize the vcf-files only_paired_variant_calling = false // if true, skips germline variant calling for normal-paired sample sentieon_dnascope_emit_mode = 'variant' // default value for Sentieon dnascope sentieon_dnascope_pcr_indel_model = 'CONSERVATIVE' sentieon_haplotyper_emit_mode = 'variant' // default value for Sentieon haplotyper + + // Post variant calling varlociraptor_chunk_size = 15 // default chunk size for Varlociraptor varlociraptor_scenario_tumor_only = null // uses default scenario in assets/varlociraptor_tumor_only.yte.yaml varlociraptor_scenario_somatic = null // uses default scenario in assets/varlociraptor_somatic.yte.yaml varlociraptor_scenario_germline = null // uses default scenario in assets/varlociraptor_germline.yte.yaml - wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers + bcftools_filter_criteria = "-f PASS" + concatenate_vcfs = false // by default we don't concatenate the germline-vcf-files + filter_vcfs = false // enables filtering of all VCFs with bcftools filter + normalize_vcfs = false // by default we don't normalize the vcf-files // Annotation bcftools_annotations = null // No extra annotation file diff --git a/nextflow_schema.json b/nextflow_schema.json index 8e94e7d27d..adb645c17b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -353,7 +353,7 @@ }, "ascat_ploidy": { "type": "number", - "minimum": 0.0, + "minimum": 0, "fa_icon": "fas fa-bacon", "help_text": "ASCAT: optional argument to override ASCAT optimization and supply psi parameter (expert parameter, do not adapt unless you know what you are doing). See [here](https://raw.githubusercontent.com/VanLoo-lab/ascat/master/man/ASCAT-manual.pdf)", "description": "Overwrite ASCAT ploidy." @@ -500,19 +500,39 @@ "type": "string", "default": "CONSERVATIVE", "help_text": "Option for selecting the PCR indel model used by GATK HaplotypeCaller." - }, - "concatenate_vcfs": { + } + } + }, + "post_variant_calling": { + "title": "Post variant calling", + "type": "object", + "description": "Options for tools that run between variant calling and annotation", + "default": "", + "fa_icon": "fas fa-filter", + "properties": { + "filter_vcfs": { "type": "boolean", - "fa_icon": "fas fa-tape", - "description": "Option for concatenating germline vcf-files.", - "help_text": "Concatenating the germline vcf-files from each applied variant-caller into one vcf-file using bfctools concat." + "fa_icon": "fas fa-filter", + "description": "Enable filtering of VCFs with bcftools view" + }, + "bcftools_filter_criteria": { + "type": "string", + "default": "-f PASS", + "fa_icon": "fas fa-question", + "description": "Filter criteria. Uses bcftools view filter options. To customize, follow instructions here: https://samtools.github.io/bcftools/bcftools.html#view " }, "normalize_vcfs": { "type": "boolean", - "fa_icon": "fas fa-tape", + "fa_icon": "fas fa-angle-double-left", "description": "Option for normalization of vcf-files.", "help_text": "Normalization of all vcf-files from each applied variant-caller using bfctools norm." }, + "concatenate_vcfs": { + "type": "boolean", + "fa_icon": "fas fa-tape", + "description": "Option for concatenating germline vcf-files.", + "help_text": "Enable concatenation of germline vcf-files from each applied variant-caller into one vcf-file using bfctools concat." + }, "varlociraptor_chunk_size": { "type": "integer", "default": 15, @@ -520,17 +540,17 @@ "description": "Number of chunks to split the vcf-files for varlociraptor. Minimum 1, indicates no splitting", "hidden": true }, - "varlociraptor_scenario_germline": { + "varlociraptor_scenario_tumor_only": { "type": "string", - "description": "Yte compatible scenario file for germline samples. Defaults to assets/varlociraptor_germline.yte.yaml" + "description": "Yte compatible scenario file for tumor only samples. Defaults to assets/varlociraptor_tumor_only.yte.yaml" }, "varlociraptor_scenario_somatic": { "type": "string", "description": "Yte compatible scenario file for somatic samples. Defaults to assets/varlociraptor_somatic.yte.yaml" }, - "varlociraptor_scenario_tumor_only": { + "varlociraptor_scenario_germline": { "type": "string", - "description": "Yte compatible scenario file for tumor only samples. Defaults to assets/varlociraptor_tumor_only.yte.yaml" + "description": "Yte compatible scenario file for germline samples. Defaults to assets/varlociraptor_germline.yte.yaml" } } }, @@ -1236,6 +1256,9 @@ { "$ref": "#/$defs/variant_calling" }, + { + "$ref": "#/$defs/post_variant_calling" + }, { "$ref": "#/$defs/annotation" }, diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index ff58fea231..b219f5c149 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -22,8 +22,8 @@ "@id": "./", "@type": "Dataset", "creativeWorkStatus": "InProgress", - "datePublished": "2025-11-04T10:00:26+00:00", - "description": "

\n \n \n \"nf-core/sarek\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/sarek/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/sarek/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/sarek/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/sarek/actions/workflows/linting.yml)\n[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/sarek/results)\n[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3476425-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3476425)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.8-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/sarek)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23sarek-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/sarek)\n[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)\n[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)\n[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/sarek** is a workflow designed to detect variants on whole genome or targeted sequencing data. Initially designed for Human, and Mouse, it can work on any species with a reference genome. Sarek can also handle tumour / normal pairs and could include additional relapses.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/sarek/results).\n\nIt's listed on [Elixir - Tools and Data Services Registry](https://bio.tools/nf-core-sarek) and [Dockstore](https://dockstore.org/workflows/github.com/nf-core/sarek).\n\n

\n \n

\n\n## Pipeline summary\n\nDepending on the options and samples provided, the pipeline can currently perform the following:\n\n- Form consensus reads from UMI sequences (`fgbio`)\n- Sequencing quality control and trimming (enabled by `--trim_fastq`) (`FastQC`, `fastp`)\n- Contamination removal (`BBSplit`, enabled by `--tools bbsplit`)\n- Map Reads to Reference (`BWA-mem`, `BWA-mem2`, `dragmap` or `Sentieon BWA-mem`)\n- Process BAM file (`GATK MarkDuplicates`, `GATK BaseRecalibrator` and `GATK ApplyBQSR` or `Sentieon LocusCollector` and `Sentieon Dedup`)\n- _Experimental Feature_: Use GPU-accelerated parabricks implementation as alternative to \"Map Reads to Reference\" + \"Process BAM file\" (`--aligner parabricks`)\n- Summarise alignment statistics (`samtools stats`, `mosdepth`)\n- Variant calling (enabled by `--tools`, see [compatibility](https://nf-co.re/sarek/latest/docs/usage#which-variant-calling-tool-is-implemented-for-which-data-type)):\n - `ASCAT`\n - `CNVkit`\n - `Control-FREEC`\n - `DeepVariant`\n - `freebayes`\n - `GATK HaplotypeCaller`\n - `GATK Mutect2`\n - `indexcov`\n - `Lofreq`\n - `Manta`\n - `mpileup`\n - `MSIsensor2`\n - `MSIsensor-pro`\n - `MuSE`\n - `Sentieon Haplotyper`\n - `Strelka`\n - `TIDDIT`\n- Post-variant calling options, one of:\n - `BCFtools concat` for germline vcfs and/or `BCFtools norm` for all vcfs (_experimental Feature_)\n - `Varlociraptor` for all vcfs\n- Variant filtering and annotation (`SnpEff`, `Ensembl VEP`, `BCFtools annotate`)\n- Summarise and represent QC (`MultiQC`)\n\n

\n \n

\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\npatient,sample,lane,fastq_1,fastq_2\nID1,S1,L002,ID1_S1_L002_R1_001.fastq.gz,ID1_S1_L002_R2_001.fastq.gz\n```\n\nEach row represents a pair of fastq files (paired end).\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/sarek \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/sarek/usage) and the [parameter documentation](https://nf-co.re/sarek/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/sarek/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/sarek/output).\n\n## Benchmarking\n\nOn each release, the pipeline is run on 3 full size tests:\n\n- `test_full` runs tumor-normal data for one patient from the SEQ2C consortium\n- `test_full_germline` runs a WGS 30X Genome-in-a-Bottle(NA12878) dataset\n- `test_full_germline_ncbench_agilent` runs two WES samples with 75M and 200M reads (data available [here](https://github.com/ncbench/ncbench-workflow#contributing-callsets)). The results are uploaded to Zenodo, evaluated against a truth dataset, and results are made available via the [NCBench dashboard](https://ncbench.github.io/report/report.html#).\n\n## Credits\n\nSarek was originally written by Maxime U Garcia and Szilveszter Juhos at the [National Genomics Infastructure](https://ngisweden.scilifelab.se) and [National Bioinformatics Infastructure Sweden](https://nbis.se) which are both platforms at [SciLifeLab](https://scilifelab.se), with the support of [The Swedish Childhood Tumor Biobank (Barntum\u00f6rbanken)](https://ki.se/forskning/barntumorbanken).\nFriederike Hanssen and Gisela Gabernet at [QBiC](https://www.qbic.uni-tuebingen.de/) later joined and helped with further development.\n\nThe Nextflow DSL2 conversion of the pipeline was lead by Friederike Hanssen and Maxime U Garcia.\n\nMaintenance is now lead by Friederike Hanssen and Maxime U Garcia (now at [Seqera](https://seqera.io))\n\nMain developers:\n\n- [Maxime U Garcia](https://github.com/maxulysse)\n- [Friederike Hanssen](https://github.com/FriederikeHanssen)\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Abhinav Sharma](https://github.com/abhi18av)\n- [Adam Talbot](https://github.com/adamrtalbot)\n- [Adrian L\u00e4rkeryd](https://github.com/adrlar)\n- [\u00c0itor Olivares](https://github.com/AitorPeseta)\n- [Alexander Peltzer](https://github.com/apeltzer)\n- [Alison Meynert](https://github.com/ameynert)\n- [Anders Sune Pedersen](https://github.com/asp8200)\n- [arontommi](https://github.com/arontommi)\n- [BarryDigby](https://github.com/BarryDigby)\n- [Bekir Erg\u00fcner](https://github.com/berguner)\n- [bjornnystedt](https://github.com/bjornnystedt)\n- [cgpu](https://github.com/cgpu)\n- [Chela James](https://github.com/chelauk)\n- [David Mas-Ponte](https://github.com/davidmasp)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Famke B\u00e4uerle](https://github.com/famosab)\n- [Francesco Lescai](https://github.com/lescai)\n- [Francisco Mart\u00ednez](https://github.com/nevinwu)\n- [Gavin Mackenzie](https://github.com/GCJMackenzie)\n- [Gisela Gabernet](https://github.com/ggabernet)\n- [Grant Neilson](https://github.com/grantn5)\n- [gulfshores](https://github.com/gulfshores)\n- [Harshil Patel](https://github.com/drpatelh)\n- [Hongwei Ye](https://github.com/YeHW)\n- [James A. Fellows Yates](https://github.com/jfy133)\n- [Jesper Eisfeldt](https://github.com/J35P312)\n- [Johannes Alneberg](https://github.com/alneberg)\n- [Jonas Kjellin](https://github.com/kjellinjonas)\n- [Jos\u00e9 Fern\u00e1ndez Navarro](https://github.com/jfnavarro)\n- [J\u00falia Mir Pedrol](https://github.com/mirpedrol)\n- [Ken Brewer](https://github.com/kenibrewer)\n- [Lasse Westergaard Folkersen](https://github.com/lassefolkersen)\n- [Lucia Conde](https://github.com/lconde-ucl)\n- [Louis Le N\u00e9zet](https://github.com/LouisLeNezet)\n- [Malin Larsson](https://github.com/malinlarsson)\n- [Marcel Martin](https://github.com/marcelm)\n- [Nick Smith](https://github.com/nickhsmith)\n- [Nicolas Schcolnicov](https://github.com/nschcolnicov)\n- [Nilesh Tawari](https://github.com/nilesh-tawari)\n- [Nils Homer](https://github.com/nh13)\n- [Olga Botvinnik](https://github.com/olgabot)\n- [Oskar Wacker](https://github.com/WackerO)\n- [pallolason](https://github.com/pallolason)\n- [Paul Cantalupo](https://github.com/pcantalupo)\n- [Phil Ewels](https://github.com/ewels)\n- [Pierre Lindenbaum](https://github.com/lindenb)\n- [Sabrina Krakau](https://github.com/skrakau)\n- [Sam Minot](https://github.com/sminot)\n- [Sebastian-D](https://github.com/Sebastian-D)\n- [Silvia Morini](https://github.com/silviamorins)\n- [Simon Pearce](https://github.com/SPPearce)\n- [Solenne Correard](https://github.com/scorreard)\n- [Susanne Jodoin](https://github.com/SusiJo)\n- [Szilveszter Juhos](https://github.com/szilvajuhos)\n- [Tobias Koch](https://github.com/KochTobi)\n- [Winni Kretzschmar](https://github.com/winni2k)\n- [Patricie Skal\u00e1kov\u00e1](https://github.com/Patricie34)\n\n## Acknowledgements\n\n| [![Barntum\u00f6rbanken](docs/images/BTB_logo.png)](https://ki.se/forskning/barntumorbanken) | [![SciLifeLab](docs/images/SciLifeLab_logo.png)](https://scilifelab.se) |\n| :-----------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------: |\n| [![National Genomics Infrastructure](docs/images/NGI_logo.png)](https://ngisweden.scilifelab.se/) | [![National Bioinformatics Infrastructure Sweden](docs/images/NBIS_logo.png)](https://nbis.se) |\n| [![QBiC](docs/images/QBiC_logo.png)](https://www.qbic.uni-tuebingen.de) | [![GHGA](docs/images/GHGA_logo.png)](https://www.ghga.de/) |\n| [![DNGC](docs/images/DNGC_logo.png)](https://eng.ngc.dk/) | |\n\n## Contributions & Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#sarek` channel](https://nfcore.slack.com/channels/sarek) (you can join with [this invite](https://nf-co.re/join/slack)), or contact us: [Maxime U Garcia](mailto:maxime.garcia@seqera.io?subject=[GitHub]%20nf-core/sarek), [Friederike Hanssen](mailto:friederike.hanssen@qbic.uni-tuebingen.de?subject=[GitHub]%20nf-core/sarek)\n\n## Citations\n\nIf you use `nf-core/sarek` for your analysis, please cite the `Sarek` article as follows:\n\n> Friederike Hanssen, Maxime U Garcia, Lasse Folkersen, Anders Sune Pedersen, Francesco Lescai, Susanne Jodoin, Edmund Miller, Oskar Wacker, Nicholas Smith, nf-core community, Gisela Gabernet, Sven Nahnsen **Scalable and efficient DNA sequencing analysis on different compute infrastructures aiding variant discovery** _NAR Genomics and Bioinformatics_ Volume 6, Issue 2, June 2024, lqae031, [doi: 10.1093/nargab/lqae031](https://doi.org/10.1093/nargab/lqae031).\n\n> Garcia M, Juhos S, Larsson M et al. **Sarek: A portable workflow for whole-genome sequencing analysis of germline and somatic variants [version 2; peer review: 2 approved]** _F1000Research_ 2020, 9:63 [doi: 10.12688/f1000research.16665.2](http://dx.doi.org/10.12688/f1000research.16665.2).\n\nYou can cite the sarek zenodo record for a specific version using the following [doi: 10.5281/zenodo.3476425](https://doi.org/10.5281/zenodo.3476425)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n\n## CHANGELOG\n\n- [CHANGELOG](CHANGELOG.md)\n", + "datePublished": "2025-11-04T12:01:05+00:00", + "description": "

\n \n \n \"nf-core/sarek\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/sarek/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/sarek/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/sarek/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/sarek/actions/workflows/linting.yml)\n[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/sarek/results)\n[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3476425-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3476425)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.8-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/sarek)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23sarek-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/sarek)\n[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)\n[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)\n[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/sarek** is a workflow designed to detect variants on whole genome or targeted sequencing data. Initially designed for Human, and Mouse, it can work on any species with a reference genome. Sarek can also handle tumour / normal pairs and could include additional relapses.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/sarek/results).\n\nIt's listed on [Elixir - Tools and Data Services Registry](https://bio.tools/nf-core-sarek) and [Dockstore](https://dockstore.org/workflows/github.com/nf-core/sarek).\n\n

\n \n

\n\n## Pipeline summary\n\nDepending on the options and samples provided, the pipeline can currently perform the following:\n\n- Form consensus reads from UMI sequences (`fgbio`)\n- Sequencing quality control and trimming (enabled by `--trim_fastq`) (`FastQC`, `fastp`)\n- Contamination removal (`BBSplit`, enabled by `--tools bbsplit`)\n- Map Reads to Reference (`BWA-mem`, `BWA-mem2`, `dragmap` or `Sentieon BWA-mem`)\n- Process BAM file (`GATK MarkDuplicates`, `GATK BaseRecalibrator` and `GATK ApplyBQSR` or `Sentieon LocusCollector` and `Sentieon Dedup`)\n- _Experimental Feature_: Use GPU-accelerated parabricks implementation as alternative to \"Map Reads to Reference\" + \"Process BAM file\" (`--aligner parabricks`)\n- Summarise alignment statistics (`samtools stats`, `mosdepth`)\n- Variant calling (enabled by `--tools`, see [compatibility](https://nf-co.re/sarek/latest/docs/usage#which-variant-calling-tool-is-implemented-for-which-data-type)):\n - `ASCAT`\n - `CNVkit`\n - `Control-FREEC`\n - `DeepVariant`\n - `freebayes`\n - `GATK HaplotypeCaller`\n - `GATK Mutect2`\n - `indexcov`\n - `Lofreq`\n - `Manta`\n - `mpileup`\n - `MSIsensor2`\n - `MSIsensor-pro`\n - `MuSE`\n - `Sentieon Haplotyper`\n - `Strelka`\n - `TIDDIT`\n- Post-variant calling options, one of:\n - `BCFtools concat` for germline vcfs and/or `BCFtools view` (default: filter by `PASS`), `BCFtools norm` for all vcfs\n - `Varlociraptor` for all vcfs\n- Variant filtering and annotation (`SnpEff`, `Ensembl VEP`, `BCFtools annotate`)\n- Summarise and represent QC (`MultiQC`)\n\n

\n \n

\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\npatient,sample,lane,fastq_1,fastq_2\nID1,S1,L002,ID1_S1_L002_R1_001.fastq.gz,ID1_S1_L002_R2_001.fastq.gz\n```\n\nEach row represents a pair of fastq files (paired end).\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/sarek \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/sarek/usage) and the [parameter documentation](https://nf-co.re/sarek/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/sarek/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/sarek/output).\n\n## Benchmarking\n\nOn each release, the pipeline is run on 3 full size tests:\n\n- `test_full` runs tumor-normal data for one patient from the SEQ2C consortium\n- `test_full_germline` runs a WGS 30X Genome-in-a-Bottle(NA12878) dataset\n- `test_full_germline_ncbench_agilent` runs two WES samples with 75M and 200M reads (data available [here](https://github.com/ncbench/ncbench-workflow#contributing-callsets)). The results are uploaded to Zenodo, evaluated against a truth dataset, and results are made available via the [NCBench dashboard](https://ncbench.github.io/report/report.html#).\n\n## Credits\n\nSarek was originally written by Maxime U Garcia and Szilveszter Juhos at the [National Genomics Infastructure](https://ngisweden.scilifelab.se) and [National Bioinformatics Infastructure Sweden](https://nbis.se) which are both platforms at [SciLifeLab](https://scilifelab.se), with the support of [The Swedish Childhood Tumor Biobank (Barntum\u00f6rbanken)](https://ki.se/forskning/barntumorbanken).\nFriederike Hanssen and Gisela Gabernet at [QBiC](https://www.qbic.uni-tuebingen.de/) later joined and helped with further development.\n\nThe Nextflow DSL2 conversion of the pipeline was lead by Friederike Hanssen and Maxime U Garcia.\n\nMaintenance is now lead by Friederike Hanssen and Maxime U Garcia (now at [Seqera](https://seqera.io))\n\nMain developers:\n\n- [Maxime U Garcia](https://github.com/maxulysse)\n- [Friederike Hanssen](https://github.com/FriederikeHanssen)\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Abhinav Sharma](https://github.com/abhi18av)\n- [Adam Talbot](https://github.com/adamrtalbot)\n- [Adrian L\u00e4rkeryd](https://github.com/adrlar)\n- [\u00c0itor Olivares](https://github.com/AitorPeseta)\n- [Alexander Peltzer](https://github.com/apeltzer)\n- [Alison Meynert](https://github.com/ameynert)\n- [Anders Sune Pedersen](https://github.com/asp8200)\n- [arontommi](https://github.com/arontommi)\n- [BarryDigby](https://github.com/BarryDigby)\n- [Bekir Erg\u00fcner](https://github.com/berguner)\n- [bjornnystedt](https://github.com/bjornnystedt)\n- [cgpu](https://github.com/cgpu)\n- [Chela James](https://github.com/chelauk)\n- [David Mas-Ponte](https://github.com/davidmasp)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Famke B\u00e4uerle](https://github.com/famosab)\n- [Francesco Lescai](https://github.com/lescai)\n- [Francisco Mart\u00ednez](https://github.com/nevinwu)\n- [Gavin Mackenzie](https://github.com/GCJMackenzie)\n- [Gisela Gabernet](https://github.com/ggabernet)\n- [Grant Neilson](https://github.com/grantn5)\n- [gulfshores](https://github.com/gulfshores)\n- [Harshil Patel](https://github.com/drpatelh)\n- [Hongwei Ye](https://github.com/YeHW)\n- [James A. Fellows Yates](https://github.com/jfy133)\n- [Jesper Eisfeldt](https://github.com/J35P312)\n- [Johannes Alneberg](https://github.com/alneberg)\n- [Jonas Kjellin](https://github.com/kjellinjonas)\n- [Jos\u00e9 Fern\u00e1ndez Navarro](https://github.com/jfnavarro)\n- [J\u00falia Mir Pedrol](https://github.com/mirpedrol)\n- [Ken Brewer](https://github.com/kenibrewer)\n- [Lasse Westergaard Folkersen](https://github.com/lassefolkersen)\n- [Lucia Conde](https://github.com/lconde-ucl)\n- [Louis Le N\u00e9zet](https://github.com/LouisLeNezet)\n- [Malin Larsson](https://github.com/malinlarsson)\n- [Marcel Martin](https://github.com/marcelm)\n- [Nick Smith](https://github.com/nickhsmith)\n- [Nicolas Schcolnicov](https://github.com/nschcolnicov)\n- [Nilesh Tawari](https://github.com/nilesh-tawari)\n- [Nils Homer](https://github.com/nh13)\n- [Olga Botvinnik](https://github.com/olgabot)\n- [Oskar Wacker](https://github.com/WackerO)\n- [pallolason](https://github.com/pallolason)\n- [Paul Cantalupo](https://github.com/pcantalupo)\n- [Phil Ewels](https://github.com/ewels)\n- [Pierre Lindenbaum](https://github.com/lindenb)\n- [Sabrina Krakau](https://github.com/skrakau)\n- [Sam Minot](https://github.com/sminot)\n- [Sebastian-D](https://github.com/Sebastian-D)\n- [Silvia Morini](https://github.com/silviamorins)\n- [Simon Pearce](https://github.com/SPPearce)\n- [Solenne Correard](https://github.com/scorreard)\n- [Susanne Jodoin](https://github.com/SusiJo)\n- [Szilveszter Juhos](https://github.com/szilvajuhos)\n- [Tobias Koch](https://github.com/KochTobi)\n- [Winni Kretzschmar](https://github.com/winni2k)\n- [Patricie Skal\u00e1kov\u00e1](https://github.com/Patricie34)\n\n## Acknowledgements\n\n| [![Barntum\u00f6rbanken](docs/images/BTB_logo.png)](https://ki.se/forskning/barntumorbanken) | [![SciLifeLab](docs/images/SciLifeLab_logo.png)](https://scilifelab.se) |\n| :-----------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------: |\n| [![National Genomics Infrastructure](docs/images/NGI_logo.png)](https://ngisweden.scilifelab.se/) | [![National Bioinformatics Infrastructure Sweden](docs/images/NBIS_logo.png)](https://nbis.se) |\n| [![QBiC](docs/images/QBiC_logo.png)](https://www.qbic.uni-tuebingen.de) | [![GHGA](docs/images/GHGA_logo.png)](https://www.ghga.de/) |\n| [![DNGC](docs/images/DNGC_logo.png)](https://eng.ngc.dk/) | |\n\n## Contributions & Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#sarek` channel](https://nfcore.slack.com/channels/sarek) (you can join with [this invite](https://nf-co.re/join/slack)), or contact us: [Maxime U Garcia](mailto:maxime.garcia@seqera.io?subject=[GitHub]%20nf-core/sarek), [Friederike Hanssen](mailto:friederike.hanssen@qbic.uni-tuebingen.de?subject=[GitHub]%20nf-core/sarek)\n\n## Citations\n\nIf you use `nf-core/sarek` for your analysis, please cite the `Sarek` article as follows:\n\n> Friederike Hanssen, Maxime U Garcia, Lasse Folkersen, Anders Sune Pedersen, Francesco Lescai, Susanne Jodoin, Edmund Miller, Oskar Wacker, Nicholas Smith, nf-core community, Gisela Gabernet, Sven Nahnsen **Scalable and efficient DNA sequencing analysis on different compute infrastructures aiding variant discovery** _NAR Genomics and Bioinformatics_ Volume 6, Issue 2, June 2024, lqae031, [doi: 10.1093/nargab/lqae031](https://doi.org/10.1093/nargab/lqae031).\n\n> Garcia M, Juhos S, Larsson M et al. **Sarek: A portable workflow for whole-genome sequencing analysis of germline and somatic variants [version 2; peer review: 2 approved]** _F1000Research_ 2020, 9:63 [doi: 10.12688/f1000research.16665.2](http://dx.doi.org/10.12688/f1000research.16665.2).\n\nYou can cite the sarek zenodo record for a specific version using the following [doi: 10.5281/zenodo.3476425](https://doi.org/10.5281/zenodo.3476425)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n\n## CHANGELOG\n\n- [CHANGELOG](CHANGELOG.md)\n", "hasPart": [ { "@id": "main.nf" @@ -105,7 +105,7 @@ }, "mentions": [ { - "@id": "#03fd6d65-0046-4cc2-a74c-6f11e82eb66a" + "@id": "#db3036e2-2f58-43ab-afdb-7bc3f3741eec" } ], "name": "nf-core/sarek" @@ -137,74 +137,74 @@ "@id": "#max.u.garcia@gmail.com" }, { - "@id": "#jc.fernandez.navarro@gmail.com" + "@id": "https://orcid.org/0000-0003-0603-7907" }, { - "@id": "#malin.larsson@liu.se" + "@id": "#l.conde@ucl.ac.uk" }, { - "@id": "#adr.lar@me.com" + "@id": "https://orcid.org/0000-0001-7409-305X" }, { - "@id": "https://orcid.org/0009-0001-9875-5262" + "@id": "#jc.fernandez.navarro@gmail.com" }, { "@id": "#max.u.garcia@gmail.com" }, { - "@id": "#24893913+SPPearce@users.noreply.github.com" + "@id": "https://orcid.org/0000-0002-5762-6253" }, { - "@id": "https://orcid.org/0000-0002-5762-6253" + "@id": "#24893913+SPPearce@users.noreply.github.com" }, { - "@id": "https://orcid.org/0009-0006-2111-4316" + "@id": "#53608000+lescai@users.noreply.github.com" }, { - "@id": "#l.conde@ucl.ac.uk" + "@id": "https://orcid.org/0000-0001-6104-9260" }, { - "@id": "https://orcid.org/0000-0001-7409-305X" + "@id": "#adr.lar@me.com" }, { "@id": "https://orcid.org/0000-0003-3996-0909" }, { - "@id": "#53608000+lescai@users.noreply.github.com" + "@id": "#malin.larsson@liu.se" }, { - "@id": "#heuermh@acm.org" + "@id": "https://orcid.org/0000-0003-1387-0251" }, { - "@id": "https://orcid.org/0000-0003-1387-0251" + "@id": "#yehwhey@gmail.com" }, { - "@id": "https://orcid.org/0000-0003-0603-7907" + "@id": "#heuermh@acm.org" }, { - "@id": "https://orcid.org/0000-0003-3966-8481" + "@id": "https://orcid.org/0009-0001-9875-5262" }, { - "@id": "https://orcid.org/0000-0003-3996-0909" + "@id": "https://orcid.org/0000-0001-6280-4643" }, { - "@id": "https://orcid.org/0000-0002-6503-2180" + "@id": "https://orcid.org/0000-0003-3966-8481" }, { - "@id": "https://orcid.org/0000-0001-6104-9260" + "@id": "https://orcid.org/0000-0003-3996-0909" }, { - "@id": "https://orcid.org/0009-0007-7860-1155" + "@id": "https://orcid.org/0009-0006-2111-4316" }, { - "@id": "#yehwhey@gmail.com" + "@id": "https://orcid.org/0009-0007-7860-1155" }, { - "@id": "https://orcid.org/0000-0001-6280-4643" + "@id": "https://orcid.org/0000-0002-6503-2180" } ], "dateCreated": "", - "dateModified": "2025-11-04T11:00:26Z", + "dateModified": "2025-11-04T13:01:05Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": [ "nf-core", @@ -226,49 +226,49 @@ ], "maintainer": [ { - "@id": "#jc.fernandez.navarro@gmail.com" + "@id": "https://orcid.org/0000-0003-0603-7907" }, { - "@id": "#malin.larsson@liu.se" + "@id": "#l.conde@ucl.ac.uk" }, { - "@id": "#max.u.garcia@gmail.com" + "@id": "https://orcid.org/0000-0001-7409-305X" }, { - "@id": "#24893913+SPPearce@users.noreply.github.com" + "@id": "#jc.fernandez.navarro@gmail.com" }, { - "@id": "#l.conde@ucl.ac.uk" + "@id": "#max.u.garcia@gmail.com" }, { - "@id": "https://orcid.org/0000-0001-7409-305X" + "@id": "#24893913+SPPearce@users.noreply.github.com" }, { - "@id": "https://orcid.org/0000-0003-1387-0251" + "@id": "https://orcid.org/0000-0001-6104-9260" }, { - "@id": "https://orcid.org/0000-0003-0603-7907" + "@id": "#malin.larsson@liu.se" }, { - "@id": "https://orcid.org/0000-0003-3966-8481" + "@id": "https://orcid.org/0000-0003-1387-0251" }, { - "@id": "https://orcid.org/0000-0003-3996-0909" + "@id": "#yehwhey@gmail.com" }, { - "@id": "https://orcid.org/0000-0002-6503-2180" + "@id": "https://orcid.org/0000-0001-6280-4643" }, { - "@id": "https://orcid.org/0000-0001-6104-9260" + "@id": "https://orcid.org/0000-0003-3966-8481" }, { - "@id": "https://orcid.org/0009-0007-7860-1155" + "@id": "https://orcid.org/0000-0003-3996-0909" }, { - "@id": "#yehwhey@gmail.com" + "@id": "https://orcid.org/0009-0007-7860-1155" }, { - "@id": "https://orcid.org/0000-0001-6280-4643" + "@id": "https://orcid.org/0000-0002-6503-2180" } ], "name": [ @@ -301,11 +301,11 @@ "version": "!>=25.04.8" }, { - "@id": "#03fd6d65-0046-4cc2-a74c-6f11e82eb66a", + "@id": "#db3036e2-2f58-43ab-afdb-7bc3f3741eec", "@type": "TestSuite", "instance": [ { - "@id": "#b00b8f37-100d-4ff4-a2ae-53a3c2e8052f" + "@id": "#c2f32a15-d575-4283-8545-adc567119385" } ], "mainEntity": { @@ -314,7 +314,7 @@ "name": "Test suite for nf-core/sarek" }, { - "@id": "#b00b8f37-100d-4ff4-a2ae-53a3c2e8052f", + "@id": "#c2f32a15-d575-4283-8545-adc567119385", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/sarek", "resource": "repos/nf-core/sarek/actions/workflows/nf-test.yml", @@ -459,34 +459,28 @@ "name": "Maxime U Garcia" }, { - "@id": "#jc.fernandez.navarro@gmail.com", - "@type": "Person", - "email": "jc.fernandez.navarro@gmail.com", - "name": "Jos\u00e9 Fern\u00e1ndez Navarro" - }, - { - "@id": "#malin.larsson@liu.se", + "@id": "https://orcid.org/0000-0003-0603-7907", "@type": "Person", - "email": "malin.larsson@liu.se", - "name": "Malin Larsson" + "email": "sabrina.krakau.qbic@gmail.com", + "name": "Sabrina Krakau" }, { - "@id": "#adr.lar@me.com", + "@id": "#l.conde@ucl.ac.uk", "@type": "Person", - "email": "adr.lar@me.com", - "name": "Adrian Larkeryd" + "email": "l.conde@ucl.ac.uk", + "name": "Lucia Conde" }, { - "@id": "https://orcid.org/0009-0001-9875-5262", + "@id": "https://orcid.org/0000-0001-7409-305X", "@type": "Person", - "email": "friederike.hanssen@seqera.io", - "name": "Friederike Hanssen" + "email": "david.mas.p@gmail.com", + "name": "David Mas-Ponte" }, { - "@id": "#24893913+SPPearce@users.noreply.github.com", + "@id": "#jc.fernandez.navarro@gmail.com", "@type": "Person", - "email": "24893913+SPPearce@users.noreply.github.com", - "name": "Simon Pearce" + "email": "jc.fernandez.navarro@gmail.com", + "name": "Jos\u00e9 Fern\u00e1ndez Navarro" }, { "@id": "https://orcid.org/0000-0002-5762-6253", @@ -495,22 +489,28 @@ "name": "\u00d6mer An" }, { - "@id": "https://orcid.org/0009-0006-2111-4316", + "@id": "#24893913+SPPearce@users.noreply.github.com", "@type": "Person", - "email": "smith@in.tum.de", - "name": "Smith Nicholas" + "email": "24893913+SPPearce@users.noreply.github.com", + "name": "Simon Pearce" }, { - "@id": "#l.conde@ucl.ac.uk", + "@id": "#53608000+lescai@users.noreply.github.com", "@type": "Person", - "email": "l.conde@ucl.ac.uk", - "name": "Lucia Conde" + "email": "53608000+lescai@users.noreply.github.com", + "name": "Francesco L" }, { - "@id": "https://orcid.org/0000-0001-7409-305X", + "@id": "https://orcid.org/0000-0001-6104-9260", "@type": "Person", - "email": "david.mas.p@gmail.com", - "name": "David Mas-Ponte" + "email": "mirp.julia@gmail.com", + "name": "J\u00falia Mir Pedrol" + }, + { + "@id": "#adr.lar@me.com", + "@type": "Person", + "email": "adr.lar@me.com", + "name": "Adrian Larkeryd" }, { "@id": "https://orcid.org/0000-0003-3996-0909", @@ -519,10 +519,22 @@ "name": "Chela James" }, { - "@id": "#53608000+lescai@users.noreply.github.com", + "@id": "#malin.larsson@liu.se", "@type": "Person", - "email": "53608000+lescai@users.noreply.github.com", - "name": "Francesco L" + "email": "malin.larsson@liu.se", + "name": "Malin Larsson" + }, + { + "@id": "https://orcid.org/0000-0003-1387-0251", + "@type": "Person", + "email": "45968370+famosab@users.noreply.github.com", + "name": "Famke B\u00e4uerle" + }, + { + "@id": "#yehwhey@gmail.com", + "@type": "Person", + "email": "yehwhey@gmail.com", + "name": "Hongwei Ye" }, { "@id": "#heuermh@acm.org", @@ -531,16 +543,16 @@ "name": "Michael L Heuer" }, { - "@id": "https://orcid.org/0000-0003-1387-0251", + "@id": "https://orcid.org/0009-0001-9875-5262", "@type": "Person", - "email": "45968370+famosab@users.noreply.github.com", - "name": "Famke B\u00e4uerle" + "email": "Friederike.hanssen@qbic.uni-tuebingen.de", + "name": "Friederike Hanssen" }, { - "@id": "https://orcid.org/0000-0003-0603-7907", + "@id": "https://orcid.org/0000-0001-6280-4643", "@type": "Person", - "email": "sabrina.krakau.qbic@gmail.com", - "name": "Sabrina Krakau" + "email": "szilveszter.juhos@scilifelab.se", + "name": "Szilveszter Juhos" }, { "@id": "https://orcid.org/0000-0003-3966-8481", @@ -549,16 +561,10 @@ "name": "Paul Cantalupo" }, { - "@id": "https://orcid.org/0000-0002-6503-2180", - "@type": "Person", - "email": "apeltzer@users.noreply.github.com", - "name": "Alexander Peltzer" - }, - { - "@id": "https://orcid.org/0000-0001-6104-9260", + "@id": "https://orcid.org/0009-0006-2111-4316", "@type": "Person", - "email": "mirp.julia@gmail.com", - "name": "J\u00falia Mir Pedrol" + "email": "smith@in.tum.de", + "name": "Smith Nicholas" }, { "@id": "https://orcid.org/0009-0007-7860-1155", @@ -567,16 +573,10 @@ "name": "Nils Homer" }, { - "@id": "#yehwhey@gmail.com", - "@type": "Person", - "email": "yehwhey@gmail.com", - "name": "Hongwei Ye" - }, - { - "@id": "https://orcid.org/0000-0001-6280-4643", + "@id": "https://orcid.org/0000-0002-6503-2180", "@type": "Person", - "email": "szilveszter.juhos@scilifelab.se", - "name": "Szilveszter Juhos" + "email": "apeltzer@users.noreply.github.com", + "name": "Alexander Peltzer" } ] } \ No newline at end of file diff --git a/subworkflows/local/bam_variant_calling_deepvariant/main.nf b/subworkflows/local/bam_variant_calling_deepvariant/main.nf index 567050b505..49fa2b686d 100644 --- a/subworkflows/local/bam_variant_calling_deepvariant/main.nf +++ b/subworkflows/local/bam_variant_calling_deepvariant/main.nf @@ -48,6 +48,13 @@ workflow BAM_VARIANT_CALLING_DEEPVARIANT { MERGE_DEEPVARIANT_GVCF(gvcf_to_merge, dict) MERGE_DEEPVARIANT_VCF(vcf_to_merge, dict) + // Figuring out if there is one or more tbi(s) from the same sample + tbi_out = DEEPVARIANT_RUNDEEPVARIANT.out.vcf_index.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + // Mix intervals and no_intervals channels together gvcf = Channel.empty().mix(MERGE_DEEPVARIANT_GVCF.out.vcf, gvcf_out.no_intervals) // add variantcaller to meta map and remove no longer necessary field: num_intervals @@ -58,6 +65,10 @@ workflow BAM_VARIANT_CALLING_DEEPVARIANT { // add variantcaller to meta map and remove no longer necessary field: num_intervals .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'deepvariant' ], vcf ] } + tbi = Channel.empty().mix(MERGE_DEEPVARIANT_VCF.out.tbi, tbi_out.no_intervals) + // add variantcaller to meta map and remove no longer necessary field: num_intervals + .map{ meta, tbi -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'deepvariant' ], tbi ] } + versions = versions.mix(DEEPVARIANT_RUNDEEPVARIANT.out.versions) versions = versions.mix(MERGE_DEEPVARIANT_GVCF.out.versions) versions = versions.mix(MERGE_DEEPVARIANT_VCF.out.versions) @@ -65,6 +76,7 @@ workflow BAM_VARIANT_CALLING_DEEPVARIANT { emit: gvcf vcf + tbi versions } diff --git a/subworkflows/local/bam_variant_calling_freebayes/main.nf b/subworkflows/local/bam_variant_calling_freebayes/main.nf index 92d2e1f0e2..b176ed7bb3 100644 --- a/subworkflows/local/bam_variant_calling_freebayes/main.nf +++ b/subworkflows/local/bam_variant_calling_freebayes/main.nf @@ -4,11 +4,12 @@ // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include { BCFTOOLS_SORT } from '../../../modules/nf-core/bcftools/sort' -include { FREEBAYES } from '../../../modules/nf-core/freebayes' -include { GATK4_MERGEVCFS as MERGE_FREEBAYES } from '../../../modules/nf-core/gatk4/mergevcfs' -include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../modules/nf-core/tabix/tabix' -include { VCFLIB_VCFFILTER } from '../../../modules/nf-core/vcflib/vcffilter' +include { BCFTOOLS_SORT } from '../../../modules/nf-core/bcftools/sort' +include { FREEBAYES } from '../../../modules/nf-core/freebayes' +include { GATK4_MERGEVCFS as MERGE_FREEBAYES } from '../../../modules/nf-core/gatk4/mergevcfs' +include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../modules/nf-core/tabix/tabix' +include { TABIX_TABIX as TABIX_VC_FREEBAYES_FILT } from '../../../modules/nf-core/tabix/tabix' +include { VCFLIB_VCFFILTER } from '../../../modules/nf-core/vcflib/vcffilter' workflow BAM_VARIANT_CALLING_FREEBAYES { take: @@ -60,16 +61,21 @@ workflow BAM_VARIANT_CALLING_FREEBAYES { vcf_filtered = VCFLIB_VCFFILTER.out.vcf + // Index the filtered VCFs + TABIX_VC_FREEBAYES_FILT(vcf_filtered) + versions = versions.mix(BCFTOOLS_SORT.out.versions) versions = versions.mix(FREEBAYES.out.versions) versions = versions.mix(MERGE_FREEBAYES.out.versions) versions = versions.mix(TABIX_VC_FREEBAYES.out.versions) + versions = versions.mix(TABIX_VC_FREEBAYES_FILT.out.versions) versions = versions.mix(VCFLIB_VCFFILTER.out.versions) emit: vcf_unfiltered = ch_vcf // channel: [ meta, vcf, tbi ] // Use the QUAL filtered vcfs for the next steps - vcf = vcf_filtered // channel: [ meta, vcf ] + vcf = vcf_filtered // channel: [ meta, vcf ] + tbi = TABIX_VC_FREEBAYES_FILT.out.tbi // channel: [ meta, tbi ] versions } diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf index 00cd9da046..8694a29d9e 100644 --- a/subworkflows/local/bam_variant_calling_germline_all/main.nf +++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf @@ -68,6 +68,14 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { vcf_sentieon_haplotyper = Channel.empty() vcf_strelka = Channel.empty() vcf_tiddit = Channel.empty() + tbi_deepvariant = Channel.empty() + tbi_freebayes = Channel.empty() + tbi_haplotypecaller = Channel.empty() + tbi_manta = Channel.empty() + tbi_sentieon_dnascope = Channel.empty() + tbi_sentieon_haplotyper = Channel.empty() + tbi_strelka = Channel.empty() + tbi_tiddit = Channel.empty() // BCFTOOLS MPILEUP if (tools && tools.split(',').contains('mpileup')) { @@ -105,6 +113,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { ) vcf_deepvariant = BAM_VARIANT_CALLING_DEEPVARIANT.out.vcf + tbi_deepvariant = BAM_VARIANT_CALLING_DEEPVARIANT.out.tbi versions = versions.mix(BAM_VARIANT_CALLING_DEEPVARIANT.out.versions) } @@ -121,6 +130,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { ) vcf_freebayes = BAM_VARIANT_CALLING_FREEBAYES.out.vcf + tbi_freebayes = BAM_VARIANT_CALLING_FREEBAYES.out.tbi versions = versions.mix(BAM_VARIANT_CALLING_FREEBAYES.out.versions) } @@ -189,6 +199,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { ) vcf_manta = BAM_VARIANT_CALLING_GERMLINE_MANTA.out.vcf + tbi_manta = BAM_VARIANT_CALLING_GERMLINE_MANTA.out.tbi versions = versions.mix(BAM_VARIANT_CALLING_GERMLINE_MANTA.out.versions) } @@ -223,7 +234,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { versions = versions.mix(BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.versions) vcf_sentieon_dnascope = BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.vcf - vcf_tbi_sentieon_dnascope = BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.vcf_tbi + tbi_sentieon_dnascope = BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.vcf_tbi gvcf_sentieon_dnascope = BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.gvcf gvcf_tbi_sentieon_dnascope = BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.gvcf_tbi @@ -281,7 +292,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { versions = versions.mix(BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.versions) vcf_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.vcf - vcf_tbi_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.vcf_tbi + tbi_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.vcf_tbi gvcf_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.gvcf gvcf_tbi_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.gvcf_tbi @@ -338,6 +349,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { ) vcf_strelka = BAM_VARIANT_CALLING_SINGLE_STRELKA.out.vcf + tbi_strelka = BAM_VARIANT_CALLING_SINGLE_STRELKA.out.tbi versions = versions.mix(BAM_VARIANT_CALLING_SINGLE_STRELKA.out.versions) } @@ -351,6 +363,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { ) vcf_tiddit = BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.vcf + tbi_tiddit = BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.tbi versions = versions.mix(BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.versions) } @@ -366,6 +379,17 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { vcf_tiddit ) + tbi_all = Channel.empty().mix( + tbi_deepvariant, + tbi_freebayes, + tbi_sentieon_dnascope, + tbi_haplotypecaller, + tbi_manta, + tbi_sentieon_haplotyper, + tbi_strelka, + tbi_tiddit + ) + emit: gvcf_sentieon_dnascope gvcf_sentieon_haplotyper @@ -380,6 +404,15 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { vcf_sentieon_dnascope vcf_sentieon_haplotyper vcf_tiddit + tbi_all + tbi_deepvariant + tbi_freebayes + tbi_haplotypecaller + tbi_manta + tbi_sentieon_dnascope + tbi_sentieon_haplotyper + tbi_strelka + tbi_tiddit versions } diff --git a/subworkflows/local/bam_variant_calling_germline_manta/main.nf b/subworkflows/local/bam_variant_calling_germline_manta/main.nf index d27a999a68..0fa4ac6bfd 100644 --- a/subworkflows/local/bam_variant_calling_germline_manta/main.nf +++ b/subworkflows/local/bam_variant_calling_germline_manta/main.nf @@ -27,18 +27,21 @@ workflow BAM_VARIANT_CALLING_GERMLINE_MANTA { MANTA_GERMLINE(cram_intervals, fasta, fasta_fai, []) - small_indels_vcf = MANTA_GERMLINE.out.candidate_small_indels_vcf - sv_vcf = MANTA_GERMLINE.out.candidate_sv_vcf - diploid_sv_vcf = MANTA_GERMLINE.out.diploid_sv_vcf + small_indels_vcf = MANTA_GERMLINE.out.candidate_small_indels_vcf + sv_vcf = MANTA_GERMLINE.out.candidate_sv_vcf + diploid_sv_vcf = MANTA_GERMLINE.out.diploid_sv_vcf + diploid_sv_vcf_tbi = MANTA_GERMLINE.out.diploid_sv_vcf_tbi // Only diploid SV should get annotated // add variantcaller to meta map vcf = diploid_sv_vcf.map{ meta, vcf -> [ meta + [ variantcaller:'manta' ], vcf ] } + tbi = diploid_sv_vcf_tbi.map{ meta, tbi -> [ meta + [ variantcaller:'manta' ], tbi ] } versions = versions.mix(MANTA_GERMLINE.out.versions) emit: vcf + tbi versions } diff --git a/subworkflows/local/bam_variant_calling_mpileup/main.nf b/subworkflows/local/bam_variant_calling_mpileup/main.nf index 54149e214c..26e7f4df2e 100644 --- a/subworkflows/local/bam_variant_calling_mpileup/main.nf +++ b/subworkflows/local/bam_variant_calling_mpileup/main.nf @@ -37,6 +37,12 @@ workflow BAM_VARIANT_CALLING_MPILEUP { no_intervals: it[0].num_intervals <= 1 } + // Figuring out if there is one or more tbi(s) from the same sample + tbi_mpileup = BCFTOOLS_MPILEUP.out.tbi.branch { + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + // Figuring out if there is one or more mpileup(s) from the same sample mpileup_samtools = SAMTOOLS_MPILEUP.out.mpileup.branch { intervals: it[0].num_intervals > 1 @@ -58,6 +64,9 @@ workflow BAM_VARIANT_CALLING_MPILEUP { vcf = MERGE_BCFTOOLS_MPILEUP.out.vcf .mix(vcf_mpileup.no_intervals) .map { meta, vcf -> [meta - meta.subMap('num_intervals') + [variantcaller: 'bcftools'], vcf] } + tbi = MERGE_BCFTOOLS_MPILEUP.out.tbi + .mix(tbi_mpileup.no_intervals) + .map { meta, tbi -> [meta - meta.subMap('num_intervals') + [variantcaller: 'bcftools'], tbi] } versions = versions.mix(SAMTOOLS_MPILEUP.out.versions) versions = versions.mix(BCFTOOLS_MPILEUP.out.versions) @@ -67,5 +76,6 @@ workflow BAM_VARIANT_CALLING_MPILEUP { emit: mpileup vcf + tbi versions } diff --git a/subworkflows/local/bam_variant_calling_single_strelka/main.nf b/subworkflows/local/bam_variant_calling_single_strelka/main.nf index ab6b3373c3..b9bf584eb6 100644 --- a/subworkflows/local/bam_variant_calling_single_strelka/main.nf +++ b/subworkflows/local/bam_variant_calling_single_strelka/main.nf @@ -34,7 +34,14 @@ workflow BAM_VARIANT_CALLING_SINGLE_STRELKA { } // Figuring out if there is one or more vcf(s) from the same sample - vcf = STRELKA_SINGLE.out.vcf.branch{ + vcf_out = STRELKA_SINGLE.out.vcf.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more tbi(s) from the same sample + tbi_out = STRELKA_SINGLE.out.vcf_tbi.branch{ // Use meta.num_intervals to asses number of intervals intervals: it[0].num_intervals > 1 no_intervals: it[0].num_intervals <= 1 @@ -42,23 +49,28 @@ workflow BAM_VARIANT_CALLING_SINGLE_STRELKA { // Only when using intervals genome_vcf_to_merge = genome_vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() - vcf_to_merge = vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + vcf_to_merge = vcf_out.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() MERGE_STRELKA(vcf_to_merge, dict) MERGE_STRELKA_GENOME(genome_vcf_to_merge, dict) // Mix intervals and no_intervals channels together // Only strelka variant vcf should get annotated - vcf = Channel.empty().mix(MERGE_STRELKA.out.vcf, vcf.no_intervals) + vcf = Channel.empty().mix(MERGE_STRELKA.out.vcf, vcf_out.no_intervals) // add variantcaller to meta map and remove no longer necessary field: num_intervals .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'strelka' ], vcf ] } + tbi = Channel.empty().mix(MERGE_STRELKA.out.tbi, tbi_out.no_intervals) + // add variantcaller to meta map and remove no longer necessary field: num_intervals + .map{ meta, tbi -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'strelka' ], tbi ] } + versions = versions.mix(MERGE_STRELKA.out.versions) versions = versions.mix(MERGE_STRELKA_GENOME.out.versions) versions = versions.mix(STRELKA_SINGLE.out.versions) emit: vcf + tbi versions } diff --git a/subworkflows/local/bam_variant_calling_single_tiddit/main.nf b/subworkflows/local/bam_variant_calling_single_tiddit/main.nf index 356ce7c2fa..d105a87101 100644 --- a/subworkflows/local/bam_variant_calling_single_tiddit/main.nf +++ b/subworkflows/local/bam_variant_calling_single_tiddit/main.nf @@ -22,6 +22,7 @@ workflow BAM_VARIANT_CALLING_SINGLE_TIDDIT { ploidy = TIDDIT_SV.out.ploidy vcf = TABIX_BGZIP_TIDDIT_SV.out.gz_tbi.map{ meta, gz, tbi -> [ meta + [ variantcaller: 'tiddit'], gz ] } + tbi = TABIX_BGZIP_TIDDIT_SV.out.gz_tbi.map{ meta, gz, tbi -> [ meta + [ variantcaller: 'tiddit'], tbi ] } versions = versions.mix(TABIX_BGZIP_TIDDIT_SV.out.versions) versions = versions.mix(TIDDIT_SV.out.versions) @@ -29,6 +30,7 @@ workflow BAM_VARIANT_CALLING_SINGLE_TIDDIT { emit: ploidy vcf + tbi versions } diff --git a/subworkflows/local/bam_variant_calling_somatic_all/main.nf b/subworkflows/local/bam_variant_calling_somatic_all/main.nf index 8ac9bf2cb9..0e7250d975 100644 --- a/subworkflows/local/bam_variant_calling_somatic_all/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_all/main.nf @@ -64,6 +64,13 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { vcf_strelka = Channel.empty() vcf_tiddit = Channel.empty() vcf_tnscope = Channel.empty() + tbi_freebayes = Channel.empty() + tbi_manta = Channel.empty() + tbi_muse = Channel.empty() + tbi_mutect2 = Channel.empty() + tbi_strelka = Channel.empty() + tbi_tiddit = Channel.empty() + tbi_tnscope = Channel.empty() if (tools && tools.split(',').contains('ascat')) { BAM_VARIANT_CALLING_SOMATIC_ASCAT( @@ -142,6 +149,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { ) vcf_freebayes = BAM_VARIANT_CALLING_FREEBAYES.out.vcf + tbi_freebayes = BAM_VARIANT_CALLING_FREEBAYES.out.tbi versions = versions.mix(BAM_VARIANT_CALLING_FREEBAYES.out.versions) } @@ -155,6 +163,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { ) vcf_manta = BAM_VARIANT_CALLING_SOMATIC_MANTA.out.vcf + tbi_manta = BAM_VARIANT_CALLING_SOMATIC_MANTA.out.tbi versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_MANTA.out.versions) } @@ -188,6 +197,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { ) vcf_strelka = BAM_VARIANT_CALLING_SOMATIC_STRELKA.out.vcf + tbi_strelka = BAM_VARIANT_CALLING_SOMATIC_STRELKA.out.tbi versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_STRELKA.out.versions) } @@ -209,6 +219,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { ) vcf_muse = BAM_VARIANT_CALLING_SOMATIC_MUSE.out.vcf + tbi_muse = BAM_VARIANT_CALLING_SOMATIC_MUSE.out.tbi versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_MUSE.out.versions) } @@ -234,6 +245,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { ) vcf_mutect2 = BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.vcf_filtered + tbi_mutect2 = BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.index_filtered versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.versions) } @@ -255,6 +267,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { ) vcf_tnscope = BAM_VARIANT_CALLING_SOMATIC_TNSCOPE.out.vcf + tbi_tnscope = BAM_VARIANT_CALLING_SOMATIC_TNSCOPE.out.index versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_TNSCOPE.out.versions) } @@ -268,6 +281,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { ) vcf_tiddit = BAM_VARIANT_CALLING_SOMATIC_TIDDIT.out.vcf + tbi_tiddit = BAM_VARIANT_CALLING_SOMATIC_TIDDIT.out.tbi versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_TIDDIT.out.versions) } @@ -282,6 +296,17 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { vcf_tnscope, ) + tbi_all = Channel.empty() + .mix( + tbi_freebayes, + tbi_manta, + tbi_muse, + tbi_mutect2, + tbi_strelka, + tbi_tiddit, + tbi_tnscope, + ) + emit: out_indexcov out_msisensorpro @@ -293,5 +318,13 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { vcf_strelka vcf_tiddit vcf_tnscope + tbi_all + tbi_freebayes + tbi_manta + tbi_muse + tbi_mutect2 + tbi_strelka + tbi_tiddit + tbi_tnscope versions } diff --git a/subworkflows/local/bam_variant_calling_somatic_manta/main.nf b/subworkflows/local/bam_variant_calling_somatic_manta/main.nf index f6720c5406..499fdd09ad 100644 --- a/subworkflows/local/bam_variant_calling_somatic_manta/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_manta/main.nf @@ -30,11 +30,14 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MANTA { candidate_small_indels_vcf_tbi = MANTA_SOMATIC.out.candidate_small_indels_vcf_tbi candidate_sv_vcf = MANTA_SOMATIC.out.candidate_sv_vcf diploid_sv_vcf = MANTA_SOMATIC.out.diploid_sv_vcf + diploid_sv_vcf_tbi = MANTA_SOMATIC.out.diploid_sv_vcf_tbi somatic_sv_vcf = MANTA_SOMATIC.out.somatic_sv_vcf + somatic_sv_vcf_tbi = MANTA_SOMATIC.out.somatic_sv_vcf_tbi // Only diploid and somatic SV should get annotated // add variantcaller to meta map vcf = Channel.empty().mix(diploid_sv_vcf, somatic_sv_vcf).map{ meta, vcf -> [ meta + [ variantcaller:'manta' ], vcf ] } + tbi = Channel.empty().mix(diploid_sv_vcf_tbi, somatic_sv_vcf_tbi).map{ meta, tbi -> [ meta + [ variantcaller:'manta' ], tbi ] } versions = versions.mix(MANTA_SOMATIC.out.versions) @@ -42,6 +45,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MANTA { candidate_small_indels_vcf candidate_small_indels_vcf_tbi vcf + tbi versions } diff --git a/subworkflows/local/bam_variant_calling_somatic_muse/main.nf b/subworkflows/local/bam_variant_calling_somatic_muse/main.nf index e968463a85..b293ba5dc8 100644 --- a/subworkflows/local/bam_variant_calling_somatic_muse/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_muse/main.nf @@ -40,11 +40,13 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUSE { // add variantcaller to meta map vcf = MUSE_SUMP.out.vcf.map { meta, vcf -> [meta + [variantcaller: 'muse'], vcf] } + tbi = MUSE_SUMP.out.tbi.map { meta, tbi -> [meta + [variantcaller: 'muse'], tbi] } versions = versions.mix(MUSE_CALL.out.versions) versions = versions.mix(MUSE_SUMP.out.versions) emit: vcf + tbi versions } diff --git a/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf b/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf index 02c729f93e..cd2c373766 100644 --- a/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf @@ -47,17 +47,36 @@ workflow BAM_VARIANT_CALLING_SOMATIC_STRELKA { MERGE_STRELKA_INDELS(vcf_indels_to_merge, dict) MERGE_STRELKA_SNVS(vcf_snvs_to_merge, dict) + // Figuring out if there is one or more tbi(s) from the same sample + tbi_indels = STRELKA_SOMATIC.out.vcf_indels_tbi.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more tbi(s) from the same sample + tbi_snvs = STRELKA_SOMATIC.out.vcf_snvs_tbi.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + // Mix intervals and no_intervals channels together vcf = Channel.empty().mix(MERGE_STRELKA_INDELS.out.vcf, MERGE_STRELKA_SNVS.out.vcf, vcf_indels.no_intervals, vcf_snvs.no_intervals) // add variantcaller to meta map and remove no longer necessary field: num_intervals .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'strelka' ], vcf ] } + tbi = Channel.empty().mix(MERGE_STRELKA_INDELS.out.tbi, MERGE_STRELKA_SNVS.out.tbi, tbi_indels.no_intervals, tbi_snvs.no_intervals) + // add variantcaller to meta map and remove no longer necessary field: num_intervals + .map{ meta, tbi -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'strelka' ], tbi ] } + versions = versions.mix(MERGE_STRELKA_SNVS.out.versions) versions = versions.mix(MERGE_STRELKA_INDELS.out.versions) versions = versions.mix(STRELKA_SOMATIC.out.versions) emit: vcf + tbi versions } diff --git a/subworkflows/local/bam_variant_calling_somatic_tiddit/main.nf b/subworkflows/local/bam_variant_calling_somatic_tiddit/main.nf index 8c17df041b..f768c42791 100644 --- a/subworkflows/local/bam_variant_calling_somatic_tiddit/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_tiddit/main.nf @@ -24,7 +24,8 @@ workflow BAM_VARIANT_CALLING_SOMATIC_TIDDIT { SVDB_MERGE(TIDDIT_NORMAL.out.vcf.join(TIDDIT_TUMOR.out.vcf, failOnDuplicate: true, failOnMismatch: true).map{ meta, vcf_normal, vcf_tumor -> [ meta, [vcf_normal, vcf_tumor] ] }, false, true) - vcf = SVDB_MERGE.out.vcf + vcf = SVDB_MERGE.out.vcf.map{ meta, vcf -> [ meta + [ variantcaller:'tiddit' ], vcf ] } + tbi = SVDB_MERGE.out.tbi.map{ meta, tbi -> [ meta + [ variantcaller:'tiddit' ], tbi ] } versions = versions.mix(TIDDIT_NORMAL.out.versions) versions = versions.mix(TIDDIT_TUMOR.out.versions) @@ -33,4 +34,5 @@ workflow BAM_VARIANT_CALLING_SOMATIC_TIDDIT { emit: versions vcf + tbi } diff --git a/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf index c3f418abef..3fa6ca2354 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf @@ -57,6 +57,15 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { vcf_tiddit = Channel.empty() vcf_tnscope = Channel.empty() + // Initialize empty TBI channels + tbi_freebayes = Channel.empty() + tbi_lofreq = Channel.empty() + tbi_manta = Channel.empty() + tbi_mpileup = Channel.empty() + tbi_mutect2 = Channel.empty() + tbi_tiddit = Channel.empty() + tbi_tnscope = Channel.empty() + // MPILEUP if (tools && tools.split(',').contains('mpileup') || tools.split(',').contains('controlfreec')) { BAM_VARIANT_CALLING_MPILEUP( @@ -66,6 +75,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { intervals, ) vcf_mpileup = BAM_VARIANT_CALLING_MPILEUP.out.vcf + tbi_mpileup = BAM_VARIANT_CALLING_MPILEUP.out.tbi versions = versions.mix(BAM_VARIANT_CALLING_MPILEUP.out.versions) } @@ -109,6 +119,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { ) vcf_freebayes = BAM_VARIANT_CALLING_FREEBAYES.out.vcf + tbi_freebayes = BAM_VARIANT_CALLING_FREEBAYES.out.tbi versions = versions.mix(BAM_VARIANT_CALLING_FREEBAYES.out.versions) } @@ -142,6 +153,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { ) vcf_mutect2 = BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2.out.vcf_filtered + tbi_mutect2 = BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2.out.index_filtered versions = versions.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2.out.versions) } @@ -155,6 +167,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { dict, ) vcf_lofreq = BAM_VARIANT_CALLING_TUMOR_ONLY_LOFREQ.out.vcf + tbi_lofreq = BAM_VARIANT_CALLING_TUMOR_ONLY_LOFREQ.out.tbi versions = versions.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_LOFREQ.out.versions) } @@ -168,6 +181,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { ) vcf_manta = BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA.out.vcf + tbi_manta = BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA.out.tbi versions = versions.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA.out.versions) } @@ -180,6 +194,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { ) vcf_tiddit = BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.vcf + tbi_tiddit = BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.tbi versions = versions.mix(BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.versions) } @@ -198,6 +213,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { ) vcf_tnscope = BAM_VARIANT_CALLING_TUMOR_ONLY_TNSCOPE.out.vcf + tbi_tnscope = BAM_VARIANT_CALLING_TUMOR_ONLY_TNSCOPE.out.tbi versions = versions.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_TNSCOPE.out.versions) } @@ -212,9 +228,21 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { vcf_tnscope, ) + tbi_all = Channel.empty() + .mix( + tbi_freebayes, + tbi_lofreq, + tbi_manta, + tbi_mutect2, + tbi_mpileup, + tbi_tiddit, + tbi_tnscope, + ) + emit: out_msisensor2 vcf_all + tbi_all vcf_freebayes vcf_lofreq vcf_manta @@ -222,5 +250,12 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { vcf_mutect2 vcf_tiddit vcf_tnscope + tbi_freebayes + tbi_lofreq + tbi_manta + tbi_mpileup + tbi_mutect2 + tbi_tiddit + tbi_tnscope versions } diff --git a/subworkflows/local/bam_variant_calling_tumor_only_lofreq/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_lofreq/main.nf index b619c1f796..31fa01a1dd 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_lofreq/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_lofreq/main.nf @@ -40,12 +40,14 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_LOFREQ { // Mix intervals and no_intervals channels together // Remove unnecessary metadata - vcf = Channel.empty().mix(MERGE_LOFREQ.out.vcf, vcf_branch.no_intervals).map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'lofreq' ], vcf ] } + vcf = Channel.empty().mix(MERGE_LOFREQ.out.vcf, vcf_branch.no_intervals).map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'lofreq' ], vcf ] } + tbi = Channel.empty().mix(MERGE_LOFREQ.out.tbi, tbi_branch.no_intervals).map{ meta, tbi -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'lofreq' ], tbi ] } versions = versions.mix(MERGE_LOFREQ.out.versions) versions = versions.mix(LOFREQ.out.versions) emit: vcf + tbi versions } diff --git a/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf index 38f5d4366c..1c01ee6de8 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf @@ -30,15 +30,18 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA { small_indels_vcf = MANTA_TUMORONLY.out.candidate_small_indels_vcf candidate_sv_vcf = MANTA_TUMORONLY.out.candidate_sv_vcf tumor_sv_vcf = MANTA_TUMORONLY.out.tumor_sv_vcf + tumor_sv_vcf_tbi = MANTA_TUMORONLY.out.tumor_sv_vcf_tbi // Only tumor sv should get annotated // add variantcaller to meta map vcf = tumor_sv_vcf.map{ meta, vcf -> [ meta + [ variantcaller:'manta' ], vcf ] } + tbi = tumor_sv_vcf_tbi.map{ meta, tbi -> [ meta + [ variantcaller:'manta' ], tbi ] } versions = versions.mix(MANTA_TUMORONLY.out.versions) emit: vcf + tbi versions } diff --git a/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf index fa20983d7f..a842c78eec 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf @@ -149,6 +149,9 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 { vcf_filtered = FILTERMUTECTCALLS.out.vcf.map { meta, vcf_ -> [meta + [variantcaller: 'mutect2'], vcf_] } + tbi_mutect2 = FILTERMUTECTCALLS.out.tbi + .map{ meta, tbi -> [ meta + [ variantcaller:'mutect2' ], tbi ] } + versions = versions.mix(MERGE_MUTECT2.out.versions) versions = versions.mix(CALCULATECONTAMINATION.out.versions) versions = versions.mix(FILTERMUTECTCALLS.out.versions) @@ -159,14 +162,19 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 { versions = versions.mix(MUTECT2.out.versions) emit: - vcf // channel: [ meta, vcf ] - stats // channel: [ meta, stats ] - vcf_filtered // channel: [ meta, vcf ] - index_filtered = FILTERMUTECTCALLS.out.tbi // channel: [ meta, tbi ] - stats_filtered = FILTERMUTECTCALLS.out.stats // channel: [ meta, stats ] - artifact_priors = LEARNREADORIENTATIONMODEL.out.artifactprior // channel: [ meta, artifactprior ] - pileup_table // channel: [ meta, table ] - contamination_table = calculatecontamination_out_cont // channel: [ meta, contamination ] - segmentation_table = calculatecontamination_out_seg // channel: [ meta, segmentation ] - versions // channel: [ versions.yml ] + vcf // channel: [ meta, vcf ] + stats // channel: [ meta, stats ] + + vcf_filtered // channel: [ meta, vcf ] + index_filtered = tbi_mutect2 // channel: [ meta, tbi ] + stats_filtered = FILTERMUTECTCALLS.out.stats // channel: [ meta, stats ] + + artifact_priors = LEARNREADORIENTATIONMODEL.out.artifactprior // channel: [ meta, artifactprior ] + + pileup_table // channel: [ meta, table ] + + contamination_table = calculatecontamination_out_cont // channel: [ meta, contamination ] + segmentation_table = calculatecontamination_out_seg // channel: [ meta, segmentation ] + + versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/bam_variant_calling_tumor_only_tnscope/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_tnscope/main.nf index a5aeaa8ca1..462ef7cba9 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_tnscope/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_tnscope/main.nf @@ -70,6 +70,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_TNSCOPE { emit: vcf // channel: [ meta, vcf ] + tbi = index // channel: [ meta, tbi ] index // channel: [ meta, index ] versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/post_variantcalling/main.nf b/subworkflows/local/post_variantcalling/main.nf index 29d07211c1..45c679a5e4 100644 --- a/subworkflows/local/post_variantcalling/main.nf +++ b/subworkflows/local/post_variantcalling/main.nf @@ -1,7 +1,7 @@ // // POST VARIANT CALLING: processes run on variantcalled but not annotated VCFs // - +include { BCFTOOLS_VIEW as FILTER_VCFS } from '../../../modules/nf-core/bcftools/view' include { CONCATENATE_GERMLINE_VCFS } from '../vcf_concatenate_germline' include { NORMALIZE_VCFS } from '../vcf_normalization' include { VCF_VARLOCIRAPTOR_SINGLE as VCF_VARLOCIRAPTOR_GERMLINE } from '../vcf_varlociraptor_single' @@ -13,13 +13,17 @@ workflow POST_VARIANTCALLING { tools cram_germline germline_vcfs + germline_tbis cram_tumor_only tumor_only_vcfs + tumor_only_tbis cram_somatic somatic_vcfs + somatic_tbis fasta fai concatenate_vcfs + filter_vcfs normalize_vcfs varlociraptor_chunk_size // integer: [mandatory] [default: 15] number of chunks to split BCF files when preprocessing and calling variants varlociraptor_scenario_germline @@ -56,14 +60,32 @@ workflow POST_VARIANTCALLING { tbis = tbis.mix(VCF_VARLOCIRAPTOR_TUMOR_ONLY.out.tbi) versions = versions.mix(VCF_VARLOCIRAPTOR_TUMOR_ONLY.out.versions) - } else if (concatenate_vcfs || normalize_vcfs) { + } else if (filter_vcfs || normalize_vcfs || concatenate_vcfs ) { + + all_vcfs = Channel.empty().mix(germline_vcfs, tumor_only_vcfs, somatic_vcfs) + all_tbis = Channel.empty().mix(germline_tbis, tumor_only_tbis, somatic_tbis) + + // 1. Filter by PASS and custom fields + // 2. Normalize + // 3. Aggregate variants (Union, intersection, or n-1) + if(filter_vcfs) { + + // Join VCFs with their corresponding TBIs before filtering + FILTER_VCFS( all_vcfs.join(all_tbis, failOnDuplicate: true, failOnMismatch: true), [], [], []) + + all_vcfs = FILTER_VCFS.out.vcf + all_tbis = FILTER_VCFS.out.tbi + versions = versions.mix(FILTER_VCFS.out.versions) + } if (normalize_vcfs) { - NORMALIZE_VCFS(germline_vcfs, tumor_only_vcfs, somatic_vcfs, fasta) + all_vcfs.dump(pretty: true) + + NORMALIZE_VCFS(all_vcfs, fasta) - vcfs = vcfs.mix(NORMALIZE_VCFS.out.vcfs) // [meta, vcf] - tbis = tbis.mix(NORMALIZE_VCFS.out.tbis) // [meta, tbi] + all_vcfs = NORMALIZE_VCFS.out.vcfs // [meta, vcf] + all_tbis = NORMALIZE_VCFS.out.tbis // [meta, tbi] versions = versions.mix(NORMALIZE_VCFS.out.versions) } diff --git a/subworkflows/local/vcf_concatenate_germline/main.nf b/subworkflows/local/vcf_concatenate_germline/main.nf index 26fd49e285..94e7b5e716 100644 --- a/subworkflows/local/vcf_concatenate_germline/main.nf +++ b/subworkflows/local/vcf_concatenate_germline/main.nf @@ -25,17 +25,15 @@ workflow CONCATENATE_GERMLINE_VCFS { GERMLINE_VCFS_CONCAT(germline_vcfs_with_tbis) GERMLINE_VCFS_CONCAT_SORT(GERMLINE_VCFS_CONCAT.out.vcf) - TABIX_GERMLINE_VCFS_CONCAT_SORT(GERMLINE_VCFS_CONCAT_SORT.out.vcf) // Gather versions of all tools used versions = versions.mix(ADD_INFO_TO_VCF.out.versions) versions = versions.mix(TABIX_EXT_VCF.out.versions) versions = versions.mix(GERMLINE_VCFS_CONCAT.out.versions) versions = versions.mix(GERMLINE_VCFS_CONCAT_SORT.out.versions) - versions = versions.mix(TABIX_GERMLINE_VCFS_CONCAT_SORT.out.versions) emit: vcfs = GERMLINE_VCFS_CONCAT_SORT.out.vcf // concatenated vcfs - tbis = TABIX_GERMLINE_VCFS_CONCAT_SORT.out.tbi // matching tbis + tbis = GERMLINE_VCFS_CONCAT_SORT.out.tbi // matching tbis versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/vcf_normalization/main.nf b/subworkflows/local/vcf_normalization/main.nf index e041f2009c..a772fb6b40 100644 --- a/subworkflows/local/vcf_normalization/main.nf +++ b/subworkflows/local/vcf_normalization/main.nf @@ -5,21 +5,16 @@ include { ADD_INFO_TO_VCF } from '../../../modules/local/add include { BCFTOOLS_NORM as VCFS_NORM } from '../../../modules/nf-core/bcftools/norm' include { BCFTOOLS_SORT as VCFS_NORM_SORT } from '../../../modules/nf-core/bcftools/sort' include { TABIX_BGZIPTABIX as TABIX_EXT_VCF } from '../../../modules/nf-core/tabix/bgziptabix' -include { TABIX_TABIX as TABIX_VCFS_NORM_SORT } from '../../../modules/nf-core/tabix/tabix' // Workflow to normalize, compress, and index VCF files workflow NORMALIZE_VCFS { take: - germline_vcfs - tumor_only_vcfs - somatic_vcfs + vcfs fasta main: versions = Channel.empty() - vcfs = germline_vcfs.mix(tumor_only_vcfs, somatic_vcfs) - // Add additional information to VCF files ADD_INFO_TO_VCF(vcfs) @@ -32,18 +27,14 @@ workflow NORMALIZE_VCFS { // Sort the normalized VCF files VCFS_NORM_SORT(VCFS_NORM.out.vcf) - // Index the sorted normalized VCF files - TABIX_VCFS_NORM_SORT(VCFS_NORM_SORT.out.vcf) - // Gather versions of all tools used versions = versions.mix(ADD_INFO_TO_VCF.out.versions) versions = versions.mix(TABIX_EXT_VCF.out.versions) - versions = versions.mix(TABIX_VCFS_NORM_SORT.out.versions) versions = versions.mix(VCFS_NORM.out.versions) versions = versions.mix(VCFS_NORM_SORT.out.versions) emit: vcfs = VCFS_NORM_SORT.out.vcf // normalized vcfs - tbis = TABIX_VCFS_NORM_SORT.out.tbi // matching tbis + tbis = VCFS_NORM_SORT.out.tbi // matching tbis versions // Channel: [versions.yml] } diff --git a/tests/postprocess_concatenation.nf.test.snap b/tests/postprocess_concatenation.nf.test.snap index f44a4b668b..d0b948796c 100644 --- a/tests/postprocess_concatenation.nf.test.snap +++ b/tests/postprocess_concatenation.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test --concatenate_vcfs --tools freebayes,strelka": { "content": [ - 55, + 53, { "ADD_INFO_TO_VCF": { "gawk": "5.3.0" @@ -45,10 +45,10 @@ "TABIX_EXT_VCF": { "tabix": 1.21 }, - "TABIX_GERMLINE_VCFS_CONCAT_SORT": { + "TABIX_VC_FREEBAYES": { "tabix": 1.21 }, - "TABIX_VC_FREEBAYES": { + "TABIX_VC_FREEBAYES_FILT": { "tabix": 1.21 }, "VCFLIB_VCFFILTER": { @@ -208,10 +208,12 @@ "variant_calling/freebayes", "variant_calling/freebayes/testN", "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz.tbi", "variant_calling/freebayes/testN/testN.freebayes.vcf.gz", "variant_calling/freebayes/testN/testN.freebayes.vcf.gz.tbi", "variant_calling/freebayes/testT", "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz.tbi", "variant_calling/freebayes/testT/testT.freebayes.vcf.gz", "variant_calling/freebayes/testT/testT.freebayes.vcf.gz.tbi", "variant_calling/strelka", @@ -281,6 +283,6 @@ "nf-test": "0.9.2", "nextflow": "25.10.0" }, - "timestamp": "2025-11-01T22:02:32.575016" + "timestamp": "2025-11-03T11:06:26.54547" } } \ No newline at end of file diff --git a/tests/postprocess_concatenation_normalization.nf.test b/tests/postprocess_concatenation_normalization.nf.test index 1a61c03fb0..3551d79d3f 100644 --- a/tests/postprocess_concatenation_normalization.nf.test +++ b/tests/postprocess_concatenation_normalization.nf.test @@ -19,6 +19,19 @@ nextflow_pipeline { ], include_freebayes_unfiltered: true, no_conda: true + ], + [ + name: "-profile test --filter_vcfs --normalize_vcfs --concatenate_vcfs --tools freebayes,strelka", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_joint_bam.csv", + step: 'variant_calling', + normalize_vcfs: true, + concatenate_vcfs: true, + filter_vcfs: true, + tools: 'freebayes,strelka' + ], + include_freebayes_unfiltered: true, + no_conda: true ] ] diff --git a/tests/postprocess_concatenation_normalization.nf.test.snap b/tests/postprocess_concatenation_normalization.nf.test.snap index c0a773bafb..9d4d53f818 100644 --- a/tests/postprocess_concatenation_normalization.nf.test.snap +++ b/tests/postprocess_concatenation_normalization.nf.test.snap @@ -1,7 +1,7 @@ { - "-profile test --normalize_vcfs --concatenate_vcfs --tools freebayes,strelka": { + "-profile test --filter_vcfs --normalize_vcfs --concatenate_vcfs --tools freebayes,strelka": { "content": [ - 75, + 73, { "ADD_INFO_TO_VCF": { "gawk": "5.3.0" @@ -15,6 +15,9 @@ "CREATE_INTERVALS_BED": { "gawk": "5.3.0" }, + "FILTER_VCFS": { + "bcftools": 1.22 + }, "FREEBAYES": { "freebayes": "1.3.10" }, @@ -45,15 +48,334 @@ "TABIX_EXT_VCF": { "tabix": 1.21 }, - "TABIX_GERMLINE_VCFS_CONCAT_SORT": { + "TABIX_VC_FREEBAYES": { "tabix": 1.21 }, - "TABIX_VCFS_NORM_SORT": { + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, + "VCFLIB_VCFFILTER": { + "vcflib": "1.0.14" + }, + "VCFS_NORM": { + "bcftools": 1.21 + }, + "VCFS_NORM_SORT": { + "bcftools": 1.21 + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/freebayes", + "reports/bcftools/freebayes/testN", + "reports/bcftools/freebayes/testN/testN.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/freebayes/testT", + "reports/bcftools/freebayes/testT/testT.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/strelka", + "reports/bcftools/strelka/testN", + "reports/bcftools/strelka/testN/testN.strelka.variants.bcftools_stats.txt", + "reports/bcftools/strelka/testT", + "reports/bcftools/strelka/testT/testT.strelka.variants.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/testN", + "reports/mosdepth/testN/testN.recal.mosdepth.global.dist.txt", + "reports/mosdepth/testN/testN.recal.mosdepth.region.dist.txt", + "reports/mosdepth/testN/testN.recal.mosdepth.summary.txt", + "reports/mosdepth/testN/testN.recal.regions.bed.gz", + "reports/mosdepth/testN/testN.recal.regions.bed.gz.csi", + "reports/mosdepth/testT", + "reports/mosdepth/testT/testT.recal.mosdepth.global.dist.txt", + "reports/mosdepth/testT/testT.recal.mosdepth.region.dist.txt", + "reports/mosdepth/testT/testT.recal.mosdepth.summary.txt", + "reports/mosdepth/testT/testT.recal.regions.bed.gz", + "reports/mosdepth/testT/testT.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/testN", + "reports/samtools/testN/testN.recal.cram.stats", + "reports/samtools/testT", + "reports/samtools/testT/testT.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/freebayes", + "reports/vcftools/freebayes/testN", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.TsTv.qual", + "reports/vcftools/freebayes/testT", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.TsTv.qual", + "reports/vcftools/strelka", + "reports/vcftools/strelka/testN", + "reports/vcftools/strelka/testN/testN.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/testN/testN.strelka.variants.TsTv.count", + "reports/vcftools/strelka/testN/testN.strelka.variants.TsTv.qual", + "reports/vcftools/strelka/testT", + "reports/vcftools/strelka/testT/testT.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/testT/testT.strelka.variants.TsTv.count", + "reports/vcftools/strelka/testT/testT.strelka.variants.TsTv.qual", + "variant_calling", + "variant_calling/concat", + "variant_calling/concat/testN", + "variant_calling/concat/testN/testN.germline.vcf.gz", + "variant_calling/concat/testN/testN.germline.vcf.gz.tbi", + "variant_calling/concat/testT", + "variant_calling/concat/testT/testT.germline.vcf.gz", + "variant_calling/concat/testT/testT.germline.vcf.gz.tbi", + "variant_calling/filtered", + "variant_calling/filtered/testN", + "variant_calling/filtered/testN/testN.freebayes.filtered.bcftools_filtered.vcf.gz", + "variant_calling/filtered/testN/testN.freebayes.filtered.bcftools_filtered.vcf.gz.tbi", + "variant_calling/filtered/testN/testN.strelka.variants.bcftools_filtered.vcf.gz", + "variant_calling/filtered/testN/testN.strelka.variants.bcftools_filtered.vcf.gz.tbi", + "variant_calling/filtered/testT", + "variant_calling/filtered/testT/testT.freebayes.filtered.bcftools_filtered.vcf.gz", + "variant_calling/filtered/testT/testT.freebayes.filtered.bcftools_filtered.vcf.gz.tbi", + "variant_calling/filtered/testT/testT.strelka.variants.bcftools_filtered.vcf.gz", + "variant_calling/filtered/testT/testT.strelka.variants.bcftools_filtered.vcf.gz.tbi", + "variant_calling/freebayes", + "variant_calling/freebayes/testN", + "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/testN/testN.freebayes.vcf.gz", + "variant_calling/freebayes/testN/testN.freebayes.vcf.gz.tbi", + "variant_calling/freebayes/testT", + "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/testT/testT.freebayes.vcf.gz", + "variant_calling/freebayes/testT/testT.freebayes.vcf.gz.tbi", + "variant_calling/normalized", + "variant_calling/normalized/testN", + "variant_calling/normalized/testN/testN.freebayes.filtered.bcftools_filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/testN/testN.freebayes.filtered.bcftools_filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/testN/testN.strelka.variants.bcftools_filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/testN/testN.strelka.variants.bcftools_filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/testT", + "variant_calling/normalized/testT/testT.freebayes.filtered.bcftools_filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/testT/testT.freebayes.filtered.bcftools_filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/testT/testT.strelka.variants.bcftools_filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/testT/testT.strelka.variants.bcftools_filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/strelka", + "variant_calling/strelka/testN", + "variant_calling/strelka/testN/testN.strelka.genome.vcf.gz", + "variant_calling/strelka/testN/testN.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/testN/testN.strelka.variants.vcf.gz", + "variant_calling/strelka/testN/testN.strelka.variants.vcf.gz.tbi", + "variant_calling/strelka/testT", + "variant_calling/strelka/testT/testT.strelka.genome.vcf.gz", + "variant_calling/strelka/testT/testT.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/testT/testT.strelka.variants.vcf.gz", + "variant_calling/strelka/testT/testT.strelka.variants.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,9cfd38310d4da59c7242b2a7ae6bd651", + "mosdepth-cumcoverage-dist-id.txt:md5,f0177ed551b6ec930854d0d221904ec0", + "mosdepth_perchrom.txt:md5,9cfd38310d4da59c7242b2a7ae6bd651", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,a0c101497a0566f60b9e8388207944ab", + "samtools_alignment_plot.txt:md5,563784066de81f3bc8ccf5fbbe82b3d5", + "testN.freebayes.filtered.bcftools_stats.txt:md5,96c5e6b9e55f1bb67fca7886fb322b67", + "testT.freebayes.filtered.bcftools_stats.txt:md5,ce8b7afaace836b6a00bd8ccdc980500", + "testN.strelka.variants.bcftools_stats.txt:md5,f11c9e5e9820868809b5465970c7cc06", + "testT.strelka.variants.bcftools_stats.txt:md5,ac070b6cf4de7540a227265971244b31", + "testN.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "testN.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "testN.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "testN.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "testN.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259", + "testT.recal.mosdepth.global.dist.txt:md5,3106c114529adc4231badeb3bb38b6d1", + "testT.recal.mosdepth.region.dist.txt:md5,ccf646922b05cb4759c4f89072be2b69", + "testT.recal.mosdepth.summary.txt:md5,024649a659caff330dfbef4ac3560542", + "testT.recal.regions.bed.gz:md5,14b36a2cf428840aab471f95cfbe399f", + "testT.recal.regions.bed.gz.csi:md5,0dc011f3344841dc14aa488da905e917", + "testN.freebayes.filtered.FILTER.summary:md5,e2f8e86fb18631c0bf6ccd7d9d4039c8", + "testN.freebayes.filtered.TsTv.count:md5,89562fef808b5c3db629682d36fd86fc", + "testT.freebayes.filtered.FILTER.summary:md5,4b25443c427fd27761017e4f3a556b29", + "testT.freebayes.filtered.TsTv.count:md5,803d74e40f7716202bae2a3a81c1ddfc", + "testN.strelka.variants.FILTER.summary:md5,39ff2cc8eb7495a14a6b76e0ab627027", + "testN.strelka.variants.TsTv.count:md5,ee7dafc8d941b8502a04a63dc3126fff", + "testT.strelka.variants.FILTER.summary:md5,ad4c8982a91cef84d0c1ff827a0ffcf3", + "testT.strelka.variants.TsTv.count:md5,a32d1781bd32b81ec18c6e0a191c0efe" + ], + "No BAM files", + "No CRAM files", + [ + [ + "testN.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=7, phased=false, phasedAutodetect=false]" + ], + [ + "testT.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=10, phased=false, phasedAutodetect=false]" + ] + ], + [ + "testN.freebayes.filtered.bcftools_filtered.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "testN.strelka.variants.bcftools_filtered.vcf.gz:md5,35a541d45222013a9887bbe1678f9444", + "testT.freebayes.filtered.bcftools_filtered.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "testT.strelka.variants.bcftools_filtered.vcf.gz:md5,46757c1dd7a5c1f62b39d91281016521", + "testN.freebayes.filtered.vcf.gz:md5,7a08d55c866e45d635cf378825ddba74", + "testT.freebayes.filtered.vcf.gz:md5,dc1b49ffa2401b2a5193a0527bf8cd0a", + "testN.freebayes.filtered.bcftools_filtered.norm.sorted.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "testN.strelka.variants.bcftools_filtered.norm.sorted.vcf.gz:md5,c7d56b86abee34770566ea490a806772", + "testT.freebayes.filtered.bcftools_filtered.norm.sorted.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "testT.strelka.variants.bcftools_filtered.norm.sorted.vcf.gz:md5,4449c1d5d7aecee9ca710ed4650633ea", + "testN.strelka.genome.vcf.gz:md5,325ea84ccf1ec1cf7a6c5c4aae1c0d1", + "testN.strelka.variants.vcf.gz:md5,3a3b3e67614ec91f4f0b88999dff454b", + "testT.strelka.genome.vcf.gz:md5,ece653410947a3ace10296c2cc5ff095", + "testT.strelka.variants.vcf.gz:md5,b1cf29a73f6b423cf26ab1a71847c9b" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-03T10:48:21.505593" + }, + "-profile test --normalize_vcfs --concatenate_vcfs --tools freebayes,strelka": { + "content": [ + 69, + { + "ADD_INFO_TO_VCF": { + "gawk": "5.3.0" + }, + "BCFTOOLS_SORT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FREEBAYES": { + "freebayes": "1.3.10" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GERMLINE_VCFS_CONCAT": { + "bcftools": 1.21 + }, + "GERMLINE_VCFS_CONCAT_SORT": { + "bcftools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "tabix": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "tabix": 1.21 + }, + "TABIX_EXT_VCF": { "tabix": 1.21 }, "TABIX_VC_FREEBAYES": { "tabix": 1.21 }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, "VCFLIB_VCFFILTER": { "vcflib": "1.0.14" }, @@ -217,23 +539,25 @@ "variant_calling/freebayes", "variant_calling/freebayes/testN", "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz.tbi", "variant_calling/freebayes/testN/testN.freebayes.vcf.gz", "variant_calling/freebayes/testN/testN.freebayes.vcf.gz.tbi", "variant_calling/freebayes/testT", "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz.tbi", "variant_calling/freebayes/testT/testT.freebayes.vcf.gz", "variant_calling/freebayes/testT/testT.freebayes.vcf.gz.tbi", "variant_calling/normalized", "variant_calling/normalized/testN", - "variant_calling/normalized/testN/testN.freebayes.norm.vcf.gz", - "variant_calling/normalized/testN/testN.freebayes.norm.vcf.gz.tbi", - "variant_calling/normalized/testN/testN.strelka.norm.vcf.gz", - "variant_calling/normalized/testN/testN.strelka.norm.vcf.gz.tbi", + "variant_calling/normalized/testN/testN.freebayes.filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/testN/testN.freebayes.filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/testN/testN.strelka.variants.norm.sorted.vcf.gz", + "variant_calling/normalized/testN/testN.strelka.variants.norm.sorted.vcf.gz.tbi", "variant_calling/normalized/testT", - "variant_calling/normalized/testT/testT.freebayes.norm.vcf.gz", - "variant_calling/normalized/testT/testT.freebayes.norm.vcf.gz.tbi", - "variant_calling/normalized/testT/testT.strelka.norm.vcf.gz", - "variant_calling/normalized/testT/testT.strelka.norm.vcf.gz.tbi", + "variant_calling/normalized/testT/testT.freebayes.filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/testT/testT.freebayes.filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/testT/testT.strelka.variants.norm.sorted.vcf.gz", + "variant_calling/normalized/testT/testT.strelka.variants.norm.sorted.vcf.gz.tbi", "variant_calling/strelka", "variant_calling/strelka/testN", "variant_calling/strelka/testN/testN.strelka.genome.vcf.gz", @@ -291,10 +615,10 @@ [ "testN.freebayes.filtered.vcf.gz:md5,7a08d55c866e45d635cf378825ddba74", "testT.freebayes.filtered.vcf.gz:md5,dc1b49ffa2401b2a5193a0527bf8cd0a", - "testN.freebayes.norm.vcf.gz:md5,18992a755b06d72374bb072cc8af86f9", - "testN.strelka.norm.vcf.gz:md5,ba9aabddec39a8bcbdb38c46f7a26515", - "testT.freebayes.norm.vcf.gz:md5,574d3d9e73986d07839d5e720c3ea929", - "testT.strelka.norm.vcf.gz:md5,2dd9f7f6dac9c10afcf01f148a486799", + "testN.freebayes.filtered.norm.sorted.vcf.gz:md5,18992a755b06d72374bb072cc8af86f9", + "testN.strelka.variants.norm.sorted.vcf.gz:md5,ba9aabddec39a8bcbdb38c46f7a26515", + "testT.freebayes.filtered.norm.sorted.vcf.gz:md5,574d3d9e73986d07839d5e720c3ea929", + "testT.strelka.variants.norm.sorted.vcf.gz:md5,2dd9f7f6dac9c10afcf01f148a486799", "testN.strelka.genome.vcf.gz:md5,325ea84ccf1ec1cf7a6c5c4aae1c0d1", "testN.strelka.variants.vcf.gz:md5,3a3b3e67614ec91f4f0b88999dff454b", "testT.strelka.genome.vcf.gz:md5,ece653410947a3ace10296c2cc5ff095", @@ -305,6 +629,6 @@ "nf-test": "0.9.2", "nextflow": "25.10.0" }, - "timestamp": "2025-11-02T13:43:59.34705" + "timestamp": "2025-11-03T10:44:17.917186" } } \ No newline at end of file diff --git a/tests/postprocess_filtering.nf.test b/tests/postprocess_filtering.nf.test new file mode 100644 index 0000000000..5c49bbf945 --- /dev/null +++ b/tests/postprocess_filtering.nf.test @@ -0,0 +1,28 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + tag "pipeline" + tag "pipeline_sarek" + + def test_scenario = [ + [ + name: "-profile test --filter_vcfs --tools freebayes,strelka", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_joint_bam.csv", + step: 'variant_calling', + filter_vcfs: true, + tools: 'freebayes,strelka' + ], + include_freebayes_unfiltered: true, + no_conda: true + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.get_test(scenario)) + } +} diff --git a/tests/postprocess_filtering.nf.test.snap b/tests/postprocess_filtering.nf.test.snap new file mode 100644 index 0000000000..8895a6e04d --- /dev/null +++ b/tests/postprocess_filtering.nf.test.snap @@ -0,0 +1,286 @@ +{ + "-profile test --filter_vcfs --tools freebayes,strelka": { + "content": [ + 41, + { + "BCFTOOLS_SORT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FILTER_VCFS": { + "bcftools": 1.22 + }, + "FREEBAYES": { + "freebayes": "1.3.10" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "tabix": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "tabix": 1.21 + }, + "TABIX_VC_FREEBAYES": { + "tabix": 1.21 + }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, + "VCFLIB_VCFFILTER": { + "vcflib": "1.0.14" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/freebayes", + "reports/bcftools/freebayes/testN", + "reports/bcftools/freebayes/testN/testN.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/freebayes/testT", + "reports/bcftools/freebayes/testT/testT.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/strelka", + "reports/bcftools/strelka/testN", + "reports/bcftools/strelka/testN/testN.strelka.variants.bcftools_stats.txt", + "reports/bcftools/strelka/testT", + "reports/bcftools/strelka/testT/testT.strelka.variants.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/testN", + "reports/mosdepth/testN/testN.recal.mosdepth.global.dist.txt", + "reports/mosdepth/testN/testN.recal.mosdepth.region.dist.txt", + "reports/mosdepth/testN/testN.recal.mosdepth.summary.txt", + "reports/mosdepth/testN/testN.recal.regions.bed.gz", + "reports/mosdepth/testN/testN.recal.regions.bed.gz.csi", + "reports/mosdepth/testT", + "reports/mosdepth/testT/testT.recal.mosdepth.global.dist.txt", + "reports/mosdepth/testT/testT.recal.mosdepth.region.dist.txt", + "reports/mosdepth/testT/testT.recal.mosdepth.summary.txt", + "reports/mosdepth/testT/testT.recal.regions.bed.gz", + "reports/mosdepth/testT/testT.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/testN", + "reports/samtools/testN/testN.recal.cram.stats", + "reports/samtools/testT", + "reports/samtools/testT/testT.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/freebayes", + "reports/vcftools/freebayes/testN", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.TsTv.qual", + "reports/vcftools/freebayes/testT", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.TsTv.qual", + "reports/vcftools/strelka", + "reports/vcftools/strelka/testN", + "reports/vcftools/strelka/testN/testN.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/testN/testN.strelka.variants.TsTv.count", + "reports/vcftools/strelka/testN/testN.strelka.variants.TsTv.qual", + "reports/vcftools/strelka/testT", + "reports/vcftools/strelka/testT/testT.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/testT/testT.strelka.variants.TsTv.count", + "reports/vcftools/strelka/testT/testT.strelka.variants.TsTv.qual", + "variant_calling", + "variant_calling/filtered", + "variant_calling/filtered/testN", + "variant_calling/filtered/testN/testN.freebayes.filtered.bcftools_filtered.vcf.gz", + "variant_calling/filtered/testN/testN.freebayes.filtered.bcftools_filtered.vcf.gz.tbi", + "variant_calling/filtered/testN/testN.strelka.variants.bcftools_filtered.vcf.gz", + "variant_calling/filtered/testN/testN.strelka.variants.bcftools_filtered.vcf.gz.tbi", + "variant_calling/filtered/testT", + "variant_calling/filtered/testT/testT.freebayes.filtered.bcftools_filtered.vcf.gz", + "variant_calling/filtered/testT/testT.freebayes.filtered.bcftools_filtered.vcf.gz.tbi", + "variant_calling/filtered/testT/testT.strelka.variants.bcftools_filtered.vcf.gz", + "variant_calling/filtered/testT/testT.strelka.variants.bcftools_filtered.vcf.gz.tbi", + "variant_calling/freebayes", + "variant_calling/freebayes/testN", + "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/testN/testN.freebayes.vcf.gz", + "variant_calling/freebayes/testN/testN.freebayes.vcf.gz.tbi", + "variant_calling/freebayes/testT", + "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/testT/testT.freebayes.vcf.gz", + "variant_calling/freebayes/testT/testT.freebayes.vcf.gz.tbi", + "variant_calling/strelka", + "variant_calling/strelka/testN", + "variant_calling/strelka/testN/testN.strelka.genome.vcf.gz", + "variant_calling/strelka/testN/testN.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/testN/testN.strelka.variants.vcf.gz", + "variant_calling/strelka/testN/testN.strelka.variants.vcf.gz.tbi", + "variant_calling/strelka/testT", + "variant_calling/strelka/testT/testT.strelka.genome.vcf.gz", + "variant_calling/strelka/testT/testT.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/testT/testT.strelka.variants.vcf.gz", + "variant_calling/strelka/testT/testT.strelka.variants.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,9cfd38310d4da59c7242b2a7ae6bd651", + "mosdepth-cumcoverage-dist-id.txt:md5,f0177ed551b6ec930854d0d221904ec0", + "mosdepth_perchrom.txt:md5,9cfd38310d4da59c7242b2a7ae6bd651", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,a0c101497a0566f60b9e8388207944ab", + "samtools_alignment_plot.txt:md5,563784066de81f3bc8ccf5fbbe82b3d5", + "testN.freebayes.filtered.bcftools_stats.txt:md5,96c5e6b9e55f1bb67fca7886fb322b67", + "testT.freebayes.filtered.bcftools_stats.txt:md5,ce8b7afaace836b6a00bd8ccdc980500", + "testN.strelka.variants.bcftools_stats.txt:md5,f11c9e5e9820868809b5465970c7cc06", + "testT.strelka.variants.bcftools_stats.txt:md5,ac070b6cf4de7540a227265971244b31", + "testN.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "testN.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "testN.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "testN.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "testN.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259", + "testT.recal.mosdepth.global.dist.txt:md5,3106c114529adc4231badeb3bb38b6d1", + "testT.recal.mosdepth.region.dist.txt:md5,ccf646922b05cb4759c4f89072be2b69", + "testT.recal.mosdepth.summary.txt:md5,024649a659caff330dfbef4ac3560542", + "testT.recal.regions.bed.gz:md5,14b36a2cf428840aab471f95cfbe399f", + "testT.recal.regions.bed.gz.csi:md5,0dc011f3344841dc14aa488da905e917", + "testN.freebayes.filtered.FILTER.summary:md5,e2f8e86fb18631c0bf6ccd7d9d4039c8", + "testN.freebayes.filtered.TsTv.count:md5,89562fef808b5c3db629682d36fd86fc", + "testT.freebayes.filtered.FILTER.summary:md5,4b25443c427fd27761017e4f3a556b29", + "testT.freebayes.filtered.TsTv.count:md5,803d74e40f7716202bae2a3a81c1ddfc", + "testN.strelka.variants.FILTER.summary:md5,39ff2cc8eb7495a14a6b76e0ab627027", + "testN.strelka.variants.TsTv.count:md5,ee7dafc8d941b8502a04a63dc3126fff", + "testT.strelka.variants.FILTER.summary:md5,ad4c8982a91cef84d0c1ff827a0ffcf3", + "testT.strelka.variants.TsTv.count:md5,a32d1781bd32b81ec18c6e0a191c0efe" + ], + "No BAM files", + "No CRAM files", + [ + [ + "testN.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=7, phased=false, phasedAutodetect=false]" + ], + [ + "testT.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=10, phased=false, phasedAutodetect=false]" + ] + ], + [ + "testN.freebayes.filtered.bcftools_filtered.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "testN.strelka.variants.bcftools_filtered.vcf.gz:md5,35a541d45222013a9887bbe1678f9444", + "testT.freebayes.filtered.bcftools_filtered.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "testT.strelka.variants.bcftools_filtered.vcf.gz:md5,46757c1dd7a5c1f62b39d91281016521", + "testN.freebayes.filtered.vcf.gz:md5,7a08d55c866e45d635cf378825ddba74", + "testT.freebayes.filtered.vcf.gz:md5,dc1b49ffa2401b2a5193a0527bf8cd0a", + "testN.strelka.genome.vcf.gz:md5,325ea84ccf1ec1cf7a6c5c4aae1c0d1", + "testN.strelka.variants.vcf.gz:md5,3a3b3e67614ec91f4f0b88999dff454b", + "testT.strelka.genome.vcf.gz:md5,ece653410947a3ace10296c2cc5ff095", + "testT.strelka.variants.vcf.gz:md5,b1cf29a73f6b423cf26ab1a71847c9b" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-03T11:32:58.31191" + } +} \ No newline at end of file diff --git a/tests/postprocess_normalization.nf.test.snap b/tests/postprocess_normalization.nf.test.snap index 71fc34c64f..8f7cf314d6 100644 --- a/tests/postprocess_normalization.nf.test.snap +++ b/tests/postprocess_normalization.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test --normalize_vcfs --tools freebayes,strelka": { "content": [ - 55, + 53, { "ADD_INFO_TO_VCF": { "gawk": "5.3.0" @@ -39,10 +39,10 @@ "TABIX_EXT_VCF": { "tabix": 1.21 }, - "TABIX_VCFS_NORM_SORT": { + "TABIX_VC_FREEBAYES": { "tabix": 1.21 }, - "TABIX_VC_FREEBAYES": { + "TABIX_VC_FREEBAYES_FILT": { "tabix": 1.21 }, "VCFLIB_VCFFILTER": { @@ -60,7 +60,6 @@ }, [ "csv", - "csv/variantcalled.csv", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/bcftools-stats-subtypes.txt", @@ -201,23 +200,25 @@ "variant_calling/freebayes", "variant_calling/freebayes/testN", "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz.tbi", "variant_calling/freebayes/testN/testN.freebayes.vcf.gz", "variant_calling/freebayes/testN/testN.freebayes.vcf.gz.tbi", "variant_calling/freebayes/testT", "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz.tbi", "variant_calling/freebayes/testT/testT.freebayes.vcf.gz", "variant_calling/freebayes/testT/testT.freebayes.vcf.gz.tbi", "variant_calling/normalized", "variant_calling/normalized/testN", - "variant_calling/normalized/testN/testN.freebayes.norm.vcf.gz", - "variant_calling/normalized/testN/testN.freebayes.norm.vcf.gz.tbi", - "variant_calling/normalized/testN/testN.strelka.norm.vcf.gz", - "variant_calling/normalized/testN/testN.strelka.norm.vcf.gz.tbi", + "variant_calling/normalized/testN/testN.freebayes.filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/testN/testN.freebayes.filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/testN/testN.strelka.variants.norm.sorted.vcf.gz", + "variant_calling/normalized/testN/testN.strelka.variants.norm.sorted.vcf.gz.tbi", "variant_calling/normalized/testT", - "variant_calling/normalized/testT/testT.freebayes.norm.vcf.gz", - "variant_calling/normalized/testT/testT.freebayes.norm.vcf.gz.tbi", - "variant_calling/normalized/testT/testT.strelka.norm.vcf.gz", - "variant_calling/normalized/testT/testT.strelka.norm.vcf.gz.tbi", + "variant_calling/normalized/testT/testT.freebayes.filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/testT/testT.freebayes.filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/testT/testT.strelka.variants.norm.sorted.vcf.gz", + "variant_calling/normalized/testT/testT.strelka.variants.norm.sorted.vcf.gz.tbi", "variant_calling/strelka", "variant_calling/strelka/testN", "variant_calling/strelka/testN/testN.strelka.genome.vcf.gz", @@ -275,10 +276,10 @@ [ "testN.freebayes.filtered.vcf.gz:md5,7a08d55c866e45d635cf378825ddba74", "testT.freebayes.filtered.vcf.gz:md5,dc1b49ffa2401b2a5193a0527bf8cd0a", - "testN.freebayes.norm.vcf.gz:md5,18992a755b06d72374bb072cc8af86f9", - "testN.strelka.norm.vcf.gz:md5,ba9aabddec39a8bcbdb38c46f7a26515", - "testT.freebayes.norm.vcf.gz:md5,574d3d9e73986d07839d5e720c3ea929", - "testT.strelka.norm.vcf.gz:md5,2dd9f7f6dac9c10afcf01f148a486799", + "testN.freebayes.filtered.norm.sorted.vcf.gz:md5,18992a755b06d72374bb072cc8af86f9", + "testN.strelka.variants.norm.sorted.vcf.gz:md5,ba9aabddec39a8bcbdb38c46f7a26515", + "testT.freebayes.filtered.norm.sorted.vcf.gz:md5,574d3d9e73986d07839d5e720c3ea929", + "testT.strelka.variants.norm.sorted.vcf.gz:md5,2dd9f7f6dac9c10afcf01f148a486799", "testN.strelka.genome.vcf.gz:md5,325ea84ccf1ec1cf7a6c5c4aae1c0d1", "testN.strelka.variants.vcf.gz:md5,3a3b3e67614ec91f4f0b88999dff454b", "testT.strelka.genome.vcf.gz:md5,ece653410947a3ace10296c2cc5ff095", @@ -289,6 +290,6 @@ "nf-test": "0.9.2", "nextflow": "25.10.0" }, - "timestamp": "2025-11-02T13:53:49.66037" + "timestamp": "2025-11-03T11:48:18.652411" } } \ No newline at end of file diff --git a/tests/variant_calling_freebayes.nf.test.snap b/tests/variant_calling_freebayes.nf.test.snap index 7dbb9073c6..38407f7467 100644 --- a/tests/variant_calling_freebayes.nf.test.snap +++ b/tests/variant_calling_freebayes.nf.test.snap @@ -1,396 +1,7 @@ { - "-profile test --tools freebayes --no_intervals --wes --input fastq_pair.csv": { - "content": [ - 39, - { - "BCFTOOLS_SORT": { - "bcftools": 1.21 - }, - "BCFTOOLS_STATS": { - "bcftools": 1.21 - }, - "BWAMEM1_INDEX": { - "bwa": "0.7.18-r1243-dirty" - }, - "BWAMEM1_MEM": { - "bwa": "0.7.18-r1243-dirty", - "samtools": 1.21 - }, - "FASTQC": { - "fastqc": "0.12.1" - }, - "FREEBAYES": { - "freebayes": "1.3.10" - }, - "GATK4_APPLYBQSR": { - "gatk4": "4.6.1.0" - }, - "GATK4_BASERECALIBRATOR": { - "gatk4": "4.6.1.0" - }, - "GATK4_MARKDUPLICATES": { - "gatk4": "4.6.1.0", - "samtools": 1.21 - }, - "INDEX_CRAM": { - "samtools": 1.21 - }, - "MOSDEPTH": { - "mosdepth": "0.3.10" - }, - "SAMTOOLS_STATS": { - "samtools": 1.21 - }, - "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { - "tabix": 1.21 - }, - "TABIX_VC_FREEBAYES": { - "tabix": 1.21 - }, - "VCFLIB_VCFFILTER": { - "vcflib": "1.0.14" - }, - "VCFTOOLS_TSTV_COUNT": { - "vcftools": "0.1.16" - } - }, - [ - "csv", - "csv/markduplicates.csv", - "csv/markduplicates_no_table.csv", - "csv/recalibrated.csv", - "csv/variantcalled.csv", - "multiqc", - "multiqc/multiqc_data", - "multiqc/multiqc_data/bcftools-stats-subtypes.txt", - "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", - "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", - "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", - "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", - "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", - "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", - "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", - "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", - "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", - "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", - "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", - "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", - "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", - "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", - "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", - "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", - "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", - "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", - "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", - "multiqc/multiqc_data/gatk_base_recalibrator.txt", - "multiqc/multiqc_data/llms-full.txt", - "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", - "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", - "multiqc/multiqc_data/mosdepth_cov_dist.txt", - "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", - "multiqc/multiqc_data/mosdepth_perchrom.txt", - "multiqc/multiqc_data/multiqc.log", - "multiqc/multiqc_data/multiqc.parquet", - "multiqc/multiqc_data/multiqc_bcftools_stats.txt", - "multiqc/multiqc_data/multiqc_citations.txt", - "multiqc/multiqc_data/multiqc_data.json", - "multiqc/multiqc_data/multiqc_fastqc.txt", - "multiqc/multiqc_data/multiqc_general_stats.txt", - "multiqc/multiqc_data/multiqc_picard_dups.txt", - "multiqc/multiqc_data/multiqc_samtools_stats.txt", - "multiqc/multiqc_data/multiqc_software_versions.txt", - "multiqc/multiqc_data/multiqc_sources.txt", - "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", - "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", - "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", - "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", - "multiqc/multiqc_data/picard_deduplication.txt", - "multiqc/multiqc_data/samtools-stats-dp.txt", - "multiqc/multiqc_data/samtools_alignment_plot.txt", - "multiqc/multiqc_data/vcftools_tstv_by_count.txt", - "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", - "multiqc/multiqc_plots", - "multiqc/multiqc_plots/pdf", - "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", - "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", - "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", - "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", - "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", - "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", - "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", - "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", - "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", - "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", - "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", - "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", - "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", - "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", - "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", - "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", - "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", - "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", - "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", - "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", - "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", - "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", - "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", - "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", - "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", - "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", - "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", - "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", - "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", - "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", - "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", - "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", - "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", - "multiqc/multiqc_plots/png", - "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", - "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", - "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", - "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", - "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", - "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", - "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", - "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", - "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", - "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", - "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", - "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", - "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", - "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", - "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", - "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", - "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", - "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", - "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", - "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", - "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", - "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", - "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", - "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", - "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", - "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", - "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", - "multiqc/multiqc_plots/png/picard_deduplication-pct.png", - "multiqc/multiqc_plots/png/samtools-stats-dp.png", - "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", - "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", - "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", - "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", - "multiqc/multiqc_plots/svg", - "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", - "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", - "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", - "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", - "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", - "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", - "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", - "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", - "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", - "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", - "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", - "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", - "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", - "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", - "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", - "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", - "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", - "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", - "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", - "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", - "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", - "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", - "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", - "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", - "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", - "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", - "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", - "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", - "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", - "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", - "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", - "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", - "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", - "multiqc/multiqc_report.html", - "no_intervals.bed", - "no_intervals.bed.gz", - "no_intervals.bed.gz.tbi", - "pipeline_info", - "pipeline_info/nf_core_sarek_software_mqc_versions.yml", - "preprocessing", - "preprocessing/markduplicates", - "preprocessing/markduplicates/test", - "preprocessing/markduplicates/test/test.md.cram", - "preprocessing/markduplicates/test/test.md.cram.crai", - "preprocessing/markduplicates/test2", - "preprocessing/markduplicates/test2/test2.md.cram", - "preprocessing/markduplicates/test2/test2.md.cram.crai", - "preprocessing/recal_table", - "preprocessing/recal_table/test", - "preprocessing/recal_table/test/test.recal.table", - "preprocessing/recal_table/test2", - "preprocessing/recal_table/test2/test2.recal.table", - "preprocessing/recalibrated", - "preprocessing/recalibrated/test", - "preprocessing/recalibrated/test/test.recal.cram", - "preprocessing/recalibrated/test/test.recal.cram.crai", - "preprocessing/recalibrated/test2", - "preprocessing/recalibrated/test2/test2.recal.cram", - "preprocessing/recalibrated/test2/test2.recal.cram.crai", - "reference", - "reports", - "reports/bcftools", - "reports/bcftools/freebayes", - "reports/bcftools/freebayes/test", - "reports/bcftools/freebayes/test/test.freebayes.filtered.bcftools_stats.txt", - "reports/bcftools/freebayes/test2_vs_test", - "reports/bcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.bcftools_stats.txt", - "reports/fastqc", - "reports/fastqc/test-test_L1", - "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", - "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", - "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", - "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", - "reports/fastqc/test2-test_L1", - "reports/fastqc/test2-test_L1/test2-test_L1_1_fastqc.html", - "reports/fastqc/test2-test_L1/test2-test_L1_1_fastqc.zip", - "reports/fastqc/test2-test_L1/test2-test_L1_2_fastqc.html", - "reports/fastqc/test2-test_L1/test2-test_L1_2_fastqc.zip", - "reports/markduplicates", - "reports/markduplicates/test", - "reports/markduplicates/test/test.md.cram.metrics", - "reports/markduplicates/test2", - "reports/markduplicates/test2/test2.md.cram.metrics", - "reports/mosdepth", - "reports/mosdepth/test", - "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", - "reports/mosdepth/test/test.md.mosdepth.summary.txt", - "reports/mosdepth/test/test.md.per-base.bed.gz", - "reports/mosdepth/test/test.md.per-base.bed.gz.csi", - "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", - "reports/mosdepth/test/test.recal.mosdepth.summary.txt", - "reports/mosdepth/test/test.recal.per-base.bed.gz", - "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", - "reports/mosdepth/test2", - "reports/mosdepth/test2/test2.md.mosdepth.global.dist.txt", - "reports/mosdepth/test2/test2.md.mosdepth.summary.txt", - "reports/mosdepth/test2/test2.md.per-base.bed.gz", - "reports/mosdepth/test2/test2.md.per-base.bed.gz.csi", - "reports/mosdepth/test2/test2.recal.mosdepth.global.dist.txt", - "reports/mosdepth/test2/test2.recal.mosdepth.summary.txt", - "reports/mosdepth/test2/test2.recal.per-base.bed.gz", - "reports/mosdepth/test2/test2.recal.per-base.bed.gz.csi", - "reports/samtools", - "reports/samtools/test", - "reports/samtools/test/test.md.cram.stats", - "reports/samtools/test/test.recal.cram.stats", - "reports/samtools/test2", - "reports/samtools/test2/test2.md.cram.stats", - "reports/samtools/test2/test2.recal.cram.stats", - "reports/vcftools", - "reports/vcftools/freebayes", - "reports/vcftools/freebayes/test", - "reports/vcftools/freebayes/test/test.freebayes.filtered.FILTER.summary", - "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.count", - "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.qual", - "reports/vcftools/freebayes/test2_vs_test", - "reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.FILTER.summary", - "reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.TsTv.count", - "reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.TsTv.qual", - "variant_calling", - "variant_calling/freebayes", - "variant_calling/freebayes/test", - "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz", - "variant_calling/freebayes/test/test.freebayes.vcf.gz", - "variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi", - "variant_calling/freebayes/test2_vs_test", - "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.vcf.gz", - "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz", - "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz.tbi" - ], - [ - "fastqc-status-check-heatmap.txt:md5,eeb4e7e7a45f4223c86bfe3aea81f90b", - "fastqc_adapter_content_plot.txt:md5,cc7a809f9f001c10646ee4199ccdb40f", - "fastqc_per_base_n_content_plot.txt:md5,1eba855ae0fa5b5ed4a1f90d1c97f759", - "fastqc_per_base_sequence_quality_plot.txt:md5,cbb2743dfb2ec74e72b578c83ec28ee8", - "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,73c884822eba0bafcdf34b90fe81aec5", - "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,24eeb00e5e2b11c7ab90a3223d429d15", - "fastqc_per_sequence_quality_scores_plot.txt:md5,6f048594f02effb93608665be29bd35a", - "fastqc_sequence_counts_plot.txt:md5,fca7ee9ef3382e2837a302d8c5d33769", - "fastqc_sequence_duplication_levels_plot.txt:md5,2aa0c6f33e4cffbb29cdabe2c28bb097", - "fastqc_sequence_length_distribution_plot.txt:md5,61b1fe978a2c73b86c30c27ee4bc60ae", - "mosdepth-coverage-per-contig-single.txt:md5,5e1e538631fd3e6e45158f9edf33ee8d", - "mosdepth-cumcoverage-dist-id.txt:md5,34725df04f598cc54a81ec32f8c7ae41", - "mosdepth_perchrom.txt:md5,5e1e538631fd3e6e45158f9edf33ee8d", - "multiqc_citations.txt:md5,ace4ca89138a5f1e2be289c157c00bd9", - "multiqc_fastqc.txt:md5,2fd25e8c81f962594b801d5a9df3cd87", - "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "samtools-stats-dp.txt:md5,7a0481b59cdd57cc0b8bc9b5641614c6", - "samtools_alignment_plot.txt:md5,8e6178a26fe2a4fc4f45fac3175ba6c6", - "test.freebayes.filtered.bcftools_stats.txt:md5,dde124ceaf6f109cd274b837b950096b", - "test2_vs_test.freebayes.filtered.bcftools_stats.txt:md5,6f942caec2f4f02c69e1216226fa44a9", - "test.md.mosdepth.global.dist.txt:md5,5a0679057c530e5945c9c5a3a17312dc", - "test.md.mosdepth.summary.txt:md5,0010c2396a3173c7cf4983abe2eb6a4c", - "test.md.per-base.bed.gz:md5,34dfe443c0a0767562dd65272e3310ef", - "test.md.per-base.bed.gz.csi:md5,b0ab630c3241fbd7581b7a38d944ff8b", - "test.recal.mosdepth.global.dist.txt:md5,5a0679057c530e5945c9c5a3a17312dc", - "test.recal.mosdepth.summary.txt:md5,0010c2396a3173c7cf4983abe2eb6a4c", - "test.recal.per-base.bed.gz:md5,34dfe443c0a0767562dd65272e3310ef", - "test.recal.per-base.bed.gz.csi:md5,b0ab630c3241fbd7581b7a38d944ff8b", - "test2.md.mosdepth.global.dist.txt:md5,f25166c3a0051bb4d8c11a210278de6c", - "test2.md.mosdepth.summary.txt:md5,d5e4084de2ea2a0a7b60b2d71c804d4b", - "test2.md.per-base.bed.gz:md5,e1c6d60621d8f64aaf28fa1c1ddda921", - "test2.md.per-base.bed.gz.csi:md5,4205a09ede17cdbdaad45e3553f73105", - "test2.recal.mosdepth.global.dist.txt:md5,f25166c3a0051bb4d8c11a210278de6c", - "test2.recal.mosdepth.summary.txt:md5,d5e4084de2ea2a0a7b60b2d71c804d4b", - "test2.recal.per-base.bed.gz:md5,e1c6d60621d8f64aaf28fa1c1ddda921", - "test2.recal.per-base.bed.gz.csi:md5,4205a09ede17cdbdaad45e3553f73105", - "test.freebayes.filtered.FILTER.summary:md5,87e753ba2ad969475fb55661852f75e0", - "test.freebayes.filtered.TsTv.count:md5,845f64e5bb4224af98f3a47294cd5483", - "test2_vs_test.freebayes.filtered.FILTER.summary:md5,126e83dcd37b82420f7c5d7b235479f1", - "test2_vs_test.freebayes.filtered.TsTv.count:md5,28919c7d29c998681391d2027af3e0f9" - ], - "No BAM files", - [ - "test.md.cram:md5,59ecc5c82c7af1283eea7507c590c831", - "test2.md.cram:md5,bac87cf9290577fd9a4def63e046031f", - "test.recal.cram:md5,59ecc5c82c7af1283eea7507c590c831", - "test2.recal.cram:md5,bac87cf9290577fd9a4def63e046031f" - ], - [ - [ - "test.freebayes.vcf.gz", - "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=732, phased=false, phasedAutodetect=false]" - ], - [ - "test2_vs_test.freebayes.vcf.gz", - "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=1447, phased=false, phasedAutodetect=false]" - ] - ], - [ - [ - "test.freebayes.filtered.vcf.gz", - "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=107, phased=false, phasedAutodetect=false]" - ], - [ - "test2_vs_test.freebayes.filtered.vcf.gz", - "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=137, phased=false, phasedAutodetect=false]" - ] - ] - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.04.7" - }, - "timestamp": "2025-09-30T22:48:44.433126443" - }, "-profile test,tools_tumoronly --tools freebayes --input recalibrated_tumoronly.csv": { "content": [ - 18, + 19, { "BCFTOOLS_SORT": { "bcftools": 1.21 @@ -422,6 +33,9 @@ "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { "tabix": 1.21 }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, "VCFLIB_VCFFILTER": { "vcflib": "1.0.14" }, @@ -547,6 +161,7 @@ "variant_calling/freebayes", "variant_calling/freebayes/sample2", "variant_calling/freebayes/sample2/sample2.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/sample2/sample2.freebayes.filtered.vcf.gz.tbi", "variant_calling/freebayes/sample2/sample2.freebayes.vcf.gz", "variant_calling/freebayes/sample2/sample2.freebayes.vcf.gz.tbi" ], @@ -581,14 +196,14 @@ ] ], "meta": { - "nf-test": "0.9.3", - "nextflow": "25.04.7" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2025-09-30T22:50:24.463341446" + "timestamp": "2025-11-04T12:14:50.842752" }, "-profile test --tools freebayes --wes --nucleotides_per_second 20": { "content": [ - 32, + 33, { "BCFTOOLS_SORT": { "bcftools": 1.21 @@ -646,6 +261,9 @@ "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { "tabix": 1.21 }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, "VCFLIB_VCFFILTER": { "vcflib": "1.0.14" }, @@ -863,86 +481,280 @@ "reports/mosdepth/test/test.recal.regions.bed.gz", "reports/mosdepth/test/test.recal.regions.bed.gz.csi", "reports/samtools", - "reports/samtools/test", - "reports/samtools/test/test.md.cram.stats", - "reports/samtools/test/test.recal.cram.stats", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/freebayes", + "reports/vcftools/freebayes/test", + "reports/vcftools/freebayes/test/test.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.qual", + "variant_calling", + "variant_calling/freebayes", + "variant_calling/freebayes/test", + "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/test/test.freebayes.vcf.gz", + "variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi" + ], + [ + "fastqc-status-check-heatmap.txt:md5,a020b9689ddeb4abec16b4854fe452f1", + "fastqc_adapter_content_plot.txt:md5,2e1b72be741319e7fadbbb39d7e5b37d", + "fastqc_per_base_n_content_plot.txt:md5,ad3b971a6bb4e8ba6c844c8a03584eb8", + "fastqc_per_base_sequence_quality_plot.txt:md5,1bc03889d243a944253ac637d81ae10c", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,2c42d140ce06c08dad2b58f397c23239", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,59e22821d350bfb97c37ffd9088f5ad9", + "fastqc_per_sequence_quality_scores_plot.txt:md5,f33615cc98bb6225f39545a415fa7c0f", + "fastqc_sequence_counts_plot.txt:md5,7f0f19a58e8e54e792a751fd04a9ae13", + "fastqc_sequence_duplication_levels_plot.txt:md5,92b02e250ff78725deb9a10d510fcecc", + "fastqc_sequence_length_distribution_plot.txt:md5,fb04dce68ec566314125bc9438211b28", + "mosdepth-coverage-per-contig-single.txt:md5,c9dd01d00f91c6483202dcde61aa1f67", + "mosdepth-cumcoverage-dist-id.txt:md5,00c547f15d022eb446ec6367739c81b8", + "mosdepth_perchrom.txt:md5,c9dd01d00f91c6483202dcde61aa1f67", + "multiqc_citations.txt:md5,ace4ca89138a5f1e2be289c157c00bd9", + "multiqc_fastqc.txt:md5,bde0d0bffa62228b33fb68b7e25b6ff8", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,300b95526a211b05f18aaffd037dacd3", + "samtools_alignment_plot.txt:md5,5053f650b8612fe5e8527b0bca8ab905", + "test.freebayes.filtered.bcftools_stats.txt:md5,5ad7fc8a51dd1dc0f827a46acbed01c4", + "test.md.mosdepth.global.dist.txt:md5,531a83245143e7975f18e1988c876138", + "test.md.mosdepth.region.dist.txt:md5,d25723bdd3fec6b17d2462abfa097b9e", + "test.md.mosdepth.summary.txt:md5,87be70cd1237d7af9aa40d8cd8b3a817", + "test.md.per-base.bed.gz:md5,c53d26b767b6e75b3e502438a77f89b2", + "test.md.per-base.bed.gz.csi:md5,c3066b00781e14a9db5fc0bf0d47d777", + "test.md.regions.bed.gz:md5,f96fa1cdae548eb7e54ce6a481d928b9", + "test.md.regions.bed.gz.csi:md5,c6d1ac97ef4dfe43731c8368d8391cab", + "test.recal.mosdepth.global.dist.txt:md5,a3e6c8f6d4b5e909d0527be83a93fbae", + "test.recal.mosdepth.region.dist.txt:md5,d25723bdd3fec6b17d2462abfa097b9e", + "test.recal.mosdepth.summary.txt:md5,ca5424a709268a61200a2dc2865f1a14", + "test.recal.per-base.bed.gz:md5,8aaf9cb3dd5c9643e77aba91293fc39d", + "test.recal.per-base.bed.gz.csi:md5,d8038c7d544abd5d6335f2541de4e769", + "test.recal.regions.bed.gz:md5,f96fa1cdae548eb7e54ce6a481d928b9", + "test.recal.regions.bed.gz.csi:md5,c6d1ac97ef4dfe43731c8368d8391cab", + "test.freebayes.filtered.FILTER.summary:md5,259470a9823d503d5639946162b2ed19", + "test.freebayes.filtered.TsTv.count:md5,162253eb6c406300678985b3ac7dc868" + ], + "No BAM files", + [ + "test.md.cram:md5,724c601c9daf019d356a53a7d5e1c8b1", + "test.recal.cram:md5,e0b8589bcc82aecad1fbbc256e85a6ee" + ], + [ + [ + "test.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=475, phased=false, phasedAutodetect=false]" + ] + ], + [ + "test.freebayes.filtered.vcf.gz:md5,2062214d7062a3cf37ea4c1d123c1a43" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-04T11:54:42.53531" + }, + "-profile test --tools freebayes --no_intervals --input recalibrated_tumoronly.csv": { + "content": [ + 14, + { + "BCFTOOLS_SORT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "FREEBAYES": { + "freebayes": "1.3.10" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "tabix": 1.21 + }, + "TABIX_VC_FREEBAYES": { + "tabix": 1.21 + }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, + "VCFLIB_VCFFILTER": { + "vcflib": "1.0.14" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reports", + "reports/bcftools", + "reports/bcftools/freebayes", + "reports/bcftools/freebayes/sample2", + "reports/bcftools/freebayes/sample2/sample2.freebayes.filtered.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", "reports/vcftools", "reports/vcftools/freebayes", - "reports/vcftools/freebayes/test", - "reports/vcftools/freebayes/test/test.freebayes.filtered.FILTER.summary", - "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.count", - "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.qual", + "reports/vcftools/freebayes/sample2", + "reports/vcftools/freebayes/sample2/sample2.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/sample2/sample2.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/sample2/sample2.freebayes.filtered.TsTv.qual", "variant_calling", "variant_calling/freebayes", - "variant_calling/freebayes/test", - "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz", - "variant_calling/freebayes/test/test.freebayes.vcf.gz", - "variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi" + "variant_calling/freebayes/sample2", + "variant_calling/freebayes/sample2/sample2.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/sample2/sample2.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/sample2/sample2.freebayes.vcf.gz", + "variant_calling/freebayes/sample2/sample2.freebayes.vcf.gz.tbi" ], [ - "fastqc-status-check-heatmap.txt:md5,a020b9689ddeb4abec16b4854fe452f1", - "fastqc_adapter_content_plot.txt:md5,2e1b72be741319e7fadbbb39d7e5b37d", - "fastqc_per_base_n_content_plot.txt:md5,ad3b971a6bb4e8ba6c844c8a03584eb8", - "fastqc_per_base_sequence_quality_plot.txt:md5,1bc03889d243a944253ac637d81ae10c", - "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,2c42d140ce06c08dad2b58f397c23239", - "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,59e22821d350bfb97c37ffd9088f5ad9", - "fastqc_per_sequence_quality_scores_plot.txt:md5,f33615cc98bb6225f39545a415fa7c0f", - "fastqc_sequence_counts_plot.txt:md5,7f0f19a58e8e54e792a751fd04a9ae13", - "fastqc_sequence_duplication_levels_plot.txt:md5,92b02e250ff78725deb9a10d510fcecc", - "fastqc_sequence_length_distribution_plot.txt:md5,fb04dce68ec566314125bc9438211b28", - "mosdepth-coverage-per-contig-single.txt:md5,c9dd01d00f91c6483202dcde61aa1f67", - "mosdepth-cumcoverage-dist-id.txt:md5,00c547f15d022eb446ec6367739c81b8", - "mosdepth_perchrom.txt:md5,c9dd01d00f91c6483202dcde61aa1f67", - "multiqc_citations.txt:md5,ace4ca89138a5f1e2be289c157c00bd9", - "multiqc_fastqc.txt:md5,bde0d0bffa62228b33fb68b7e25b6ff8", - "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "samtools-stats-dp.txt:md5,300b95526a211b05f18aaffd037dacd3", - "samtools_alignment_plot.txt:md5,5053f650b8612fe5e8527b0bca8ab905", - "test.freebayes.filtered.bcftools_stats.txt:md5,5ad7fc8a51dd1dc0f827a46acbed01c4", - "test.md.mosdepth.global.dist.txt:md5,531a83245143e7975f18e1988c876138", - "test.md.mosdepth.region.dist.txt:md5,d25723bdd3fec6b17d2462abfa097b9e", - "test.md.mosdepth.summary.txt:md5,87be70cd1237d7af9aa40d8cd8b3a817", - "test.md.per-base.bed.gz:md5,c53d26b767b6e75b3e502438a77f89b2", - "test.md.per-base.bed.gz.csi:md5,c3066b00781e14a9db5fc0bf0d47d777", - "test.md.regions.bed.gz:md5,f96fa1cdae548eb7e54ce6a481d928b9", - "test.md.regions.bed.gz.csi:md5,c6d1ac97ef4dfe43731c8368d8391cab", - "test.recal.mosdepth.global.dist.txt:md5,a3e6c8f6d4b5e909d0527be83a93fbae", - "test.recal.mosdepth.region.dist.txt:md5,d25723bdd3fec6b17d2462abfa097b9e", - "test.recal.mosdepth.summary.txt:md5,ca5424a709268a61200a2dc2865f1a14", - "test.recal.per-base.bed.gz:md5,8aaf9cb3dd5c9643e77aba91293fc39d", - "test.recal.per-base.bed.gz.csi:md5,d8038c7d544abd5d6335f2541de4e769", - "test.recal.regions.bed.gz:md5,f96fa1cdae548eb7e54ce6a481d928b9", - "test.recal.regions.bed.gz.csi:md5,c6d1ac97ef4dfe43731c8368d8391cab", - "test.freebayes.filtered.FILTER.summary:md5,259470a9823d503d5639946162b2ed19", - "test.freebayes.filtered.TsTv.count:md5,162253eb6c406300678985b3ac7dc868" + "mosdepth-coverage-per-contig-single.txt:md5,a0266cd06dd6f7f5463f09e3cb8af71d", + "mosdepth-cumcoverage-dist-id.txt:md5,88b94dd2dcc423983da65125ece7651e", + "mosdepth_perchrom.txt:md5,a0266cd06dd6f7f5463f09e3cb8af71d", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,6618ece77181051a58275f504f67ea5b", + "samtools_alignment_plot.txt:md5,6136e5e1d072f166f280fb79424c392f", + "sample2.freebayes.filtered.bcftools_stats.txt:md5,b83440f3699a3258015d0ce60eea0d46", + "sample2.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample2.recal.mosdepth.summary.txt:md5,0a7300e56eda6fba7c7564f00aa000f0", + "sample2.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample2.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample2.freebayes.filtered.FILTER.summary:md5,fac42829b9b347413acbfc9c0a17fef0", + "sample2.freebayes.filtered.TsTv.count:md5,f864a8b1d6bab1d5f877b60dde36863f" ], "No BAM files", - [ - "test.md.cram:md5,724c601c9daf019d356a53a7d5e1c8b1", - "test.recal.cram:md5,e0b8589bcc82aecad1fbbc256e85a6ee" - ], + "No CRAM files", [ [ - "test.freebayes.vcf.gz", - "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=475, phased=false, phasedAutodetect=false]" + "sample2.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=15485, phased=false, phasedAutodetect=false]" ] ], [ - "test.freebayes.filtered.vcf.gz:md5,2062214d7062a3cf37ea4c1d123c1a43" + "sample2.freebayes.filtered.vcf.gz:md5,e68ce412428016fcc1f09735a91cdef" ] ], "meta": { - "nf-test": "0.9.3", - "nextflow": "25.04.7" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2025-09-30T22:41:07.450629772" + "timestamp": "2025-11-04T12:22:29.186457" }, "-profile test --tools freebayes --no_intervals": { "content": [ - 23, + 24, { "BCFTOOLS_SORT": { "bcftools": 1.21 @@ -988,6 +800,9 @@ "TABIX_VC_FREEBAYES": { "tabix": 1.21 }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, "VCFLIB_VCFFILTER": { "vcflib": "1.0.14" }, @@ -1217,6 +1032,7 @@ "variant_calling/freebayes", "variant_calling/freebayes/test", "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz.tbi", "variant_calling/freebayes/test/test.freebayes.vcf.gz", "variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi" ], @@ -1277,9 +1093,9 @@ }, "timestamp": "2025-09-30T22:43:46.993750433" }, - "-profile test --tools freebayes --no_intervals --input recalibrated_tumoronly.csv": { + "-profile test --tools freebayes --wes --nucleotides_per_second 20 --input fastq_pair.csv": { "content": [ - 13, + 56, { "BCFTOOLS_SORT": { "bcftools": 1.21 @@ -1287,10 +1103,42 @@ "BCFTOOLS_STATS": { "bcftools": 1.21 }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, "FREEBAYES": { "freebayes": "1.3.10" }, - "GATK4_CREATESEQUENCEDICTIONARY": { + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_GATHERBQSRREPORTS": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MERGE_CRAM": { + "samtools": 1.21 + }, + "MERGE_FREEBAYES": { "gatk4": "4.6.1.0" }, "MOSDEPTH": { @@ -1302,7 +1150,10 @@ "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { "tabix": 1.21 }, - "TABIX_VC_FREEBAYES": { + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "tabix": 1.21 + }, + "TABIX_VC_FREEBAYES_FILT": { "tabix": 1.21 }, "VCFLIB_VCFFILTER": { @@ -1314,6 +1165,9 @@ }, [ "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", "csv/variantcalled.csv", "multiqc", "multiqc/multiqc_data", @@ -1324,6 +1178,20 @@ "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", "multiqc/multiqc_data/llms-full.txt", "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", @@ -1335,10 +1203,17 @@ "multiqc/multiqc_data/multiqc_bcftools_stats.txt", "multiqc/multiqc_data/multiqc_citations.txt", "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", "multiqc/multiqc_data/multiqc_samtools_stats.txt", "multiqc/multiqc_data/multiqc_software_versions.txt", "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", "multiqc/multiqc_data/samtools-stats-dp.txt", "multiqc/multiqc_data/samtools_alignment_plot.txt", "multiqc/multiqc_data/vcftools_tstv_by_count.txt", @@ -1354,9 +1229,25 @@ "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", @@ -1372,9 +1263,25 @@ "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", "multiqc/multiqc_plots/png/samtools-stats-dp.png", "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", @@ -1390,85 +1297,226 @@ "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", "multiqc/multiqc_report.html", - "no_intervals.bed", - "no_intervals.bed.gz", - "no_intervals.bed.gz.tbi", "pipeline_info", "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/markduplicates/test2", + "preprocessing/markduplicates/test2/test2.md.cram", + "preprocessing/markduplicates/test2/test2.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recal_table/test2", + "preprocessing/recal_table/test2/test2.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "preprocessing/recalibrated/test2", + "preprocessing/recalibrated/test2/test2.recal.cram", + "preprocessing/recalibrated/test2/test2.recal.cram.crai", "reference", - "reference/dict", "reports", "reports/bcftools", "reports/bcftools/freebayes", - "reports/bcftools/freebayes/sample2", - "reports/bcftools/freebayes/sample2/sample2.freebayes.filtered.bcftools_stats.txt", - "reports/mosdepth", - "reports/mosdepth/sample2", - "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", - "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", - "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz", - "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz.csi", + "reports/bcftools/freebayes/test", + "reports/bcftools/freebayes/test/test.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/freebayes/test2_vs_test", + "reports/bcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.bcftools_stats.txt", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/fastqc/test2-test_L1", + "reports/fastqc/test2-test_L1/test2-test_L1_1_fastqc.html", + "reports/fastqc/test2-test_L1/test2-test_L1_1_fastqc.zip", + "reports/fastqc/test2-test_L1/test2-test_L1_2_fastqc.html", + "reports/fastqc/test2-test_L1/test2-test_L1_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/markduplicates/test2", + "reports/markduplicates/test2/test2.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.per-base.bed.gz", + "reports/mosdepth/test/test.md.per-base.bed.gz.csi", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.per-base.bed.gz", + "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/mosdepth/test2", + "reports/mosdepth/test2/test2.md.mosdepth.global.dist.txt", + "reports/mosdepth/test2/test2.md.mosdepth.region.dist.txt", + "reports/mosdepth/test2/test2.md.mosdepth.summary.txt", + "reports/mosdepth/test2/test2.md.per-base.bed.gz", + "reports/mosdepth/test2/test2.md.per-base.bed.gz.csi", + "reports/mosdepth/test2/test2.md.regions.bed.gz", + "reports/mosdepth/test2/test2.md.regions.bed.gz.csi", + "reports/mosdepth/test2/test2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test2/test2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test2/test2.recal.mosdepth.summary.txt", + "reports/mosdepth/test2/test2.recal.per-base.bed.gz", + "reports/mosdepth/test2/test2.recal.per-base.bed.gz.csi", + "reports/mosdepth/test2/test2.recal.regions.bed.gz", + "reports/mosdepth/test2/test2.recal.regions.bed.gz.csi", "reports/samtools", - "reports/samtools/sample2", - "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/samtools/test2", + "reports/samtools/test2/test2.md.cram.stats", + "reports/samtools/test2/test2.recal.cram.stats", "reports/vcftools", "reports/vcftools/freebayes", - "reports/vcftools/freebayes/sample2", - "reports/vcftools/freebayes/sample2/sample2.freebayes.filtered.FILTER.summary", - "reports/vcftools/freebayes/sample2/sample2.freebayes.filtered.TsTv.count", - "reports/vcftools/freebayes/sample2/sample2.freebayes.filtered.TsTv.qual", + "reports/vcftools/freebayes/test", + "reports/vcftools/freebayes/test/test.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.qual", + "reports/vcftools/freebayes/test2_vs_test", + "reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.TsTv.qual", "variant_calling", "variant_calling/freebayes", - "variant_calling/freebayes/sample2", - "variant_calling/freebayes/sample2/sample2.freebayes.filtered.vcf.gz", - "variant_calling/freebayes/sample2/sample2.freebayes.vcf.gz", - "variant_calling/freebayes/sample2/sample2.freebayes.vcf.gz.tbi" + "variant_calling/freebayes/test", + "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/test/test.freebayes.vcf.gz", + "variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi", + "variant_calling/freebayes/test2_vs_test", + "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz", + "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz.tbi" ], [ - "mosdepth-coverage-per-contig-single.txt:md5,a0266cd06dd6f7f5463f09e3cb8af71d", - "mosdepth-cumcoverage-dist-id.txt:md5,88b94dd2dcc423983da65125ece7651e", - "mosdepth_perchrom.txt:md5,a0266cd06dd6f7f5463f09e3cb8af71d", - "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", - "samtools-stats-dp.txt:md5,6618ece77181051a58275f504f67ea5b", - "samtools_alignment_plot.txt:md5,6136e5e1d072f166f280fb79424c392f", - "sample2.freebayes.filtered.bcftools_stats.txt:md5,b83440f3699a3258015d0ce60eea0d46", - "sample2.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", - "sample2.recal.mosdepth.summary.txt:md5,0a7300e56eda6fba7c7564f00aa000f0", - "sample2.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", - "sample2.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", - "sample2.freebayes.filtered.FILTER.summary:md5,fac42829b9b347413acbfc9c0a17fef0", - "sample2.freebayes.filtered.TsTv.count:md5,f864a8b1d6bab1d5f877b60dde36863f" + "fastqc-status-check-heatmap.txt:md5,eeb4e7e7a45f4223c86bfe3aea81f90b", + "fastqc_adapter_content_plot.txt:md5,cc7a809f9f001c10646ee4199ccdb40f", + "fastqc_per_base_n_content_plot.txt:md5,1eba855ae0fa5b5ed4a1f90d1c97f759", + "fastqc_per_base_sequence_quality_plot.txt:md5,cbb2743dfb2ec74e72b578c83ec28ee8", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,73c884822eba0bafcdf34b90fe81aec5", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,24eeb00e5e2b11c7ab90a3223d429d15", + "fastqc_per_sequence_quality_scores_plot.txt:md5,6f048594f02effb93608665be29bd35a", + "fastqc_sequence_counts_plot.txt:md5,fca7ee9ef3382e2837a302d8c5d33769", + "fastqc_sequence_duplication_levels_plot.txt:md5,2aa0c6f33e4cffbb29cdabe2c28bb097", + "fastqc_sequence_length_distribution_plot.txt:md5,61b1fe978a2c73b86c30c27ee4bc60ae", + "mosdepth-coverage-per-contig-single.txt:md5,751cdbc07ca147c86d83a0232c40ef9e", + "mosdepth-cumcoverage-dist-id.txt:md5,d730579d62465113709f6f061629c687", + "mosdepth_perchrom.txt:md5,751cdbc07ca147c86d83a0232c40ef9e", + "multiqc_citations.txt:md5,ace4ca89138a5f1e2be289c157c00bd9", + "multiqc_fastqc.txt:md5,2fd25e8c81f962594b801d5a9df3cd87", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,ff7906db0da2f9c9ba5ce85c34694124", + "samtools_alignment_plot.txt:md5,89d0a6e7076223e9feadbecd794948d5", + "test.freebayes.filtered.bcftools_stats.txt:md5,b313075b3e9854e0b561dd8d754bf097", + "test2_vs_test.freebayes.filtered.bcftools_stats.txt:md5,17df64b64f9235afa93aae6bbe08e283", + "test.md.mosdepth.global.dist.txt:md5,5a0679057c530e5945c9c5a3a17312dc", + "test.md.mosdepth.region.dist.txt:md5,835fdc6fa52cc33e6fb76c0c20a8a6c3", + "test.md.mosdepth.summary.txt:md5,dcc9ab2bf3248903e02d8da87e678977", + "test.md.per-base.bed.gz:md5,34dfe443c0a0767562dd65272e3310ef", + "test.md.per-base.bed.gz.csi:md5,b0ab630c3241fbd7581b7a38d944ff8b", + "test.md.regions.bed.gz:md5,99cc80b920ba574e7d9ef8f59f54f7c6", + "test.md.regions.bed.gz.csi:md5,c6d1ac97ef4dfe43731c8368d8391cab", + "test.recal.mosdepth.global.dist.txt:md5,0b3162def977123809598639f7698121", + "test.recal.mosdepth.region.dist.txt:md5,835fdc6fa52cc33e6fb76c0c20a8a6c3", + "test.recal.mosdepth.summary.txt:md5,a8455eb2947de529abfa62b303986e0f", + "test.recal.per-base.bed.gz:md5,c075ccd2b847c7c04061a39717faeb30", + "test.recal.per-base.bed.gz.csi:md5,4816eeb9af254ca40177b08cf11b98d2", + "test.recal.regions.bed.gz:md5,99cc80b920ba574e7d9ef8f59f54f7c6", + "test.recal.regions.bed.gz.csi:md5,c6d1ac97ef4dfe43731c8368d8391cab", + "test2.md.mosdepth.global.dist.txt:md5,f25166c3a0051bb4d8c11a210278de6c", + "test2.md.mosdepth.region.dist.txt:md5,3211135329e4077bd9bf0ba488e14371", + "test2.md.mosdepth.summary.txt:md5,ce0eb6d33c6d0dc720fbc6d1811abef8", + "test2.md.per-base.bed.gz:md5,e1c6d60621d8f64aaf28fa1c1ddda921", + "test2.md.per-base.bed.gz.csi:md5,4205a09ede17cdbdaad45e3553f73105", + "test2.md.regions.bed.gz:md5,0bb2549180165a99680ba3e453ea312f", + "test2.md.regions.bed.gz.csi:md5,c6d1ac97ef4dfe43731c8368d8391cab", + "test2.recal.mosdepth.global.dist.txt:md5,a1ef7e662ce993da4668e804952014ce", + "test2.recal.mosdepth.region.dist.txt:md5,3211135329e4077bd9bf0ba488e14371", + "test2.recal.mosdepth.summary.txt:md5,70ad653c0c98baeeaf5085f1209a7bdb", + "test2.recal.per-base.bed.gz:md5,e992ef845ec91a3612297952a23ba579", + "test2.recal.per-base.bed.gz.csi:md5,8072f447199c60f24b01eede8b557333", + "test2.recal.regions.bed.gz:md5,0bb2549180165a99680ba3e453ea312f", + "test2.recal.regions.bed.gz.csi:md5,c6d1ac97ef4dfe43731c8368d8391cab", + "test.freebayes.filtered.FILTER.summary:md5,449597c35ada505b4cb2530d5260e9d5", + "test.freebayes.filtered.TsTv.count:md5,162253eb6c406300678985b3ac7dc868", + "test2_vs_test.freebayes.filtered.FILTER.summary:md5,0ae7467f2311c1382173d70d8d7efb0b", + "test2_vs_test.freebayes.filtered.TsTv.count:md5,9dc940f98dae9c0b49c9468a491836d4" ], "No BAM files", - "No CRAM files", + [ + "test.md.cram:md5,59ecc5c82c7af1283eea7507c590c831", + "test2.md.cram:md5,bac87cf9290577fd9a4def63e046031f", + "test.recal.cram:md5,6a28675f6e294b3822952968d86a4868", + "test2.recal.cram:md5,cf197ddea4e392bfdccc817787fc4eb4" + ], [ [ - "sample2.freebayes.vcf.gz", - "VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=15485, phased=false, phasedAutodetect=false]" + "test.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=474, phased=false, phasedAutodetect=false]" + ], + [ + "test2_vs_test.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=966, phased=false, phasedAutodetect=false]" ] ], [ - "sample2.freebayes.filtered.vcf.gz:md5,e68ce412428016fcc1f09735a91cdef" + "test.freebayes.filtered.vcf.gz:md5,c4793897a38d6781dc512a52d9046be5", + "test2_vs_test.freebayes.filtered.vcf.gz:md5,d1d0916fc56a666bd7637792047b82f8" ] ], "meta": { "nf-test": "0.9.3", "nextflow": "25.04.7" }, - "timestamp": "2025-09-30T22:52:03.48645421" + "timestamp": "2025-09-30T22:46:09.786733451" }, - "-profile test --tools freebayes --wes --nucleotides_per_second 20 --input fastq_pair.csv": { + "-profile test --tools freebayes --no_intervals --wes --input fastq_pair.csv": { "content": [ - 54, + 41, { "BCFTOOLS_SORT": { "bcftools": 1.21 @@ -1483,9 +1531,6 @@ "bwa": "0.7.18-r1243-dirty", "samtools": 1.21 }, - "CREATE_INTERVALS_BED": { - "gawk": "5.3.0" - }, "FASTQC": { "fastqc": "0.12.1" }, @@ -1498,9 +1543,6 @@ "GATK4_BASERECALIBRATOR": { "gatk4": "4.6.1.0" }, - "GATK4_GATHERBQSRREPORTS": { - "gatk4": "4.6.1.0" - }, "GATK4_MARKDUPLICATES": { "gatk4": "4.6.1.0", "samtools": 1.21 @@ -1508,12 +1550,6 @@ "INDEX_CRAM": { "samtools": 1.21 }, - "MERGE_CRAM": { - "samtools": 1.21 - }, - "MERGE_FREEBAYES": { - "gatk4": "4.6.1.0" - }, "MOSDEPTH": { "mosdepth": "0.3.10" }, @@ -1523,7 +1559,10 @@ "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { "tabix": 1.21 }, - "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "TABIX_VC_FREEBAYES": { + "tabix": 1.21 + }, + "TABIX_VC_FREEBAYES_FILT": { "tabix": 1.21 }, "VCFLIB_VCFFILTER": { @@ -1692,6 +1731,9 @@ "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", "pipeline_info", "pipeline_info/nf_core_sarek_software_mqc_versions.yml", "preprocessing", @@ -1741,34 +1783,22 @@ "reports/mosdepth", "reports/mosdepth/test", "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", - "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", "reports/mosdepth/test/test.md.mosdepth.summary.txt", "reports/mosdepth/test/test.md.per-base.bed.gz", "reports/mosdepth/test/test.md.per-base.bed.gz.csi", - "reports/mosdepth/test/test.md.regions.bed.gz", - "reports/mosdepth/test/test.md.regions.bed.gz.csi", "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", - "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", "reports/mosdepth/test/test.recal.mosdepth.summary.txt", "reports/mosdepth/test/test.recal.per-base.bed.gz", "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", - "reports/mosdepth/test/test.recal.regions.bed.gz", - "reports/mosdepth/test/test.recal.regions.bed.gz.csi", "reports/mosdepth/test2", "reports/mosdepth/test2/test2.md.mosdepth.global.dist.txt", - "reports/mosdepth/test2/test2.md.mosdepth.region.dist.txt", "reports/mosdepth/test2/test2.md.mosdepth.summary.txt", "reports/mosdepth/test2/test2.md.per-base.bed.gz", "reports/mosdepth/test2/test2.md.per-base.bed.gz.csi", - "reports/mosdepth/test2/test2.md.regions.bed.gz", - "reports/mosdepth/test2/test2.md.regions.bed.gz.csi", "reports/mosdepth/test2/test2.recal.mosdepth.global.dist.txt", - "reports/mosdepth/test2/test2.recal.mosdepth.region.dist.txt", "reports/mosdepth/test2/test2.recal.mosdepth.summary.txt", "reports/mosdepth/test2/test2.recal.per-base.bed.gz", "reports/mosdepth/test2/test2.recal.per-base.bed.gz.csi", - "reports/mosdepth/test2/test2.recal.regions.bed.gz", - "reports/mosdepth/test2/test2.recal.regions.bed.gz.csi", "reports/samtools", "reports/samtools/test", "reports/samtools/test/test.md.cram.stats", @@ -1790,10 +1820,12 @@ "variant_calling/freebayes", "variant_calling/freebayes/test", "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz.tbi", "variant_calling/freebayes/test/test.freebayes.vcf.gz", "variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi", "variant_calling/freebayes/test2_vs_test", "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.vcf.gz.tbi", "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz", "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz.tbi" ], @@ -1808,78 +1840,72 @@ "fastqc_sequence_counts_plot.txt:md5,fca7ee9ef3382e2837a302d8c5d33769", "fastqc_sequence_duplication_levels_plot.txt:md5,2aa0c6f33e4cffbb29cdabe2c28bb097", "fastqc_sequence_length_distribution_plot.txt:md5,61b1fe978a2c73b86c30c27ee4bc60ae", - "mosdepth-coverage-per-contig-single.txt:md5,751cdbc07ca147c86d83a0232c40ef9e", - "mosdepth-cumcoverage-dist-id.txt:md5,d730579d62465113709f6f061629c687", - "mosdepth_perchrom.txt:md5,751cdbc07ca147c86d83a0232c40ef9e", + "mosdepth-coverage-per-contig-single.txt:md5,5e1e538631fd3e6e45158f9edf33ee8d", + "mosdepth-cumcoverage-dist-id.txt:md5,34725df04f598cc54a81ec32f8c7ae41", + "mosdepth_perchrom.txt:md5,5e1e538631fd3e6e45158f9edf33ee8d", "multiqc_citations.txt:md5,ace4ca89138a5f1e2be289c157c00bd9", "multiqc_fastqc.txt:md5,2fd25e8c81f962594b801d5a9df3cd87", "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "samtools-stats-dp.txt:md5,ff7906db0da2f9c9ba5ce85c34694124", - "samtools_alignment_plot.txt:md5,89d0a6e7076223e9feadbecd794948d5", - "test.freebayes.filtered.bcftools_stats.txt:md5,b313075b3e9854e0b561dd8d754bf097", - "test2_vs_test.freebayes.filtered.bcftools_stats.txt:md5,17df64b64f9235afa93aae6bbe08e283", + "samtools-stats-dp.txt:md5,7a0481b59cdd57cc0b8bc9b5641614c6", + "samtools_alignment_plot.txt:md5,8e6178a26fe2a4fc4f45fac3175ba6c6", + "test.freebayes.filtered.bcftools_stats.txt:md5,dde124ceaf6f109cd274b837b950096b", + "test2_vs_test.freebayes.filtered.bcftools_stats.txt:md5,6f942caec2f4f02c69e1216226fa44a9", "test.md.mosdepth.global.dist.txt:md5,5a0679057c530e5945c9c5a3a17312dc", - "test.md.mosdepth.region.dist.txt:md5,835fdc6fa52cc33e6fb76c0c20a8a6c3", - "test.md.mosdepth.summary.txt:md5,dcc9ab2bf3248903e02d8da87e678977", + "test.md.mosdepth.summary.txt:md5,0010c2396a3173c7cf4983abe2eb6a4c", "test.md.per-base.bed.gz:md5,34dfe443c0a0767562dd65272e3310ef", "test.md.per-base.bed.gz.csi:md5,b0ab630c3241fbd7581b7a38d944ff8b", - "test.md.regions.bed.gz:md5,99cc80b920ba574e7d9ef8f59f54f7c6", - "test.md.regions.bed.gz.csi:md5,c6d1ac97ef4dfe43731c8368d8391cab", - "test.recal.mosdepth.global.dist.txt:md5,0b3162def977123809598639f7698121", - "test.recal.mosdepth.region.dist.txt:md5,835fdc6fa52cc33e6fb76c0c20a8a6c3", - "test.recal.mosdepth.summary.txt:md5,a8455eb2947de529abfa62b303986e0f", - "test.recal.per-base.bed.gz:md5,c075ccd2b847c7c04061a39717faeb30", - "test.recal.per-base.bed.gz.csi:md5,4816eeb9af254ca40177b08cf11b98d2", - "test.recal.regions.bed.gz:md5,99cc80b920ba574e7d9ef8f59f54f7c6", - "test.recal.regions.bed.gz.csi:md5,c6d1ac97ef4dfe43731c8368d8391cab", + "test.recal.mosdepth.global.dist.txt:md5,5a0679057c530e5945c9c5a3a17312dc", + "test.recal.mosdepth.summary.txt:md5,0010c2396a3173c7cf4983abe2eb6a4c", + "test.recal.per-base.bed.gz:md5,34dfe443c0a0767562dd65272e3310ef", + "test.recal.per-base.bed.gz.csi:md5,b0ab630c3241fbd7581b7a38d944ff8b", "test2.md.mosdepth.global.dist.txt:md5,f25166c3a0051bb4d8c11a210278de6c", - "test2.md.mosdepth.region.dist.txt:md5,3211135329e4077bd9bf0ba488e14371", - "test2.md.mosdepth.summary.txt:md5,ce0eb6d33c6d0dc720fbc6d1811abef8", + "test2.md.mosdepth.summary.txt:md5,d5e4084de2ea2a0a7b60b2d71c804d4b", "test2.md.per-base.bed.gz:md5,e1c6d60621d8f64aaf28fa1c1ddda921", "test2.md.per-base.bed.gz.csi:md5,4205a09ede17cdbdaad45e3553f73105", - "test2.md.regions.bed.gz:md5,0bb2549180165a99680ba3e453ea312f", - "test2.md.regions.bed.gz.csi:md5,c6d1ac97ef4dfe43731c8368d8391cab", - "test2.recal.mosdepth.global.dist.txt:md5,a1ef7e662ce993da4668e804952014ce", - "test2.recal.mosdepth.region.dist.txt:md5,3211135329e4077bd9bf0ba488e14371", - "test2.recal.mosdepth.summary.txt:md5,70ad653c0c98baeeaf5085f1209a7bdb", - "test2.recal.per-base.bed.gz:md5,e992ef845ec91a3612297952a23ba579", - "test2.recal.per-base.bed.gz.csi:md5,8072f447199c60f24b01eede8b557333", - "test2.recal.regions.bed.gz:md5,0bb2549180165a99680ba3e453ea312f", - "test2.recal.regions.bed.gz.csi:md5,c6d1ac97ef4dfe43731c8368d8391cab", - "test.freebayes.filtered.FILTER.summary:md5,449597c35ada505b4cb2530d5260e9d5", - "test.freebayes.filtered.TsTv.count:md5,162253eb6c406300678985b3ac7dc868", - "test2_vs_test.freebayes.filtered.FILTER.summary:md5,0ae7467f2311c1382173d70d8d7efb0b", - "test2_vs_test.freebayes.filtered.TsTv.count:md5,9dc940f98dae9c0b49c9468a491836d4" + "test2.recal.mosdepth.global.dist.txt:md5,f25166c3a0051bb4d8c11a210278de6c", + "test2.recal.mosdepth.summary.txt:md5,d5e4084de2ea2a0a7b60b2d71c804d4b", + "test2.recal.per-base.bed.gz:md5,e1c6d60621d8f64aaf28fa1c1ddda921", + "test2.recal.per-base.bed.gz.csi:md5,4205a09ede17cdbdaad45e3553f73105", + "test.freebayes.filtered.FILTER.summary:md5,87e753ba2ad969475fb55661852f75e0", + "test.freebayes.filtered.TsTv.count:md5,845f64e5bb4224af98f3a47294cd5483", + "test2_vs_test.freebayes.filtered.FILTER.summary:md5,126e83dcd37b82420f7c5d7b235479f1", + "test2_vs_test.freebayes.filtered.TsTv.count:md5,28919c7d29c998681391d2027af3e0f9" ], "No BAM files", [ "test.md.cram:md5,59ecc5c82c7af1283eea7507c590c831", "test2.md.cram:md5,bac87cf9290577fd9a4def63e046031f", - "test.recal.cram:md5,6a28675f6e294b3822952968d86a4868", - "test2.recal.cram:md5,cf197ddea4e392bfdccc817787fc4eb4" + "test.recal.cram:md5,59ecc5c82c7af1283eea7507c590c831", + "test2.recal.cram:md5,bac87cf9290577fd9a4def63e046031f" ], [ [ "test.freebayes.vcf.gz", - "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=474, phased=false, phasedAutodetect=false]" + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=732, phased=false, phasedAutodetect=false]" ], [ "test2_vs_test.freebayes.vcf.gz", - "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=966, phased=false, phasedAutodetect=false]" + "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=1447, phased=false, phasedAutodetect=false]" ] ], [ - "test.freebayes.filtered.vcf.gz:md5,c4793897a38d6781dc512a52d9046be5", - "test2_vs_test.freebayes.filtered.vcf.gz:md5,d1d0916fc56a666bd7637792047b82f8" + [ + "test.freebayes.filtered.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=107, phased=false, phasedAutodetect=false]" + ], + [ + "test2_vs_test.freebayes.filtered.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=137, phased=false, phasedAutodetect=false]" + ] ] ], "meta": { "nf-test": "0.9.3", "nextflow": "25.04.7" }, - "timestamp": "2025-09-30T22:46:09.786733451" + "timestamp": "2025-09-30T22:48:44.433126443" } } \ No newline at end of file diff --git a/workflows/sarek/main.nf b/workflows/sarek/main.nf index 3b2dbff401..7168a80ae6 100644 --- a/workflows/sarek/main.nf +++ b/workflows/sarek/main.nf @@ -515,13 +515,17 @@ workflow SAREK { tools, cram_variant_calling_status_normal, BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_all, + BAM_VARIANT_CALLING_GERMLINE_ALL.out.tbi_all, cram_variant_calling_tumor_only, BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.vcf_all, + BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.tbi_all, cram_variant_calling_pair, BAM_VARIANT_CALLING_SOMATIC_ALL.out.vcf_all, + BAM_VARIANT_CALLING_SOMATIC_ALL.out.tbi_all, fasta, fasta_fai, params.concatenate_vcfs, + params.filter_vcfs, params.normalize_vcfs, params.varlociraptor_chunk_size, varlociraptor_scenario_germline,