Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update STITCH #7510

Merged
merged 12 commits into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions modules/nf-core/stitch/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ channels:
- conda-forge
- bioconda
dependencies:
- bioconda::htslib=1.18
- bioconda::r-stitch=1.6.10
- conda-forge::r-base=4.3.1
- conda-forge::rsync=3.2.7
- bioconda::htslib=1.21
- bioconda::r-stitch=1.7.3
- conda-forge::r-base=4.4.2
- conda-forge::rsync=3.4.1
53 changes: 40 additions & 13 deletions modules/nf-core/stitch/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@ process STITCH {

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/r-stitch:1.6.10--r43h06b5641_0':
'biocontainers/r-stitch:1.6.10--r43h06b5641_0' }"
'https://depot.galaxyproject.org/singularity/r-stitch:1.7.3--r44h64f727c_0':
'biocontainers/r-stitch:1.7.3--r44h64f727c_0' }"

input:
tuple val(meta), path(collected_crams), path(collected_crais), path(cramlist)
tuple val(meta2), path(posfile), path(input, stageAs: "input"), path(rdata, stageAs: "RData_in"), val(chromosome_name), val(K), val(nGen)
tuple val(meta3), path(fasta), path(fasta_fai)
val seed
tuple val(meta), path(collected_crams), path(collected_crais), path(cramlist), path(samplename), path(posfile), path(input, stageAs: "input"), path(rdata, stageAs: "RData_in"), val(chromosome_name), val(K), val(nGen)
tuple val(meta2), path(fasta), path(fasta_fai)
val(seed)

output:
tuple val(meta), path("input", type: "dir") , emit: input
Expand All @@ -30,6 +29,7 @@ process STITCH {
def args2 = task.ext.args2 ?: ""
def generate_input_only = args2.contains( "--generateInputOnly TRUE" )
def bgen_output = args2.contains( "--output_format bgen" )
def suffix = bgen_output ? "bgen" : "vcf.gz"
def reads_ext = collected_crams ? collected_crams.extension.unique() : []
def rsync_cmd = rdata ? "rsync -rL ${rdata}/ RData" : ""
def stitch_cmd = seed ? "Rscript <(cat \$(which STITCH.R) | tail -n +2 | cat <(echo 'set.seed(${seed})') -)" : "STITCH.R"
Expand All @@ -38,6 +38,7 @@ process STITCH {
def reference_cmd = fasta ? "--reference ${fasta}" : ""
def regenerate_input_cmd = input && rdata && !cramlist ? "--regenerateInput FALSE --originalRegionName ${chromosome_name}" : ""
def rsync_version_cmd = rdata ? "rsync: \$(rsync --version | head -n1 | sed 's/^rsync version //; s/ .*\$//')" : ""
def samplename_cmd = samplename ? "--sampleNames_file ${samplename}" : ""
"""
${rsync_cmd} ${args}

Expand All @@ -52,6 +53,8 @@ process STITCH {
${bamlist_cmd} \\
${reference_cmd} \\
${regenerate_input_cmd} \\
${samplename_cmd} \\
--output_filename ${prefix}.${suffix} \\
${args2}

cat <<-END_VERSIONS > versions.yml
Expand All @@ -64,17 +67,41 @@ process STITCH {

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
def args = task.ext.args ?: ""
def _args = task.ext.args ?: ""
def args2 = task.ext.args2 ?: ""
def nb_samples = collected_crams.size()
def generate_input_only = args2.contains( "--generateInputOnly TRUE" )
def generate_plots_cmd = !generate_input_only ? "mkdir plots" : ""
def generate_vcf_cmd = !generate_input_only ? "touch ${prefix}.vcf.gz" : ""
def bgen_output = args2.contains( "--output_format bgen" )
def generate_plots_cmd = !generate_input_only
def generate_file_cmd = !generate_input_only ? bgen_output ? "touch ${prefix}.bgen" : "echo '' | gzip > ${prefix}.vcf.gz" : ""
def rsync_version_cmd = rdata ? "rsync: \$(rsync --version | head -n1 | sed 's/^rsync version //; s/ .*\$//')" : ""
"""
touch input
touch RData
${generate_plots_cmd}
${generate_vcf_cmd}
mkdir -p input
for i in {1..$nb_samples}
do
touch "input/sample.\$i.input.${chromosome_name}.RData"
done

${generate_file_cmd}

mkdir -p RData
touch "RData/EM.all.${chromosome_name}.RData"
touch "RData/end.${chromosome_name}.RData"
touch "RData/sampleNames.${chromosome_name}.RData"
touch "RData/start.${chromosome_name}.RData"
touch "RData/startEM.${chromosome_name}.RData"

if [ "${generate_plots_cmd}" == true ]
then
mkdir -p plots
touch "plots/alphaMat.${chromosome_name}.all.s.1.png"
touch "plots/alphaMat.${chromosome_name}.normalized.s.1.png"
touch "plots/hapSum.${chromosome_name}.s.1.png"
touch "plots/hapSum_log.${chromosome_name}.s.1.png"
touch "plots/metricsForPostImputationQC.${chromosome_name}.sample.jpg"
touch "plots/metricsForPostImputationQCChromosomeWide.${chromosome_name}.sample.jpg"
touch "plots/r2.${chromosome_name}.goodonly.jpg"
fi

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
13 changes: 6 additions & 7 deletions modules/nf-core/stitch/meta.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
name: "stitch"
description: "STITCH is an R program for reference panel free, read aware, low coverage
sequencing genotype imputation. STITCH runs on a set of samples with sequencing
Expand Down Expand Up @@ -40,11 +39,11 @@ input:
description: |
Text file with the path to the cram files to use in imputation, one per line. Since the cram files are staged to the working directory for the process, this file should just contain the file names without any pre-pending path.
pattern: "*.txt"
- - meta2:
type: map
description: |
Groovy Map containing information about the set of positions to run the imputation over
e.g. `[ id:'test' ]`
- samplename:
type: file
description: (Optional) File with list of samples names in the same order as in bamlist to impute.
One file per line.
pattern: "*.{txt}"
- posfile:
type: file
description: |
Expand Down Expand Up @@ -73,7 +72,7 @@ input:
description: Number of generations since founding of the population to use for
imputation. Refer to the documentation for the `--nGen` argument of STITCH
for more information.
- - meta3:
- - meta2:
type: map
description: |
Groovy Map containing information about the reference genome used
Expand Down
Loading
Loading