diff --git a/CHANGELOG.md b/CHANGELOG.md index 7fea0044..c09836ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- [#501](https://github.com/nf-core/funcscan/pull/501) Fixed issue with BAKTA HMMs not being staged correctly (reported by Yusuke Hioki, fix by @jfy133) + ### `Dependencies` +| Tool | Previous Version | New Version | +| ----- | ---------------- | ----------- | +| Bakta | 1.10.4 | 1.11.4 | + ### `Deprecated` ## v3.0.0 - French Chocolatine - [2025-10-04] diff --git a/conf/modules.config b/conf/modules.config index 2812093b..61f71854 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -207,7 +207,6 @@ process { params.annotation_bakta_gap ? '' : '--skip-gap', params.annotation_bakta_ori ? '' : '--skip-ori', params.annotation_bakta_activate_plot ? '' : '--skip-plot', - params.annotation_bakta_hmms ? '--hmms ${params.annotation_bakta_hmms}' : '', ].join(' ').trim() } diff --git a/modules.json b/modules.json index eac420fd..2bff465f 100644 --- a/modules.json +++ b/modules.json @@ -62,12 +62,12 @@ }, "bakta/bakta": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "git_sha": "72c983560c9b9c2a02ff636451a5e5008f7d020b", "installed_by": ["modules"] }, "bakta/baktadbdownload": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "git_sha": "72c983560c9b9c2a02ff636451a5e5008f7d020b", "installed_by": ["modules"] }, "deeparg/downloaddata": { diff --git a/modules/nf-core/bakta/bakta/environment.yml b/modules/nf-core/bakta/bakta/environment.yml index c1b616a4..0640aa34 100644 --- a/modules/nf-core/bakta/bakta/environment.yml +++ b/modules/nf-core/bakta/bakta/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::bakta=1.10.4 + - bioconda::bakta=1.11.4 diff --git a/modules/nf-core/bakta/bakta/main.nf b/modules/nf-core/bakta/bakta/main.nf index 4d192e45..7de3cdc1 100644 --- a/modules/nf-core/bakta/bakta/main.nf +++ b/modules/nf-core/bakta/bakta/main.nf @@ -1,48 +1,55 @@ process BAKTA_BAKTA { - tag "$meta.id" + tag "${meta.id}" label 'process_medium' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bakta:1.10.4--pyhdfd78af_0' : - 'biocontainers/bakta:1.10.4--pyhdfd78af_0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/bakta:1.11.4--pyhdfd78af_0' + : 'biocontainers/bakta:1.11.4--pyhdfd78af_0'}" input: tuple val(meta), path(fasta) path db path proteins path prodigal_tf + path regions + path hmms output: - tuple val(meta), path("${prefix}.embl") , emit: embl - tuple val(meta), path("${prefix}.faa") , emit: faa - tuple val(meta), path("${prefix}.ffn") , emit: ffn - tuple val(meta), path("${prefix}.fna") , emit: fna - tuple val(meta), path("${prefix}.gbff") , emit: gbff - tuple val(meta), path("${prefix}.gff3") , emit: gff + tuple val(meta), path("${prefix}.embl"), emit: embl + tuple val(meta), path("${prefix}.faa"), emit: faa + tuple val(meta), path("${prefix}.ffn"), emit: ffn + tuple val(meta), path("${prefix}.fna"), emit: fna + tuple val(meta), path("${prefix}.gbff"), emit: gbff + tuple val(meta), path("${prefix}.gff3"), emit: gff tuple val(meta), path("${prefix}.hypotheticals.tsv"), emit: hypotheticals_tsv tuple val(meta), path("${prefix}.hypotheticals.faa"), emit: hypotheticals_faa - tuple val(meta), path("${prefix}.tsv") , emit: tsv - tuple val(meta), path("${prefix}.txt") , emit: txt - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}.tsv"), emit: tsv + tuple val(meta), path("${prefix}.txt"), emit: txt + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" def proteins_opt = proteins ? "--proteins ${proteins[0]}" : "" - def prodigal_tf = prodigal_tf ? "--prodigal-tf ${prodigal_tf[0]}" : "" + def prodigal_tf_opt = prodigal_tf ? "--prodigal-tf ${prodigal_tf[0]}" : "" + def regions_opt = regions ? "--regions ${regions}" : "" + def hmms_opt = hmms ? "--hmms ${hmms}" : "" + """ bakta \\ - $fasta \\ - $args \\ - --threads $task.cpus \\ - --prefix $prefix \\ - $proteins_opt \\ - $prodigal_tf \\ - --db $db + ${fasta} \\ + ${args} \\ + --threads ${task.cpus} \\ + --prefix ${prefix} \\ + ${proteins_opt} \\ + ${prodigal_tf_opt} \\ + ${regions_opt} \\ + ${hmms_opt} \\ + --db ${db} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bakta/bakta/meta.yml b/modules/nf-core/bakta/bakta/meta.yml index 7d734f28..1944fbb1 100644 --- a/modules/nf-core/bakta/bakta/meta.yml +++ b/modules/nf-core/bakta/bakta/meta.yml @@ -23,19 +23,42 @@ input: type: file description: | FASTA file to be annotated. Has to contain at least a non-empty string dummy value. - - - db: - type: file - description: | - Path to the Bakta database. Must have amrfinderplus database directory already installed within it (in a directory called 'amrfinderplus-db/'). - - - proteins: - type: file - description: FASTA/GenBank file of trusted proteins to first annotate from (optional) - - - prodigal_tf: - type: file - description: Training file to use for Prodigal (optional) + pattern: "*.{fa,fas,fna,fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 # FASTA + - db: + type: file + description: | + Path to the Bakta database directory. Must have amrfinderplus database directory already installed within it (in a directory called 'amrfinderplus-db/'). + ontologies: + - edam: http://edamontology.org/data_1049 ## Directory name + - proteins: + type: file + description: FASTA/GenBank file of trusted proteins to first annotate from (optional) + pattern: "*.{fa,fas,fna,fasta,faa}" + ontologies: + - edam: http://edamontology.org/format_1929 # FASTA + - prodigal_tf: + type: file + description: Training file to use for Prodigal for CDS prediction(optional) + pattern: "*.{tf,trn}" + ontologies: + - edam: http://edamontology.org/format_2333 # Binary format + - regions: + type: file + description: GFF3 or GenBank file of pre-annotated regions (optional) + pattern: "*.{gbff,gff3}" + ontologies: + - edam: http://edamontology.org/format_2206 # Sequence feature table format (text) + - hmms: + type: file + description: HMM database file for custom annotation (optional) + pattern: "*.hmm" + ontologies: + - edam: http://edamontology.org/format_1370 # HMMER database format output: - - embl: - - meta: + embl: + - - meta: type: map description: | Groovy Map containing sample information @@ -44,8 +67,10 @@ output: type: file description: annotations & sequences in (multi) EMBL format pattern: "*.embl" - - faa: - - meta: + ontologies: + - edam: http://edamontology.org/format_1927 # EMBL format + faa: + - - meta: type: map description: | Groovy Map containing sample information @@ -54,8 +79,10 @@ output: type: file description: CDS/sORF amino acid sequences as FASTA pattern: "*.faa" - - ffn: - - meta: + ontologies: + - edam: http://edamontology.org/format_1929 # FASTA + ffn: + - - meta: type: map description: | Groovy Map containing sample information @@ -64,8 +91,11 @@ output: type: file description: feature nucleotide sequences as FASTA pattern: "*.ffn" - - fna: - - meta: + ontologies: + - edam: http://edamontology.org/format_1929 # FASTA + + fna: + - - meta: type: map description: | Groovy Map containing sample information @@ -74,8 +104,10 @@ output: type: file description: replicon/contig DNA sequences as FASTA pattern: "*.fna" - - gbff: - - meta: + ontologies: + - edam: http://edamontology.org/format_1929 # FASTA + gbff: + - - meta: type: map description: | Groovy Map containing sample information @@ -84,8 +116,10 @@ output: type: file description: annotations & sequences in (multi) GenBank format pattern: "*.gbff" - - gff: - - meta: + ontologies: + - edam: http://edamontology.org/format_1936 # GenBank format + gff: + - - meta: type: map description: | Groovy Map containing sample information @@ -94,19 +128,23 @@ output: type: file description: annotations & sequences in GFF3 format pattern: "*.gff3" - - hypotheticals_tsv: - - meta: + ontologies: + - edam: http://edamontology.org/format_1975 # GFF3 format + hypotheticals_tsv: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - ${prefix}.hypotheticals.tsv: type: file - description: additional information on hypothetical protein CDS as simple human - readable tab separated values + description: additional information on hypothetical protein CDS as simple + human readable tab separated values pattern: "*.hypotheticals.tsv" - - hypotheticals_faa: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + hypotheticals_faa: + - - meta: type: map description: | Groovy Map containing sample information @@ -115,8 +153,10 @@ output: type: file description: hypothetical protein CDS amino acid sequences as FASTA pattern: "*.hypotheticals.faa" - - tsv: - - meta: + ontologies: + - edam: http://edamontology.org/format_1929 # FASTA + tsv: + - - meta: type: map description: | Groovy Map containing sample information @@ -125,8 +165,10 @@ output: type: file description: annotations as simple human readable tab separated values pattern: "*.tsv" - - txt: - - meta: + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + txt: + - - meta: type: map description: | Groovy Map containing sample information @@ -135,11 +177,15 @@ output: type: file description: genome statistics and annotation summary pattern: "*.txt" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@rpetit3" - "@oschwengers" diff --git a/modules/nf-core/bakta/bakta/tests/main.nf.test b/modules/nf-core/bakta/bakta/tests/main.nf.test index 3c1f8f82..29a5b39d 100644 --- a/modules/nf-core/bakta/bakta/tests/main.nf.test +++ b/modules/nf-core/bakta/bakta/tests/main.nf.test @@ -33,6 +33,8 @@ nextflow_process { input[1] = BAKTA_BAKTADBDOWNLOAD.out.db input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } @@ -67,6 +69,8 @@ nextflow_process { input[1] = [] input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } diff --git a/modules/nf-core/bakta/bakta/tests/main.nf.test.snap b/modules/nf-core/bakta/bakta/tests/main.nf.test.snap index cee06343..5bc75a3e 100644 --- a/modules/nf-core/bakta/bakta/tests/main.nf.test.snap +++ b/modules/nf-core/bakta/bakta/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "versions": { "content": [ [ - "versions.yml:md5,c40bd66294f6eb4520f194325ef24f24" + "versions.yml:md5,39d76e345cee9a020d2109d1891c3c0b" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.4" + "nextflow": "25.04.7" }, - "timestamp": "2025-01-25T11:59:09.981137" + "timestamp": "2025-10-22T11:18:34.505077069" }, "Bakta - stub": { "content": [ @@ -31,7 +31,7 @@ ] ], "10": [ - "versions.yml:md5,c40bd66294f6eb4520f194325ef24f24" + "versions.yml:md5,39d76e345cee9a020d2109d1891c3c0b" ], "2": [ [ @@ -178,14 +178,14 @@ ] ], "versions": [ - "versions.yml:md5,c40bd66294f6eb4520f194325ef24f24" + "versions.yml:md5,39d76e345cee9a020d2109d1891c3c0b" ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.4" + "nextflow": "25.04.7" }, - "timestamp": "2025-01-25T11:09:05.864545" + "timestamp": "2025-10-22T11:11:23.220239457" } } \ No newline at end of file diff --git a/modules/nf-core/bakta/bakta/tests/tags.yml b/modules/nf-core/bakta/bakta/tests/tags.yml deleted file mode 100644 index ecb08c45..00000000 --- a/modules/nf-core/bakta/bakta/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -bakta/bakta: - - "modules/nf-core/bakta/bakta/**" diff --git a/modules/nf-core/bakta/baktadbdownload/environment.yml b/modules/nf-core/bakta/baktadbdownload/environment.yml index c1b616a4..0640aa34 100644 --- a/modules/nf-core/bakta/baktadbdownload/environment.yml +++ b/modules/nf-core/bakta/baktadbdownload/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::bakta=1.10.4 + - bioconda::bakta=1.11.4 diff --git a/modules/nf-core/bakta/baktadbdownload/main.nf b/modules/nf-core/bakta/baktadbdownload/main.nf index cc2f445e..36a172b0 100644 --- a/modules/nf-core/bakta/baktadbdownload/main.nf +++ b/modules/nf-core/bakta/baktadbdownload/main.nf @@ -2,13 +2,13 @@ process BAKTA_BAKTADBDOWNLOAD { label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bakta:1.10.4--pyhdfd78af_0' : - 'biocontainers/bakta:1.10.4--pyhdfd78af_0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/bakta:1.11.4--pyhdfd78af_0' + : 'biocontainers/bakta:1.11.4--pyhdfd78af_0'}" output: - path "db*" , emit: db - path "versions.yml" , emit: versions + path "db*", emit: db + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -18,7 +18,7 @@ process BAKTA_BAKTADBDOWNLOAD { """ bakta_db \\ download \\ - $args + ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -31,7 +31,7 @@ process BAKTA_BAKTADBDOWNLOAD { """ echo "bakta_db \\ download \\ - $args" + ${args}" mkdir db diff --git a/modules/nf-core/bakta/baktadbdownload/meta.yml b/modules/nf-core/bakta/baktadbdownload/meta.yml index a0a3a455..103d38ff 100644 --- a/modules/nf-core/bakta/baktadbdownload/meta.yml +++ b/modules/nf-core/bakta/baktadbdownload/meta.yml @@ -17,16 +17,18 @@ tools: licence: ["GPL v3"] identifier: biotools:bakta output: - - db: - - db*: - type: directory - description: BAKTA database directory - pattern: "db*/" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + db: + - db*: + type: directory + description: BAKTA database directory + pattern: "db*/" + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@jfy133" - "@jasmezz" diff --git a/modules/nf-core/bakta/baktadbdownload/tests/main.nf.test.snap b/modules/nf-core/bakta/baktadbdownload/tests/main.nf.test.snap index ef6aabe7..0d6e2aac 100644 --- a/modules/nf-core/bakta/baktadbdownload/tests/main.nf.test.snap +++ b/modules/nf-core/bakta/baktadbdownload/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "Bakta database download": { "content": [ [ - "versions.yml:md5,29d6ec77dc88492b2c53141e6541c289" + "versions.yml:md5,e6f280a0deb9981dbf7f1d6270ab0908" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.4" + "nextflow": "25.04.7" }, - "timestamp": "2025-01-25T12:30:51.853371" + "timestamp": "2025-10-22T11:28:20.601164502" }, "Bakta database download - stub": { "content": [ @@ -17,13 +17,13 @@ [ ], - "versions.yml:md5,29d6ec77dc88492b2c53141e6541c289" + "versions.yml:md5,e6f280a0deb9981dbf7f1d6270ab0908" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.4" + "nextflow": "25.04.7" }, - "timestamp": "2025-01-25T12:31:08.390845" + "timestamp": "2025-10-22T11:28:26.466451602" } } \ No newline at end of file diff --git a/modules/nf-core/bakta/baktadbdownload/tests/tags.yml b/modules/nf-core/bakta/baktadbdownload/tests/tags.yml deleted file mode 100644 index c469fa48..00000000 --- a/modules/nf-core/bakta/baktadbdownload/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -bakta/baktadbdownload: - - "modules/nf-core/bakta/baktadbdownload/**" diff --git a/subworkflows/local/annotation.nf b/subworkflows/local/annotation.nf index a59fe561..5e576631 100644 --- a/subworkflows/local/annotation.nf +++ b/subworkflows/local/annotation.nf @@ -71,8 +71,7 @@ workflow ANNOTATION { // BAKTA prepare download if (params.annotation_bakta_db) { - ch_bakta_db = Channel - .fromPath(params.annotation_bakta_db, checkIfExists: true) + ch_bakta_db = Channel.fromPath(params.annotation_bakta_db, checkIfExists: true) .first() } else { @@ -81,7 +80,22 @@ workflow ANNOTATION { ch_bakta_db = BAKTA_BAKTADBDOWNLOAD.out.db } - BAKTA_BAKTA(fasta, ch_bakta_db, [], []) + // BAKTA HMM download + if (params.annotation_bakta_hmms) { + ch_bakta_hmm = Channel.fromPath(params.annotation_bakta_hmms, checkIfExists: true).first() + } + else { + ch_bakta_hmm = [] + } + + BAKTA_BAKTA( + fasta, + ch_bakta_db, + [], + [], + [], + ch_bakta_hmm, + ) ch_versions = ch_versions.mix(BAKTA_BAKTA.out.versions) ch_multiqc_files = BAKTA_BAKTA.out.txt.collect { it[1] }.ifEmpty([]) ch_annotation_faa = BAKTA_BAKTA.out.faa