- 
                Notifications
    You must be signed in to change notification settings 
- Fork 928
EBI MGnify - ENA Genome uploader #9177
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| --- | ||
| # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json | ||
| channels: | ||
| - conda-forge | ||
| - bioconda | ||
| dependencies: | ||
| - "bioconda::genome-uploader=2.5.0" | 
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| process GENOMEUPLOADER { | ||
| tag "$meta.id" | ||
| label 'process_single' | ||
|  | ||
| secret secrets.ENA_WEBIN ? "ENA_WEBIN" : "" | ||
| secret secrets.ENA_WEBIN_PASSWORD ? "ENA_WEBIN_PASSWORD" : "" | ||
|  | ||
| conda "${moduleDir}/environment.yml" | ||
| container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
| 'https://depot.galaxyproject.org/singularity/genome-uploader:2.5.0--pyhdfd78af_0': | ||
| 'biocontainers/genome-uploader:2.5.0--pyhdfd78af_0' }" | ||
|  | ||
| input: | ||
| tuple val(meta), path(metadata_tsv) | ||
| path(fastas, stageAs: "genomes/*") | ||
|  | ||
| output: | ||
| tuple val(meta), path("upload_output/*") , emit: upload_output_dir | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe rename to  | ||
| tuple val(meta), path("upload_output/MAG_upload/submission.xml") , emit: submission | ||
| tuple val(meta), path("upload_output/MAG_upload/registered_MAGs_${prefix}.tsv") , emit: registered_mags | ||
| tuple val(meta), path("upload_output/MAG_upload/genome_samples.xml") , emit: genome_samples | ||
| tuple val(meta), path("upload_output/MAG_upload/manifests_${prefix}/*.manifest"), emit: manifests | ||
| path "versions.yml" , emit: versions | ||
|  | ||
| when: | ||
| task.ext.when == null || task.ext.when | ||
|  | ||
| script: | ||
| def args = task.ext.args ?: '' | ||
| prefix = task.ext.prefix ?: "${meta.id}" | ||
| """ | ||
| genome_upload \\ | ||
| $args \\ | ||
| --upload_study "${meta.study_accession}" \\ | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like the use of meta here, but we will need a second opinion if that's preferred over additional input params | ||
| --centre_name "${meta.center_name}" \\ | ||
| --genome_info ${metadata_tsv} \\ | ||
| --out upload_output | ||
|  | ||
| cat <<-END_VERSIONS > versions.yml | ||
| "${task.process}": | ||
| genome_upload: \$( genome_upload --version | sed 's/genome_uploader //' ) | ||
| ena-webin-cli: \$( ena-webin-cli -version ) | ||
| END_VERSIONS | ||
| """ | ||
|  | ||
| stub: | ||
| def args = task.ext.args ?: '' | ||
| prefix = task.ext.prefix ?: "${meta.id}" | ||
| """ | ||
| mkdir -p upload_output/MAG_upload/manifests_${prefix} | ||
| touch upload_output/MAG_upload/submission.xml | ||
| touch upload_output/MAG_upload/registered_MAGs_${prefix}.tsv | ||
| touch upload_output/MAG_upload/genome_samples.xml | ||
| touch upload_output/MAG_upload/manifests_${prefix}/test_mag.manifest | ||
|  | ||
| cat <<-END_VERSIONS > versions.yml | ||
| "${task.process}": | ||
| genome_upload: \$( genome_upload --version | sed 's/genome_uploader //' ) | ||
| ena-webin-cli: \$( ena-webin-cli -version ) | ||
| END_VERSIONS | ||
| """ | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,109 @@ | ||||||
| # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json | ||||||
| name: genomeuploader | ||||||
| description: Upload genome bins and MAGs in FASTA format to ENA (European Nucleotide Archive) | ||||||
| keywords: | ||||||
| - archiving | ||||||
| - ena | ||||||
| - mags | ||||||
| - bins | ||||||
| - upload | ||||||
| tools: | ||||||
| - genomeuploader: | ||||||
| description: Python script to upload bins and MAGs in fasta format to ENA (European Nucleotide Archive). | ||||||
| homepage: https://github.com/EBI-Metagenomics/genome_uploader | ||||||
| documentation: https://github.com/EBI-Metagenomics/genome_uploader | ||||||
| tool_dev_url: https://github.com/EBI-Metagenomics/genome_uploader | ||||||
| licence: ["Apache-2.0"] | ||||||
| identifier: "" | ||||||
|  | ||||||
| input: | ||||||
| - - meta: | ||||||
| type: map | ||||||
| description: | | ||||||
| Groovy Map containing sample information, the study_accession to upload the data, and the center name | ||||||
| e.g. `[ id:'test', study_accession:'ERP159782', center_name:'nf-core' ]` | ||||||
| - metadata_tsv: | ||||||
| type: file | ||||||
| description: TSV file containing metadata for genomes/bins to upload | ||||||
| pattern: "*.tsv" | ||||||
| ontologies: | ||||||
| - edam: http://edamontology.org/format_3475 # TSV | ||||||
| - fastas: | ||||||
| type: file | ||||||
| description: FASTA files containing genome/bin sequences to upload | ||||||
| pattern: "*.{fasta,fa,fna,fasta.gz,fa.gz,fna.gz}" | ||||||
| ontologies: | ||||||
| - edam: http://edamontology.org/format_1929 # FASTA | ||||||
| - edam: http://edamontology.org/format_3989 # GZIP format | ||||||
|  | ||||||
| output: | ||||||
| upload_output_dir: | ||||||
| - - meta: | ||||||
| type: map | ||||||
| description: | | ||||||
| Groovy Map containing sample information | ||||||
| e.g. `[ id:'test', study_accession:'ERP159782', center_name:'nf-core' ]` | ||||||
| - upload_output/*: | ||||||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
        Suggested change
       
 | ||||||
| type: directory | ||||||
| description: Directory containing all upload outputs | ||||||
| pattern: "upload_output/*" | ||||||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
        Suggested change
       
 | ||||||
| ontologies: [] | ||||||
| submission: | ||||||
| - - meta: | ||||||
| type: map | ||||||
| description: | | ||||||
| Groovy Map containing sample information | ||||||
| e.g. `[ id:'test', study_accession:'ERP159782', center_name:'nf-core' ]` | ||||||
| - upload_output/MAG_upload/submission.xml: | ||||||
| type: file | ||||||
| description: ENA submission XML file | ||||||
| pattern: "upload_output/MAG_upload/submission.xml" | ||||||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
        Suggested change
       
 | ||||||
| ontologies: | ||||||
| - edam: http://edamontology.org/format_2332 # XML | ||||||
| registered_mags: | ||||||
| - - meta: | ||||||
| type: map | ||||||
| description: | | ||||||
| Groovy Map containing sample information | ||||||
| e.g. `[ id:'test', study_accession:'ERP159782', center_name:'nf-core' ]` | ||||||
| - upload_output/MAG_upload/registered_MAGs_${prefix}.tsv: | ||||||
| type: file | ||||||
| description: TSV file mapping genome names to ENA accession numbers | ||||||
| pattern: "upload_output/MAG_upload/registered_MAGs_*.tsv" | ||||||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
        Suggested change
       
 | ||||||
| ontologies: | ||||||
| - edam: http://edamontology.org/format_3475 # TSV | ||||||
| genome_samples: | ||||||
| - - meta: | ||||||
| type: map | ||||||
| description: | | ||||||
| Groovy Map containing sample information | ||||||
| e.g. `[ id:'test', study_accession:'ERP159782', center_name:'nf-core' ]` | ||||||
| - upload_output/MAG_upload/genome_samples.xml: | ||||||
| type: file | ||||||
| description: ENA genome samples XML file | ||||||
| pattern: "upload_output/MAG_upload/genome_samples.xml" | ||||||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
        Suggested change
       
 | ||||||
| ontologies: | ||||||
| - edam: http://edamontology.org/format_2332 # XML | ||||||
| manifests: | ||||||
| - - meta: | ||||||
| type: map | ||||||
| description: | | ||||||
| Groovy Map containing sample information | ||||||
| e.g. `[ id:'test', study_accession:'ERP159782', center_name:'nf-core' ]` | ||||||
| - upload_output/MAG_upload/manifests_${prefix}/*.manifest: | ||||||
| type: file | ||||||
| description: ENA manifest files for genome upload | ||||||
| pattern: "upload_output/MAG_upload/manifests_*/*.manifest" | ||||||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
        Suggested change
       
 | ||||||
| ontologies: [] | ||||||
| versions: | ||||||
| - versions.yml: | ||||||
| type: file | ||||||
| description: File containing software versions | ||||||
| pattern: "versions.yml" | ||||||
| ontologies: | ||||||
| - edam: http://edamontology.org/format_3750 # YAML | ||||||
|  | ||||||
| authors: | ||||||
| - "@mberacochea" | ||||||
| maintainers: | ||||||
| - "@mberacochea" | ||||||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,100 @@ | ||||||||||||||||||||||
| nextflow_process { | ||||||||||||||||||||||
|  | ||||||||||||||||||||||
| name "Test Process GENOMEUPLOADER" | ||||||||||||||||||||||
| script "../main.nf" | ||||||||||||||||||||||
| config "./nextflow.config" | ||||||||||||||||||||||
| process "GENOMEUPLOADER" | ||||||||||||||||||||||
|  | ||||||||||||||||||||||
| tag "modules" | ||||||||||||||||||||||
| tag "modules_nfcore" | ||||||||||||||||||||||
| tag "genomeuploader" | ||||||||||||||||||||||
|  | ||||||||||||||||||||||
| test("genome - fasta - gz") { | ||||||||||||||||||||||
|  | ||||||||||||||||||||||
| when { | ||||||||||||||||||||||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
        Suggested change
       
 | ||||||||||||||||||||||
| process { | ||||||||||||||||||||||
| """ | ||||||||||||||||||||||
| // This module uses a csv as input, which contains the paths to the genomes/bins to upload | ||||||||||||||||||||||
| // That is why it contains a second parameter that accepts a Path with all the fasta files (mags and bins) to upload | ||||||||||||||||||||||
| // and that is why the path is genomes/<name> in the manifest | ||||||||||||||||||||||
| def metadata_content = [ | ||||||||||||||||||||||
| ["genome_name", "genome_path", "accessions", "assembly_software", "binning_software", "binning_parameters", "stats_generation_software", "completeness", "contamination", "genome_coverage", "metagenome", "co-assembly", "broad_environment", "local_environment", "environmental_medium", "rRNA_presence", "NCBI_lineage"].join("\t"), | ||||||||||||||||||||||
| ["test_mag", "genomes/GCA_002688505.1_ASM268850v1_genomic.fna.gz", "ERR4647712", "SPAdes_4.1.0", "nf-core/mag", "default", "CheckM2_1.1.0", "90.0", "1.0", "10.0", "chicken gut metagenome", "False", "chicken gut", "chicken gut mucosa", "chicken gut mucosa", "True", "d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus;s__Lactobacillus_crispatus"].join("\t") | ||||||||||||||||||||||
| ].join("\\n") | ||||||||||||||||||||||
|  | ||||||||||||||||||||||
| def metadata_file = file('genomes_metadata.tsv') | ||||||||||||||||||||||
| metadata_file.text = metadata_content | ||||||||||||||||||||||
|  | ||||||||||||||||||||||
| input[0] = [ | ||||||||||||||||||||||
| [ | ||||||||||||||||||||||
| id: 'test', | ||||||||||||||||||||||
| study_accession: 'ERP159782', | ||||||||||||||||||||||
| center_name: 'nf-core' | ||||||||||||||||||||||
| ], | ||||||||||||||||||||||
| metadata_file | ||||||||||||||||||||||
| ] | ||||||||||||||||||||||
| input[1] = file('https://github.com/nf-core/test-datasets/raw/refs/heads/magmap/testdata/GCA_002688505.1_ASM268850v1_genomic.fna.gz', checkIfExists: true) | ||||||||||||||||||||||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This file will probably need to be deposited in nf-core test-datasets as well | ||||||||||||||||||||||
| """ | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|  | ||||||||||||||||||||||
| then { | ||||||||||||||||||||||
| assertAll( | ||||||||||||||||||||||
| { assert process.success }, | ||||||||||||||||||||||
| { assert snapshot( | ||||||||||||||||||||||
| process.out.submission, | ||||||||||||||||||||||
| process.out.versions, | ||||||||||||||||||||||
| // Check registered_MAGs contains expected genome name (starts with test_mag) | ||||||||||||||||||||||
| file(process.out.registered_mags.get(0).get(1)).readLines()[0].split('\t')[0].startsWith('test_mag'), | ||||||||||||||||||||||
| // Check genome_samples.xml contains expected elements | ||||||||||||||||||||||
| file(process.out.genome_samples.get(0).get(1)).readLines().any { it.contains('<SAMPLE') }, | ||||||||||||||||||||||
| file(process.out.genome_samples.get(0).get(1)).readLines().any { it.contains('alias="test_mag"') }, | ||||||||||||||||||||||
| // Check manifest file exists and has content | ||||||||||||||||||||||
| file(process.out.manifests.get(0).get(1)).readLines().size() > 0, | ||||||||||||||||||||||
| file(process.out.manifests.get(0).get(1)).readLines().any { it.contains('STUDY') } | ||||||||||||||||||||||
| ).match() } | ||||||||||||||||||||||
| ) | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|  | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|  | ||||||||||||||||||||||
| test("genome - fasta - gz -stub") { | ||||||||||||||||||||||
|  | ||||||||||||||||||||||
| options "-stub" | ||||||||||||||||||||||
|  | ||||||||||||||||||||||
| when { | ||||||||||||||||||||||
| process { | ||||||||||||||||||||||
| """ | ||||||||||||||||||||||
| // This module uses a csv as input, which contains the paths to the genomes/bins to upload | ||||||||||||||||||||||
| // That is why it contains a second parameter that accepts a Path with all the fasta files (mags and bins) to upload | ||||||||||||||||||||||
| // and that is why the path is genomes/<name> in the manifest | ||||||||||||||||||||||
| def metadata_content = [ | ||||||||||||||||||||||
| ["genome_name", "genome_path", "accessions", "assembly_software", "binning_software", "binning_parameters", "stats_generation_software", "completeness", "contamination", "genome_coverage", "metagenome", "co-assembly", "broad_environment", "local_environment", "environmental_medium", "rRNA_presence", "NCBI_lineage"].join("\t"), | ||||||||||||||||||||||
| ["test_mag", "genomes/GCA_002688505.1_ASM268850v1_genomic.fna.gz", "ERR4647712", "SPAdes_4.1.0", "nf-core/mag", "default", "CheckM2_1.1.0", "90.0", "1.0", "10.0", "chicken gut metagenome", "False", "chicken gut", "chicken gut mucosa", "chicken gut mucosa", "True", "d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus;s__Lactobacillus_crispatus"].join("\t") | ||||||||||||||||||||||
| ].join("\\n") | ||||||||||||||||||||||
|  | ||||||||||||||||||||||
| def metadata_file = file('genomes_metadata.tsv') | ||||||||||||||||||||||
| metadata_file.text = metadata_content | ||||||||||||||||||||||
|  | ||||||||||||||||||||||
| input[0] = [ | ||||||||||||||||||||||
| [ | ||||||||||||||||||||||
| id: 'test', | ||||||||||||||||||||||
| study_accession: 'ERP159782', | ||||||||||||||||||||||
| center_name: 'nf-core' | ||||||||||||||||||||||
| ], | ||||||||||||||||||||||
| metadata_file | ||||||||||||||||||||||
| ] | ||||||||||||||||||||||
| input[1] = file('https://github.com/nf-core/test-datasets/raw/refs/heads/magmap/testdata/GCA_002688505.1_ASM268850v1_genomic.fna.gz', checkIfExists: true) | ||||||||||||||||||||||
| """ | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|  | ||||||||||||||||||||||
| then { | ||||||||||||||||||||||
| assertAll( | ||||||||||||||||||||||
| { assert process.success }, | ||||||||||||||||||||||
| { assert snapshot(process.out).match() } | ||||||||||||||||||||||
| 
      Comment on lines
    
      +94
     to 
      +95
    
   There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
        Suggested change
       
 | ||||||||||||||||||||||
| ) | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|  | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| { | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I dont see the stub test in the snapshot | ||
| "GCA_002688505.1_ASM268850v1 - fasta - gz": { | ||
| "content": [ | ||
| [ | ||
| [ | ||
| { | ||
| "id": "test", | ||
| "study_accession": "ERP159782", | ||
| "center_name": "nf-core" | ||
| }, | ||
| "submission.xml:md5,705c441ca687726152f8540dab4bb322" | ||
| ] | ||
| ], | ||
| [ | ||
| "versions.yml:md5,36d7baf63c1face41d2fc0edd6263944" | ||
| ], | ||
| true, | ||
| true, | ||
| false, | ||
| true, | ||
| true | ||
| ], | ||
| "meta": { | ||
| "nf-test": "0.9.2", | ||
| "nextflow": "25.04.7" | ||
| }, | ||
| "timestamp": "2025-10-08T18:33:31.006282" | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,5 @@ | ||||||
| process { | ||||||
| withName: 'GENOMEUPLOADER' { | ||||||
| ext.args = '--mags' | ||||||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
        Suggested change
       
 | ||||||
| } | ||||||
| } | ||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
would rename to
fasta