diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index bf00b84..ba50f5e 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -36,7 +36,7 @@ jobs: echo "password: ${{ secrets.ENA_PASSWORD }}" >> .secrets.yml - name: Test submission in --draft mode run: | - ena-upload-cli --action add --draft --dev --center ${{ secrets.ENA_CENTER }} --data example_data/ENA_TEST1.R1.fastq.gz example_data/ENA_TEST2.R1.fastq.gz example_data/ENA_TEST2.R2.fastq.gz --checklist ERC000033 --secret .secret.yml --xlsx example_tables/ENA_excel_example_ERC000033.xlsx + ena-upload-cli --action add --draft --dev --center TEST --data example_data/ENA_TEST1.R1.fastq.gz example_data/ENA_TEST2.R1.fastq.gz example_data/ENA_TEST2.R2.fastq.gz --checklist ERC000033 --secret .secret.yml --xlsx example_tables/ENA_excel_example_ERC000033.xlsx - name: Run Python to get temp directory run: | echo "TEMP_DIR=$(python -c 'import tempfile; print(tempfile.gettempdir())')" >> $GITHUB_ENV diff --git a/.gitignore b/.gitignore index 4e6ddc8..a8da505 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ build/ ena_upload_cli.egg-info/ __pycache__/ +tests/ena_upload/ +.idea/ diff --git a/README.md b/README.md index dd7722b..552fb6e 100644 --- a/README.md +++ b/README.md @@ -108,6 +108,15 @@ The command line tool will automatically fetch the correct scientific name based | sample_alias_4 | sample_title_2 | 2697049 | Severe acute respiratory syndrome coronavirus 2 | covid-19 | sample_description_1 | 2020-10-11 | Argentina | | sample_alias_5 | sample_title_3 | 2697049 | Severe acute respiratory syndrome coronavirus 2 | covid-19 | sample_description_2 | 2008-01-24 | Belgium | +#### Custom attributes + +Additional custom attributes (i.e. attributes not specified in the ERC checklist) can be added to the sample table by adding columns which headers are named like `sample_attribute[attribute_name]`; for example `sample_attribute[treatment]`, `sample_attribute[age]`... An example tsv file using custom attributes can be found in [example_tables/ENA_template_samples_xtra_attrs.tsv](/example_tables/ENA_template_samples_xtra_attrs.tsv). The same syntax is also applicable for xlsx input files. + +| alias | ... | sample_attribute[treatment] | sample_attribute[age] +|----------------|----------------|---------------------|------------------------| +| sample_alias_4 | ... | treated | 2 days +| sample_alias_5 | ... | untreated | 2 days + #### Viral submissions If you want to submit viral samples you can use the [ENA virus pathogen](https://www.ebi.ac.uk/ena/browser/view/ERC000033) checklist by adding `ERC000033` to the checklist parameter. Check out our [viral example command](#test-the-tool) as demonstration. Please use the [ENA virus pathogen](https://github.com/ELIXIR-Belgium/ENA-metadata-templates/tree/main/templates/ERC000033) checklist in our template repo to know what is allowed/possible in the `Controlled vocabulary`fields. @@ -116,6 +125,13 @@ If you want to submit viral samples you can use the [ENA virus pathogen](https:/ Please check out the [template](https://github.com/ELIXIR-Belgium/ENA-metadata-templates) of your checklist to discover which attributes are mandatory for the study, experiment and run ENA object. +#### Read info run attributes + +Using `read_type` and `read_label` as header in the columns of ENA run objects will allow you to set information about reads. Values are listed in a comma separated way, without spaces. `read_type` has a controlled vocabulary, which can be found in the [ENA Documentation](https://ena-docs.readthedocs.io/en/latest/submit/reads/webin-cli.html#json-manifest-file-format). An example tsv file using these attributes can be found in [example_tables/ENA_template_runs_read_info.tsv](/example_tables/ENA_template_runs_read_info.tsv). The same syntax is also applicable for xlsx input files. + +#### Study and experiment custom attributes + +Similarly to samples, additional custom attributes can be added to the experiment and study tables by adding columns which headers are named like `experiment_attribute[attribute_name]` and `study_attribute[attribute_name]` in the experiment and study tables, respectively. ### Dev instance @@ -231,7 +247,7 @@ By default the updated tables after submission will have the action `added` in t * **Release submission** ``` - ena-upload-cli --action release --center'your_center_name' --study example_tables/ENA_template_studies_release.tsv --dev --secret .secret.yml + ena-upload-cli --action release --center 'your_center_name' --study example_tables/ENA_template_studies_release.tsv --dev --secret .secret.yml ``` > **Note for Windows users:** Windows, by default, does not support wildcard expansion in command-line arguments. diff --git a/ena_upload/ena_upload.py b/ena_upload/ena_upload.py index 5b7dbe1..ef11f0c 100644 --- a/ena_upload/ena_upload.py +++ b/ena_upload/ena_upload.py @@ -214,6 +214,16 @@ def generate_stream(schema, targets, Template, center, tool): :return: stream ''' + # find all columns in targets which column header matches the pattern attribute[(.*)], extract the group + # and return a dict[header] = group + # eg for header run_attribute[sex] => {'run_attribute[sex]': 'sex'} + pattern = re.compile(rf"{schema}_attribute\[(.*)\]") + extra_attributes = {} + for column in targets.columns: + match = re.match(pattern, column) + if match: + extra_attributes[column] = match.group(1) + if schema == 'run': # These attributes are required for rendering # the run xml templates @@ -221,6 +231,11 @@ def generate_stream(schema, targets, Template, center, tool): if 'file_format' in targets: targets.rename(columns={'file_format': 'file_type'}, inplace=True) file_attrib = ['file_name', 'file_type', 'file_checksum'] + if 'read_type' in targets: + file_attrib.append('read_type') + if 'read_label' in targets: + file_attrib.append('read_label') + other_attrib = ['alias', 'experiment_alias'] # Create groups with alias as index run_groups = targets[other_attrib].groupby('alias')['experiment_alias'].first().to_dict() @@ -230,11 +245,14 @@ def generate_stream(schema, targets, Template, center, tool): stream = Template.generate(run_groups=run_groups, file_groups=file_groups, center=center, + extra_attributes=extra_attributes, tool_name=tool['tool_name'], tool_version=tool['tool_version']) else: stream = Template.generate( - df=targets, center=center, tool_name=tool['tool_name'], tool_version=tool['tool_version']) + df=targets, center=center, extra_attributes=extra_attributes, + tool_name=tool['tool_name'], tool_version=tool['tool_version'] + ) return stream @@ -982,7 +1000,7 @@ def main(): if pd.notna(row['scientific_name']) and pd.isna(row['taxon_id']): # retrieve taxon id using scientific name taxonID = get_taxon_id(row['scientific_name']) - df.loc[index, 'taxon_id'] = taxonID + df.loc[index, 'taxon_id'] = int(taxonID) elif pd.notna(row['taxon_id']) and pd.isna(row['scientific_name']): # retrieve scientific name using taxon id scientificName = get_scientific_name(row['taxon_id']) diff --git a/ena_upload/templates/ENA_template_READ_TYPE.xml b/ena_upload/templates/ENA_template_READ_TYPE.xml new file mode 100644 index 0000000..ba52ebc --- /dev/null +++ b/ena_upload/templates/ENA_template_READ_TYPE.xml @@ -0,0 +1,9 @@ + +single +paired +cell_barcode +umi_barcode +feature_barcode +sample_barcode +spatial_barcode + \ No newline at end of file diff --git a/ena_upload/templates/ENA_template_experiments.xml b/ena_upload/templates/ENA_template_experiments.xml index 71254e6..3e146e0 100755 --- a/ena_upload/templates/ENA_template_experiments.xml +++ b/ena_upload/templates/ENA_template_experiments.xml @@ -17,62 +17,70 @@ def mandatorytest(row, column, index): xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="ftp://ftp.sra.ebi.ac.uk/meta/xsd/sra_1_6/SRA.experiment.xsd"> - - - ${row.title} + + + ${row.title} + + + + + + + ${row.design_description} - - + + ${row.spot_descriptor} - - - ${row.design_description} + + + + + + ${row.library_name} - - ${row.spot_descriptor} + + - - + + - - - ${row.library_name} - - - - - - - - - - - - - - - - - - ${row.library_construction_protocol} - - - - - - - + + + + + + + + + + + ${row.library_construction_protocol} + + + + + + + + + + + + + ${tag} + ${row[header]} + - - - SUBMISSION_TOOL - ${tool_name} - - - SUBMISSION_TOOL_VERSION - ${tool_version} - - - + + + SUBMISSION_TOOL + ${tool_name} + + + SUBMISSION_TOOL_VERSION + ${tool_version} + + + diff --git a/ena_upload/templates/ENA_template_runs.xml b/ena_upload/templates/ENA_template_runs.xml index 15feae7..3f8d081 100644 --- a/ena_upload/templates/ENA_template_runs.xml +++ b/ena_upload/templates/ENA_template_runs.xml @@ -2,6 +2,9 @@ - + + + + + + ${rlabel.strip()} + + + + + + + + + + + + + diff --git a/ena_upload/templates/ENA_template_samples_ERC000011.xml b/ena_upload/templates/ENA_template_samples_ERC000011.xml index 96ac3b8..4731e70 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000011.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000011.xml @@ -213,6 +213,14 @@ def mandatorytest(row, column, index): ${row['strain']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000012.xml b/ena_upload/templates/ENA_template_samples_ERC000012.xml index cf940a7..9512c97 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000012.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000012.xml @@ -516,6 +516,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000013.xml b/ena_upload/templates/ENA_template_samples_ERC000013.xml index 94c8617..9ca5147 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000013.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000013.xml @@ -645,6 +645,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000014.xml b/ena_upload/templates/ENA_template_samples_ERC000014.xml index 21d2520..75668c8 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000014.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000014.xml @@ -667,6 +667,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000015.xml b/ena_upload/templates/ENA_template_samples_ERC000015.xml index 785c903..83f5348 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000015.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000015.xml @@ -564,6 +564,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000016.xml b/ena_upload/templates/ENA_template_samples_ERC000016.xml index 089b235..b276c2c 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000016.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000016.xml @@ -565,6 +565,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000017.xml b/ena_upload/templates/ENA_template_samples_ERC000017.xml index c18d09c..96a13cb 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000017.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000017.xml @@ -565,6 +565,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000018.xml b/ena_upload/templates/ENA_template_samples_ERC000018.xml index 9e27baf..c8f99dc 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000018.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000018.xml @@ -613,6 +613,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000019.xml b/ena_upload/templates/ENA_template_samples_ERC000019.xml index fbc9d0a..48595bb 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000019.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000019.xml @@ -768,6 +768,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000020.xml b/ena_upload/templates/ENA_template_samples_ERC000020.xml index 6739699..3823752 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000020.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000020.xml @@ -799,6 +799,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000021.xml b/ena_upload/templates/ENA_template_samples_ERC000021.xml index d9b0d3b..4e758fe 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000021.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000021.xml @@ -806,6 +806,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000022.xml b/ena_upload/templates/ENA_template_samples_ERC000022.xml index de72141..8ba9520 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000022.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000022.xml @@ -708,6 +708,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000023.xml b/ena_upload/templates/ENA_template_samples_ERC000023.xml index 4b146b4..a018865 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000023.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000023.xml @@ -608,6 +608,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000024.xml b/ena_upload/templates/ENA_template_samples_ERC000024.xml index ac73cca..5b8e71d 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000024.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000024.xml @@ -913,6 +913,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000025.xml b/ena_upload/templates/ENA_template_samples_ERC000025.xml index de5b782..2ce7413 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000025.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000025.xml @@ -636,6 +636,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000027.xml b/ena_upload/templates/ENA_template_samples_ERC000027.xml index 2d3727c..7cca27b 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000027.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000027.xml @@ -910,6 +910,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000028.xml b/ena_upload/templates/ENA_template_samples_ERC000028.xml index a202000..9ab9ddf 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000028.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000028.xml @@ -153,6 +153,14 @@ def mandatorytest(row, column, index): ${row['strain']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000029.xml b/ena_upload/templates/ENA_template_samples_ERC000029.xml index 033fcb6..0013e1b 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000029.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000029.xml @@ -316,6 +316,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000030.xml b/ena_upload/templates/ENA_template_samples_ERC000030.xml index b11c070..3a394cb 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000030.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000030.xml @@ -243,6 +243,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000031.xml b/ena_upload/templates/ENA_template_samples_ERC000031.xml index fad5c88..67496f2 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000031.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000031.xml @@ -1423,6 +1423,14 @@ def mandatorytest(row, column, index): ${row['aerospace structure']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000032.xml b/ena_upload/templates/ENA_template_samples_ERC000032.xml index 9b51ac9..5e87477 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000032.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000032.xml @@ -391,6 +391,14 @@ def mandatorytest(row, column, index): ${row['isolation source non-host-associated']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000033.xml b/ena_upload/templates/ENA_template_samples_ERC000033.xml index ec01815..bed2ce4 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000033.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000033.xml @@ -253,6 +253,14 @@ def mandatorytest(row, column, index): ${row['isolation source non-host-associated']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000034.xml b/ena_upload/templates/ENA_template_samples_ERC000034.xml index fab7686..6810291 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000034.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000034.xml @@ -112,6 +112,14 @@ def mandatorytest(row, column, index): ${row['Further Details']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000035.xml b/ena_upload/templates/ENA_template_samples_ERC000035.xml index d897cde..78ec6f1 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000035.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000035.xml @@ -249,6 +249,14 @@ def mandatorytest(row, column, index): ${row['growth condition']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000036.xml b/ena_upload/templates/ENA_template_samples_ERC000036.xml index c0eb3e6..e664dcd 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000036.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000036.xml @@ -183,6 +183,14 @@ def mandatorytest(row, column, index): ${row['nucleic acid amplification']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000037.xml b/ena_upload/templates/ENA_template_samples_ERC000037.xml index dfb641a..c0f1b25 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000037.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000037.xml @@ -636,6 +636,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000038.xml b/ena_upload/templates/ENA_template_samples_ERC000038.xml index b7156cd..7dde69e 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000038.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000038.xml @@ -218,6 +218,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000039.xml b/ena_upload/templates/ENA_template_samples_ERC000039.xml index 9629055..a6622cb 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000039.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000039.xml @@ -205,6 +205,14 @@ def mandatorytest(row, column, index): ${row['isolation source non-host-associated']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000040.xml b/ena_upload/templates/ENA_template_samples_ERC000040.xml index e45722c..e6b3f0f 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000040.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000040.xml @@ -166,6 +166,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000041.xml b/ena_upload/templates/ENA_template_samples_ERC000041.xml index 944f751..149c363 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000041.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000041.xml @@ -175,6 +175,14 @@ def mandatorytest(row, column, index): ${row['protocol']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000043.xml b/ena_upload/templates/ENA_template_samples_ERC000043.xml index 1d5e8c4..c7b12de 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000043.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000043.xml @@ -161,6 +161,14 @@ def mandatorytest(row, column, index): ${row['growth condition']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000044.xml b/ena_upload/templates/ENA_template_samples_ERC000044.xml index 0c99024..572fb38 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000044.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000044.xml @@ -136,6 +136,14 @@ def mandatorytest(row, column, index): ${row['isolation source host-associated']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000045.xml b/ena_upload/templates/ENA_template_samples_ERC000045.xml index 39a10d2..93fc3a5 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000045.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000045.xml @@ -81,6 +81,14 @@ def mandatorytest(row, column, index): ${row['serotype']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000047.xml b/ena_upload/templates/ENA_template_samples_ERC000047.xml index e70f6d8..13160c9 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000047.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000047.xml @@ -384,6 +384,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000048.xml b/ena_upload/templates/ENA_template_samples_ERC000048.xml index 92745cd..82b5a92 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000048.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000048.xml @@ -389,6 +389,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000049.xml b/ena_upload/templates/ENA_template_samples_ERC000049.xml index ff7a7ea..b5ef07a 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000049.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000049.xml @@ -461,6 +461,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000050.xml b/ena_upload/templates/ENA_template_samples_ERC000050.xml index 2c7de87..bab10f0 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000050.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000050.xml @@ -354,6 +354,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000051.xml b/ena_upload/templates/ENA_template_samples_ERC000051.xml index 41d59b7..d9b2741 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000051.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000051.xml @@ -129,6 +129,14 @@ def mandatorytest(row, column, index): ${row['patient sex']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000052.xml b/ena_upload/templates/ENA_template_samples_ERC000052.xml index b102fa9..6745c95 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000052.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000052.xml @@ -248,6 +248,14 @@ def mandatorytest(row, column, index): ${row['adapters']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000053.xml b/ena_upload/templates/ENA_template_samples_ERC000053.xml index c3129fa..5e78e73 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000053.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000053.xml @@ -295,6 +295,14 @@ def mandatorytest(row, column, index): mm + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000055.xml b/ena_upload/templates/ENA_template_samples_ERC000055.xml index 17c965e..36d8470 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000055.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000055.xml @@ -1126,6 +1126,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000056.xml b/ena_upload/templates/ENA_template_samples_ERC000056.xml index 4095369..91cadb1 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000056.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000056.xml @@ -1781,6 +1781,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000057.xml b/ena_upload/templates/ENA_template_samples_ERC000057.xml index 4625b61..d6b397a 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000057.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000057.xml @@ -738,6 +738,14 @@ def mandatorytest(row, column, index): ${row['chemical administration']} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_samples_ERC000058.xml b/ena_upload/templates/ENA_template_samples_ERC000058.xml index 8ada54e..e35e49a 100644 --- a/ena_upload/templates/ENA_template_samples_ERC000058.xml +++ b/ena_upload/templates/ENA_template_samples_ERC000058.xml @@ -995,6 +995,14 @@ def mandatorytest(row, column, index): % + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/ENA_template_studies.xml b/ena_upload/templates/ENA_template_studies.xml index 374e743..250f051 100755 --- a/ena_upload/templates/ENA_template_studies.xml +++ b/ena_upload/templates/ENA_template_studies.xml @@ -45,6 +45,14 @@ def mandatorytest(row, column, index): + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/ena_upload/templates/jinja_templates/ENA_template_READ_TYPE.xml b/ena_upload/templates/jinja_templates/ENA_template_READ_TYPE.xml new file mode 100644 index 0000000..54d3ec4 --- /dev/null +++ b/ena_upload/templates/jinja_templates/ENA_template_READ_TYPE.xml @@ -0,0 +1,7 @@ + +{%- for value in attributes %} +{%- if value != '' %} +{{value}} +{%- endif %} +{%- endfor %} + diff --git a/ena_upload/templates/jinja_templates/ENA_template_samples.xml b/ena_upload/templates/jinja_templates/ENA_template_samples.xml index 8b8e978..a3b5862 100755 --- a/ena_upload/templates/jinja_templates/ENA_template_samples.xml +++ b/ena_upload/templates/jinja_templates/ENA_template_samples.xml @@ -48,6 +48,14 @@ def mandatorytest(row, column, index): {%- endfor %} + + + + ${tag} + ${row[header]} + + + SUBMISSION_TOOL ${tool_name} diff --git a/example_data/ENA_TEST2.I1.fastq.gz b/example_data/ENA_TEST2.I1.fastq.gz new file mode 100644 index 0000000..767ed22 Binary files /dev/null and b/example_data/ENA_TEST2.I1.fastq.gz differ diff --git a/example_tables/ENA_template_runs_read_info.tsv b/example_tables/ENA_template_runs_read_info.tsv new file mode 100644 index 0000000..7ec4b60 --- /dev/null +++ b/example_tables/ENA_template_runs_read_info.tsv @@ -0,0 +1,5 @@ +alias experiment_alias file_name file_type read_type +run_alias_1a experiment_alias_7a ENA_TEST2.R1.fastq.gz fastq paired +run_alias_1a experiment_alias_7a ENA_TEST2.R2.fastq.gz fastq paired +run_alias_1a experiment_alias_7a ENA_TEST2.I1.fastq.gz fastq umi_barcode,cell_barcode +run_alias_3c experiment_alias_9c ENA_TEST1.R1.fastq.gz fastq single diff --git a/example_tables/ENA_template_samples_xtra_attrs.tsv b/example_tables/ENA_template_samples_xtra_attrs.tsv new file mode 100644 index 0000000..f0397c8 --- /dev/null +++ b/example_tables/ENA_template_samples_xtra_attrs.tsv @@ -0,0 +1,3 @@ +alias title scientific_name sample_description collection date geographic location (country and/or sea) sample_attribute[age] sample_attribute[treatment] +sample_alias_4 sample_title_1 homo sapiens sample_description_1 2020-10-11 Argentina 2 days treated +sample_alias_5 sample_title_2 human metagenome sample_description_2 2008-01-24 Belgium 2 days untreated diff --git a/var/xml_converter.py b/var/xml_converter.py old mode 100644 new mode 100755 index 7016b4b..15e0eba --- a/var/xml_converter.py +++ b/var/xml_converter.py @@ -34,6 +34,9 @@ def fetching_checklists(): def main(): + is_test = False + export_path_prefix = 'tests/' if is_test else '' + for response_object in fetching_checklists(): checklist = response_object['accession'] print(f"Parsing {checklist}") @@ -74,7 +77,8 @@ def main(): output_from_parsed_template = t.render(attributes=xml_tree) # Saving new xml template file - with open(f"ena_upload/templates/ENA_template_samples_{checklist}.xml", "wb") as fh: + + with open(f"{export_path_prefix}ena_upload/templates/ENA_template_samples_{checklist}.xml", "wb") as fh: fh.write(output_from_parsed_template.encode('utf-8')) diff --git a/var/xsd_converter.py b/var/xsd_converter.py old mode 100644 new mode 100755 index 74b805c..aa199f4 --- a/var/xsd_converter.py +++ b/var/xsd_converter.py @@ -1,3 +1,6 @@ +import argparse +import os + from lxml import etree from jinja2 import Environment, FileSystemLoader import requests @@ -5,6 +8,9 @@ from urllib3.util.retry import Retry import time +from ena_upload.ena_upload import SmartFormatter + + def fetch_object(url): """ Fetch single BrAPI object by path @@ -71,9 +77,14 @@ def findkeys(node, query): for j in node.values(): for x in findkeys(j, query): yield x - + + def main(): - mapping = { "run":["FILE"], "experiment":["LIBRARY_SELECTION", "LIBRARY_SOURCE", "LIBRARY_STRATEGY"], "common":["PLATFORM"]} + # turn to True to export in tests folder + is_test = False + export_path_prefix = 'tests/' if is_test else '' + + mapping = { "run":["FILE", "READ_TYPE"], "experiment":["LIBRARY_SELECTION", "LIBRARY_SOURCE", "LIBRARY_STRATEGY"], "common":["PLATFORM"]} template_names= ["ENA.project", "SRA.common", "SRA.experiment", "SRA.run", "SRA.sample", "SRA.study", "SRA.submission"] for template_name in template_names: @@ -83,7 +94,11 @@ def main(): url = f"https://raw.githubusercontent.com/enasequence/webin-xml/master/src/main/resources/uk/ac/ebi/ena/sra/schema/{template_name}.xsd" response = fetch_object(url) - open(f'ena_upload/templates/{template_name}.xsd', 'wb').write(response) + if is_test: + os.makedirs(f'{export_path_prefix}ena_upload/templates', exist_ok=True) + open(f'{export_path_prefix}ena_upload/templates/{template_name}.xsd', 'wb').write(response) + else: + open(f'ena_upload/templates/{template_name}.xsd', 'wb').write(response) if template_name_sm in mapping.keys(): @@ -102,6 +117,9 @@ def main(): if template_block == "FILE": query_dict = (list(findkeys(xsd_dict, 'filetype')))[0] xml_tree = query_dict['simpleType']['restriction']['enumeration'] + elif template_block == "READ_TYPE": + query_dict = (list(findkeys(xsd_dict, 'READ_TYPE')))[0] + xml_tree = query_dict['simpleType']['restriction']['enumeration'] elif template_block == "LIBRARY_SELECTION": query_dict = (list(findkeys(xsd_dict, 'typeLibrarySelection')))[0] xml_tree = query_dict['restriction']['enumeration'] @@ -120,8 +138,7 @@ def main(): else: break - - + print(f"Parsed values: {xml_tree}") # Loading the xml jinja2 template for samples @@ -131,11 +148,10 @@ def main(): output_from_parsed_template = t.render(attributes=xml_tree) # Saving new xml template file - with open(f"ena_upload/templates/ENA_template_{template_block}.xml", "w") as fh: + with open(f"{export_path_prefix}ena_upload/templates/ENA_template_{template_block}.xml", "w") as fh: fh.write(output_from_parsed_template) - if __name__ == "__main__": main()