diff --git a/.github/workflows/yamlfmt-lint.yml b/.github/workflows/yamlfmt-lint.yml
new file mode 100644
index 0000000000..9bde9939ff
--- /dev/null
+++ b/.github/workflows/yamlfmt-lint.yml
@@ -0,0 +1,19 @@
+name: yamlfmt-lint
+on:
+ pull_request:
+ push:
+ branches:
+ - main
+ workflow_dispatch:
+
+jobs:
+ lint:
+ runs-on: ubuntu-latest
+ container:
+ image: ghcr.io/google/yamlfmt:0.21.0
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v6
+
+ - name: Run yamlfmt lint
+ run: yamlfmt -lint .
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index df4e073db1..da1b4ed811 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,36 +1,36 @@
repos:
-- repo: https://github.com/astral-sh/ruff-pre-commit
- # Ruff version.
- rev: v0.14.2
- hooks:
- # Run the linter and fix issues.
- - id: ruff-check
- args: [ --fix ]
- files: ^ena-submission/
- # Run lint but only fix auto-fixable issues.
- - id: ruff-check
- args: [ --fix-only ]
- files: ^preprocessing/nextclade/
- # Run the formatter.
- - id: ruff-format
- files: ^ena-submission/|^preprocessing/nextclade/
-- repo: local
- hooks:
- - id: prettier-values-schema
- name: prettier (values.schema.json)
- entry: npx prettier@3.6.2 --write
- language: system
- files: ^kubernetes/loculus/values\.schema\.json$
- - id: helm-lint
- name: helm lint
- entry: bash
- args:
- - -c
- - |
- set -euo pipefail
- helm lint kubernetes/loculus -f kubernetes/loculus/values.yaml
- helm lint kubernetes/loculus -f kubernetes/loculus/values.yaml -f kubernetes/loculus/values_e2e_and_dev.yaml
- helm lint kubernetes/loculus -f kubernetes/loculus/values.yaml -f kubernetes/loculus/values_preview_server.yaml
- language: system
- pass_filenames: false
- files: ^kubernetes/loculus/
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ # Ruff version.
+ rev: v0.14.2
+ hooks:
+ # Run the linter and fix issues.
+ - id: ruff-check
+ args: [--fix]
+ files: ^ena-submission/
+ # Run lint but only fix auto-fixable issues.
+ - id: ruff-check
+ args: [--fix-only]
+ files: ^preprocessing/nextclade/
+ # Run the formatter.
+ - id: ruff-format
+ files: ^ena-submission/|^preprocessing/nextclade/
+ - repo: local
+ hooks:
+ - id: prettier-values-schema
+ name: prettier (values.schema.json)
+ entry: npx prettier@3.6.2 --write
+ language: system
+ files: ^kubernetes/loculus/values\.schema\.json$
+ - id: helm-lint
+ name: helm lint
+ entry: bash
+ args:
+ - -c
+ - |
+ set -euo pipefail
+ helm lint kubernetes/loculus -f kubernetes/loculus/values.yaml
+ helm lint kubernetes/loculus -f kubernetes/loculus/values.yaml -f kubernetes/loculus/values_e2e_and_dev.yaml
+ helm lint kubernetes/loculus -f kubernetes/loculus/values.yaml -f kubernetes/loculus/values_preview_server.yaml
+ language: system
+ pass_filenames: false
+ files: ^kubernetes/loculus/
diff --git a/.yamlfmt.yaml b/.yamlfmt.yaml
index 8369992be0..88a0f5e6d8 100644
--- a/.yamlfmt.yaml
+++ b/.yamlfmt.yaml
@@ -2,6 +2,9 @@
formatter:
type: basic
drop_merge_tag: true
+ trim_trailing_whitespace: true
+ retain_line_breaks_single: true
gitignore_excludes: true
exclude:
- kubernetes/loculus/templates
+ - .github
diff --git a/ena-submission/environment.yml b/ena-submission/environment.yml
index 5d3c4910c4..5556e4efbc 100644
--- a/ena-submission/environment.yml
+++ b/ena-submission/environment.yml
@@ -5,7 +5,7 @@ channels:
- nodefaults
dependencies:
# Core Python dependencies
- - python=3.12.9 # Pinned to 3.12.x series, check for patch updates within 3.12
+ - python=3.12.9 # Pinned to 3.12.x series, check for patch updates within 3.12
- pip=25.2
- uv=0.9.5
# Extra dependencies
diff --git a/ena-submission/test/test_config.yaml b/ena-submission/test/test_config.yaml
index 289f63ff2a..a3759cc452 100644
--- a/ena-submission/test/test_config.yaml
+++ b/ena-submission/test/test_config.yaml
@@ -19,7 +19,7 @@ enaOrganisms:
scientific_name: Orthonairovirus haemorrhagiae
taxon_id: 3052518
organismName: "Crimean-Congo Hemorrhagic Fever Virus"
- segments:
+ segments:
- L
- M
- S
@@ -92,7 +92,7 @@ enaOrganisms:
scientific_name: West Nile virus
taxon_id: 11082
organismName: "West Nile Virus"
- segments:
+ segments:
- main
externalMetadata:
- externalMetadataUpdater: ena
@@ -133,4 +133,4 @@ enaOrganisms:
type: int
- externalMetadataUpdater: ena
name: sraRunAccession
- type: string
\ No newline at end of file
+ type: string
diff --git a/ingest/config/defaults.yaml b/ingest/config/defaults.yaml
index 308dd12396..7296e01b30 100644
--- a/ingest/config/defaults.yaml
+++ b/ingest/config/defaults.yaml
@@ -62,16 +62,16 @@ ncbi_mappings:
- ncbiHostCommonName
- ncbiPurposeOfSampling
- ncbiHostSex
-group_name: insdc_ingest_group # Used only to set the group name, never read
+group_name: insdc_ingest_group # Used only to set the group name, never read
username: insdc_ingest_user
password: insdc_ingest_user
keycloak_client_id: backend-client
subsample_fraction: 1.0
-approve_timeout_min: "25" # Cronjobs run every 30min, make approve stop before it is forced to stop by argocd
+approve_timeout_min: "25" # Cronjobs run every 30min, make approve stop before it is forced to stop by argocd
db_username: postgres
db_password: unsecure
db_url: "jdbc:postgresql://127.0.0.1:5432/loculus"
-batch_chunk_size: 10000 # Batch size for submitting sequences to Loculus backend
+batch_chunk_size: 10000 # Batch size for submitting sequences to Loculus backend
nextclade_dataset_server: https://data.clades.nextstrain.org/v3
time_between_approve_requests_seconds: 60
backend_request_timeout_seconds: 600
diff --git a/ingest/environment.yml b/ingest/environment.yml
index 2ac91266e7..516f2322f5 100644
--- a/ingest/environment.yml
+++ b/ingest/environment.yml
@@ -5,7 +5,7 @@ channels:
- nodefaults
dependencies:
# Core Python dependencies
- - python=3.12.9 # Pinned to 3.12.x series, check for patch updates within 3.12
+ - python=3.12.9 # Pinned to 3.12.x series, check for patch updates within 3.12
# Extra dependencies
- biopython=1.85
- click=8.3.0
diff --git a/kubernetes/loculus/values.yaml b/kubernetes/loculus/values.yaml
index 56a41db1ae..5c146fe64e 100644
--- a/kubernetes/loculus/values.yaml
+++ b/kubernetes/loculus/values.yaml
@@ -1391,8 +1391,8 @@ defaultOrganismConfig: &defaultOrganismConfig
segments:
- name: main
references:
- - name: singleReference
- genes: []
+ - name: singleReference
+ genes: []
nextclade_dataset_server: https://data.clades.nextstrain.org/v3
ingest: &ingest
image: ghcr.io/loculus-project/ingest
@@ -1424,9 +1424,9 @@ defaultOrganisms:
segments:
- name: main
references:
- - name: singleReference
- nextclade_dataset_name: nextstrain/ebola/sudan
- genes: [NP, VP35, VP40, GP, sGP, ssGP, VP30, VP24, L]
+ - name: singleReference
+ nextclade_dataset_name: nextstrain/ebola/sudan
+ genes: [NP, VP35, VP40, GP, sGP, ssGP, VP30, VP24, L]
nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/ebola/data_output
taxon_id: 186540
scientific_name: "Sudan ebolavirus"
@@ -1539,9 +1539,9 @@ defaultOrganisms:
segments:
- name: main
references:
- - name: singleReference
- nextclade_dataset_name: nextstrain/wnv/all-lineages
- genes: [capsid, prM, env, NS1, NS2A, NS2B, NS3, NS4A, 2K, NS4B, NS5]
+ - name: singleReference
+ nextclade_dataset_name: nextstrain/wnv/all-lineages
+ genes: [capsid, prM, env, NS1, NS2A, NS2B, NS3, NS4A, 2K, NS4B, NS5]
nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/wnv/data_output
taxon_id: 11082
scientific_name: "West Nile virus"
@@ -1638,7 +1638,7 @@ defaultOrganisms:
required: true
type: string
lineageSystem: pangoLineage
- options:
+ options:
- name: A
- name: A.1
- name: A.1.1
@@ -1750,7 +1750,7 @@ defaultOrganisms:
required: true
type: string
lineageSystem: alternativeLineage
- options:
+ options:
- name: A
- name: A.1
- name: A.1.1
@@ -1806,8 +1806,8 @@ defaultOrganisms:
segments:
- name: main
references:
- - name: singleReference
- genes: []
+ - name: singleReference
+ genes: []
referenceGenomes:
- name: main
references:
@@ -1880,19 +1880,19 @@ defaultOrganisms:
segments:
- name: L
references:
- - nextclade_dataset_name: community/pathoplexus/cchfv/L
- name: singleReference
- genes: [RdRp]
+ - nextclade_dataset_name: community/pathoplexus/cchfv/L
+ name: singleReference
+ genes: [RdRp]
- name: M
references:
- - nextclade_dataset_name: community/pathoplexus/cchfv/M
- name: singleReference
- genes: [GPC]
+ - nextclade_dataset_name: community/pathoplexus/cchfv/M
+ name: singleReference
+ genes: [GPC]
- name: S
references:
- - nextclade_dataset_name: community/pathoplexus/cchfv/S
- name: singleReference
- genes: [NP]
+ - nextclade_dataset_name: community/pathoplexus/cchfv/S
+ name: singleReference
+ genes: [NP]
ingest:
<<: *ingest
configFile:
@@ -2017,32 +2017,32 @@ defaultOrganisms:
segments:
- name: L
references:
- - nextclade_dataset_name: cchfv/L
- name: singleReference
- genes: [RdRp]
+ - nextclade_dataset_name: cchfv/L
+ name: singleReference
+ genes: [RdRp]
- name: M
references:
- - nextclade_dataset_name: cchfv/M-MH396653
- name: MH396653
- genes: [GPC]
- - nextclade_dataset_name: cchfv/M-OR047158
- name: OR047158
- genes: [GPC]
+ - nextclade_dataset_name: cchfv/M-MH396653
+ name: MH396653
+ genes: [GPC]
+ - nextclade_dataset_name: cchfv/M-OR047158
+ name: OR047158
+ genes: [GPC]
- name: S
references:
- - nextclade_dataset_name: cchfv/S-1and6
- name: 1and6
- genes: [NP]
- - nextclade_dataset_name: cchfv/S-2to5
- name: 2to5
- genes: [NP]
+ - nextclade_dataset_name: cchfv/S-1and6
+ name: 1and6
+ genes: [NP]
+ - nextclade_dataset_name: cchfv/S-2to5
+ name: 2to5
+ genes: [NP]
ingest:
<<: *ingest
configFile:
taxon_id: 3052518
segment_identification:
method: "align"
- nextclade_dataset_name: community/pathoplexus/cchfv #TODO: allow multiple ref in ingest grouping algo
+ nextclade_dataset_name: community/pathoplexus/cchfv #TODO: allow multiple ref in ingest grouping algo
enaDeposition:
singleReference:
configFile:
@@ -2191,23 +2191,23 @@ defaultOrganisms:
segments:
- name: main
references:
- - name: CV-A16
- nextclade_dataset_name: enpen/enterovirus/cv-a16
- accepted_dataset_matches: ["community/hodcroftlab/enterovirus/cva16", "community/hodcroftlab/enterovirus/enterovirus/linked/CV-A16"]
- genes: ["VP4", "VP2", "VP3", "VP1", "2A", "2B", "2C", "3A", "3B", "3C", "3D"]
- - name: CV-A10
- nextclade_dataset_name: enpen/enterovirus/cv-a10
- accepted_dataset_matches: ["community/hodcroftlab/enterovirus/enterovirus/linked/CV-A10"]
- genes: ["VP4", "VP2", "VP3", "VP1", "2A", "2B", "2C", "3A", "3B", "3C", "3D"]
- - name: EV-A71
- nextclade_dataset_name: enpen/enterovirus/ev-a71
- accepted_dataset_matches: ["community/hodcroftlab/enterovirus/enterovirus/linked/EV-A71"]
- genes: ["VP4", "VP2", "VP3", "VP1", "2A", "2B", "2C", "3A", "3B", "3C", "3D"]
- - name: EV-D68
- accepted_dataset_matches: ["community/hodcroftlab/enterovirus/enterovirus/linked/EV-D68"]
- genes: ["VP4", "VP2", "VP3", "VP1", "2A", "2B", "2C", "3A", "3B", "3C", "3D"]
- nextclade_dataset_name: enpen/enterovirus/ev-d68
- nextclade_dataset_server: https://data.clades.nextstrain.org/v3
+ - name: CV-A16
+ nextclade_dataset_name: enpen/enterovirus/cv-a16
+ accepted_dataset_matches: ["community/hodcroftlab/enterovirus/cva16", "community/hodcroftlab/enterovirus/enterovirus/linked/CV-A16"]
+ genes: ["VP4", "VP2", "VP3", "VP1", "2A", "2B", "2C", "3A", "3B", "3C", "3D"]
+ - name: CV-A10
+ nextclade_dataset_name: enpen/enterovirus/cv-a10
+ accepted_dataset_matches: ["community/hodcroftlab/enterovirus/enterovirus/linked/CV-A10"]
+ genes: ["VP4", "VP2", "VP3", "VP1", "2A", "2B", "2C", "3A", "3B", "3C", "3D"]
+ - name: EV-A71
+ nextclade_dataset_name: enpen/enterovirus/ev-a71
+ accepted_dataset_matches: ["community/hodcroftlab/enterovirus/enterovirus/linked/EV-A71"]
+ genes: ["VP4", "VP2", "VP3", "VP1", "2A", "2B", "2C", "3A", "3B", "3C", "3D"]
+ - name: EV-D68
+ accepted_dataset_matches: ["community/hodcroftlab/enterovirus/enterovirus/linked/EV-D68"]
+ genes: ["VP4", "VP2", "VP3", "VP1", "2A", "2B", "2C", "3A", "3B", "3C", "3D"]
+ nextclade_dataset_name: enpen/enterovirus/ev-d68
+ nextclade_dataset_server: https://data.clades.nextstrain.org/v3
nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/evs-datasets/data_output
ingest:
<<: *ingest
@@ -2445,7 +2445,7 @@ enforceHTTPS: true
registrationTermsMessage: >
You must agree to the terms of use.
-enaDeposition:
+enaDeposition:
submitToEnaProduction: false
enaDbName: Loculus
enaUniqueSuffix: Loculus
diff --git a/kubernetes/loculus/values_e2e_and_dev.yaml b/kubernetes/loculus/values_e2e_and_dev.yaml
index f7fce7b84d..032af7efee 100644
--- a/kubernetes/loculus/values_e2e_and_dev.yaml
+++ b/kubernetes/loculus/values_e2e_and_dev.yaml
@@ -14,4 +14,4 @@ auth:
verifyEmail: false
host: localhost:3000
siloImport:
- pollIntervalSeconds: 5
\ No newline at end of file
+ pollIntervalSeconds: 5
diff --git a/kubernetes/loculus/values_preview_server.yaml b/kubernetes/loculus/values_preview_server.yaml
index a7acaa3523..c6a1ea97d2 100644
--- a/kubernetes/loculus/values_preview_server.yaml
+++ b/kubernetes/loculus/values_preview_server.yaml
@@ -140,4 +140,4 @@ defaultResources:
cpu: "20m"
limits:
memory: "1Gi"
- cpu: "20m"
\ No newline at end of file
+ cpu: "20m"
diff --git a/preprocessing/dummy/lineage-alternative.yaml b/preprocessing/dummy/lineage-alternative.yaml
index b2b34e1fa8..8f7008394d 100644
--- a/preprocessing/dummy/lineage-alternative.yaml
+++ b/preprocessing/dummy/lineage-alternative.yaml
@@ -4,30 +4,30 @@ A:
A.1:
aliases: []
parents:
- - A
+ - A
A.2:
aliases: []
parents:
- - A
+ - A
A.1.1:
aliases: []
parents:
- - A.1
- - A.2
+ - A.1
+ - A.2
B:
aliases: []
parents: []
B.1:
aliases: []
parents:
- - B
+ - B
B.1.1:
aliases:
- - C
+ - C
parents:
- - B.1
+ - B.1
C.1:
aliases:
- - B.1.1.1
+ - B.1.1.1
parents:
- - C
+ - C
diff --git a/preprocessing/dummy/lineage.yaml b/preprocessing/dummy/lineage.yaml
index 0c533e1a6c..52236c2360 100644
--- a/preprocessing/dummy/lineage.yaml
+++ b/preprocessing/dummy/lineage.yaml
@@ -4,13 +4,13 @@ A:
A.1:
aliases: []
parents:
- - A
+ - A
A.1.1:
aliases:
- - B
+ - B
parents:
- - A.1
+ - A.1
A.2:
aliases: []
parents:
- - A
+ - A
diff --git a/preprocessing/nextclade/tests/embl_required_metadata.yaml b/preprocessing/nextclade/tests/embl_required_metadata.yaml
index 919e8f94e9..18641f04a9 100644
--- a/preprocessing/nextclade/tests/embl_required_metadata.yaml
+++ b/preprocessing/nextclade/tests/embl_required_metadata.yaml
@@ -20,4 +20,4 @@ processing_spec:
type: date
function: parse_and_assert_past_date
inputs:
- date: sampleCollectionDate
\ No newline at end of file
+ date: sampleCollectionDate
diff --git a/preprocessing/nextclade/tests/multi_reference_config.yaml b/preprocessing/nextclade/tests/multi_reference_config.yaml
index 5784f1e403..f2f2aae0b9 100644
--- a/preprocessing/nextclade/tests/multi_reference_config.yaml
+++ b/preprocessing/nextclade/tests/multi_reference_config.yaml
@@ -7,14 +7,14 @@ nextclade_dataset_server: TEST
segments:
- name: main
references:
- - name: ebola-sudan
- nextclade_dataset_name: ebola-dataset/ebola-sudan
- accepted_dataset_matches: [ebola-sudan]
- genes: [NPEbolaSudan, VP35EbolaSudan] # Names of genes in nextclade dataset gff3 file
- - name: ebola-zaire
- nextclade_dataset_name: ebola-dataset/ebola-zaire
- accepted_dataset_matches: [ebola-zaire]
- genes: [VP24EbolaZaire, LEbolaZaire]
+ - name: ebola-sudan
+ nextclade_dataset_name: ebola-dataset/ebola-sudan
+ accepted_dataset_matches: [ebola-sudan]
+ genes: [NPEbolaSudan, VP35EbolaSudan] # Names of genes in nextclade dataset gff3 file
+ - name: ebola-zaire
+ nextclade_dataset_name: ebola-dataset/ebola-zaire
+ accepted_dataset_matches: [ebola-zaire]
+ genes: [VP24EbolaZaire, LEbolaZaire]
organism: multi-ebola-test
processing_spec:
subtype:
diff --git a/preprocessing/nextclade/tests/multi_segment_config.yaml b/preprocessing/nextclade/tests/multi_segment_config.yaml
index 2a9fa7a5b9..cfded28c62 100644
--- a/preprocessing/nextclade/tests/multi_segment_config.yaml
+++ b/preprocessing/nextclade/tests/multi_segment_config.yaml
@@ -6,12 +6,12 @@ segment_classification_method: "minimizer"
segments:
- name: ebola-sudan
references:
- - nextclade_dataset_name: ebola-dataset/ebola-sudan
- genes: [NPEbolaSudan, VP35EbolaSudan]
+ - nextclade_dataset_name: ebola-dataset/ebola-sudan
+ genes: [NPEbolaSudan, VP35EbolaSudan]
- name: ebola-zaire
references:
- - nextclade_dataset_name: ebola-dataset/ebola-zaire
- genes: [VP24EbolaZaire, LEbolaZaire]
+ - nextclade_dataset_name: ebola-dataset/ebola-zaire
+ genes: [VP24EbolaZaire, LEbolaZaire]
organism: multi-ebola-test
processing_spec:
totalInsertedNucs_ebola-zaire:
diff --git a/preprocessing/nextclade/tests/multi_segment_multi_reference.yaml b/preprocessing/nextclade/tests/multi_segment_multi_reference.yaml
index fea2a6e783..b5f69f9702 100644
--- a/preprocessing/nextclade/tests/multi_segment_multi_reference.yaml
+++ b/preprocessing/nextclade/tests/multi_segment_multi_reference.yaml
@@ -8,20 +8,20 @@ diamond_dmnd_url: FAKE_URL
segments:
- name: L
references:
- - name: singleReference
- nextclade_dataset_name: CCHF/L
- accepted_dataset_matches: [L]
- genes: [RdRp]
+ - name: singleReference
+ nextclade_dataset_name: CCHF/L
+ accepted_dataset_matches: [L]
+ genes: [RdRp]
- name: S
references:
- - name: 1and6
- nextclade_dataset_name: CCHF/S-1and6
- accepted_dataset_matches: [S-1and6]
- genes: [NP]
- - name: 2to5
- nextclade_dataset_name: CCHF/S-2to5
- accepted_dataset_matches: [S-2to5]
- genes: [NP]
+ - name: 1and6
+ nextclade_dataset_name: CCHF/S-1and6
+ accepted_dataset_matches: [S-1and6]
+ genes: [NP]
+ - name: 2to5
+ nextclade_dataset_name: CCHF/S-2to5
+ accepted_dataset_matches: [S-2to5]
+ genes: [NP]
organism: multi-reference-cchf-test
processing_spec:
subtype_S:
diff --git a/preprocessing/nextclade/tests/no_alignment_config.yaml b/preprocessing/nextclade/tests/no_alignment_config.yaml
index 569a649a5f..da2eb5c58c 100644
--- a/preprocessing/nextclade/tests/no_alignment_config.yaml
+++ b/preprocessing/nextclade/tests/no_alignment_config.yaml
@@ -87,7 +87,7 @@ processing_spec:
extracted_regex_field:
function: extract_regex
args:
- pattern: "^EPI_ISL_(?P[0-9]+)?$" # Note the optional group to allow for testing of missing values
+ pattern: "^EPI_ISL_(?P[0-9]+)?$" # Note the optional group to allow for testing of missing values
capture_group: id
inputs:
regex_field: regex_field
diff --git a/preprocessing/nextclade/tests/single_segment_config.yaml b/preprocessing/nextclade/tests/single_segment_config.yaml
index 408f0beb41..3c8f31bcc4 100644
--- a/preprocessing/nextclade/tests/single_segment_config.yaml
+++ b/preprocessing/nextclade/tests/single_segment_config.yaml
@@ -8,8 +8,8 @@ db_name: "Loculus"
segments:
- name: main
references:
- - nextclade_dataset_name: ebola-sudan
- genes: [NPEbolaSudan, VP35EbolaSudan]
+ - nextclade_dataset_name: ebola-sudan
+ genes: [NPEbolaSudan, VP35EbolaSudan]
organism: ebola-sudan-test
processing_spec:
completeness: