diff --git a/.github/workflows/yamlfmt-lint.yml b/.github/workflows/yamlfmt-lint.yml new file mode 100644 index 0000000000..9bde9939ff --- /dev/null +++ b/.github/workflows/yamlfmt-lint.yml @@ -0,0 +1,19 @@ +name: yamlfmt-lint +on: + pull_request: + push: + branches: + - main + workflow_dispatch: + +jobs: + lint: + runs-on: ubuntu-latest + container: + image: ghcr.io/google/yamlfmt:0.21.0 + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Run yamlfmt lint + run: yamlfmt -lint . diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index df4e073db1..da1b4ed811 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,36 +1,36 @@ repos: -- repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: v0.14.2 - hooks: - # Run the linter and fix issues. - - id: ruff-check - args: [ --fix ] - files: ^ena-submission/ - # Run lint but only fix auto-fixable issues. - - id: ruff-check - args: [ --fix-only ] - files: ^preprocessing/nextclade/ - # Run the formatter. - - id: ruff-format - files: ^ena-submission/|^preprocessing/nextclade/ -- repo: local - hooks: - - id: prettier-values-schema - name: prettier (values.schema.json) - entry: npx prettier@3.6.2 --write - language: system - files: ^kubernetes/loculus/values\.schema\.json$ - - id: helm-lint - name: helm lint - entry: bash - args: - - -c - - | - set -euo pipefail - helm lint kubernetes/loculus -f kubernetes/loculus/values.yaml - helm lint kubernetes/loculus -f kubernetes/loculus/values.yaml -f kubernetes/loculus/values_e2e_and_dev.yaml - helm lint kubernetes/loculus -f kubernetes/loculus/values.yaml -f kubernetes/loculus/values_preview_server.yaml - language: system - pass_filenames: false - files: ^kubernetes/loculus/ + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.14.2 + hooks: + # Run the linter and fix issues. + - id: ruff-check + args: [--fix] + files: ^ena-submission/ + # Run lint but only fix auto-fixable issues. + - id: ruff-check + args: [--fix-only] + files: ^preprocessing/nextclade/ + # Run the formatter. + - id: ruff-format + files: ^ena-submission/|^preprocessing/nextclade/ + - repo: local + hooks: + - id: prettier-values-schema + name: prettier (values.schema.json) + entry: npx prettier@3.6.2 --write + language: system + files: ^kubernetes/loculus/values\.schema\.json$ + - id: helm-lint + name: helm lint + entry: bash + args: + - -c + - | + set -euo pipefail + helm lint kubernetes/loculus -f kubernetes/loculus/values.yaml + helm lint kubernetes/loculus -f kubernetes/loculus/values.yaml -f kubernetes/loculus/values_e2e_and_dev.yaml + helm lint kubernetes/loculus -f kubernetes/loculus/values.yaml -f kubernetes/loculus/values_preview_server.yaml + language: system + pass_filenames: false + files: ^kubernetes/loculus/ diff --git a/.yamlfmt.yaml b/.yamlfmt.yaml index 8369992be0..88a0f5e6d8 100644 --- a/.yamlfmt.yaml +++ b/.yamlfmt.yaml @@ -2,6 +2,9 @@ formatter: type: basic drop_merge_tag: true + trim_trailing_whitespace: true + retain_line_breaks_single: true gitignore_excludes: true exclude: - kubernetes/loculus/templates + - .github diff --git a/ena-submission/environment.yml b/ena-submission/environment.yml index 5d3c4910c4..5556e4efbc 100644 --- a/ena-submission/environment.yml +++ b/ena-submission/environment.yml @@ -5,7 +5,7 @@ channels: - nodefaults dependencies: # Core Python dependencies - - python=3.12.9 # Pinned to 3.12.x series, check for patch updates within 3.12 + - python=3.12.9 # Pinned to 3.12.x series, check for patch updates within 3.12 - pip=25.2 - uv=0.9.5 # Extra dependencies diff --git a/ena-submission/test/test_config.yaml b/ena-submission/test/test_config.yaml index 289f63ff2a..a3759cc452 100644 --- a/ena-submission/test/test_config.yaml +++ b/ena-submission/test/test_config.yaml @@ -19,7 +19,7 @@ enaOrganisms: scientific_name: Orthonairovirus haemorrhagiae taxon_id: 3052518 organismName: "Crimean-Congo Hemorrhagic Fever Virus" - segments: + segments: - L - M - S @@ -92,7 +92,7 @@ enaOrganisms: scientific_name: West Nile virus taxon_id: 11082 organismName: "West Nile Virus" - segments: + segments: - main externalMetadata: - externalMetadataUpdater: ena @@ -133,4 +133,4 @@ enaOrganisms: type: int - externalMetadataUpdater: ena name: sraRunAccession - type: string \ No newline at end of file + type: string diff --git a/ingest/config/defaults.yaml b/ingest/config/defaults.yaml index 308dd12396..7296e01b30 100644 --- a/ingest/config/defaults.yaml +++ b/ingest/config/defaults.yaml @@ -62,16 +62,16 @@ ncbi_mappings: - ncbiHostCommonName - ncbiPurposeOfSampling - ncbiHostSex -group_name: insdc_ingest_group # Used only to set the group name, never read +group_name: insdc_ingest_group # Used only to set the group name, never read username: insdc_ingest_user password: insdc_ingest_user keycloak_client_id: backend-client subsample_fraction: 1.0 -approve_timeout_min: "25" # Cronjobs run every 30min, make approve stop before it is forced to stop by argocd +approve_timeout_min: "25" # Cronjobs run every 30min, make approve stop before it is forced to stop by argocd db_username: postgres db_password: unsecure db_url: "jdbc:postgresql://127.0.0.1:5432/loculus" -batch_chunk_size: 10000 # Batch size for submitting sequences to Loculus backend +batch_chunk_size: 10000 # Batch size for submitting sequences to Loculus backend nextclade_dataset_server: https://data.clades.nextstrain.org/v3 time_between_approve_requests_seconds: 60 backend_request_timeout_seconds: 600 diff --git a/ingest/environment.yml b/ingest/environment.yml index 2ac91266e7..516f2322f5 100644 --- a/ingest/environment.yml +++ b/ingest/environment.yml @@ -5,7 +5,7 @@ channels: - nodefaults dependencies: # Core Python dependencies - - python=3.12.9 # Pinned to 3.12.x series, check for patch updates within 3.12 + - python=3.12.9 # Pinned to 3.12.x series, check for patch updates within 3.12 # Extra dependencies - biopython=1.85 - click=8.3.0 diff --git a/kubernetes/loculus/values.yaml b/kubernetes/loculus/values.yaml index 56a41db1ae..5c146fe64e 100644 --- a/kubernetes/loculus/values.yaml +++ b/kubernetes/loculus/values.yaml @@ -1391,8 +1391,8 @@ defaultOrganismConfig: &defaultOrganismConfig segments: - name: main references: - - name: singleReference - genes: [] + - name: singleReference + genes: [] nextclade_dataset_server: https://data.clades.nextstrain.org/v3 ingest: &ingest image: ghcr.io/loculus-project/ingest @@ -1424,9 +1424,9 @@ defaultOrganisms: segments: - name: main references: - - name: singleReference - nextclade_dataset_name: nextstrain/ebola/sudan - genes: [NP, VP35, VP40, GP, sGP, ssGP, VP30, VP24, L] + - name: singleReference + nextclade_dataset_name: nextstrain/ebola/sudan + genes: [NP, VP35, VP40, GP, sGP, ssGP, VP30, VP24, L] nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/ebola/data_output taxon_id: 186540 scientific_name: "Sudan ebolavirus" @@ -1539,9 +1539,9 @@ defaultOrganisms: segments: - name: main references: - - name: singleReference - nextclade_dataset_name: nextstrain/wnv/all-lineages - genes: [capsid, prM, env, NS1, NS2A, NS2B, NS3, NS4A, 2K, NS4B, NS5] + - name: singleReference + nextclade_dataset_name: nextstrain/wnv/all-lineages + genes: [capsid, prM, env, NS1, NS2A, NS2B, NS3, NS4A, 2K, NS4B, NS5] nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/wnv/data_output taxon_id: 11082 scientific_name: "West Nile virus" @@ -1638,7 +1638,7 @@ defaultOrganisms: required: true type: string lineageSystem: pangoLineage - options: + options: - name: A - name: A.1 - name: A.1.1 @@ -1750,7 +1750,7 @@ defaultOrganisms: required: true type: string lineageSystem: alternativeLineage - options: + options: - name: A - name: A.1 - name: A.1.1 @@ -1806,8 +1806,8 @@ defaultOrganisms: segments: - name: main references: - - name: singleReference - genes: [] + - name: singleReference + genes: [] referenceGenomes: - name: main references: @@ -1880,19 +1880,19 @@ defaultOrganisms: segments: - name: L references: - - nextclade_dataset_name: community/pathoplexus/cchfv/L - name: singleReference - genes: [RdRp] + - nextclade_dataset_name: community/pathoplexus/cchfv/L + name: singleReference + genes: [RdRp] - name: M references: - - nextclade_dataset_name: community/pathoplexus/cchfv/M - name: singleReference - genes: [GPC] + - nextclade_dataset_name: community/pathoplexus/cchfv/M + name: singleReference + genes: [GPC] - name: S references: - - nextclade_dataset_name: community/pathoplexus/cchfv/S - name: singleReference - genes: [NP] + - nextclade_dataset_name: community/pathoplexus/cchfv/S + name: singleReference + genes: [NP] ingest: <<: *ingest configFile: @@ -2017,32 +2017,32 @@ defaultOrganisms: segments: - name: L references: - - nextclade_dataset_name: cchfv/L - name: singleReference - genes: [RdRp] + - nextclade_dataset_name: cchfv/L + name: singleReference + genes: [RdRp] - name: M references: - - nextclade_dataset_name: cchfv/M-MH396653 - name: MH396653 - genes: [GPC] - - nextclade_dataset_name: cchfv/M-OR047158 - name: OR047158 - genes: [GPC] + - nextclade_dataset_name: cchfv/M-MH396653 + name: MH396653 + genes: [GPC] + - nextclade_dataset_name: cchfv/M-OR047158 + name: OR047158 + genes: [GPC] - name: S references: - - nextclade_dataset_name: cchfv/S-1and6 - name: 1and6 - genes: [NP] - - nextclade_dataset_name: cchfv/S-2to5 - name: 2to5 - genes: [NP] + - nextclade_dataset_name: cchfv/S-1and6 + name: 1and6 + genes: [NP] + - nextclade_dataset_name: cchfv/S-2to5 + name: 2to5 + genes: [NP] ingest: <<: *ingest configFile: taxon_id: 3052518 segment_identification: method: "align" - nextclade_dataset_name: community/pathoplexus/cchfv #TODO: allow multiple ref in ingest grouping algo + nextclade_dataset_name: community/pathoplexus/cchfv #TODO: allow multiple ref in ingest grouping algo enaDeposition: singleReference: configFile: @@ -2191,23 +2191,23 @@ defaultOrganisms: segments: - name: main references: - - name: CV-A16 - nextclade_dataset_name: enpen/enterovirus/cv-a16 - accepted_dataset_matches: ["community/hodcroftlab/enterovirus/cva16", "community/hodcroftlab/enterovirus/enterovirus/linked/CV-A16"] - genes: ["VP4", "VP2", "VP3", "VP1", "2A", "2B", "2C", "3A", "3B", "3C", "3D"] - - name: CV-A10 - nextclade_dataset_name: enpen/enterovirus/cv-a10 - accepted_dataset_matches: ["community/hodcroftlab/enterovirus/enterovirus/linked/CV-A10"] - genes: ["VP4", "VP2", "VP3", "VP1", "2A", "2B", "2C", "3A", "3B", "3C", "3D"] - - name: EV-A71 - nextclade_dataset_name: enpen/enterovirus/ev-a71 - accepted_dataset_matches: ["community/hodcroftlab/enterovirus/enterovirus/linked/EV-A71"] - genes: ["VP4", "VP2", "VP3", "VP1", "2A", "2B", "2C", "3A", "3B", "3C", "3D"] - - name: EV-D68 - accepted_dataset_matches: ["community/hodcroftlab/enterovirus/enterovirus/linked/EV-D68"] - genes: ["VP4", "VP2", "VP3", "VP1", "2A", "2B", "2C", "3A", "3B", "3C", "3D"] - nextclade_dataset_name: enpen/enterovirus/ev-d68 - nextclade_dataset_server: https://data.clades.nextstrain.org/v3 + - name: CV-A16 + nextclade_dataset_name: enpen/enterovirus/cv-a16 + accepted_dataset_matches: ["community/hodcroftlab/enterovirus/cva16", "community/hodcroftlab/enterovirus/enterovirus/linked/CV-A16"] + genes: ["VP4", "VP2", "VP3", "VP1", "2A", "2B", "2C", "3A", "3B", "3C", "3D"] + - name: CV-A10 + nextclade_dataset_name: enpen/enterovirus/cv-a10 + accepted_dataset_matches: ["community/hodcroftlab/enterovirus/enterovirus/linked/CV-A10"] + genes: ["VP4", "VP2", "VP3", "VP1", "2A", "2B", "2C", "3A", "3B", "3C", "3D"] + - name: EV-A71 + nextclade_dataset_name: enpen/enterovirus/ev-a71 + accepted_dataset_matches: ["community/hodcroftlab/enterovirus/enterovirus/linked/EV-A71"] + genes: ["VP4", "VP2", "VP3", "VP1", "2A", "2B", "2C", "3A", "3B", "3C", "3D"] + - name: EV-D68 + accepted_dataset_matches: ["community/hodcroftlab/enterovirus/enterovirus/linked/EV-D68"] + genes: ["VP4", "VP2", "VP3", "VP1", "2A", "2B", "2C", "3A", "3B", "3C", "3D"] + nextclade_dataset_name: enpen/enterovirus/ev-d68 + nextclade_dataset_server: https://data.clades.nextstrain.org/v3 nextclade_dataset_server: https://raw.githubusercontent.com/nextstrain/nextclade_data/evs-datasets/data_output ingest: <<: *ingest @@ -2445,7 +2445,7 @@ enforceHTTPS: true registrationTermsMessage: > You must agree to the terms of use. -enaDeposition: +enaDeposition: submitToEnaProduction: false enaDbName: Loculus enaUniqueSuffix: Loculus diff --git a/kubernetes/loculus/values_e2e_and_dev.yaml b/kubernetes/loculus/values_e2e_and_dev.yaml index f7fce7b84d..032af7efee 100644 --- a/kubernetes/loculus/values_e2e_and_dev.yaml +++ b/kubernetes/loculus/values_e2e_and_dev.yaml @@ -14,4 +14,4 @@ auth: verifyEmail: false host: localhost:3000 siloImport: - pollIntervalSeconds: 5 \ No newline at end of file + pollIntervalSeconds: 5 diff --git a/kubernetes/loculus/values_preview_server.yaml b/kubernetes/loculus/values_preview_server.yaml index a7acaa3523..c6a1ea97d2 100644 --- a/kubernetes/loculus/values_preview_server.yaml +++ b/kubernetes/loculus/values_preview_server.yaml @@ -140,4 +140,4 @@ defaultResources: cpu: "20m" limits: memory: "1Gi" - cpu: "20m" \ No newline at end of file + cpu: "20m" diff --git a/preprocessing/dummy/lineage-alternative.yaml b/preprocessing/dummy/lineage-alternative.yaml index b2b34e1fa8..8f7008394d 100644 --- a/preprocessing/dummy/lineage-alternative.yaml +++ b/preprocessing/dummy/lineage-alternative.yaml @@ -4,30 +4,30 @@ A: A.1: aliases: [] parents: - - A + - A A.2: aliases: [] parents: - - A + - A A.1.1: aliases: [] parents: - - A.1 - - A.2 + - A.1 + - A.2 B: aliases: [] parents: [] B.1: aliases: [] parents: - - B + - B B.1.1: aliases: - - C + - C parents: - - B.1 + - B.1 C.1: aliases: - - B.1.1.1 + - B.1.1.1 parents: - - C + - C diff --git a/preprocessing/dummy/lineage.yaml b/preprocessing/dummy/lineage.yaml index 0c533e1a6c..52236c2360 100644 --- a/preprocessing/dummy/lineage.yaml +++ b/preprocessing/dummy/lineage.yaml @@ -4,13 +4,13 @@ A: A.1: aliases: [] parents: - - A + - A A.1.1: aliases: - - B + - B parents: - - A.1 + - A.1 A.2: aliases: [] parents: - - A + - A diff --git a/preprocessing/nextclade/tests/embl_required_metadata.yaml b/preprocessing/nextclade/tests/embl_required_metadata.yaml index 919e8f94e9..18641f04a9 100644 --- a/preprocessing/nextclade/tests/embl_required_metadata.yaml +++ b/preprocessing/nextclade/tests/embl_required_metadata.yaml @@ -20,4 +20,4 @@ processing_spec: type: date function: parse_and_assert_past_date inputs: - date: sampleCollectionDate \ No newline at end of file + date: sampleCollectionDate diff --git a/preprocessing/nextclade/tests/multi_reference_config.yaml b/preprocessing/nextclade/tests/multi_reference_config.yaml index 5784f1e403..f2f2aae0b9 100644 --- a/preprocessing/nextclade/tests/multi_reference_config.yaml +++ b/preprocessing/nextclade/tests/multi_reference_config.yaml @@ -7,14 +7,14 @@ nextclade_dataset_server: TEST segments: - name: main references: - - name: ebola-sudan - nextclade_dataset_name: ebola-dataset/ebola-sudan - accepted_dataset_matches: [ebola-sudan] - genes: [NPEbolaSudan, VP35EbolaSudan] # Names of genes in nextclade dataset gff3 file - - name: ebola-zaire - nextclade_dataset_name: ebola-dataset/ebola-zaire - accepted_dataset_matches: [ebola-zaire] - genes: [VP24EbolaZaire, LEbolaZaire] + - name: ebola-sudan + nextclade_dataset_name: ebola-dataset/ebola-sudan + accepted_dataset_matches: [ebola-sudan] + genes: [NPEbolaSudan, VP35EbolaSudan] # Names of genes in nextclade dataset gff3 file + - name: ebola-zaire + nextclade_dataset_name: ebola-dataset/ebola-zaire + accepted_dataset_matches: [ebola-zaire] + genes: [VP24EbolaZaire, LEbolaZaire] organism: multi-ebola-test processing_spec: subtype: diff --git a/preprocessing/nextclade/tests/multi_segment_config.yaml b/preprocessing/nextclade/tests/multi_segment_config.yaml index 2a9fa7a5b9..cfded28c62 100644 --- a/preprocessing/nextclade/tests/multi_segment_config.yaml +++ b/preprocessing/nextclade/tests/multi_segment_config.yaml @@ -6,12 +6,12 @@ segment_classification_method: "minimizer" segments: - name: ebola-sudan references: - - nextclade_dataset_name: ebola-dataset/ebola-sudan - genes: [NPEbolaSudan, VP35EbolaSudan] + - nextclade_dataset_name: ebola-dataset/ebola-sudan + genes: [NPEbolaSudan, VP35EbolaSudan] - name: ebola-zaire references: - - nextclade_dataset_name: ebola-dataset/ebola-zaire - genes: [VP24EbolaZaire, LEbolaZaire] + - nextclade_dataset_name: ebola-dataset/ebola-zaire + genes: [VP24EbolaZaire, LEbolaZaire] organism: multi-ebola-test processing_spec: totalInsertedNucs_ebola-zaire: diff --git a/preprocessing/nextclade/tests/multi_segment_multi_reference.yaml b/preprocessing/nextclade/tests/multi_segment_multi_reference.yaml index fea2a6e783..b5f69f9702 100644 --- a/preprocessing/nextclade/tests/multi_segment_multi_reference.yaml +++ b/preprocessing/nextclade/tests/multi_segment_multi_reference.yaml @@ -8,20 +8,20 @@ diamond_dmnd_url: FAKE_URL segments: - name: L references: - - name: singleReference - nextclade_dataset_name: CCHF/L - accepted_dataset_matches: [L] - genes: [RdRp] + - name: singleReference + nextclade_dataset_name: CCHF/L + accepted_dataset_matches: [L] + genes: [RdRp] - name: S references: - - name: 1and6 - nextclade_dataset_name: CCHF/S-1and6 - accepted_dataset_matches: [S-1and6] - genes: [NP] - - name: 2to5 - nextclade_dataset_name: CCHF/S-2to5 - accepted_dataset_matches: [S-2to5] - genes: [NP] + - name: 1and6 + nextclade_dataset_name: CCHF/S-1and6 + accepted_dataset_matches: [S-1and6] + genes: [NP] + - name: 2to5 + nextclade_dataset_name: CCHF/S-2to5 + accepted_dataset_matches: [S-2to5] + genes: [NP] organism: multi-reference-cchf-test processing_spec: subtype_S: diff --git a/preprocessing/nextclade/tests/no_alignment_config.yaml b/preprocessing/nextclade/tests/no_alignment_config.yaml index 569a649a5f..da2eb5c58c 100644 --- a/preprocessing/nextclade/tests/no_alignment_config.yaml +++ b/preprocessing/nextclade/tests/no_alignment_config.yaml @@ -87,7 +87,7 @@ processing_spec: extracted_regex_field: function: extract_regex args: - pattern: "^EPI_ISL_(?P[0-9]+)?$" # Note the optional group to allow for testing of missing values + pattern: "^EPI_ISL_(?P[0-9]+)?$" # Note the optional group to allow for testing of missing values capture_group: id inputs: regex_field: regex_field diff --git a/preprocessing/nextclade/tests/single_segment_config.yaml b/preprocessing/nextclade/tests/single_segment_config.yaml index 408f0beb41..3c8f31bcc4 100644 --- a/preprocessing/nextclade/tests/single_segment_config.yaml +++ b/preprocessing/nextclade/tests/single_segment_config.yaml @@ -8,8 +8,8 @@ db_name: "Loculus" segments: - name: main references: - - nextclade_dataset_name: ebola-sudan - genes: [NPEbolaSudan, VP35EbolaSudan] + - nextclade_dataset_name: ebola-sudan + genes: [NPEbolaSudan, VP35EbolaSudan] organism: ebola-sudan-test processing_spec: completeness: