From 6c7aeb21eca4b132c238569c7dc483343c7a1b92 Mon Sep 17 00:00:00 2001
From: Cornelius Roemer <cornelius.roemer@gmail.com>
Date: Sun, 23 Nov 2025 14:42:26 +0100
Subject: [PATCH 1/7] chore(ci): log basic details of downloaded ncbi datasets

Doesn't harm to have md5, file size, number of sequences/avg length etc
---
 .github/workflows/datasets-mirror.yml | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/datasets-mirror.yml b/.github/workflows/datasets-mirror.yml
index 17f207b1d7..c387de6c37 100644
--- a/.github/workflows/datasets-mirror.yml
+++ b/.github/workflows/datasets-mirror.yml
@@ -24,13 +24,17 @@ jobs:
       - uses: mamba-org/setup-micromamba@v2
         with:
           environment-name: datasets
-          create-args: ncbi-datasets-cli s3cmd
-      - name: Download NCBI Dataset
+          create-args: ncbi-datasets-cli s3cmd seqkit
+      - name: Download NCBI Dataset and create tzst archive
         shell: bash -l {0}
         run: |
           datasets download virus genome taxon ${{ inputs.taxon_id }} --no-progressbar --filename ${{ inputs.taxon_id }}.zip
           unzip -o ${{ inputs.taxon_id }}.zip
           tar -I 'zstd -T0 -18' -cvf ${{ inputs.taxon_id }}.tar.zst ncbi_dataset
+          cat md5sum.txt
+          find ncbi_dataset -type f -exec stat -c "%s %n" {} \;
+          stat -c "%s %n" ${{ inputs.taxon_id }}.zip ${{ inputs.taxon_id }}.tar.zst
+          seqkit stats ncbi_dataset/data/genomic.fna
       - name: Create S3cmd config
         run: |
           cat <<EOF > ~/.s3cfg

From 3cc853ce2fac77fc4b07969e01b2ccd5a50bbe3e Mon Sep 17 00:00:00 2001
From: Cornelius Roemer <cornelius.roemer@gmail.com>
Date: Sun, 23 Nov 2025 14:44:29 +0100
Subject: [PATCH 2/7] Run prio 2 with 1hr offset from 2-hourly prio 1 to not
 coincide

---
 .github/workflows/datasets-mirror-priority-2.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/datasets-mirror-priority-2.yml b/.github/workflows/datasets-mirror-priority-2.yml
index 54c89375e2..cbf59cb175 100644
--- a/.github/workflows/datasets-mirror-priority-2.yml
+++ b/.github/workflows/datasets-mirror-priority-2.yml
@@ -2,7 +2,7 @@ name: Mirror NCBI Datasets (Priority 2 - Daily)
 on:
   workflow_dispatch:  # Allows manual triggering to re-run all priority-2 taxa
   schedule:
-    - cron: '12 2 * * *' # Runs daily at 02:12 UTC
+    - cron: '42 2 * * *' # Runs daily at 02:12 UTC
 jobs:
   mirror:
     strategy:

From c0148439f9365b6e61459d69c13f58170aac8c93 Mon Sep 17 00:00:00 2001
From: Cornelius Roemer <cornelius.roemer@gmail.com>
Date: Sun, 23 Nov 2025 14:44:29 +0100
Subject: [PATCH 3/7] Run prio 2 with 1hr offset from 2-hourly prio 1 to not
 coincide add bioconda channel

---
 .github/workflows/datasets-mirror-priority-2.yml | 2 +-
 .github/workflows/datasets-mirror.yml            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/datasets-mirror-priority-2.yml b/.github/workflows/datasets-mirror-priority-2.yml
index cbf59cb175..fdd1376cd8 100644
--- a/.github/workflows/datasets-mirror-priority-2.yml
+++ b/.github/workflows/datasets-mirror-priority-2.yml
@@ -2,7 +2,7 @@ name: Mirror NCBI Datasets (Priority 2 - Daily)
 on:
   workflow_dispatch:  # Allows manual triggering to re-run all priority-2 taxa
   schedule:
-    - cron: '42 2 * * *' # Runs daily at 02:12 UTC
+    - cron: '12 3 * * *' # Runs daily at 03:12 UTC
 jobs:
   mirror:
     strategy:
diff --git a/.github/workflows/datasets-mirror.yml b/.github/workflows/datasets-mirror.yml
index c387de6c37..0ae25fc662 100644
--- a/.github/workflows/datasets-mirror.yml
+++ b/.github/workflows/datasets-mirror.yml
@@ -24,7 +24,7 @@ jobs:
       - uses: mamba-org/setup-micromamba@v2
         with:
           environment-name: datasets
-          create-args: ncbi-datasets-cli s3cmd seqkit
+          create-args: -c conda-forge -c bioconda ncbi-datasets-cli s3cmd seqkit
       - name: Download NCBI Dataset and create tzst archive
         shell: bash -l {0}
         run: |

From 37ecba54aea6a76c630c4e0098c2a693746d7938 Mon Sep 17 00:00:00 2001
From: Cornelius Roemer <cornelius.roemer@gmail.com>
Date: Sun, 23 Nov 2025 14:49:32 +0100
Subject: [PATCH 4/7] Nicer log

---
 .github/workflows/datasets-mirror.yml | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/datasets-mirror.yml b/.github/workflows/datasets-mirror.yml
index 0ae25fc662..37d08b2080 100644
--- a/.github/workflows/datasets-mirror.yml
+++ b/.github/workflows/datasets-mirror.yml
@@ -29,11 +29,10 @@ jobs:
         shell: bash -l {0}
         run: |
           datasets download virus genome taxon ${{ inputs.taxon_id }} --no-progressbar --filename ${{ inputs.taxon_id }}.zip
-          unzip -o ${{ inputs.taxon_id }}.zip
-          tar -I 'zstd -T0 -18' -cvf ${{ inputs.taxon_id }}.tar.zst ncbi_dataset
-          cat md5sum.txt
-          find ncbi_dataset -type f -exec stat -c "%s %n" {} \;
-          stat -c "%s %n" ${{ inputs.taxon_id }}.zip ${{ inputs.taxon_id }}.tar.zst
+          unzip -o ${{ inputs.taxon_id }}.zip; echo
+          tar -I 'zstd -T0 -18' -cvf ${{ inputs.taxon_id }}.tar.zst ncbi_dataset; echo
+          cat md5sum.txt; echo
+          ls -lh ${{ inputs.taxon_id }}.* ncbi_dataset/data/*; echo
           seqkit stats ncbi_dataset/data/genomic.fna
       - name: Create S3cmd config
         run: |

From 6c53e2ec0a78b06bbc28f4d84de44c9a287e4d0a Mon Sep 17 00:00:00 2001
From: Cornelius Roemer <cornelius.roemer@gmail.com>
Date: Sun, 23 Nov 2025 14:52:12 +0100
Subject: [PATCH 5/7] try again

---
 .github/workflows/datasets-mirror.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/datasets-mirror.yml b/.github/workflows/datasets-mirror.yml
index 37d08b2080..4fbed94800 100644
--- a/.github/workflows/datasets-mirror.yml
+++ b/.github/workflows/datasets-mirror.yml
@@ -29,10 +29,10 @@ jobs:
         shell: bash -l {0}
         run: |
           datasets download virus genome taxon ${{ inputs.taxon_id }} --no-progressbar --filename ${{ inputs.taxon_id }}.zip
-          unzip -o ${{ inputs.taxon_id }}.zip; echo
-          tar -I 'zstd -T0 -18' -cvf ${{ inputs.taxon_id }}.tar.zst ncbi_dataset; echo
-          cat md5sum.txt; echo
-          ls -lh ${{ inputs.taxon_id }}.* ncbi_dataset/data/*; echo
+          unzip -o ${{ inputs.taxon_id }}.zip; echo ""
+          tar -I 'zstd -T0 -18' -cvf ${{ inputs.taxon_id }}.tar.zst ncbi_dataset; echo ""
+          cat md5sum.txt; echo ""
+          ls -lh ${{ inputs.taxon_id }}.* ncbi_dataset/data/*; echo ""
           seqkit stats ncbi_dataset/data/genomic.fna
       - name: Create S3cmd config
         run: |

From 64ff17cc1f88e80f802a66ab76ef250d28d160a7 Mon Sep 17 00:00:00 2001
From: Cornelius Roemer <cornelius.roemer@gmail.com>
Date: Sun, 23 Nov 2025 14:54:12 +0100
Subject: [PATCH 6/7] again

---
 .github/workflows/datasets-mirror.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/datasets-mirror.yml b/.github/workflows/datasets-mirror.yml
index 4fbed94800..6f69d7998d 100644
--- a/.github/workflows/datasets-mirror.yml
+++ b/.github/workflows/datasets-mirror.yml
@@ -29,10 +29,10 @@ jobs:
         shell: bash -l {0}
         run: |
           datasets download virus genome taxon ${{ inputs.taxon_id }} --no-progressbar --filename ${{ inputs.taxon_id }}.zip
-          unzip -o ${{ inputs.taxon_id }}.zip; echo ""
-          tar -I 'zstd -T0 -18' -cvf ${{ inputs.taxon_id }}.tar.zst ncbi_dataset; echo ""
-          cat md5sum.txt; echo ""
-          ls -lh ${{ inputs.taxon_id }}.* ncbi_dataset/data/*; echo ""
+          unzip -o ${{ inputs.taxon_id }}.zip; printf "\n"
+          tar -I 'zstd -T0 -18' -cvf ${{ inputs.taxon_id }}.tar.zst ncbi_dataset; printf "\n"
+          cat md5sum.txt; printf "\n"
+          ls -lh ${{ inputs.taxon_id }}.* ncbi_dataset/data/*; printf "\n"
           seqkit stats ncbi_dataset/data/genomic.fna
       - name: Create S3cmd config
         run: |

From 477eb02d8bd8128a78d216added655ca93cde7c8 Mon Sep 17 00:00:00 2001
From: Cornelius Roemer <cornelius.roemer@gmail.com>
Date: Sun, 23 Nov 2025 14:55:49 +0100
Subject: [PATCH 7/7] never mind

---
 .github/workflows/datasets-mirror.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/datasets-mirror.yml b/.github/workflows/datasets-mirror.yml
index 6f69d7998d..fc9be6b73d 100644
--- a/.github/workflows/datasets-mirror.yml
+++ b/.github/workflows/datasets-mirror.yml
@@ -29,10 +29,10 @@ jobs:
         shell: bash -l {0}
         run: |
           datasets download virus genome taxon ${{ inputs.taxon_id }} --no-progressbar --filename ${{ inputs.taxon_id }}.zip
-          unzip -o ${{ inputs.taxon_id }}.zip; printf "\n"
-          tar -I 'zstd -T0 -18' -cvf ${{ inputs.taxon_id }}.tar.zst ncbi_dataset; printf "\n"
-          cat md5sum.txt; printf "\n"
-          ls -lh ${{ inputs.taxon_id }}.* ncbi_dataset/data/*; printf "\n"
+          unzip -o ${{ inputs.taxon_id }}.zip
+          tar -I 'zstd -T0 -18' -cvf ${{ inputs.taxon_id }}.tar.zst ncbi_dataset
+          cat md5sum.txt
+          ls -lh ${{ inputs.taxon_id }}.* ncbi_dataset/data/*
           seqkit stats ncbi_dataset/data/genomic.fna
       - name: Create S3cmd config
         run: |