virtool
diff --git a/‎.github/workflows/ci.yml‎ renamed to ‎.github/workflows/ci.yaml‎
Lines changed: 37 additions & 0 deletions b/‎.github/workflows/ci.yml‎ renamed to ‎.github/workflows/ci.yaml‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎Cargo.lock‎
Lines changed: 46 additions & 0 deletions b/‎Cargo.lock‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 1 addition & 0 deletions b/‎Cargo.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎fixtures.py‎
Lines changed: 0 additions & 1 deletion b/‎fixtures.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎python/workflow_pathoscope/rust.pyi‎
Lines changed: 8 additions & 9 deletions b/‎python/workflow_pathoscope/rust.pyi‎
Lines changed: 8 additions & 9 deletions
diff --git a/‎src/candidates.rs‎
Lines changed: 22 additions & 101 deletions b/‎src/candidates.rs‎
Lines changed: 22 additions & 101 deletions
diff --git a/‎src/coverage.rs‎
Lines changed: 1 addition & 1 deletion b/‎src/coverage.rs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/em.rs‎
Lines changed: 1 addition & 1 deletion b/‎src/em.rs‎
Lines changed: 1 addition & 1 deletion
@@ -15,6 +15,7 @@ env:
 
 jobs:
   commitlint:
+    name: Commitlint
     runs-on: ubuntu-24.04
     steps:
       - name: Checkout
@@ -23,6 +24,42 @@ jobs:
           fetch-depth: 0
       - name: commitlint
         uses: wagoid/commitlint-github-action@v5
+  ruff-format:
+    name: Ruff / Format
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Install mise
+        uses: jdx/mise-action@v2
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+      - name: Set up Python
+        run: uv python install
+      - name: Install dependencies
+        run: uv sync
+      - name: Check formatting
+        run: uv run ruff format --check .
+  ruff-lint:
+    name: Ruff / Lint
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Install mise
+        uses: jdx/mise-action@v2
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+      - name: Set up Python
+        run: uv python install
+      - name: Install dependencies
+        run: uv sync
+      - name: Check linting
+        run: uv run ruff check .
   build:
     name: Build
     runs-on: ubuntu-24.04
 
@@ -16,6 +16,7 @@ rustc-hash = "2.0"
 thiserror = "1.0"
 log = "0.4"
 env_logger = "0.11"
+rayon = "1.8"
 
 [dependencies.pyo3]
 version = "^0.22.0"
 
@@ -8,7 +8,6 @@
 def intermediate():
     """A namespace for storing intermediate values."""
     return SimpleNamespace(
-        isolate_high_scores={},
         to_otus=set(),
     )
 
 
@@ -1,7 +1,12 @@
 def run_eliminate_subtraction(
-    isolate_sam_path: str, subtraction_sam_path: str, output_sam_path: str
-) -> None:
-    """Eliminate subtraction reads from isolate reads using Rust."""
+    isolate_sam_path: str,
+    subtraction_sam_path: str,
+    output_sam_path: str,
+    input_fastq_path: str,
+    output_fastq_path: str,
+    proc: int,
+) -> int:
+    """Eliminate subtraction reads from BAM and filter FASTQ file. Returns number of eliminated reads."""
 
 class PathoscopeResults:
     best_hit_initial_reads: list[float]
@@ -25,12 +30,6 @@ def run_expectation_maximization(
 ) -> PathoscopeResults:
     """Run Pathoscope expectation maximization algorithm using Rust on SAM/BAM files."""
 
-def parse_isolate_scores(
-    alignment_path: str,
-    p_score_cutoff: float,
-) -> dict[str, float]:
-    """Parse isolate alignment file (SAM or BAM) and extract high scores for each read."""
-
 def find_candidate_otus(
     alignment_path: str,
     p_score_cutoff: float,
 
@@ -3,14 +3,31 @@ use log::info;
 use pyo3::prelude::*;
 use pyo3::exceptions::PyIOError;
 
+const AS_TAG_PREFIX: &str = "AS:i:";
 
-
+/// Extract AS:i alignment score from SAM optional fields
+/// 
+/// # Arguments
+/// * `fields` - SAM fields starting from the optional fields (field 11+)
+/// 
+/// # Returns
+/// Option containing the AS:i score as f64, None if not found or invalid
+fn extract_as_score(fields: &[&str]) -> Option<f64> {
+    for field in fields {
+        if let Some(stripped) = field.strip_prefix(AS_TAG_PREFIX) {
+            if let Ok(score) = stripped.parse::<i32>() {
+                return Some(score as f64);
+            }
+        }
+    }
+    None
+}
 
 /// Parse a single SAM line and extract candidate OTU information
 /// 
 /// This function processes one SAM line and determines if the read meets the score cutoff.
 /// Used for testing and by the streaming functions.
-/// 
+///  
 /// # Arguments
 /// * `line` - A SAM format line as string
 /// * `p_score_cutoff` - Minimum score threshold (AS:i score + read length)
@@ -44,19 +61,8 @@ pub fn parse_sam_line(line: &str, p_score_cutoff: f64) -> Option<String> {
         return None;
     }
 
-    // Find AS:i score in the optional fields (starting from field 11)
-    let mut as_score: Option<f64> = None;
-    for field in &fields[11..] {
-        if let Some(stripped) = field.strip_prefix("AS:i:") {
-            if let Ok(score) = stripped.parse::<i32>() {
-                as_score = Some(score as f64);
-                break;
-            }
-        }
-    }
-
-    // Skip if no AS score found
-    if let Some(as_score) = as_score {
+    // Extract AS:i score from optional fields (starting from field 11)
+    if let Some(as_score) = extract_as_score(&fields[11..]) {
         // Calculate total score (AS score + read length)
         let total_score = as_score + seq_len;
 
@@ -69,31 +75,6 @@ pub fn parse_sam_line(line: &str, p_score_cutoff: f64) -> Option<String> {
     None
 }
 
-/// Extract candidate OTU reference IDs from SAM text data
-/// 
-/// This function parses SAM format data directly from text without using rust-htslib.
-/// Used for testing and can be called by other functions that need SAM text parsing.
-/// 
-/// # Arguments
-/// * `sam_text` - Raw SAM format data as string
-/// * `p_score_cutoff` - Minimum score threshold (AS:i score + read length)
-/// 
-/// # Returns
-/// HashSet of reference IDs that have reads meeting the score cutoff
-pub fn extract_candidates_from_sam_text(
-    sam_text: &str,
-    p_score_cutoff: f64,
-) -> HashSet<String> {
-    let mut candidate_otus = HashSet::new();
-
-    for line in sam_text.lines() {
-        if let Some(ref_name) = parse_sam_line(line, p_score_cutoff) {
-            candidate_otus.insert(ref_name);
-        }
-    }
-
-    candidate_otus
-}
 
 /// Extract candidate OTU reference IDs by running bowtie2 directly with streaming
 /// 
@@ -119,7 +100,7 @@ pub fn find_candidate_otus_with_bowtie2(
     use std::process::{Command, Stdio};
     use std::io::{BufRead, BufReader};
 
-    info!("running bowtie2 directly from rust: index={}, reads={:?}, cutoff={}", 
+    info!("running bowtie2: index={}, reads={:?}, cutoff={}", 
           bowtie_index_path, read_paths, p_score_cutoff);
     py.allow_threads(|| {
         let mut cmd = Command::new("bowtie2");
@@ -236,65 +217,5 @@ mod tests {
         assert_eq!(result, None);
     }
 
-    #[test]
-    fn test_extract_candidates_from_sam_text_basic() {
-        let sam_data = "@HD\tVN:1.0\tSO:unsorted
-@SQ\tSN:ref1\tLN:1000
-@SQ\tSN:ref2\tLN:2000
-read1\t0\tref1\t100\t255\t50M\t*\t0\t0\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\t*\tAS:i:45
-read2\t0\tref2\t200\t255\t30M\t*\t0\t0\tTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT\t*\tAS:i:25
-read3\t0\tref1\t300\t255\t40M\t*\t0\t0\tCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC\t*\tAS:i:2";
-
-        let result = extract_candidates_from_sam_text(sam_data, 0.01);
-
-        // Should find 2 unique references since scores are:
-        // read1: AS:i:45 + 50 = 95.0
-        // read2: AS:i:25 + 30 = 55.0  
-        // read3: AS:i:2 + 40 = 42.0
-        assert_eq!(result.len(), 2, "Should find 2 unique references");
-        assert!(result.contains("ref1"), "Should contain ref1");
-        assert!(result.contains("ref2"), "Should contain ref2");
-    }
-
-    #[test]
-    fn test_extract_candidates_from_sam_text_with_cutoff() {
-        let sam_data = "@HD\tVN:1.0\tSO:unsorted
-read1\t0\tref1\t100\t255\t50M\t*\t0\t0\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\t*\tAS:i:45
-read2\t0\tref2\t200\t255\t30M\t*\t0\t0\tTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT\t*\tAS:i:25
-read3\t0\tref1\t300\t255\t40M\t*\t0\t0\tCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC\t*\tAS:i:2";
-
-        let result = extract_candidates_from_sam_text(sam_data, 50.0);
-
-        // Only read1 (95.0) and read2 (55.0) should pass, read3 (42.0) should be filtered
-        assert_eq!(result.len(), 2, "Should find 2 references with high cutoff");
-        assert!(result.contains("ref1"), "Should contain ref1");
-        assert!(result.contains("ref2"), "Should contain ref2");
-    }
-
-    #[test]
-    fn test_extract_candidates_from_sam_text_very_high_cutoff() {
-        let sam_data = "@HD\tVN:1.0\tSO:unsorted
-read1\t0\tref1\t100\t255\t50M\t*\t0\t0\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\t*\tAS:i:45";
-
-        let result = extract_candidates_from_sam_text(sam_data, 100.0);
-
-        // No reads should pass this cutoff (read1 is 95.0)
-        assert_eq!(result.len(), 0, "Should find no references with very high cutoff");
-    }
-
-    #[test]
-    fn test_extract_candidates_from_sam_text_deduplication() {
-        let sam_data = "@HD\tVN:1.0\tSO:unsorted
-read1\t0\tref1\t100\t255\t50M\t*\t0\t0\tAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\t*\tAS:i:45
-read2\t0\tref1\t200\t255\t30M\t*\t0\t0\tTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT\t*\tAS:i:25";
-
-        let result = extract_candidates_from_sam_text(sam_data, 0.01);
-
-        // Even if multiple reads map to ref1, it should only appear once in the set
-        assert_eq!(result.len(), 1, "Should deduplicate reference names");
-        assert!(result.contains("ref1"), "Should contain ref1");
-        let ref1_count = result.iter().filter(|&r| r == "ref1").count();
-        assert_eq!(ref1_count, 1, "Each reference should appear only once in the result set");
-    }
 
 }
@@ -1,4 +1,4 @@
-use crate::parse_sam::MinimalAlignment;
+use crate::sam::MinimalAlignment;
 use crate::em::find_updated_score;
 use crate::{UniqueReads, MultiMappingReads};
 use rustc_hash::FxHashMap;
 
@@ -632,7 +632,7 @@ mod tests {
 
     #[test]
     fn test_em_integration_with_real_sam_data() {
-        use crate::build_matrix;
+        use crate::matrix::build_matrix;
 
         // Use real SAM data with multi-mapping reads to test the full pipeline
         let sam_path = "example/rust/test_em_with_multimapping.sam";
Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,6 @@`
`8`	`8`	`def intermediate():`
`9`	`9`	`"""A namespace for storing intermediate values."""`
`10`	`10`	`return SimpleNamespace(`
`11`		`- isolate_high_scores={},`
`12`	`11`	`to_otus=set(),`
`13`	`12`	`)`
`14`	`13`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-use crate::parse_sam::MinimalAlignment;`
	`1`	`+use crate::sam::MinimalAlignment;`
`2`	`2`	`use crate::em::find_updated_score;`
`3`	`3`	`use crate::{UniqueReads, MultiMappingReads};`
`4`	`4`	`use rustc_hash::FxHashMap;`