virtool
diff --git a/‎Dockerfile‎
Lines changed: 1 addition & 8 deletions b/‎Dockerfile‎
Lines changed: 1 addition & 8 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/workflow_pathoscope/utils.py‎
Lines changed: 20 additions & 83 deletions b/‎python/workflow_pathoscope/utils.py‎
Lines changed: 20 additions & 83 deletions
diff --git a/‎src/em.rs‎
Lines changed: 3 additions & 3 deletions b/‎src/em.rs‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/lib.rs‎
Lines changed: 5 additions & 5 deletions b/‎src/lib.rs‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎src/logging.rs‎
Lines changed: 1 addition & 1 deletion b/‎src/logging.rs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/matrix.rs‎
Lines changed: 2 additions & 2 deletions b/‎src/matrix.rs‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/stream_processor.rs‎
Lines changed: 2 additions & 2 deletions b/‎src/stream_processor.rs‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/subtraction.rs‎
Lines changed: 4 additions & 4 deletions b/‎src/subtraction.rs‎
Lines changed: 4 additions & 4 deletions
@@ -1,14 +1,8 @@
 FROM python:3.13-bookworm AS deps
 WORKDIR /app
 COPY --from=ghcr.io/virtool/tools:1.1.0 /tools/bowtie2/2.5.4/bowtie* /usr/local/bin/
-COPY --from=ghcr.io/virtool/tools:1.1.0 /tools/hmmer/3.2.1 /opt/hmmer
-COPY --from=ghcr.io/virtool/tools:1.1.0 /tools/fastqc/0.11.9 /opt/fastqc
 COPY --from=ghcr.io/virtool/tools:1.1.0 /tools/pigz/2.8/pigz /usr/local/bin/
 COPY --from=ghcr.io/virtool/tools:1.1.0 /tools/samtools/1.22.1/bin/samtools /usr/local/bin/
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends default-jre && \
-    rm -rf /var/lib/apt/lists/* && \
-    apt-get clean
 
 FROM python:3.13-bookworm AS uv
 WORKDIR /app
@@ -29,8 +23,7 @@ RUN uv run maturin develop --release
 FROM deps AS base
 WORKDIR /app
 ENV VIRTUAL_ENV=/app/.venv \
-    PATH="/app/.venv/bin:/opt/fastqc:/opt/hmmer/bin:${PATH}"
-RUN chmod ugo+x /opt/fastqc/fastqc
+    PATH="/app/.venv/bin:${PATH}"
 COPY --from=uv /app/.venv /app/.venv
 COPY --from=uv /app/python /app/python
 COPY fixtures.py workflow.py VERSION* ./
 
@@ -37,7 +37,7 @@ features = ["pyo3/extension-module"]
 asyncio_mode = "auto"
 
 [tool.uv.sources]
-virtool = { git = "https://github.com/virtool/virtool", tag = "31.11.2" }
+virtool = { git = "https://github.com/virtool/virtool", tag = "31.11.3" }
 
 [build-system]
 requires = ["maturin>=1.0,<2.0"]
 
@@ -1,85 +1,34 @@
 import csv
-from functools import cached_property
+import json
 from pathlib import Path
-from typing import Any
 
 from workflow_pathoscope.rust import run_expectation_maximization, PathoscopeResults
 
 
-class SamLine:
-    def __init__(self, line: str):
-        self._line = line
+def write_isolate_fasta(
+    otu_ids: set[str],
+    json_path: Path,
+    target_path: Path,
+) -> dict[str, int]:
+    """Generate a FASTA file for all the isolates of the OTUs specified by ``otu_ids``.
 
-    def __str__(self) -> str:
-        return self.line
-
-    @property
-    def line(self) -> str:
-        """The SAM line used to create the object."""
-        return self._line
-
-    @property
-    def read_id(self) -> str:
-        """The ID of the mapped read."""
-        return self.fields[0]
-
-    @cached_property
-    def read_length(self) -> int:
-        """The length of the mapped read."""
-        return len(self.fields[9])
-
-    @cached_property
-    def fields(self) -> list[Any]:
-        """The SAM fields"""
-        return self.line.split("\t")
-
-    @cached_property
-    def position(self) -> int:
-        """The position of the read on the reference."""
-        return int(self.fields[3])
-
-    @cached_property
-    def score(self) -> float:
-        """The Pathoscope score for the alignment."""
-        return find_sam_align_score(self.fields)
-
-    @cached_property
-    def bitwise_flag(self) -> int:
-        """The SAM bitwise flag."""
-        return int(self.fields[1])
-
-    @cached_property
-    def unmapped(self) -> bool:
-        """The read is unmapped.
-
-        This value is derived from the bitwise flag (0x4: segment unmapped).
-        """
-        return self.bitwise_flag & 4 == 4
-
-    @cached_property
-    def ref_id(self) -> str:
-        """The ID of the mapped reference sequence."""
-        return self.fields[2]
-
-
-def find_sam_align_score(fields: list[Any]) -> float:
-    """Find the Bowtie2 alignment score for the given split line (``fields``).
-
-    Searches the SAM fields for the ``AS:i`` substring and extracts the Bowtie2-specific
-    alignment score. This will not work for other aligners.
-
-    :param fields: a SAM line that has been split on "\t"
-    :return: the alignment score
+    :param otu_ids: the list of OTU IDs for which to generate and index
+    :param json_path: the path to the reference index json file
+    :param target_path: the path to write the fasta file to
+    :return: a dictionary of the lengths of all sequences keyed by their IDS
 
     """
-    read_length = float(len(fields[9]))
+    lengths = {}
 
-    for field in fields:
-        if field.startswith("AS:i:"):
-            a_score = int(field[5:])
-            return a_score + read_length
+    with open(json_path) as f_json, open(target_path, "w") as f_target:
+        for otu in json.load(f_json):
+            if otu["_id"] in otu_ids:
+                for isolate in otu["isolates"]:
+                    for sequence in isolate["sequences"]:
+                        f_target.write(f">{sequence['_id']}\n{sequence['sequence']}\n")
+                        lengths[sequence["_id"]] = len(sequence["sequence"])
 
-    raise ValueError("Could not find alignment score")
+    return lengths
 
 
 def write_report(
@@ -208,15 +157,3 @@ def run_pathoscope(
         p_score_cutoff,
         ref_lengths,
     )
-
-
-# Backward compatibility alias - DEPRECATED
-def run_pathoscope_sam(
-    sam_path: Path, p_score_cutoff: float, ref_lengths: dict[str, int]
-):
-    """
-    Deprecated: Use run_pathoscope instead.
-
-    This function is kept for backward compatibility.
-    """
-    return run_pathoscope(sam_path, p_score_cutoff, ref_lengths)
@@ -306,17 +306,17 @@ fn check_convergence(
 ) -> bool {
     // Log convergence progress
     if iteration == 0 || iteration % 10 == 9 || cutoff <= epsilon {
-        info!("EM iteration {}: convergence delta = {:.2e}", iteration + 1, cutoff);
+        info!("em iteration {}: convergence delta = {:.2e}", iteration + 1, cutoff);
     }
 
     if cutoff <= epsilon || nu_length == 1 {
-        info!("EM converged after {} iterations (delta: {:.2e})", iteration + 1, cutoff);
+        info!("em converged after {} iterations (delta: {:.2e})", iteration + 1, cutoff);
         return true;
     }
 
     // Detect potential divergence
     if iteration > 10 && cutoff > 1e-2 {
-        info!("EM may be diverging at iteration {} (delta: {:.2e})", iteration + 1, cutoff);
+        info!("em may be diverging at iteration {} (delta: {:.2e})", iteration + 1, cutoff);
     }
 
     false
 
@@ -157,14 +157,14 @@ pub fn run_eliminate_subtraction(
     subtraction_sam_path: String,
     output_sam_path: String,
 ) -> PyResult<()> {
-    info!("Starting subtraction elimination from Python: isolate={}, subtraction={}", 
+    info!("starting subtraction elimination from Python: isolate={}, subtraction={}",
           isolate_sam_path, subtraction_sam_path);
 
     // Call the pure Rust function and map errors to PyResult
     let result = eliminate_subtraction(&isolate_sam_path, &subtraction_sam_path, &output_sam_path)
         .map_err(|e| PyErr::new::<PyIOError, _>(e.to_string()))?;
 
-    info!("Subtraction elimination completed successfully");
+    info!("subtraction elimination completed successfully");
     Ok(result)
 }
 
@@ -178,7 +178,7 @@ pub fn parse_isolate_scores(
 ) -> PyResult<HashMap<String, f64>> {
     use rust_htslib::{bam, bam::Read};
 
-    info!("Parsing isolate scores from {} with cutoff {}", alignment_path, p_score_cutoff);
+    info!("parsing isolate scores from {} with cutoff {}", alignment_path, p_score_cutoff);
 
     let mut reader = bam::Reader::from_path(&alignment_path)
         .map_err(|e| PyErr::new::<PyIOError, _>(format!("Failed to open alignment file '{}': {}", alignment_path, e)))?;
@@ -216,7 +216,7 @@ pub fn parse_isolate_scores(
         }
     }
 
-    info!("Parsed {} isolate scores", isolate_high_scores.len());
+    info!("parsed {} isolate scores", isolate_high_scores.len());
     Ok(isolate_high_scores)
 }
 
@@ -228,7 +228,7 @@ pub fn run_expectation_maximization(
     p_score_cutoff: f64,
     ref_lengths: HashMap<String, usize>,
 ) -> PyResult<PathoscopeResults> {
-    info!("Starting EM algorithm: file={}, cutoff={}", alignment_path, p_score_cutoff);
+    info!("starting em algorithm: file={}, cutoff={}", alignment_path, p_score_cutoff);
     run_expectation_maximization_streaming(_py, alignment_path, p_score_cutoff, ref_lengths, 10000)
 }
 
 
@@ -198,7 +198,7 @@ pub fn init_logging(_py: Python, log_level: Option<String>) -> PyResult<()> {
     });
 
     // Test that logging works
-    log::info!("Rust logging initialized with custom Python bridge");
+    log::info!("rust logging initialized");
     Ok(())
 }
 
 
@@ -79,7 +79,7 @@ impl PathoscopeMatrix {
 
         let unique_count = matrix.unique_reads.len();
         let multi_count = matrix.multi_mapping_reads.len();
-        info!("Matrix created: {} unique reads, {} multi-mapping reads, score range [{:.2}, {:.2}]", 
+        info!("matrix created: {} unique reads, {} multi-mapping reads, score range [{:.2}, {:.2}]",
               unique_count, multi_count, min_score, max_score);
 
         matrix
@@ -324,7 +324,7 @@ pub fn build_matrix_with_chunk_size(
 ) -> Result<MatrixResult, String> {
     let p_score_cutoff = p_score_cutoff.unwrap_or(0.01);
 
-    info!("Building matrix from '{}' with score cutoff {} and chunk size {}", 
+    info!("building matrix from '{}' with score cutoff {} and chunk size {}",
           alignment_path, p_score_cutoff, chunk_size);
 
     // Open reader for streaming
 
@@ -49,7 +49,7 @@ pub fn extract_candidate_otus_from_sam_file<P: AsRef<Path>>(
 ) -> Result<HashSet<String>, StreamProcessorError> {
     let path_str = sam_path.as_ref().to_string_lossy().to_string();
 
-    info!("Extracting candidate OTUs from {} with score cutoff {}", path_str, p_score_cutoff);
+    info!("extracting candidate otus from {} with score cutoff {}", path_str, p_score_cutoff);
 
     let mut reader = bam::Reader::from_path(&sam_path)
         .map_err(|e| StreamProcessorError::FileOpen {
@@ -58,7 +58,7 @@ pub fn extract_candidate_otus_from_sam_file<P: AsRef<Path>>(
         })?;
 
     let result = extract_candidate_otus_from_reader(&mut reader, p_score_cutoff)?;
-    info!("Found {} candidate OTUs", result.len());
+    info!("found {} candidate otus", result.len());
     Ok(result)
 }
 
 
@@ -129,7 +129,7 @@ impl SubtractionProcessor {
 
 /// Parse subtraction SAM file using parse_sam module and return scores for each read
 pub fn parse_subtraction_sam(path: &str) -> Result<HashMap<String, f32>, BamProcessingError> {
-    info!("Parsing subtraction SAM file: {}", path);
+    info!("parsing subtraction SAM file: {}", path);
 
     let sam_lines = parse_sam(path, None)
         .map_err(BamProcessingError::SamParse)?;
@@ -142,7 +142,7 @@ pub fn parse_subtraction_sam(path: &str) -> Result<HashMap<String, f32>, BamProc
         }
     }
 
-    info!("Parsed {} subtraction scores from {}", high_scores.len(), path);
+    info!("parsed {} subtraction scores from {}", high_scores.len(), path);
     Ok(high_scores)
 }
 
@@ -155,7 +155,7 @@ pub fn eliminate_subtraction(
     subtraction_sam_path: &str,
     output_sam_path: &str,
 ) -> Result<(), BamProcessingError> {
-    info!("Starting subtraction elimination: isolate={}, subtraction={}, output={}", 
+    info!("starting subtraction elimination: isolate={}, subtraction={}, output={}",
           isolate_sam_path, subtraction_sam_path, output_sam_path);
 
     // Parse subtraction scores
@@ -165,7 +165,7 @@ pub fn eliminate_subtraction(
     // Process isolate file
     let subtracted_ids = process_isolate_file(isolate_sam_path, output_sam_path, &processor)?;
 
-    info!("Subtraction complete: {} reads eliminated", subtracted_ids.len());
+    info!("subtraction complete: {} reads eliminated", subtracted_ids.len());
 
     // Write subtracted IDs file
     write_subtracted_ids_file(output_sam_path, &subtracted_ids)?;
Original file line number	Diff line number	Diff line change
`@@ -306,17 +306,17 @@ fn check_convergence(`
`306`	`306`	`) -> bool {`
`307`	`307`	`// Log convergence progress`
`308`	`308`	`if iteration == 0 \|\| iteration % 10 == 9 \|\| cutoff <= epsilon {`
`309`		`- info!("EM iteration {}: convergence delta = {:.2e}", iteration + 1, cutoff);`
	`309`	`+ info!("em iteration {}: convergence delta = {:.2e}", iteration + 1, cutoff);`
`310`	`310`	`}`
`311`	`311`
`312`	`312`	`if cutoff <= epsilon \|\| nu_length == 1 {`
`313`		`- info!("EM converged after {} iterations (delta: {:.2e})", iteration + 1, cutoff);`
	`313`	`+ info!("em converged after {} iterations (delta: {:.2e})", iteration + 1, cutoff);`
`314`	`314`	`return true;`
`315`	`315`	`}`
`316`	`316`
`317`	`317`	`// Detect potential divergence`
`318`	`318`	`if iteration > 10 && cutoff > 1e-2 {`
`319`		`- info!("EM may be diverging at iteration {} (delta: {:.2e})", iteration + 1, cutoff);`
	`319`	`+ info!("em may be diverging at iteration {} (delta: {:.2e})", iteration + 1, cutoff);`
`320`	`320`	`}`
`321`	`321`
`322`	`322`	`false`
Original file line number	Diff line number	Diff line change
`@@ -198,7 +198,7 @@ pub fn init_logging(_py: Python, log_level: Option<String>) -> PyResult<()> {`
`198`	`198`	`});`
`199`	`199`
`200`	`200`	`// Test that logging works`
`201`		`- log::info!("Rust logging initialized with custom Python bridge");`
	`201`	`+ log::info!("rust logging initialized");`
`202`	`202`	`Ok(())`
`203`	`203`	`}`
`204`	`204`