Skip to content

Commit 25cb88a

Browse files
authored
fix: resolve write_isolate_fasta error
- Fix `write_isolate_fasta` error by porting from `virtool-workflow`. - Remove unneeded assets from Dockerfile. - Remove unused Python code. - Make logging consistent. - Update `virtool`.
1 parent b10da52 commit 25cb88a

File tree

14 files changed

+58
-294
lines changed

14 files changed

+58
-294
lines changed

Dockerfile

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,8 @@
11
FROM python:3.13-bookworm AS deps
22
WORKDIR /app
33
COPY --from=ghcr.io/virtool/tools:1.1.0 /tools/bowtie2/2.5.4/bowtie* /usr/local/bin/
4-
COPY --from=ghcr.io/virtool/tools:1.1.0 /tools/hmmer/3.2.1 /opt/hmmer
5-
COPY --from=ghcr.io/virtool/tools:1.1.0 /tools/fastqc/0.11.9 /opt/fastqc
64
COPY --from=ghcr.io/virtool/tools:1.1.0 /tools/pigz/2.8/pigz /usr/local/bin/
75
COPY --from=ghcr.io/virtool/tools:1.1.0 /tools/samtools/1.22.1/bin/samtools /usr/local/bin/
8-
RUN apt-get update && \
9-
apt-get install -y --no-install-recommends default-jre && \
10-
rm -rf /var/lib/apt/lists/* && \
11-
apt-get clean
126

137
FROM python:3.13-bookworm AS uv
148
WORKDIR /app
@@ -29,8 +23,7 @@ RUN uv run maturin develop --release
2923
FROM deps AS base
3024
WORKDIR /app
3125
ENV VIRTUAL_ENV=/app/.venv \
32-
PATH="/app/.venv/bin:/opt/fastqc:/opt/hmmer/bin:${PATH}"
33-
RUN chmod ugo+x /opt/fastqc/fastqc
26+
PATH="/app/.venv/bin:${PATH}"
3427
COPY --from=uv /app/.venv /app/.venv
3528
COPY --from=uv /app/python /app/python
3629
COPY fixtures.py workflow.py VERSION* ./

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ features = ["pyo3/extension-module"]
3737
asyncio_mode = "auto"
3838

3939
[tool.uv.sources]
40-
virtool = { git = "https://github.com/virtool/virtool", tag = "31.11.2" }
40+
virtool = { git = "https://github.com/virtool/virtool", tag = "31.11.3" }
4141

4242
[build-system]
4343
requires = ["maturin>=1.0,<2.0"]

python/workflow_pathoscope/utils.py

Lines changed: 20 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,85 +1,34 @@
11
import csv
2-
from functools import cached_property
2+
import json
33
from pathlib import Path
4-
from typing import Any
54

65
from workflow_pathoscope.rust import run_expectation_maximization, PathoscopeResults
76

87

9-
class SamLine:
10-
def __init__(self, line: str):
11-
self._line = line
8+
def write_isolate_fasta(
9+
otu_ids: set[str],
10+
json_path: Path,
11+
target_path: Path,
12+
) -> dict[str, int]:
13+
"""Generate a FASTA file for all the isolates of the OTUs specified by ``otu_ids``.
1214
13-
def __str__(self) -> str:
14-
return self.line
15-
16-
@property
17-
def line(self) -> str:
18-
"""The SAM line used to create the object."""
19-
return self._line
20-
21-
@property
22-
def read_id(self) -> str:
23-
"""The ID of the mapped read."""
24-
return self.fields[0]
25-
26-
@cached_property
27-
def read_length(self) -> int:
28-
"""The length of the mapped read."""
29-
return len(self.fields[9])
30-
31-
@cached_property
32-
def fields(self) -> list[Any]:
33-
"""The SAM fields"""
34-
return self.line.split("\t")
35-
36-
@cached_property
37-
def position(self) -> int:
38-
"""The position of the read on the reference."""
39-
return int(self.fields[3])
40-
41-
@cached_property
42-
def score(self) -> float:
43-
"""The Pathoscope score for the alignment."""
44-
return find_sam_align_score(self.fields)
45-
46-
@cached_property
47-
def bitwise_flag(self) -> int:
48-
"""The SAM bitwise flag."""
49-
return int(self.fields[1])
50-
51-
@cached_property
52-
def unmapped(self) -> bool:
53-
"""The read is unmapped.
54-
55-
This value is derived from the bitwise flag (0x4: segment unmapped).
56-
"""
57-
return self.bitwise_flag & 4 == 4
58-
59-
@cached_property
60-
def ref_id(self) -> str:
61-
"""The ID of the mapped reference sequence."""
62-
return self.fields[2]
63-
64-
65-
def find_sam_align_score(fields: list[Any]) -> float:
66-
"""Find the Bowtie2 alignment score for the given split line (``fields``).
67-
68-
Searches the SAM fields for the ``AS:i`` substring and extracts the Bowtie2-specific
69-
alignment score. This will not work for other aligners.
70-
71-
:param fields: a SAM line that has been split on "\t"
72-
:return: the alignment score
15+
:param otu_ids: the list of OTU IDs for which to generate and index
16+
:param json_path: the path to the reference index json file
17+
:param target_path: the path to write the fasta file to
18+
:return: a dictionary of the lengths of all sequences keyed by their IDS
7319
7420
"""
75-
read_length = float(len(fields[9]))
21+
lengths = {}
7622

77-
for field in fields:
78-
if field.startswith("AS:i:"):
79-
a_score = int(field[5:])
80-
return a_score + read_length
23+
with open(json_path) as f_json, open(target_path, "w") as f_target:
24+
for otu in json.load(f_json):
25+
if otu["_id"] in otu_ids:
26+
for isolate in otu["isolates"]:
27+
for sequence in isolate["sequences"]:
28+
f_target.write(f">{sequence['_id']}\n{sequence['sequence']}\n")
29+
lengths[sequence["_id"]] = len(sequence["sequence"])
8130

82-
raise ValueError("Could not find alignment score")
31+
return lengths
8332

8433

8534
def write_report(
@@ -208,15 +157,3 @@ def run_pathoscope(
208157
p_score_cutoff,
209158
ref_lengths,
210159
)
211-
212-
213-
# Backward compatibility alias - DEPRECATED
214-
def run_pathoscope_sam(
215-
sam_path: Path, p_score_cutoff: float, ref_lengths: dict[str, int]
216-
):
217-
"""
218-
Deprecated: Use run_pathoscope instead.
219-
220-
This function is kept for backward compatibility.
221-
"""
222-
return run_pathoscope(sam_path, p_score_cutoff, ref_lengths)

src/em.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -306,17 +306,17 @@ fn check_convergence(
306306
) -> bool {
307307
// Log convergence progress
308308
if iteration == 0 || iteration % 10 == 9 || cutoff <= epsilon {
309-
info!("EM iteration {}: convergence delta = {:.2e}", iteration + 1, cutoff);
309+
info!("em iteration {}: convergence delta = {:.2e}", iteration + 1, cutoff);
310310
}
311311

312312
if cutoff <= epsilon || nu_length == 1 {
313-
info!("EM converged after {} iterations (delta: {:.2e})", iteration + 1, cutoff);
313+
info!("em converged after {} iterations (delta: {:.2e})", iteration + 1, cutoff);
314314
return true;
315315
}
316316

317317
// Detect potential divergence
318318
if iteration > 10 && cutoff > 1e-2 {
319-
info!("EM may be diverging at iteration {} (delta: {:.2e})", iteration + 1, cutoff);
319+
info!("em may be diverging at iteration {} (delta: {:.2e})", iteration + 1, cutoff);
320320
}
321321

322322
false

src/lib.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -157,14 +157,14 @@ pub fn run_eliminate_subtraction(
157157
subtraction_sam_path: String,
158158
output_sam_path: String,
159159
) -> PyResult<()> {
160-
info!("Starting subtraction elimination from Python: isolate={}, subtraction={}",
160+
info!("starting subtraction elimination from Python: isolate={}, subtraction={}",
161161
isolate_sam_path, subtraction_sam_path);
162162

163163
// Call the pure Rust function and map errors to PyResult
164164
let result = eliminate_subtraction(&isolate_sam_path, &subtraction_sam_path, &output_sam_path)
165165
.map_err(|e| PyErr::new::<PyIOError, _>(e.to_string()))?;
166166

167-
info!("Subtraction elimination completed successfully");
167+
info!("subtraction elimination completed successfully");
168168
Ok(result)
169169
}
170170

@@ -178,7 +178,7 @@ pub fn parse_isolate_scores(
178178
) -> PyResult<HashMap<String, f64>> {
179179
use rust_htslib::{bam, bam::Read};
180180

181-
info!("Parsing isolate scores from {} with cutoff {}", alignment_path, p_score_cutoff);
181+
info!("parsing isolate scores from {} with cutoff {}", alignment_path, p_score_cutoff);
182182

183183
let mut reader = bam::Reader::from_path(&alignment_path)
184184
.map_err(|e| PyErr::new::<PyIOError, _>(format!("Failed to open alignment file '{}': {}", alignment_path, e)))?;
@@ -216,7 +216,7 @@ pub fn parse_isolate_scores(
216216
}
217217
}
218218

219-
info!("Parsed {} isolate scores", isolate_high_scores.len());
219+
info!("parsed {} isolate scores", isolate_high_scores.len());
220220
Ok(isolate_high_scores)
221221
}
222222

@@ -228,7 +228,7 @@ pub fn run_expectation_maximization(
228228
p_score_cutoff: f64,
229229
ref_lengths: HashMap<String, usize>,
230230
) -> PyResult<PathoscopeResults> {
231-
info!("Starting EM algorithm: file={}, cutoff={}", alignment_path, p_score_cutoff);
231+
info!("starting em algorithm: file={}, cutoff={}", alignment_path, p_score_cutoff);
232232
run_expectation_maximization_streaming(_py, alignment_path, p_score_cutoff, ref_lengths, 10000)
233233
}
234234

src/logging.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ pub fn init_logging(_py: Python, log_level: Option<String>) -> PyResult<()> {
198198
});
199199

200200
// Test that logging works
201-
log::info!("Rust logging initialized with custom Python bridge");
201+
log::info!("rust logging initialized");
202202
Ok(())
203203
}
204204

src/matrix.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ impl PathoscopeMatrix {
7979

8080
let unique_count = matrix.unique_reads.len();
8181
let multi_count = matrix.multi_mapping_reads.len();
82-
info!("Matrix created: {} unique reads, {} multi-mapping reads, score range [{:.2}, {:.2}]",
82+
info!("matrix created: {} unique reads, {} multi-mapping reads, score range [{:.2}, {:.2}]",
8383
unique_count, multi_count, min_score, max_score);
8484

8585
matrix
@@ -324,7 +324,7 @@ pub fn build_matrix_with_chunk_size(
324324
) -> Result<MatrixResult, String> {
325325
let p_score_cutoff = p_score_cutoff.unwrap_or(0.01);
326326

327-
info!("Building matrix from '{}' with score cutoff {} and chunk size {}",
327+
info!("building matrix from '{}' with score cutoff {} and chunk size {}",
328328
alignment_path, p_score_cutoff, chunk_size);
329329

330330
// Open reader for streaming

src/stream_processor.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ pub fn extract_candidate_otus_from_sam_file<P: AsRef<Path>>(
4949
) -> Result<HashSet<String>, StreamProcessorError> {
5050
let path_str = sam_path.as_ref().to_string_lossy().to_string();
5151

52-
info!("Extracting candidate OTUs from {} with score cutoff {}", path_str, p_score_cutoff);
52+
info!("extracting candidate otus from {} with score cutoff {}", path_str, p_score_cutoff);
5353

5454
let mut reader = bam::Reader::from_path(&sam_path)
5555
.map_err(|e| StreamProcessorError::FileOpen {
@@ -58,7 +58,7 @@ pub fn extract_candidate_otus_from_sam_file<P: AsRef<Path>>(
5858
})?;
5959

6060
let result = extract_candidate_otus_from_reader(&mut reader, p_score_cutoff)?;
61-
info!("Found {} candidate OTUs", result.len());
61+
info!("found {} candidate otus", result.len());
6262
Ok(result)
6363
}
6464

src/subtraction.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ impl SubtractionProcessor {
129129

130130
/// Parse subtraction SAM file using parse_sam module and return scores for each read
131131
pub fn parse_subtraction_sam(path: &str) -> Result<HashMap<String, f32>, BamProcessingError> {
132-
info!("Parsing subtraction SAM file: {}", path);
132+
info!("parsing subtraction SAM file: {}", path);
133133

134134
let sam_lines = parse_sam(path, None)
135135
.map_err(BamProcessingError::SamParse)?;
@@ -142,7 +142,7 @@ pub fn parse_subtraction_sam(path: &str) -> Result<HashMap<String, f32>, BamProc
142142
}
143143
}
144144

145-
info!("Parsed {} subtraction scores from {}", high_scores.len(), path);
145+
info!("parsed {} subtraction scores from {}", high_scores.len(), path);
146146
Ok(high_scores)
147147
}
148148

@@ -155,7 +155,7 @@ pub fn eliminate_subtraction(
155155
subtraction_sam_path: &str,
156156
output_sam_path: &str,
157157
) -> Result<(), BamProcessingError> {
158-
info!("Starting subtraction elimination: isolate={}, subtraction={}, output={}",
158+
info!("starting subtraction elimination: isolate={}, subtraction={}, output={}",
159159
isolate_sam_path, subtraction_sam_path, output_sam_path);
160160

161161
// Parse subtraction scores
@@ -165,7 +165,7 @@ pub fn eliminate_subtraction(
165165
// Process isolate file
166166
let subtracted_ids = process_isolate_file(isolate_sam_path, output_sam_path, &processor)?;
167167

168-
info!("Subtraction complete: {} reads eliminated", subtracted_ids.len());
168+
info!("subtraction complete: {} reads eliminated", subtracted_ids.len());
169169

170170
// Write subtracted IDs file
171171
write_subtracted_ids_file(output_sam_path, &subtracted_ids)?;

0 commit comments

Comments
 (0)