Skip to content

Commit ff3626e

Browse files
authored
fix: prevent failures due to bad sam formatting
- Remove more --no-hd --no-sq headers. - Use rust-htslib to write SAM output.
1 parent c4ffabc commit ff3626e

File tree

7 files changed

+2137
-40
lines changed

7 files changed

+2137
-40
lines changed

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,14 @@ Run only Python/e2e tests:
4242
mise run test:python
4343
```
4444

45+
Pass arguments to pytest:
46+
47+
```bash
48+
mise run test:python -- -vv
49+
mise run test:python -- --snapshot-update
50+
mise run test:python -- tests/test_workflow.py::test_map_isolates
51+
```
52+
4553
Run clippy for linting:
4654

4755
```bash

example/rust/test_isolates_minimal.sam

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
@SQ SN:ref1 LN:1000
33
@SQ SN:ref2 LN:2000
44
@PG ID:test PN:test VN:1.0
5-
# Test comment line
5+
@CO Test comment line
66
read_eliminate 0 ref1 100 60 10M * 0 0 AAAAAAAAAA IIIIIIIIII AS:i:50
77
read_keep 0 ref1 200 60 10M * 0 0 CCCCCCCCCC IIIIIIIIII AS:i:80
88
read_equal 0 ref2 300 60 10M * 0 0 GGGGGGGGGG IIIIIIIIII AS:i:40

src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -333,8 +333,8 @@ mod tests {
333333
"Program header should be preserved"
334334
);
335335
assert_eq!(
336-
output_lines[4], "# Test comment line",
337-
"Comment lines should be preserved"
336+
output_lines[4], "@CO\tTest comment line",
337+
"Comment lines should be preserved in proper SAM format"
338338
);
339339

340340
// Verify correct reads are kept

src/subtraction.rs

Lines changed: 73 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
use crate::parse_sam::parse_sam;
2+
use rust_htslib::{bam, bam::Read};
3+
use rust_htslib::bam::Format;
24
use std::collections::{HashMap, HashSet};
35
use std::fs::File;
4-
use std::io::{BufRead, BufReader, Write};
6+
use std::io::Write;
57
use std::path::Path;
68
use thiserror::Error;
79

@@ -16,6 +18,12 @@ pub enum SubtractionError {
1618
#[error("Failed to parse SAM file: {0}")]
1719
SamParse(String),
1820

21+
#[error("Failed to read BAM/SAM record: {source}")]
22+
BamRead { source: rust_htslib::errors::Error },
23+
24+
#[error("Failed to write BAM/SAM record: {source}")]
25+
BamWrite { source: rust_htslib::errors::Error },
26+
1927
#[error("Insufficient fields in SAM line")]
2028
InsufficientFields,
2129

@@ -162,46 +170,78 @@ pub fn eliminate_subtraction(
162170
Ok(())
163171
}
164172

165-
/// Process the isolate SAM file and write filtered output
173+
/// Process the isolate SAM file and write filtered output using rust-htslib
166174
fn process_isolate_file(
167175
input_path: &str,
168176
output_path: &str,
169177
processor: &SubtractionProcessor,
170178
) -> Result<HashSet<String>, SubtractionError> {
171-
// Open input file
172-
let file = File::open(input_path)
173-
.map_err(|e| SubtractionError::FileOpen {
174-
path: input_path.to_string(),
175-
source: e,
176-
})?;
177-
let lines = BufReader::new(file).lines();
178-
179-
// Create output file
180-
let mut sam_file = File::create(output_path)
181-
.map_err(|e| SubtractionError::FileCreate {
182-
path: output_path.to_string(),
183-
source: e,
184-
})?;
179+
// Open input file with rust-htslib reader
180+
let mut reader = bam::Reader::from_path(input_path)
181+
.map_err(|e| SubtractionError::BamRead { source: e })?;
182+
183+
// Get the header from input to preserve SQ lines and other headers
184+
let header_view = reader.header().clone();
185+
186+
// Create a new Header from the HeaderView
187+
let header = bam::Header::from_template(&header_view);
188+
189+
// Create output writer with the same header
190+
let mut writer = bam::Writer::from_path(output_path, &header, Format::Sam)
191+
.map_err(|e| SubtractionError::BamWrite { source: e })?;
185192

186193
let mut subtracted_read_ids = HashSet::new();
187194

188-
// Process each line
189-
for line in lines {
190-
let l = line.map_err(|e| SubtractionError::LineRead { source: e })?;
191-
192-
// Use processor to determine action
193-
match processor.process_sam_line(&l)? {
194-
Some(ProcessResult::Keep(output_line)) => {
195-
writeln!(&mut sam_file, "{}", output_line)
196-
.map_err(|e| SubtractionError::WriteOutput { source: e })?;
197-
}
198-
Some(ProcessResult::Eliminate(read_id)) => {
199-
subtracted_read_ids.insert(read_id);
200-
}
201-
None => {
202-
// Skip this line (empty, unmapped, malformed)
203-
continue;
204-
}
195+
// Process each record
196+
for result in reader.records() {
197+
let record = result.map_err(|e| SubtractionError::BamRead { source: e })?;
198+
199+
// Skip unmapped reads
200+
if record.is_unmapped() {
201+
continue;
202+
}
203+
204+
// Get read ID
205+
let read_id = std::str::from_utf8(record.qname())
206+
.map_err(|_| SubtractionError::InsufficientFields)?
207+
.to_string();
208+
209+
// Get reference name
210+
let ref_name = if record.tid() >= 0 {
211+
std::str::from_utf8(header_view.tid2name(record.tid() as u32))
212+
.unwrap_or("*")
213+
} else {
214+
"*"
215+
};
216+
217+
// Skip if reference is unmapped
218+
if ref_name == "*" {
219+
continue;
220+
}
221+
222+
// Calculate alignment score (AS score + read length)
223+
let read_length = record.seq_len() as f32;
224+
let as_score = match record.aux(b"AS") {
225+
Ok(aux) => match aux {
226+
rust_htslib::bam::record::Aux::I32(score) => score as f32,
227+
rust_htslib::bam::record::Aux::I8(score) => score as f32,
228+
rust_htslib::bam::record::Aux::I16(score) => score as f32,
229+
rust_htslib::bam::record::Aux::U8(score) => score as f32,
230+
rust_htslib::bam::record::Aux::U16(score) => score as f32,
231+
rust_htslib::bam::record::Aux::U32(score) => score as f32,
232+
_ => 0.0,
233+
},
234+
Err(_) => 0.0,
235+
};
236+
let isolate_score = as_score + read_length;
237+
238+
// Check if this read should be eliminated
239+
if processor.should_eliminate(&read_id, isolate_score) {
240+
subtracted_read_ids.insert(read_id);
241+
} else {
242+
// Write the record to output
243+
writer.write(&record)
244+
.map_err(|e| SubtractionError::BamWrite { source: e })?;
205245
}
206246
}
207247

0 commit comments

Comments
 (0)