Skip to content

Commit 8c273c4

Browse files
authored
perf: improve performance of isolate mapping
1 parent ff3626e commit 8c273c4

File tree

4 files changed

+106
-691
lines changed

4 files changed

+106
-691
lines changed

src/lib.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ pub struct PathoscopeResults {
5050
/// pyo3 interface
5151
fn rust(_py: Python, m: &PyModule) -> PyResult<()> {
5252
m.add_class::<PathoscopeResults>()?;
53+
m.add_function(wrap_pyfunction!(parse_isolate_scores, m)?)?;
5354
m.add_function(wrap_pyfunction!(run_expectation_maximization, m)?)?;
5455
m.add_function(wrap_pyfunction!(run_eliminate_subtraction, m)?)?;
5556
m.add_function(wrap_pyfunction!(calculate_coverage_from_em_results, m)?)?;
@@ -99,6 +100,65 @@ pub fn run_eliminate_subtraction(
99100
}
100101

101102

103+
#[pyfunction]
104+
/// Parse isolate SAM file and extract high scores for each read
105+
pub fn parse_isolate_scores(
106+
_py: Python,
107+
sam_path: String,
108+
p_score_cutoff: f64,
109+
) -> PyResult<HashMap<String, f64>> {
110+
use rust_htslib::{bam, bam::Read};
111+
112+
let mut reader = bam::Reader::from_path(&sam_path)
113+
.map_err(|e| PyErr::new::<PyIOError, _>(format!("Failed to open SAM file '{}': {}", sam_path, e)))?;
114+
115+
let mut isolate_high_scores: HashMap<String, f64> = HashMap::new();
116+
117+
for result in reader.records() {
118+
let record = result.map_err(|e| PyErr::new::<PyIOError, _>(format!("Error reading record: {}", e)))?;
119+
120+
// Skip unmapped reads
121+
if record.is_unmapped() {
122+
continue;
123+
}
124+
125+
// Get read ID (qname)
126+
let read_id = std::str::from_utf8(record.qname())
127+
.map_err(|e| PyErr::new::<PyIOError, _>(format!("Invalid UTF-8 in read ID: {}", e)))?
128+
.to_string();
129+
130+
// Get read length
131+
let read_length = record.seq_len();
132+
133+
// Get alignment score from AS:i: auxiliary field
134+
let as_score = match record.aux(b"AS") {
135+
Ok(aux) => match aux {
136+
rust_htslib::bam::record::Aux::I32(score) => score as f64,
137+
rust_htslib::bam::record::Aux::I8(score) => score as f64,
138+
rust_htslib::bam::record::Aux::I16(score) => score as f64,
139+
rust_htslib::bam::record::Aux::U8(score) => score as f64,
140+
rust_htslib::bam::record::Aux::U16(score) => score as f64,
141+
rust_htslib::bam::record::Aux::U32(score) => score as f64,
142+
_ => continue, // Skip records with unexpected AS type
143+
},
144+
Err(_) => continue, // Skip records without AS field
145+
};
146+
147+
// Calculate total score (AS score + read length, matching original logic)
148+
let total_score = as_score + read_length as f64;
149+
150+
// Apply score cutoff
151+
if total_score >= p_score_cutoff {
152+
// Track highest score for this read
153+
if total_score > *isolate_high_scores.get(&read_id).unwrap_or(&0.0) {
154+
isolate_high_scores.insert(read_id, total_score);
155+
}
156+
}
157+
}
158+
159+
Ok(isolate_high_scores)
160+
}
161+
102162
#[pyfunction]
103163
/// Entry point for expectation_maximization
104164
pub fn run_expectation_maximization(

0 commit comments

Comments
 (0)