@@ -50,6 +50,7 @@ pub struct PathoscopeResults {
5050/// pyo3 interface
5151fn rust ( _py : Python , m : & PyModule ) -> PyResult < ( ) > {
5252 m. add_class :: < PathoscopeResults > ( ) ?;
53+ m. add_function ( wrap_pyfunction ! ( parse_isolate_scores, m) ?) ?;
5354 m. add_function ( wrap_pyfunction ! ( run_expectation_maximization, m) ?) ?;
5455 m. add_function ( wrap_pyfunction ! ( run_eliminate_subtraction, m) ?) ?;
5556 m. add_function ( wrap_pyfunction ! ( calculate_coverage_from_em_results, m) ?) ?;
@@ -99,6 +100,65 @@ pub fn run_eliminate_subtraction(
99100}
100101
101102
103+ #[ pyfunction]
104+ /// Parse isolate SAM file and extract high scores for each read
105+ pub fn parse_isolate_scores (
106+ _py : Python ,
107+ sam_path : String ,
108+ p_score_cutoff : f64 ,
109+ ) -> PyResult < HashMap < String , f64 > > {
110+ use rust_htslib:: { bam, bam:: Read } ;
111+
112+ let mut reader = bam:: Reader :: from_path ( & sam_path)
113+ . map_err ( |e| PyErr :: new :: < PyIOError , _ > ( format ! ( "Failed to open SAM file '{}': {}" , sam_path, e) ) ) ?;
114+
115+ let mut isolate_high_scores: HashMap < String , f64 > = HashMap :: new ( ) ;
116+
117+ for result in reader. records ( ) {
118+ let record = result. map_err ( |e| PyErr :: new :: < PyIOError , _ > ( format ! ( "Error reading record: {}" , e) ) ) ?;
119+
120+ // Skip unmapped reads
121+ if record. is_unmapped ( ) {
122+ continue ;
123+ }
124+
125+ // Get read ID (qname)
126+ let read_id = std:: str:: from_utf8 ( record. qname ( ) )
127+ . map_err ( |e| PyErr :: new :: < PyIOError , _ > ( format ! ( "Invalid UTF-8 in read ID: {}" , e) ) ) ?
128+ . to_string ( ) ;
129+
130+ // Get read length
131+ let read_length = record. seq_len ( ) ;
132+
133+ // Get alignment score from AS:i: auxiliary field
134+ let as_score = match record. aux ( b"AS" ) {
135+ Ok ( aux) => match aux {
136+ rust_htslib:: bam:: record:: Aux :: I32 ( score) => score as f64 ,
137+ rust_htslib:: bam:: record:: Aux :: I8 ( score) => score as f64 ,
138+ rust_htslib:: bam:: record:: Aux :: I16 ( score) => score as f64 ,
139+ rust_htslib:: bam:: record:: Aux :: U8 ( score) => score as f64 ,
140+ rust_htslib:: bam:: record:: Aux :: U16 ( score) => score as f64 ,
141+ rust_htslib:: bam:: record:: Aux :: U32 ( score) => score as f64 ,
142+ _ => continue , // Skip records with unexpected AS type
143+ } ,
144+ Err ( _) => continue , // Skip records without AS field
145+ } ;
146+
147+ // Calculate total score (AS score + read length, matching original logic)
148+ let total_score = as_score + read_length as f64 ;
149+
150+ // Apply score cutoff
151+ if total_score >= p_score_cutoff {
152+ // Track highest score for this read
153+ if total_score > * isolate_high_scores. get ( & read_id) . unwrap_or ( & 0.0 ) {
154+ isolate_high_scores. insert ( read_id, total_score) ;
155+ }
156+ }
157+ }
158+
159+ Ok ( isolate_high_scores)
160+ }
161+
102162#[ pyfunction]
103163/// Entry point for expectation_maximization
104164pub fn run_expectation_maximization (
0 commit comments