11use crate :: parse_sam:: parse_sam;
2+ use rust_htslib:: { bam, bam:: Read } ;
3+ use rust_htslib:: bam:: Format ;
24use std:: collections:: { HashMap , HashSet } ;
35use std:: fs:: File ;
4- use std:: io:: { BufRead , BufReader , Write } ;
6+ use std:: io:: Write ;
57use std:: path:: Path ;
68use thiserror:: Error ;
79
@@ -16,6 +18,12 @@ pub enum SubtractionError {
1618 #[ error( "Failed to parse SAM file: {0}" ) ]
1719 SamParse ( String ) ,
1820
21+ #[ error( "Failed to read BAM/SAM record: {source}" ) ]
22+ BamRead { source : rust_htslib:: errors:: Error } ,
23+
24+ #[ error( "Failed to write BAM/SAM record: {source}" ) ]
25+ BamWrite { source : rust_htslib:: errors:: Error } ,
26+
1927 #[ error( "Insufficient fields in SAM line" ) ]
2028 InsufficientFields ,
2129
@@ -162,46 +170,78 @@ pub fn eliminate_subtraction(
162170 Ok ( ( ) )
163171}
164172
165- /// Process the isolate SAM file and write filtered output
173+ /// Process the isolate SAM file and write filtered output using rust-htslib
166174fn process_isolate_file (
167175 input_path : & str ,
168176 output_path : & str ,
169177 processor : & SubtractionProcessor ,
170178) -> Result < HashSet < String > , SubtractionError > {
171- // Open input file
172- let file = File :: open ( input_path)
173- . map_err ( |e| SubtractionError :: FileOpen {
174- path : input_path. to_string ( ) ,
175- source : e,
176- } ) ?;
177- let lines = BufReader :: new ( file) . lines ( ) ;
178-
179- // Create output file
180- let mut sam_file = File :: create ( output_path)
181- . map_err ( |e| SubtractionError :: FileCreate {
182- path : output_path. to_string ( ) ,
183- source : e,
184- } ) ?;
179+ // Open input file with rust-htslib reader
180+ let mut reader = bam:: Reader :: from_path ( input_path)
181+ . map_err ( |e| SubtractionError :: BamRead { source : e } ) ?;
182+
183+ // Get the header from input to preserve SQ lines and other headers
184+ let header_view = reader. header ( ) . clone ( ) ;
185+
186+ // Create a new Header from the HeaderView
187+ let header = bam:: Header :: from_template ( & header_view) ;
188+
189+ // Create output writer with the same header
190+ let mut writer = bam:: Writer :: from_path ( output_path, & header, Format :: Sam )
191+ . map_err ( |e| SubtractionError :: BamWrite { source : e } ) ?;
185192
186193 let mut subtracted_read_ids = HashSet :: new ( ) ;
187194
188- // Process each line
189- for line in lines {
190- let l = line. map_err ( |e| SubtractionError :: LineRead { source : e } ) ?;
191-
192- // Use processor to determine action
193- match processor. process_sam_line ( & l) ? {
194- Some ( ProcessResult :: Keep ( output_line) ) => {
195- writeln ! ( & mut sam_file, "{}" , output_line)
196- . map_err ( |e| SubtractionError :: WriteOutput { source : e } ) ?;
197- }
198- Some ( ProcessResult :: Eliminate ( read_id) ) => {
199- subtracted_read_ids. insert ( read_id) ;
200- }
201- None => {
202- // Skip this line (empty, unmapped, malformed)
203- continue ;
204- }
195+ // Process each record
196+ for result in reader. records ( ) {
197+ let record = result. map_err ( |e| SubtractionError :: BamRead { source : e } ) ?;
198+
199+ // Skip unmapped reads
200+ if record. is_unmapped ( ) {
201+ continue ;
202+ }
203+
204+ // Get read ID
205+ let read_id = std:: str:: from_utf8 ( record. qname ( ) )
206+ . map_err ( |_| SubtractionError :: InsufficientFields ) ?
207+ . to_string ( ) ;
208+
209+ // Get reference name
210+ let ref_name = if record. tid ( ) >= 0 {
211+ std:: str:: from_utf8 ( header_view. tid2name ( record. tid ( ) as u32 ) )
212+ . unwrap_or ( "*" )
213+ } else {
214+ "*"
215+ } ;
216+
217+ // Skip if reference is unmapped
218+ if ref_name == "*" {
219+ continue ;
220+ }
221+
222+ // Calculate alignment score (AS score + read length)
223+ let read_length = record. seq_len ( ) as f32 ;
224+ let as_score = match record. aux ( b"AS" ) {
225+ Ok ( aux) => match aux {
226+ rust_htslib:: bam:: record:: Aux :: I32 ( score) => score as f32 ,
227+ rust_htslib:: bam:: record:: Aux :: I8 ( score) => score as f32 ,
228+ rust_htslib:: bam:: record:: Aux :: I16 ( score) => score as f32 ,
229+ rust_htslib:: bam:: record:: Aux :: U8 ( score) => score as f32 ,
230+ rust_htslib:: bam:: record:: Aux :: U16 ( score) => score as f32 ,
231+ rust_htslib:: bam:: record:: Aux :: U32 ( score) => score as f32 ,
232+ _ => 0.0 ,
233+ } ,
234+ Err ( _) => 0.0 ,
235+ } ;
236+ let isolate_score = as_score + read_length;
237+
238+ // Check if this read should be eliminated
239+ if processor. should_eliminate ( & read_id, isolate_score) {
240+ subtracted_read_ids. insert ( read_id) ;
241+ } else {
242+ // Write the record to output
243+ writer. write ( & record)
244+ . map_err ( |e| SubtractionError :: BamWrite { source : e } ) ?;
205245 }
206246 }
207247
0 commit comments