Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions crates/transcribe-whisper-local/src/service/streaming.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,8 @@ where
Ok(chunk) => Some(hypr_whisper_local::SimpleAudioChunk {
samples: chunk.samples,
meta: Some(serde_json::json!({ "source": source_name })),
start_timestamp_ms: Some(chunk.start_timestamp_ms),
end_timestamp_ms: Some(chunk.end_timestamp_ms),
}),
})
})
Expand Down
22 changes: 22 additions & 0 deletions crates/whisper-local/src/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,16 @@ pub struct TranscriptionTask<S, T> {
pub trait AudioChunk: Send + 'static {
fn samples(&self) -> &[f32];
fn meta(&self) -> Option<serde_json::Value>;
fn start_timestamp_ms(&self) -> Option<usize>;
fn end_timestamp_ms(&self) -> Option<usize>;
}

#[derive(Default)]
pub struct SimpleAudioChunk {
pub samples: Vec<f32>,
pub meta: Option<serde_json::Value>,
pub start_timestamp_ms: Option<usize>,
pub end_timestamp_ms: Option<usize>,
}

impl AudioChunk for SimpleAudioChunk {
Expand All @@ -36,6 +40,14 @@ impl AudioChunk for SimpleAudioChunk {
fn meta(&self) -> Option<serde_json::Value> {
self.meta.clone()
}

fn start_timestamp_ms(&self) -> Option<usize> {
self.start_timestamp_ms
}

fn end_timestamp_ms(&self) -> Option<usize> {
self.end_timestamp_ms
}
}

pub struct AudioChunkStream<S>(pub S);
Expand Down Expand Up @@ -116,6 +128,7 @@ where
&samples,
&mut this.current_segment_task,
None,
(None, None),
) {
Poll::Ready(result) => return Poll::Ready(result),
Poll::Pending => continue,
Expand Down Expand Up @@ -156,11 +169,14 @@ where
let meta = chunk.meta();
let samples = chunk.samples();

let timestamps = (chunk.start_timestamp_ms(), chunk.end_timestamp_ms());

match process_transcription(
&mut this.whisper,
samples,
&mut this.current_segment_task,
meta,
timestamps,
) {
Poll::Ready(result) => return Poll::Ready(result),
Poll::Pending => continue,
Expand All @@ -178,6 +194,7 @@ fn process_transcription<'a>(
samples: &'a [f32],
current_segment_task: &'a mut Option<Pin<Box<dyn Stream<Item = Segment> + Send>>>,
meta: Option<serde_json::Value>,
timestamps: (Option<usize>, Option<usize>),
) -> Poll<Option<Segment>> {
if !samples.is_empty() {
match whisper.transcribe(samples) {
Expand All @@ -190,6 +207,11 @@ fn process_transcription<'a>(
Ok(mut segments) => {
for segment in &mut segments {
segment.meta = meta.clone();

if let (Some(start_ms), Some(end_ms)) = timestamps {
segment.start = start_ms as f64 / 1000.0;
segment.end = end_ms as f64 / 1000.0;
}
Comment on lines +211 to +214
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Verification agent

🧩 Analysis chain

Bug: Overwrites per-segment timing with chunk bounds; offset instead of replacing

This sets every segment’s start/end to the same chunk-level [start, end], discarding per-segment timing from Whisper and making all segments identical in time. Instead, add the chunk start offset to each segment’s existing times and optionally clamp to the chunk end.

Apply this diff to offset/clamp rather than overwrite:

-                    if let (Some(start_ms), Some(end_ms)) = timestamps {
-                        segment.start = start_ms as f64 / 1000.0;
-                        segment.end = end_ms as f64 / 1000.0;
-                    }
+                    // Add chunk start offset; keep model-provided intra-chunk timing.
+                    if let (Some(start_ms), _) = timestamps {
+                        let offset_s = start_ms as f64 / 1000.0;
+                        segment.start += offset_s;
+                        segment.end += offset_s;
+                    }
+                    // Optionally clamp segment end to the chunk end if provided.
+                    if let (_, Some(end_ms)) = timestamps {
+                        let end_limit_s = end_ms as f64 / 1000.0;
+                        if segment.end > end_limit_s {
+                            segment.end = end_limit_s;
+                        }
+                    }

To double-check expectations, confirm how Segment.start/end are used downstream and whether they currently assume absolute or relative times:


🏁 Script executed:

#!/bin/bash
# Locate Segment definition and usages of start/end to infer expected semantics.
rg -n -C3 -P 'struct\s+Segment\b' --type=rust
rg -n -C3 -P '\bSegment\b' --type=rust | rg -n -C1 -P '\bstart\b|\bend\b'

Length of output: 3201


Bug: Preserve per-segment timing by offsetting, not overwriting

The current code at crates/whisper-local/src/stream.rs (lines 211–214) replaces every segment’s times with the enclosing chunk’s bounds, discarding the model’s intra-chunk timestamps. Instead, you should add the chunk start offset to each segment’s existing start/end and clamp the end to the chunk boundary if provided.

• Location: crates/whisper-local/src/stream.rs:211–214

Apply this patch:

-                    if let (Some(start_ms), Some(end_ms)) = timestamps {
-                        segment.start = start_ms as f64 / 1000.0;
-                        segment.end = end_ms as f64 / 1000.0;
-                    }
+                    // Add chunk start offset; keep model-provided intra-chunk timing.
+                    if let (Some(start_ms), _) = timestamps {
+                        let offset_s = start_ms as f64 / 1000.0;
+                        segment.start += offset_s;
+                        segment.end += offset_s;
+                    }
+                    // Clamp segment end to the chunk end if provided.
+                    if let (_, Some(end_ms)) = timestamps {
+                        let end_limit_s = end_ms as f64 / 1000.0;
+                        if segment.end > end_limit_s {
+                            segment.end = end_limit_s;
+                        }
+                    }

This ensures each segment retains its relative timing from Whisper while mapping to absolute positions in the full audio.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if let (Some(start_ms), Some(end_ms)) = timestamps {
segment.start = start_ms as f64 / 1000.0;
segment.end = end_ms as f64 / 1000.0;
}
// Add chunk start offset; keep model-provided intra-chunk timing.
if let (Some(start_ms), _) = timestamps {
let offset_s = start_ms as f64 / 1000.0;
segment.start += offset_s;
segment.end += offset_s;
}
// Clamp segment end to the chunk end if provided.
if let (_, Some(end_ms)) = timestamps {
let end_limit_s = end_ms as f64 / 1000.0;
if segment.end > end_limit_s {
segment.end = end_limit_s;
}
}
🤖 Prompt for AI Agents
In crates/whisper-local/src/stream.rs around lines 211 to 214, the code
currently overwrites each segment’s start/end with the chunk bounds; instead,
add the chunk start offset to the segment’s existing start/end (preserving the
model’s intra-chunk timing), convert ms->seconds consistently, and if an
enclosing chunk end is provided clamp the adjusted segment.end to that chunk end
rather than overwriting it; ensure you only apply the offset when the segment
has timestamps (Some) and leave segments without timestamps unchanged.

}

*current_segment_task = Some(Box::pin(futures_util::stream::iter(segments)));
Expand Down
Loading