From cf576e529b9beea7318e7aaa633d8707f93f4c08 Mon Sep 17 00:00:00 2001 From: Jeronymous Date: Sun, 25 Feb 2024 16:43:22 +0100 Subject: [PATCH] fixes #64 : fix inconsistency between segments when there are empty text --- whisper_timestamped/transcribe.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/whisper_timestamped/transcribe.py b/whisper_timestamped/transcribe.py index bc0eb02..68b9fe0 100644 --- a/whisper_timestamped/transcribe.py +++ b/whisper_timestamped/transcribe.py @@ -905,6 +905,9 @@ def filter_tokens(tokens): assert len(segment_logprobs) == len(segment_tokens), f"Inconsistent number of segments: logprobs ({len(segment_logprobs)}) != tokens ({len(segment_tokens)})" whisper_segments = transcription["segments"] + # See issue 64: some segments may have empty text + if any(not s["text"] for s in whisper_segments): + whisper_segments = [s for s in whisper_segments if s["text"]] l1 = len(whisper_segments) l2 = len(timestamped_word_segments) if l1 != l2 and l1 != 0: