Lower priority of early stopping task, cleanup

argmaxinc · ZachNagengast · Dec 21, 2024 · Dec 20, 2024 · Dec 20, 2024 · Dec 20, 2024
commit ac834058843ea9a36f910fe7e517fe33feba6ee4
diff --git a/Sources/WhisperKit/Core/TextDecoder.swift b/Sources/WhisperKit/Core/TextDecoder.swift
@@ -213,7 +213,7 @@ public extension TextDecoding {
             throw WhisperError.tokenizerUnavailable()
         }
 
-        var prefilledDecoderInputs = decoderInputs
+        let prefilledDecoderInputs = decoderInputs
 
         // Setup prefill tokens based on task and language
         var prefillTokens: [Int] = [tokenizer.specialTokens.startOfTranscriptToken] // SOT
@@ -828,7 +828,7 @@ open class TextDecoder: TextDecoding, WhisperMLModel {
 
                 // Call the callback if it is provided on a background thread
                 if let callback = callback {
-                    Task.detached { [weak self] in
+                    Task(priority: .utility) { [weak self] in
                         guard let self = self else { return }
                         let shouldContinue = callback(result)
                         if let shouldContinue = shouldContinue, !shouldContinue, !isPrefill {

diff --git a/Tests/WhisperKitTests/UnitTests.swift b/Tests/WhisperKitTests/UnitTests.swift
@@ -743,46 +743,6 @@ final class UnitTests: XCTestCase {
         )
     }
 
-    func testDecodingEarlyStopping() async throws {
-        let earlyStopTokenCount = 10
-        let options = DecodingOptions()
-        let continuationCallback: TranscriptionCallback = { (progress: TranscriptionProgress) -> Bool? in
-            // Stop after only 10 tokens (full test audio contains 16)
-            progress.tokens.count <= earlyStopTokenCount
-        }
-
-        let result = try await XCTUnwrapAsync(
-            await transcribe(with: .tiny, options: options, callback: continuationCallback).first!,
-            "Failed to transcribe"
-        )
-
-        XCTAssertNotNil(result)
-        let tokenCountWithEarlyStop = result.segments.flatMap { $0.tokens }.count
-        let decodingTimePerTokenWithEarlyStop = result.timings.decodingLoop / Double(tokenCountWithEarlyStop)
-
-        // Work done in the callback should not block the decoding loop
-        let continuationCallbackWithWait: TranscriptionCallback = { (progress: TranscriptionProgress) -> Bool? in
-            Thread.sleep(forTimeInterval: 2)
-            return false
-        }
-
-        let resultWithWait = try await XCTUnwrapAsync(
-            await transcribe(with: .tiny, options: options, callback: continuationCallbackWithWait).first!,
-            "Failed to transcribe"
-        )
-
-        XCTAssertNotNil(resultWithWait)
-        let tokenCountWithWait = resultWithWait.segments.flatMap { $0.tokens }.count
-        let decodingTimePerTokenWithWait = resultWithWait.timings.decodingLoop / Double(tokenCountWithWait)
-        Logging.debug("Decoding loop without wait: \(result.timings.decodingLoop), with wait: \(resultWithWait.timings.decodingLoop)")
-
-        // Assert that the decoding predictions per token are not slower with the waiting
-        XCTAssertEqual(decodingTimePerTokenWithWait, decodingTimePerTokenWithEarlyStop, accuracy: decodingTimePerTokenWithEarlyStop, "Decoding predictions per token should not be significantly slower with waiting")
-
-        // Assert that more tokens are returned in the callback with waiting
-        XCTAssertGreaterThan(tokenCountWithWait, tokenCountWithEarlyStop, "More tokens should be returned in the callback with waiting")
-    }
-
     // MARK: - Tokenizer Tests
 
     func testDecoderTokenizer() async throws {
@@ -1300,6 +1260,62 @@ final class UnitTests: XCTestCase {
         await fulfillment(of: [modelStateExpectation, segmentDiscoveryExpectation, transcriptionStateExpectation], timeout: 1)
     }
 
+    func testCallbackWithEarlyStopping() async throws {
+        let computeOptions = ModelComputeOptions(
+            melCompute: .cpuOnly,
+            audioEncoderCompute: .cpuOnly,
+            textDecoderCompute: .cpuOnly,
+            prefillCompute: .cpuOnly
+        )
+
+        let config = try WhisperKitConfig(
+            modelFolder: tinyModelPath(),
+            computeOptions: computeOptions,
+            verbose: true,
+            logLevel: .debug,
+            load: false
+        )
+        let whisperKit = try await WhisperKit(config)
+
+        try await whisperKit.loadModels()
+        let audioFilePath = try XCTUnwrap(
+            Bundle.current.path(forResource: "jfk", ofType: "wav"),
+            "Audio file not found"
+        )
+
+        let earlyStopTokenCount = 10
+        let continuationCallback: TranscriptionCallback = { (progress: TranscriptionProgress) -> Bool? in
+            // Stop after only 10 tokens (full test audio contains 16)
+            progress.tokens.count <= earlyStopTokenCount
+        }
+
+        let result = try await whisperKit.transcribe(audioPath: audioFilePath, callback: continuationCallback).first!
+
+        XCTAssertNotNil(result)
+        let tokenCountWithEarlyStop = result.segments.flatMap { $0.tokens }.count
+        let decodingTimePerTokenWithEarlyStop = result.timings.decodingLoop / Double(tokenCountWithEarlyStop)
+
+        // Work done in the callback should not block the decoding loop
+        let continuationCallbackWithWait: TranscriptionCallback = { (progress: TranscriptionProgress) -> Bool? in
+            Thread.sleep(forTimeInterval: 5)
+            return false
+        }
+
+        let resultWithWait = try await whisperKit.transcribe(audioPath: audioFilePath, callback: continuationCallbackWithWait).first!
+
+        XCTAssertNotNil(resultWithWait)
+        let tokenCountWithWait = resultWithWait.segments.flatMap { $0.tokens }.count
+        let decodingTimePerTokenWithWait = resultWithWait.timings.decodingLoop / Double(tokenCountWithWait)
+        Logging.debug("Decoding loop without wait: \(result.timings.decodingLoop), with wait: \(resultWithWait.timings.decodingLoop)")
+
+        // Assert that the decoding predictions per token are not slower with the waiting
+        XCTAssertEqual(decodingTimePerTokenWithWait, decodingTimePerTokenWithEarlyStop, accuracy: decodingTimePerTokenWithEarlyStop, "Decoding predictions per token should not be significantly slower with waiting")
+
+        // Assert that more tokens are returned in the callback with waiting
+        XCTAssertEqual(tokenCountWithWait, 30, "Token count should be equal to full audio file with 5 seconds of wait")
+        XCTAssertGreaterThan(tokenCountWithWait, tokenCountWithEarlyStop, "More tokens should be returned in the callback with waiting")
+    }
+
     // MARK: - Utils Tests
 
     func testFillIndexesWithValue() throws {