From 8a3c3dd96ff78c88a8f9f4d67f7f96215cac973b Mon Sep 17 00:00:00 2001 From: Ugo Di Profio Date: Mon, 4 Mar 2024 16:06:39 -0500 Subject: [PATCH 1/4] skip tests for models that are not downloaded, but assume that openai_whisper-tiny and openai_whisper-large-v3 are downloaded. --- Tests/WhisperKitTests/UnitTests.swift | 53 +++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/Tests/WhisperKitTests/UnitTests.swift b/Tests/WhisperKitTests/UnitTests.swift index 96f02e9..9b45b39 100644 --- a/Tests/WhisperKitTests/UnitTests.swift +++ b/Tests/WhisperKitTests/UnitTests.swift @@ -827,6 +827,24 @@ final class UnitTests: XCTestCase { XCTAssertEqual(mergedAlignmentTiming[i].probability, expectedWordTimings[i].probability, "Probability at index \(i) does not match") } } + + func testGitLFSPointerFile() { + // Assumption: + // 1 - the openai_whisper-tiny is downloaded locally. This means that the proxyFile is an actual data file. + // 2 - the openai_whisper-large-v3_turbo is not downloaded locally. This means that the proxyFile is pointer file. + let proxyFile = "AudioEncoder.mlmodelc/coremldata.bin" + + // First, we check that a data file is not considered a git lfs pointer file. + var filePath = URL(filePath: tinyModelPath()).appending(path: proxyFile) + var isPointerFile = isGitLFSPointerFile(url: filePath) + XCTAssertEqual(isPointerFile, false, "Assuming whisper-tiny was downloaded, \(proxyFile) should not be a git-lfs pointer file.") + + // Second, we check that a pointer file is considered so. + let modelDir = largev3TurboModelPath() + filePath = URL(filePath: modelDir).appending(path: proxyFile) + isPointerFile = isGitLFSPointerFile(url: filePath) + XCTAssertEqual(isPointerFile, true, "Assuming whisper-large-v3_turbo was not downloaded, \(proxyFile) should be a git-lfs pointer file.") + } } // MARK: Helpers @@ -904,6 +922,15 @@ extension XCTestCase { return modelPath } + func largev3TurboModelPath() -> String { + let modelDir = "whisperkit-coreml/openai_whisper-large-v3_turbo" + guard let modelPath = Bundle.module.urls(forResourcesWithExtension: "mlmodelc", subdirectory: modelDir)?.first?.deletingLastPathComponent().path else { + print("Failed to load model, ensure \"Models/\(modelDir)\" exists via Makefile command: `make download-models`") + return "" + } + return modelPath + } + func allModelPaths() -> [String] { let fileManager = FileManager.default var modelPaths: [String] = [] @@ -921,6 +948,13 @@ extension XCTestCase { for folderURL in directoryContents { let resourceValues = try folderURL.resourceValues(forKeys: Set(resourceKeys)) if resourceValues.isDirectory == true { + // Check if the directory contains actual data files, or if it contains pointer files. + // As a proxy, use the MelSpectrogramc.mlmodel/coredata.bin file. + let proxyFileToCheck = folderURL.appendingPathComponent("MelSpectrogram.mlmodelc/coremldata.bin") + if isGitLFSPointerFile(url: proxyFileToCheck) { + continue + } + // Check if the directory name contains the quantization pattern // Only test large quantized models let dirName = folderURL.lastPathComponent @@ -935,6 +969,25 @@ extension XCTestCase { return modelPaths } + + // Function to check if the beginning of the file matches a Git LFS pointer pattern + func isGitLFSPointerFile(url: URL) -> Bool { + do { + let fileHandle = try FileHandle(forReadingFrom: url) + // Read the first few bytes of the file to get enough for the Git LFS pointer signature + let data = fileHandle.readData(ofLength: 512) // Read first 512 bytes + fileHandle.closeFile() + + if let string = String(data: data, encoding: .utf8), + string.starts(with: "version https://git-lfs.github.com/") { + return true + } + } catch { + print("Failed to read file: \(error)") + } + + return false + } func trackForMemoryLeaks(on instance: AnyObject, file: StaticString = #filePath, line: UInt = #line) { addTeardownBlock { [weak instance] in From b59351497b5fa355dd1e5bb0d352b5d4cc4b4a19 Mon Sep 17 00:00:00 2001 From: Ugo Di Profio Date: Mon, 4 Mar 2024 17:18:56 -0500 Subject: [PATCH 2/4] fix typo. --- Examples/WhisperAX/WhisperAX/Views/ContentView.swift | 4 ++-- .../WhisperAX/WhisperAXWatchApp/WhisperAXExampleView.swift | 4 ++-- Sources/WhisperKit/Core/Models.swift | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Examples/WhisperAX/WhisperAX/Views/ContentView.swift b/Examples/WhisperAX/WhisperAX/Views/ContentView.swift index 0df7640..944b0e2 100644 --- a/Examples/WhisperAX/WhisperAX/Views/ContentView.swift +++ b/Examples/WhisperAX/WhisperAX/Views/ContentView.swift @@ -751,7 +751,7 @@ struct ContentView: View { try await whisperKit.loadModels() await MainActor.run { - availableLanguages = whisperKit.tokenizer?.langauges.map { $0.key }.sorted() ?? ["english"] + availableLanguages = whisperKit.tokenizer?.languages.map { $0.key }.sorted() ?? ["english"] loadingProgressValue = 1.0 modelState = whisperKit.modelState } @@ -920,7 +920,7 @@ struct ContentView: View { func transcribeAudioSamples(_ samples: [Float]) async throws -> TranscriptionResult? { guard let whisperKit = whisperKit else { return nil } - let languageCode = whisperKit.tokenizer?.langauges[selectedLanguage] ?? "en" + let languageCode = whisperKit.tokenizer?.languages[selectedLanguage] ?? "en" let task: DecodingTask = selectedTask == "transcribe" ? .transcribe : .translate let seekClip = [lastConfirmedSegmentEndSeconds] diff --git a/Examples/WhisperAX/WhisperAXWatchApp/WhisperAXExampleView.swift b/Examples/WhisperAX/WhisperAXWatchApp/WhisperAXExampleView.swift index 5e20678..d661215 100644 --- a/Examples/WhisperAX/WhisperAXWatchApp/WhisperAXExampleView.swift +++ b/Examples/WhisperAX/WhisperAXWatchApp/WhisperAXExampleView.swift @@ -409,7 +409,7 @@ struct WhisperAXWatchView: View { try await whisperKit.loadModels() await MainActor.run { - availableLanguages = whisperKit.tokenizer?.langauges.map { $0.key }.sorted() ?? ["english"] + availableLanguages = whisperKit.tokenizer?.languages.map { $0.key }.sorted() ?? ["english"] loadingProgressValue = 1.0 modelState = whisperKit.modelState } @@ -491,7 +491,7 @@ struct WhisperAXWatchView: View { func transcribeAudioSamples(_ samples: [Float]) async throws -> TranscriptionResult? { guard let whisperKit = whisperKit else { return nil } - let languageCode = whisperKit.tokenizer?.langauges[selectedLanguage] ?? "en" + let languageCode = whisperKit.tokenizer?.languages[selectedLanguage] ?? "en" let task: DecodingTask = selectedTask == "transcribe" ? .transcribe : .translate let seekClip = [lastConfirmedSegmentEndSeconds] diff --git a/Sources/WhisperKit/Core/Models.swift b/Sources/WhisperKit/Core/Models.swift index 2048089..68d1824 100644 --- a/Sources/WhisperKit/Core/Models.swift +++ b/Sources/WhisperKit/Core/Models.swift @@ -919,7 +919,7 @@ public extension Tokenizer { return false } - var langauges: [String: String] { [ + var languages: [String: String] { [ "english": "en", "chinese": "zh", "german": "de", From 34e26bc3b5d01621ad4680aa86dd6b27a0516a6f Mon Sep 17 00:00:00 2001 From: Ugo Di Profio Date: Mon, 4 Mar 2024 18:08:14 -0500 Subject: [PATCH 3/4] Revert to before the typo fix --- Examples/WhisperAX/WhisperAX/Views/ContentView.swift | 4 ++-- .../WhisperAX/WhisperAXWatchApp/WhisperAXExampleView.swift | 4 ++-- Sources/WhisperKit/Core/Models.swift | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Examples/WhisperAX/WhisperAX/Views/ContentView.swift b/Examples/WhisperAX/WhisperAX/Views/ContentView.swift index 944b0e2..0df7640 100644 --- a/Examples/WhisperAX/WhisperAX/Views/ContentView.swift +++ b/Examples/WhisperAX/WhisperAX/Views/ContentView.swift @@ -751,7 +751,7 @@ struct ContentView: View { try await whisperKit.loadModels() await MainActor.run { - availableLanguages = whisperKit.tokenizer?.languages.map { $0.key }.sorted() ?? ["english"] + availableLanguages = whisperKit.tokenizer?.langauges.map { $0.key }.sorted() ?? ["english"] loadingProgressValue = 1.0 modelState = whisperKit.modelState } @@ -920,7 +920,7 @@ struct ContentView: View { func transcribeAudioSamples(_ samples: [Float]) async throws -> TranscriptionResult? { guard let whisperKit = whisperKit else { return nil } - let languageCode = whisperKit.tokenizer?.languages[selectedLanguage] ?? "en" + let languageCode = whisperKit.tokenizer?.langauges[selectedLanguage] ?? "en" let task: DecodingTask = selectedTask == "transcribe" ? .transcribe : .translate let seekClip = [lastConfirmedSegmentEndSeconds] diff --git a/Examples/WhisperAX/WhisperAXWatchApp/WhisperAXExampleView.swift b/Examples/WhisperAX/WhisperAXWatchApp/WhisperAXExampleView.swift index d661215..5e20678 100644 --- a/Examples/WhisperAX/WhisperAXWatchApp/WhisperAXExampleView.swift +++ b/Examples/WhisperAX/WhisperAXWatchApp/WhisperAXExampleView.swift @@ -409,7 +409,7 @@ struct WhisperAXWatchView: View { try await whisperKit.loadModels() await MainActor.run { - availableLanguages = whisperKit.tokenizer?.languages.map { $0.key }.sorted() ?? ["english"] + availableLanguages = whisperKit.tokenizer?.langauges.map { $0.key }.sorted() ?? ["english"] loadingProgressValue = 1.0 modelState = whisperKit.modelState } @@ -491,7 +491,7 @@ struct WhisperAXWatchView: View { func transcribeAudioSamples(_ samples: [Float]) async throws -> TranscriptionResult? { guard let whisperKit = whisperKit else { return nil } - let languageCode = whisperKit.tokenizer?.languages[selectedLanguage] ?? "en" + let languageCode = whisperKit.tokenizer?.langauges[selectedLanguage] ?? "en" let task: DecodingTask = selectedTask == "transcribe" ? .transcribe : .translate let seekClip = [lastConfirmedSegmentEndSeconds] diff --git a/Sources/WhisperKit/Core/Models.swift b/Sources/WhisperKit/Core/Models.swift index 68d1824..2048089 100644 --- a/Sources/WhisperKit/Core/Models.swift +++ b/Sources/WhisperKit/Core/Models.swift @@ -919,7 +919,7 @@ public extension Tokenizer { return false } - var languages: [String: String] { [ + var langauges: [String: String] { [ "english": "en", "chinese": "zh", "german": "de", From db0c388d3cbb625488a115185c093fdb02d46f0b Mon Sep 17 00:00:00 2001 From: Ugo Di Profio Date: Fri, 8 Mar 2024 21:16:03 -0500 Subject: [PATCH 4/4] Revert "Revert to before the typo fix" This reverts commit 34e26bc3b5d01621ad4680aa86dd6b27a0516a6f. --- Examples/WhisperAX/WhisperAX/Views/ContentView.swift | 4 ++-- .../WhisperAX/WhisperAXWatchApp/WhisperAXExampleView.swift | 4 ++-- Sources/WhisperKit/Core/Models.swift | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Examples/WhisperAX/WhisperAX/Views/ContentView.swift b/Examples/WhisperAX/WhisperAX/Views/ContentView.swift index 92fe7fb..e38cdab 100644 --- a/Examples/WhisperAX/WhisperAX/Views/ContentView.swift +++ b/Examples/WhisperAX/WhisperAX/Views/ContentView.swift @@ -813,7 +813,7 @@ struct ContentView: View { localModels.append(model) } - availableLanguages = whisperKit.tokenizer?.langauges.map { $0.key }.sorted() ?? ["english"] + availableLanguages = whisperKit.tokenizer?.languages.map { $0.key }.sorted() ?? ["english"] loadingProgressValue = 1.0 modelState = whisperKit.modelState } @@ -1009,7 +1009,7 @@ struct ContentView: View { func transcribeAudioSamples(_ samples: [Float]) async throws -> TranscriptionResult? { guard let whisperKit = whisperKit else { return nil } - let languageCode = whisperKit.tokenizer?.langauges[selectedLanguage] ?? "en" + let languageCode = whisperKit.tokenizer?.languages[selectedLanguage] ?? "en" let task: DecodingTask = selectedTask == "transcribe" ? .transcribe : .translate let seekClip = [lastConfirmedSegmentEndSeconds] diff --git a/Examples/WhisperAX/WhisperAXWatchApp/WhisperAXExampleView.swift b/Examples/WhisperAX/WhisperAXWatchApp/WhisperAXExampleView.swift index 2809c83..5536477 100644 --- a/Examples/WhisperAX/WhisperAXWatchApp/WhisperAXExampleView.swift +++ b/Examples/WhisperAX/WhisperAXWatchApp/WhisperAXExampleView.swift @@ -409,7 +409,7 @@ struct WhisperAXWatchView: View { try await whisperKit.loadModels() await MainActor.run { - availableLanguages = whisperKit.tokenizer?.langauges.map { $0.key }.sorted() ?? ["english"] + availableLanguages = whisperKit.tokenizer?.languages.map { $0.key }.sorted() ?? ["english"] loadingProgressValue = 1.0 modelState = whisperKit.modelState } @@ -491,7 +491,7 @@ struct WhisperAXWatchView: View { func transcribeAudioSamples(_ samples: [Float]) async throws -> TranscriptionResult? { guard let whisperKit = whisperKit else { return nil } - let languageCode = whisperKit.tokenizer?.langauges[selectedLanguage] ?? "en" + let languageCode = whisperKit.tokenizer?.languages[selectedLanguage] ?? "en" let task: DecodingTask = selectedTask == "transcribe" ? .transcribe : .translate let seekClip = [lastConfirmedSegmentEndSeconds] diff --git a/Sources/WhisperKit/Core/Models.swift b/Sources/WhisperKit/Core/Models.swift index 50e9228..ff862a7 100644 --- a/Sources/WhisperKit/Core/Models.swift +++ b/Sources/WhisperKit/Core/Models.swift @@ -931,7 +931,7 @@ public extension Tokenizer { return false } - var langauges: [String: String] { [ + var languages: [String: String] { [ "english": "en", "chinese": "zh", "german": "de",