From 66e9984cbf7dff42094d2ca56105c3754c4456c6 Mon Sep 17 00:00:00 2001 From: ZachNagengast Date: Thu, 9 Jan 2025 13:33:45 -0800 Subject: [PATCH 1/4] Add repo and token option to regression test matrix --- Sources/WhisperKit/Core/Configurations.swift | 4 ++ Sources/WhisperKit/Core/WhisperKit.swift | 5 +- .../WhisperKitTests/RegressionTestUtils.swift | 3 + Tests/WhisperKitTests/RegressionTests.swift | 63 ++++++++++++------- 4 files changed, 51 insertions(+), 24 deletions(-) diff --git a/Sources/WhisperKit/Core/Configurations.swift b/Sources/WhisperKit/Core/Configurations.swift index 1547b1f2..65d899e2 100644 --- a/Sources/WhisperKit/Core/Configurations.swift +++ b/Sources/WhisperKit/Core/Configurations.swift @@ -12,6 +12,8 @@ open class WhisperKitConfig { public var downloadBase: URL? /// Repository for downloading models public var modelRepo: String? + /// Token for downloading models from repo (if required) + public var modelToken: String? /// Folder to store models public var modelFolder: String? @@ -47,6 +49,7 @@ open class WhisperKitConfig { public init(model: String? = nil, downloadBase: URL? = nil, modelRepo: String? = nil, + modelToken: String? = nil, modelFolder: String? = nil, tokenizerFolder: URL? = nil, computeOptions: ModelComputeOptions? = nil, @@ -67,6 +70,7 @@ open class WhisperKitConfig { self.model = model self.downloadBase = downloadBase self.modelRepo = modelRepo + self.modelToken = modelToken self.modelFolder = modelFolder self.tokenizerFolder = tokenizerFolder self.computeOptions = computeOptions diff --git a/Sources/WhisperKit/Core/WhisperKit.swift b/Sources/WhisperKit/Core/WhisperKit.swift index 6cccf017..73b77a3d 100644 --- a/Sources/WhisperKit/Core/WhisperKit.swift +++ b/Sources/WhisperKit/Core/WhisperKit.swift @@ -68,6 +68,7 @@ open class WhisperKit { model: config.model, downloadBase: config.downloadBase, modelRepo: config.modelRepo, + modelToken: config.modelToken, modelFolder: config.modelFolder, download: config.download ) @@ -295,6 +296,7 @@ open class WhisperKit { model: String?, downloadBase: URL? = nil, modelRepo: String?, + modelToken: String? = nil, modelFolder: String?, download: Bool ) async throws { @@ -312,7 +314,8 @@ open class WhisperKit { variant: modelVariant, downloadBase: downloadBase, useBackgroundSession: useBackgroundDownloadSession, - from: repo + from: repo, + token: modelToken ) } catch { // Handle errors related to model downloading diff --git a/Tests/WhisperKitTests/RegressionTestUtils.swift b/Tests/WhisperKitTests/RegressionTestUtils.swift index 07a25db4..378e7450 100644 --- a/Tests/WhisperKitTests/RegressionTestUtils.swift +++ b/Tests/WhisperKitTests/RegressionTestUtils.swift @@ -54,6 +54,7 @@ class TestInfo: JSONCodable { let datasetDir: String let datasetRepo: String let model: String + let modelRepo: String let modelSizeMB: Double let date: String let timeElapsedInSeconds: TimeInterval @@ -69,6 +70,7 @@ class TestInfo: JSONCodable { datasetDir: String, datasetRepo: String, model: String, + modelRepo: String, modelSizeMB: Double, date: String, timeElapsedInSeconds: TimeInterval, @@ -83,6 +85,7 @@ class TestInfo: JSONCodable { self.datasetDir = datasetDir self.datasetRepo = datasetRepo self.model = model + self.modelRepo = modelRepo self.modelSizeMB = modelSizeMB self.date = date self.timeElapsedInSeconds = timeElapsedInSeconds diff --git a/Tests/WhisperKitTests/RegressionTests.swift b/Tests/WhisperKitTests/RegressionTests.swift index 5f5c095e..0b849680 100644 --- a/Tests/WhisperKitTests/RegressionTests.swift +++ b/Tests/WhisperKitTests/RegressionTests.swift @@ -13,12 +13,13 @@ import WatchKit #endif @available(macOS 13, iOS 16, watchOS 10, visionOS 1, *) -final class RegressionTests: XCTestCase { +class RegressionTests: XCTestCase { var audioFileURLs: [URL]? var remoteFileURLs: [URL]? var metadataURL: URL? var testWERURLs: [URL]? var modelsToTest: [String] = [] + var modelReposToTest: [String] = [] var modelsTested: [String] = [] var optionsToTest: [DecodingOptions] = [DecodingOptions()] @@ -26,6 +27,7 @@ final class RegressionTests: XCTestCase { let dataset: String let modelComputeOptions: ModelComputeOptions var model: String + var modelRepo: String let decodingOptions: DecodingOptions } @@ -34,6 +36,7 @@ final class RegressionTests: XCTestCase { var datasets = ["librispeech-10mins", "earnings22-10mins"] let debugDataset = ["earnings22-10mins"] let debugModels = ["tiny"] + let debugRepos = ["argmaxinc/whisperkit-coreml"] var computeOptions: [ModelComputeOptions] = [ ModelComputeOptions(audioEncoderCompute: .cpuAndNeuralEngine, textDecoderCompute: .cpuAndNeuralEngine), @@ -71,7 +74,7 @@ final class RegressionTests: XCTestCase { Logging.debug("Max memory before warning: \(maxMemory)") } - func testEnvConfigurations(defaultModels: [String]? = nil) { + func testEnvConfigurations(defaultModels: [String]? = nil, defaultRepos: [String]? = nil) { if let modelSizeEnv = ProcessInfo.processInfo.environment["MODEL_NAME"], !modelSizeEnv.isEmpty { modelsToTest = [modelSizeEnv] Logging.debug("Model size: \(modelSizeEnv)") @@ -81,6 +84,7 @@ final class RegressionTests: XCTestCase { } } else { modelsToTest = defaultModels ?? debugModels + modelReposToTest = defaultRepos ?? debugRepos Logging.debug("Model size not set by env") } } @@ -116,7 +120,7 @@ final class RegressionTests: XCTestCase { // MARK: - Test Pipeline - private func runRegressionTests(with testMatrix: [TestConfig]) async throws { + public func runRegressionTests(with testMatrix: [TestConfig]) async throws { var failureInfo: [String: String] = [:] var attachments: [String: String] = [:] let device = getCurrentDevice() @@ -159,8 +163,7 @@ final class RegressionTests: XCTestCase { // Create WhisperKit instance with checks for memory usage let whisperKit = try await createWithMemoryCheck( - model: config.model, - computeOptions: config.modelComputeOptions, + testConfig: config, verbose: true, logLevel: .debug ) @@ -295,6 +298,7 @@ final class RegressionTests: XCTestCase { datasetDir: config.dataset, datasetRepo: datasetRepo, model: config.model, + modelRepo: config.modelRepo, modelSizeMB: modelSizeMB ?? -1, date: startTime.formatted(Date.ISO8601FormatStyle().dateSeparator(.dash)), timeElapsedInSeconds: Date().timeIntervalSince(startTime), @@ -432,20 +436,23 @@ final class RegressionTests: XCTestCase { } } - private func getTestMatrix() -> [TestConfig] { + public func getTestMatrix() -> [TestConfig] { var regressionTestConfigMatrix: [TestConfig] = [] for dataset in datasets { for computeOption in computeOptions { for options in optionsToTest { - for model in modelsToTest { - regressionTestConfigMatrix.append( - TestConfig( - dataset: dataset, - modelComputeOptions: computeOption, - model: model, - decodingOptions: options + for repo in modelReposToTest { + for model in modelsToTest { + regressionTestConfigMatrix.append( + TestConfig( + dataset: dataset, + modelComputeOptions: computeOption, + model: model, + modelRepo: repo, + decodingOptions: options + ) ) - ) + } } } } @@ -609,18 +616,14 @@ final class RegressionTests: XCTestCase { let modelSize = try fileManager.allocatedSizeOfDirectory(at: folder) return Double(modelSize / (1024 * 1024)) // Convert to MB } - - func createWithMemoryCheck( - model: String, - computeOptions: ModelComputeOptions, - verbose: Bool, - logLevel: Logging.LogLevel - ) async throws -> WhisperKit { + + public func initWhisperKitTask(testConfig config: TestConfig, verbose: Bool, logLevel: Logging.LogLevel) -> Task { // Create the initialization task let initializationTask = Task { () -> WhisperKit in let whisperKit = try await WhisperKit(WhisperKitConfig( - model: model, - computeOptions: computeOptions, + model: config.model, + modelRepo: config.modelRepo, + computeOptions: config.modelComputeOptions, verbose: verbose, logLevel: logLevel, prewarm: true, @@ -629,6 +632,20 @@ final class RegressionTests: XCTestCase { try Task.checkCancellation() return whisperKit } + return initializationTask + } + + func createWithMemoryCheck( + testConfig: TestConfig, + verbose: Bool, + logLevel: Logging.LogLevel + ) async throws -> WhisperKit { + // Create the initialization task + let initializationTask = initWhisperKitTask( + testConfig: testConfig, + verbose: verbose, + logLevel: logLevel + ) // Start the memory monitoring task let monitorTask = Task { From 499c8d7c8a37dedce54c7ed69aa410abbf5fbf72 Mon Sep 17 00:00:00 2001 From: ZachNagengast Date: Wed, 15 Jan 2025 16:15:29 -0800 Subject: [PATCH 2/4] Add default Debug.xcconfig file --- Examples/WhisperAX/Debug.xcconfig | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Examples/WhisperAX/Debug.xcconfig diff --git a/Examples/WhisperAX/Debug.xcconfig b/Examples/WhisperAX/Debug.xcconfig new file mode 100644 index 00000000..be6bbdde --- /dev/null +++ b/Examples/WhisperAX/Debug.xcconfig @@ -0,0 +1,2 @@ +// Run `make setup` to add your team here +DEVELOPMENT_TEAM= From ee586810e475bed7be09ac3e212b221256ee9e55 Mon Sep 17 00:00:00 2001 From: ZachNagengast Date: Wed, 15 Jan 2025 18:05:51 -0800 Subject: [PATCH 3/4] Update fastlane to run on repo from benchmark config --- .../xcshareddata/xcschemes/WhisperAX.xcscheme | 5 +++++ Tests/WhisperKitTests/RegressionTestUtils.swift | 3 +++ Tests/WhisperKitTests/RegressionTests.swift | 17 +++++++++++++++++ fastlane/Fastfile | 16 ++++++++++++---- 4 files changed, 37 insertions(+), 4 deletions(-) diff --git a/Examples/WhisperAX/WhisperAX.xcodeproj/xcshareddata/xcschemes/WhisperAX.xcscheme b/Examples/WhisperAX/WhisperAX.xcodeproj/xcshareddata/xcschemes/WhisperAX.xcscheme index 236ed0eb..48d9731a 100644 --- a/Examples/WhisperAX/WhisperAX.xcodeproj/xcshareddata/xcschemes/WhisperAX.xcscheme +++ b/Examples/WhisperAX/WhisperAX.xcodeproj/xcshareddata/xcschemes/WhisperAX.xcscheme @@ -79,6 +79,11 @@ value = "$(MODEL_NAME)" isEnabled = "YES"> + + String? { + // Add token here or override + return nil + } + func testEnvConfigurations(defaultModels: [String]? = nil, defaultRepos: [String]? = nil) { if let modelSizeEnv = ProcessInfo.processInfo.environment["MODEL_NAME"], !modelSizeEnv.isEmpty { modelsToTest = [modelSizeEnv] Logging.debug("Model size: \(modelSizeEnv)") + + if let repoEnv = ProcessInfo.processInfo.environment["MODEL_REPO"] { + modelReposToTest = [repoEnv] + Logging.debug("Using repo: \(repoEnv)") + } + XCTAssertTrue(modelsToTest.count > 0, "Invalid model size: \(modelSizeEnv)") + if modelSizeEnv == "crash_test" { fatalError("Crash test triggered") } @@ -172,6 +185,8 @@ class RegressionTests: XCTestCase { config.model = modelFile modelsTested.append(modelFile) modelsTested = Array(Set(modelsTested)) + modelReposTested.append(config.modelRepo) + modelReposTested = Array(Set(modelReposTested)) } for audioFilePath in audioFilePaths { @@ -562,6 +577,7 @@ class RegressionTests: XCTestCase { osType: osDetails.osType, osVersion: osDetails.osVersion, modelsTested: modelsTested, + modelReposTested: modelReposTested, failureInfo: failureInfo, attachments: attachments ) @@ -623,6 +639,7 @@ class RegressionTests: XCTestCase { let whisperKit = try await WhisperKit(WhisperKitConfig( model: config.model, modelRepo: config.modelRepo, + modelToken: Self.getModelToken(), computeOptions: config.modelComputeOptions, verbose: verbose, logLevel: logLevel, diff --git a/fastlane/Fastfile b/fastlane/Fastfile index 6325ecba..1059bc29 100644 --- a/fastlane/Fastfile +++ b/fastlane/Fastfile @@ -23,7 +23,7 @@ BASE_BENCHMARK_PATH = "#{WORKING_DIR}/benchmark_data".freeze BASE_UPLOAD_PATH = "#{WORKING_DIR}/upload_folder".freeze XCRESULT_PATH = File.expand_path("#{BASE_BENCHMARK_PATH}/#{COMMIT_TIMESTAMP}_#{COMMIT_HASH}/") BENCHMARK_REPO = 'argmaxinc/whisperkit-evals-dataset'.freeze -BENCHMARK_CONFIGS = { +BENCHMARK_CONFIGS ||= { full: { test_identifier: 'WhisperAXTests/RegressionTests/testModelPerformance', name: 'full', @@ -50,12 +50,14 @@ BENCHMARK_CONFIGS = { 'openai_whisper-large-v3-v20240930_turbo', 'openai_whisper-large-v3-v20240930_626MB', 'openai_whisper-large-v3-v20240930_turbo_632MB' - ] + ], + repo: 'argmaxinc/whisperkit-coreml' }, debug: { test_identifier: 'WhisperAXTests/RegressionTests/testModelPerformanceWithDebugConfig', name: 'debug', - models: ['tiny', 'crash_test', 'unknown_model', 'small.en'] + models: ['tiny', 'crash_test', 'unknown_model', 'small.en'], + repo: 'argmaxinc/whisperkit-coreml' } }.freeze @@ -200,7 +202,9 @@ end def run_benchmark(devices, config) summaries = [] - BENCHMARK_CONFIGS[config][:models].each do |model| + config_data = BENCHMARK_CONFIGS[config] + + config_data[:models].each do |model| begin # Sanitize device name for use in file path devices_to_test = devices.map { |device_info| device_info[:name] }.compact @@ -228,8 +232,12 @@ def run_benchmark(devices, config) UI.message "Running in #{BENCHMARK_CONFIGS[config][:name]} mode" UI.message "Running benchmark for model: #{model}" + UI.message 'Using Hugging Face:' + UI.message " • Repository: #{config_data[:repo]}" + xcargs = [ "MODEL_NAME=#{model}", + "MODEL_REPO=#{config_data[:repo]}", '-allowProvisioningUpdates', '-allowProvisioningDeviceRegistration' ].join(' ') From 4f82b35e4a197cb04be47f3be8c321c033caeab4 Mon Sep 17 00:00:00 2001 From: ZachNagengast Date: Wed, 15 Jan 2025 18:14:44 -0800 Subject: [PATCH 4/4] Formatting --- Sources/WhisperKit/Core/WhisperKit.swift | 3 +-- Tests/WhisperKitTests/RegressionTests.swift | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Sources/WhisperKit/Core/WhisperKit.swift b/Sources/WhisperKit/Core/WhisperKit.swift index 73b77a3d..26b89613 100644 --- a/Sources/WhisperKit/Core/WhisperKit.swift +++ b/Sources/WhisperKit/Core/WhisperKit.swift @@ -72,7 +72,6 @@ open class WhisperKit { modelFolder: config.modelFolder, download: config.download ) - if let prewarm = config.prewarm, prewarm { Logging.info("Prewarming models...") @@ -314,7 +313,7 @@ open class WhisperKit { variant: modelVariant, downloadBase: downloadBase, useBackgroundSession: useBackgroundDownloadSession, - from: repo, + from: repo, token: modelToken ) } catch { diff --git a/Tests/WhisperKitTests/RegressionTests.swift b/Tests/WhisperKitTests/RegressionTests.swift index f6cef153..33047c3b 100644 --- a/Tests/WhisperKitTests/RegressionTests.swift +++ b/Tests/WhisperKitTests/RegressionTests.swift @@ -84,7 +84,7 @@ class RegressionTests: XCTestCase { if let modelSizeEnv = ProcessInfo.processInfo.environment["MODEL_NAME"], !modelSizeEnv.isEmpty { modelsToTest = [modelSizeEnv] Logging.debug("Model size: \(modelSizeEnv)") - + if let repoEnv = ProcessInfo.processInfo.environment["MODEL_REPO"] { modelReposToTest = [repoEnv] Logging.debug("Using repo: \(repoEnv)") @@ -632,7 +632,7 @@ class RegressionTests: XCTestCase { let modelSize = try fileManager.allocatedSizeOfDirectory(at: folder) return Double(modelSize / (1024 * 1024)) // Convert to MB } - + public func initWhisperKitTask(testConfig config: TestConfig, verbose: Bool, logLevel: Logging.LogLevel) -> Task { // Create the initialization task let initializationTask = Task { () -> WhisperKit in