Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions app/MeetingTranscriber/Sources/AppSettings.swift
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,62 @@ final class AppSettings {
}
}

// MARK: - Experimental: Diarization Tuning

/// Defaults mirroring `OfflineDiarizerConfig.Clustering.community` and `Embedding.community`.
/// Source of truth for both `resetDiarizerTuning()` and tests.
enum DiarizerTuningDefaults {
static let clusterThreshold: Double = 0.6
static let warmStartFa: Double = 0.07
static let warmStartFb: Double = 0.8
static let minSegmentDurationSeconds: Double = 1.0
static let excludeOverlap: Bool = true
}

/// Euclidean distance threshold for unit-normalized embeddings (FluidAudio: clustering.threshold).
var clusterThreshold: Double {
didSet { defaults.set(clusterThreshold, forKey: "diarizerClusterThreshold") }
}

/// VBx warm-start Fa parameter — controls precision (FluidAudio: clustering.warmStartFa).
var warmStartFa: Double {
didSet { defaults.set(warmStartFa, forKey: "diarizerWarmStartFa") }
}

/// VBx warm-start Fb parameter — controls recall (FluidAudio: clustering.warmStartFb).
var warmStartFb: Double {
didSet { defaults.set(warmStartFb, forKey: "diarizerWarmStartFb") }
}

/// Skip embeddings for segments shorter than this duration (FluidAudio: embedding.minSegmentDurationSeconds).
var minSegmentDurationSeconds: Double {
didSet { defaults.set(minSegmentDurationSeconds, forKey: "diarizerMinSegmentDuration") }
}

/// Mask out frames where multiple speakers overlap during embedding extraction
/// (FluidAudio: embedding.excludeOverlap).
var excludeOverlap: Bool {
didSet { defaults.set(excludeOverlap, forKey: "diarizerExcludeOverlap") }
}

/// Reset all 5 experimental diarization tuning knobs to their FluidAudio community defaults.
func resetDiarizerTuning() {
clusterThreshold = DiarizerTuningDefaults.clusterThreshold
warmStartFa = DiarizerTuningDefaults.warmStartFa
warmStartFb = DiarizerTuningDefaults.warmStartFb
minSegmentDurationSeconds = DiarizerTuningDefaults.minSegmentDurationSeconds
excludeOverlap = DiarizerTuningDefaults.excludeOverlap
}

/// True when all 5 tuning knobs are at their default values.
var diarizerTuningIsAllDefaults: Bool {
clusterThreshold == DiarizerTuningDefaults.clusterThreshold
&& warmStartFa == DiarizerTuningDefaults.warmStartFa
&& warmStartFb == DiarizerTuningDefaults.warmStartFb
&& minSegmentDurationSeconds == DiarizerTuningDefaults.minSegmentDurationSeconds
&& excludeOverlap == DiarizerTuningDefaults.excludeOverlap
}

// MARK: - Protocol Generation

var protocolProvider: ProtocolProvider {
Expand Down Expand Up @@ -349,6 +405,13 @@ final class AppSettings {
.flatMap(DiarizerMode.init(rawValue:))) ?? .offline
numSpeakers = defaults.object(forKey: "numSpeakers") as? Int ?? 0

let tuning = Self.loadDiarizerTuning(from: defaults)
clusterThreshold = tuning.clusterThreshold
warmStartFa = tuning.warmStartFa
warmStartFb = tuning.warmStartFb
minSegmentDurationSeconds = tuning.minSegmentDuration
excludeOverlap = tuning.excludeOverlap

let storedProvider = defaults.string(forKey: "protocolProvider")
.flatMap(ProtocolProvider.init(rawValue:))
#if APPSTORE
Expand Down Expand Up @@ -385,4 +448,30 @@ final class AppSettings {
checkForUpdates = defaults.object(forKey: "checkForUpdates") as? Bool ?? true
includePreReleases = defaults.object(forKey: "includePreReleases") as? Bool ?? false
}

/// Bag of values used during init to read all 5 tuning knobs in one go.
/// Keeps the init body under the lint length budget without duplicating
/// the lookup pattern five times.
private struct LoadedDiarizerTuning {
let clusterThreshold: Double
let warmStartFa: Double
let warmStartFb: Double
let minSegmentDuration: Double
let excludeOverlap: Bool
}

private static func loadDiarizerTuning(from defaults: UserDefaults) -> LoadedDiarizerTuning {
LoadedDiarizerTuning(
clusterThreshold: defaults.object(forKey: "diarizerClusterThreshold") as? Double
?? DiarizerTuningDefaults.clusterThreshold,
warmStartFa: defaults.object(forKey: "diarizerWarmStartFa") as? Double
?? DiarizerTuningDefaults.warmStartFa,
warmStartFb: defaults.object(forKey: "diarizerWarmStartFb") as? Double
?? DiarizerTuningDefaults.warmStartFb,
minSegmentDuration: defaults.object(forKey: "diarizerMinSegmentDuration") as? Double
?? DiarizerTuningDefaults.minSegmentDurationSeconds,
excludeOverlap: defaults.object(forKey: "diarizerExcludeOverlap") as? Bool
?? DiarizerTuningDefaults.excludeOverlap,
)
}
}
13 changes: 12 additions & 1 deletion app/MeetingTranscriber/Sources/AppState.swift
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,18 @@ final class AppState {
func makePipelineQueue() -> PipelineQueue {
let queue = PipelineQueue(
engine: activeTranscriptionEngine,
diarizationFactory: { [self] in FluidDiarizer(mode: settings.diarizerMode) },
diarizationFactory: { [self] in
FluidDiarizer(
mode: settings.diarizerMode,
tuning: OfflineDiarizerTuning(
clusterThreshold: settings.clusterThreshold,
warmStartFa: settings.warmStartFa,
warmStartFb: settings.warmStartFb,
minSegmentDurationSeconds: settings.minSegmentDurationSeconds,
excludeOverlap: settings.excludeOverlap,
),
)
},
protocolGeneratorFactory: { [self] in makeProtocolGenerator() },
outputDir: settings.effectiveOutputDir,
diarizeEnabled: settings.diarize,
Expand Down
61 changes: 55 additions & 6 deletions app/MeetingTranscriber/Sources/FluidDiarizer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,38 @@ protocol OfflineDiarizationProcessing {
func process(audioPath: URL) async throws -> DiarizationResult
}

/// User-tunable subset of `OfflineDiarizerConfig` exposed via Settings.
/// Decouples `FluidOfflineProcessor` from `AppSettings`/UserDefaults so the
/// plumbing stays unit-testable.
struct OfflineDiarizerTuning: Equatable {
var clusterThreshold: Double
var warmStartFa: Double
var warmStartFb: Double
var minSegmentDurationSeconds: Double
var excludeOverlap: Bool

/// Defaults matching FluidAudio's `Clustering.community` and `Embedding.community`.
static let defaults = Self(
clusterThreshold: 0.6,
warmStartFa: 0.07,
warmStartFb: 0.8,
minSegmentDurationSeconds: 1.0,
excludeOverlap: true,
)

/// Apply this tuning to an `OfflineDiarizerConfig`, preserving everything else
/// (segmentation, vbx, postProcessing, export, speaker count constraints).
func apply(to config: OfflineDiarizerConfig) -> OfflineDiarizerConfig {
var copy = config
copy.clustering.threshold = clusterThreshold
copy.clustering.warmStartFa = warmStartFa
copy.clustering.warmStartFb = warmStartFb
copy.embedding.minSegmentDurationSeconds = minSegmentDurationSeconds
copy.embedding.excludeOverlap = excludeOverlap
return copy
}
}

/// CoreML-based speaker diarization using FluidAudio (on-device, no HuggingFace token needed).
class FluidDiarizer: DiarizationProvider {
let mode: DiarizerMode
Expand All @@ -20,9 +52,13 @@ class FluidDiarizer: DiarizationProvider {
true
}

init(mode: DiarizerMode = .offline, offlineProcessor: OfflineDiarizationProcessing? = nil) {
init(
mode: DiarizerMode = .offline,
tuning: OfflineDiarizerTuning = .defaults,
offlineProcessor: OfflineDiarizationProcessing? = nil,
) {
self.mode = mode
self.offlineProcessor = offlineProcessor ?? FluidOfflineProcessor()
self.offlineProcessor = offlineProcessor ?? FluidOfflineProcessor(tuning: tuning)
}

/// Normalize FluidAudio's "Speaker 0" format to "SPEAKER_0".
Expand Down Expand Up @@ -137,16 +173,29 @@ class FluidDiarizer: DiarizationProvider {
struct FluidOfflineProcessor: OfflineDiarizationProcessing {
private var manager: OfflineDiarizerManager?
private var currentNumSpeakers: Int?
private let tuning: OfflineDiarizerTuning

init(tuning: OfflineDiarizerTuning = .defaults) {
self.tuning = tuning
}

/// Build the `OfflineDiarizerConfig` from a tuning struct + optional speaker count.
/// Pure helper so unit tests can verify the produced config without standing up
/// the actual CoreML manager.
static func makeConfig(tuning: OfflineDiarizerTuning, numSpeakers: Int?) -> OfflineDiarizerConfig {
var config = tuning.apply(to: OfflineDiarizerConfig())
if let n = numSpeakers, n > 0 {
config = config.withSpeakers(min: 1, max: n)
}
return config
}

mutating func prepare(numSpeakers: Int?) async throws {
guard manager == nil || numSpeakers != currentNumSpeakers else { return }

// Explicitly deallocate previous manager to prevent resource conflicts
manager = nil
var config = OfflineDiarizerConfig()
if let n = numSpeakers, n > 0 {
config = config.withSpeakers(min: 1, max: n)
}
let config = Self.makeConfig(tuning: tuning, numSpeakers: numSpeakers)
let newManager = OfflineDiarizerManager(config: config)
try await newManager.prepareModels()
manager = newManager
Expand Down
139 changes: 139 additions & 0 deletions app/MeetingTranscriber/Sources/Settings/SpeakersSettingsView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ struct SpeakersSettingsView: View {
/// SpeakerMatcher.init reads + decodes speakers.json — must not run
/// per body re-evaluation, so the matcher is created lazily on tap.
@State private var sheetMatcher: SpeakerMatcher?
@State private var experimentalTuningExpanded = false

var body: some View {
// swiftlint:disable:next closure_body_length
Expand Down Expand Up @@ -51,6 +52,10 @@ struct SpeakersSettingsView: View {
Stepper("", value: $settings.numSpeakers, in: 0 ... 10)
.labelsHidden()
}

if settings.diarizerMode == .offline {
experimentalTuningDisclosure
}
}
}
.accessibilityIdentifier("diarizationSection")
Expand Down Expand Up @@ -97,4 +102,138 @@ struct SpeakersSettingsView: View {
}
}
}

// MARK: - Experimental Tuning

private var experimentalTuningDisclosure: some View {
DisclosureGroup(isExpanded: $experimentalTuningExpanded) {
tuningDisclosureBody
} label: {
tuningDisclosureLabel
}
.accessibilityIdentifier("experimentalTuningDisclosure")
}

private var tuningDisclosureLabel: some View {
HStack(spacing: 4) {
Text("Experimental: Diarization Tuning")
if !settings.diarizerTuningIsAllDefaults {
Circle()
.fill(Color.orange)
.frame(width: 6, height: 6)
.accessibilityLabel("Non-default tuning active")
}
}
}

private var tuningDisclosureBody: some View {
VStack(alignment: .leading, spacing: 12) {
tuningWarningBanner
TuningSliderRow(knob: .clusterThreshold, value: $settings.clusterThreshold)
TuningSliderRow(knob: .warmStartFa, value: $settings.warmStartFa)
TuningSliderRow(knob: .warmStartFb, value: $settings.warmStartFb)
TuningSliderRow(knob: .minSegmentDuration, value: $settings.minSegmentDurationSeconds)
HStack {
Toggle("Exclude overlap", isOn: $settings.excludeOverlap)
TuningHelpIcon(
tooltip: "When enabled, frames with multiple active speakers are masked out during embedding extraction.",
)
Spacer()
}
Button("Reset to defaults") {
settings.resetDiarizerTuning()
}
.disabled(settings.diarizerTuningIsAllDefaults)
}
.padding(.top, 4)
}

private var tuningWarningBanner: some View {
Label(
"Changing these values may degrade diarization quality. Use with caution and reset if unsure.",
systemImage: "exclamationmark.triangle.fill",
)
.foregroundColor(.red)
.font(.caption)
.padding(.vertical, 4)
}
}

// MARK: - Tuning slider helpers

/// Static description of a single experimental tuning knob — keeps the
/// per-knob configuration (range, step, format, label, help text) out of
/// the SwiftUI body so it stays easy to read and lint-friendly.
private struct TuningKnob {
let title: String
let range: ClosedRange<Double>
let step: Double
let format: String
let suffix: String
let help: String

static let clusterThreshold = Self(
title: "Cluster threshold",
range: 0.0 ... 1.0,
step: 0.05,
format: "%.2f",
suffix: "",
help: "Euclidean distance threshold for clustering speaker embeddings. Lower values split speakers more aggressively.",
)

static let warmStartFa = Self(
title: "Warm-start Fa",
range: 0.0 ... 1.0,
step: 0.01,
format: "%.2f",
suffix: "",
help: "VBx warm-start Fa controls clustering precision. Increasing it tightens speaker boundaries.",
)

static let warmStartFb = Self(
title: "Warm-start Fb",
range: 0.0 ... 2.0,
step: 0.05,
format: "%.2f",
suffix: "",
help: "VBx warm-start Fb controls clustering recall. Increasing it merges similar speakers more readily.",
)

static let minSegmentDuration = Self(
title: "Min segment duration",
range: 0.0 ... 5.0,
step: 0.1,
format: "%.1f",
suffix: "s",
help: "Skip embedding extraction for segments shorter than this duration. Larger values trade recall for stability.",
)
}

private struct TuningSliderRow: View {
let knob: TuningKnob
@Binding var value: Double

var body: some View {
VStack(alignment: .leading, spacing: 2) {
HStack {
Text(knob.title)
TuningHelpIcon(tooltip: knob.help)
Spacer()
Text("\(String(format: knob.format, value))\(knob.suffix)")
.monospacedDigit()
.foregroundStyle(.secondary)
}
Slider(value: $value, in: knob.range, step: knob.step)
}
}
}

private struct TuningHelpIcon: View {
let tooltip: String

var body: some View {
Image(systemName: "questionmark.circle")
.foregroundStyle(.secondary)
.help(tooltip)
}
}
Loading
Loading