diff --git a/Package.resolved b/Package.resolved
index d964957..8488a92 100644
--- a/Package.resolved
+++ b/Package.resolved
@@ -1,5 +1,5 @@
 {
-  "originHash" : "f02d1b06da5914ec9757af4da0eaa58ac89d06c25ef4f19477024d63596a814a",
+  "originHash" : "bd373e20fb0f4ce7e77093a139f872954cd08face9c8f999dd7ce38e1d0f0869",
   "pins" : [
     {
       "identity" : "dtln-aec-coreml",
@@ -10,6 +10,24 @@
         "version" : "0.6.0-beta"
       }
     },
+    {
+      "identity" : "eventsource",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/mattt/EventSource.git",
+      "state" : {
+        "revision" : "a3a85a85214caf642abaa96ae664e4c772a59f6e",
+        "version" : "1.4.1"
+      }
+    },
+    {
+      "identity" : "fluidaudio",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/FluidInference/FluidAudio.git",
+      "state" : {
+        "revision" : "9830ce835881c0d0d40f90aabfaae3a6da5bebfb",
+        "version" : "0.12.4"
+      }
+    },
     {
       "identity" : "sparkle",
       "kind" : "remoteSourceControl",
@@ -18,6 +36,96 @@
         "revision" : "21d8df80440b1ca3b65fa82e40782f1e5a9e6ba2",
         "version" : "2.9.0"
       }
+    },
+    {
+      "identity" : "swift-asn1",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/apple/swift-asn1.git",
+      "state" : {
+        "revision" : "9f542610331815e29cc3821d3b6f488db8715517",
+        "version" : "1.6.0"
+      }
+    },
+    {
+      "identity" : "swift-atomics",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/apple/swift-atomics.git",
+      "state" : {
+        "revision" : "b601256eab081c0f92f059e12818ac1d4f178ff7",
+        "version" : "1.3.0"
+      }
+    },
+    {
+      "identity" : "swift-collections",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/apple/swift-collections.git",
+      "state" : {
+        "revision" : "8d9834a6189db730f6264db7556a7ffb751e99ee",
+        "version" : "1.4.0"
+      }
+    },
+    {
+      "identity" : "swift-crypto",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/apple/swift-crypto.git",
+      "state" : {
+        "revision" : "fa308c07a6fa04a727212d793e761460e41049c3",
+        "version" : "4.3.0"
+      }
+    },
+    {
+      "identity" : "swift-huggingface",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/huggingface/swift-huggingface.git",
+      "state" : {
+        "revision" : "b721959445b617d0bf03910b2b4aced345fd93bf",
+        "version" : "0.9.0"
+      }
+    },
+    {
+      "identity" : "swift-jinja",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/huggingface/swift-jinja.git",
+      "state" : {
+        "revision" : "f731f03bf746481d4fda07f817c3774390c4d5b9",
+        "version" : "2.3.2"
+      }
+    },
+    {
+      "identity" : "swift-nio",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/apple/swift-nio.git",
+      "state" : {
+        "revision" : "b31565862a8f39866af50bc6676160d8dda7de35",
+        "version" : "2.96.0"
+      }
+    },
+    {
+      "identity" : "swift-system",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/apple/swift-system.git",
+      "state" : {
+        "revision" : "7c6ad0fc39d0763e0b699210e4124afd5041c5df",
+        "version" : "1.6.4"
+      }
+    },
+    {
+      "identity" : "swift-transformers",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/huggingface/swift-transformers",
+      "state" : {
+        "revision" : "eed7264ac5e4ec5dfa6165c6e5c5577364344fe4",
+        "version" : "1.2.0"
+      }
+    },
+    {
+      "identity" : "yyjson",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/ibireme/yyjson.git",
+      "state" : {
+        "revision" : "8b4a38dc994a110abaec8a400615567bd996105f",
+        "version" : "0.12.0"
+      }
     }
   ],
   "version" : 3
diff --git a/Package.swift b/Package.swift
index 4a2db25..1eacd0e 100644
--- a/Package.swift
+++ b/Package.swift
@@ -7,6 +7,7 @@ let package = Package(
     dependencies: [
         .package(url: "https://github.com/sparkle-project/Sparkle", from: "2.0.0"),
         .package(url: "https://github.com/MimicScribe/dtln-aec-coreml.git", from: "0.4.0-beta"),
+        .package(url: "https://github.com/FluidInference/FluidAudio.git", from: "0.12.4"),
     ],
     targets: [
         .target(
@@ -32,6 +33,7 @@ let package = Package(
                 .product(name: "Sparkle", package: "Sparkle"),
                 .product(name: "DTLNAecCoreML", package: "dtln-aec-coreml"),
                 .product(name: "DTLNAec256", package: "dtln-aec-coreml"),
+                .product(name: "FluidAudio", package: "FluidAudio"),
             ],
             path: "Sources",
             exclude: ["ObjCExceptionCatcher", "Watchdog"],
diff --git a/Sources/AudioMonitor.swift b/Sources/AudioMonitor.swift
index 9b6689f..108643f 100644
--- a/Sources/AudioMonitor.swift
+++ b/Sources/AudioMonitor.swift
@@ -593,8 +593,9 @@ final class AudioMonitor {
       let url = await recorder.stop()
       savingCount -= 1
       isSaving = savingCount > 0
-      if url != nil {
+      if let url {
         notifyRecordingSaved(appName: appName)
+        autoTranscribe(recordingDirectory: url)
       }
       updateAutoState()
     }
@@ -721,6 +722,25 @@ final class AudioMonitor {
     return count <= max
   }
 
+  // MARK: - Auto-Transcription
+
+  private func autoTranscribe(recordingDirectory: URL) {
+    let provider =
+      UserDefaults.standard.string(forKey: TranscriptionProvider.defaultsKey) ?? "local"
+    guard provider == "local", LocalTranscriptionService.modelsReady else { return }
+    Task.detached {
+      do {
+        try await LocalTranscriptionService.transcribeAndSave(
+          recordingDirectory: recordingDirectory)
+        Log.info(Log.transcription, "auto", "auto-transcription complete")
+      } catch {
+        Log.error(
+          Log.transcription, "auto",
+          "auto-transcription failed: \(error.localizedDescription)")
+      }
+    }
+  }
+
   // MARK: - Notifications
 
   private func notifyRecordingStarted(appName: String) {
diff --git a/Sources/LocalTranscriptionService.swift b/Sources/LocalTranscriptionService.swift
new file mode 100644
index 0000000..aa1641b
--- /dev/null
+++ b/Sources/LocalTranscriptionService.swift
@@ -0,0 +1,430 @@
+@preconcurrency import AVFoundation
+import CoreMedia
+import FluidAudio
+
+/// On-device transcription using FluidAudio (Parakeet TDT v3 ASR + offline diarization).
+/// Extracts dual tracks from the recording, transcribes and diarizes each independently,
+/// then merges with speaker attribution via temporal overlap matching.
+enum LocalTranscriptionService {
+
+  /// Whether ASR and diarizer model files have been downloaded to disk.
+  nonisolated static var modelsReady: Bool {
+    UserDefaults.standard.bool(forKey: "localTranscriptionModelsReady")
+  }
+
+  // MARK: - Public API
+
+  /// Transcribe a recording and return the document. Status updates are sent via
+  /// `onStatus` which is `@Sendable` - callers must dispatch to MainActor themselves.
+  static func transcribe(
+    recordingDirectory: URL,
+    onStatus: @escaping @Sendable (TranscriptionStatus) -> Void
+  ) async throws -> TranscriptDocument {
+    let processedURL = recordingDirectory.appendingPathComponent("audio-processed.m4a")
+    let originalURL = recordingDirectory.appendingPathComponent("audio.m4a")
+    let audioURL =
+      FileManager.default.fileExists(atPath: processedURL.path) ? processedURL : originalURL
+
+    let doc = try await Task.detached {
+      try await Self.run(audioURL: audioURL, onStatus: onStatus)
+    }.value
+
+    onStatus(.completed)
+    return doc
+  }
+
+  /// Convenience: transcribe and save to disk as `transcript-local.json`.
+  static func transcribeAndSave(
+    recordingDirectory: URL,
+    onStatus: @escaping @Sendable (TranscriptionStatus) -> Void = { _ in }
+  ) async throws {
+    let doc = try await transcribe(
+      recordingDirectory: recordingDirectory, onStatus: onStatus)
+    try doc.save(for: recordingDirectory, provider: .local)
+  }
+
+  /// Download and prepare all models (ASR + diarizer). Call from Settings UI.
+  static func prepareModels(
+    onStatus: @escaping @Sendable (TranscriptionStatus) -> Void
+  ) async throws {
+    onStatus(.preparing)
+    try await Task.detached {
+      try await Self.downloadAllModels()
+    }.value
+    onStatus(.completed)
+  }
+
+  // MARK: - Core Pipeline (runs off main actor)
+
+  nonisolated private static func run(
+    audioURL: URL,
+    onStatus: @Sendable (TranscriptionStatus) -> Void
+  ) async throws -> TranscriptDocument {
+    try Task.checkCancellation()
+
+    // 1. Load models (fast from cache on subsequent calls)
+    onStatus(.preparing)
+    let asrModels = try await downloadASRModels()
+    let diarizerManager = OfflineDiarizerManager()
+    try await diarizerManager.prepareModels()
+    UserDefaults.standard.set(true, forKey: "localTranscriptionModelsReady")
+
+    try Task.checkCancellation()
+
+    // 2. Extract tracks
+    onStatus(.transcribing)
+    let (systemSamples, micSamples) = try await extractTracks(from: audioURL)
+
+    Log.info(
+      Log.transcription, "local",
+      "extracted tracks: system=\(systemSamples.count) samples"
+        + (micSamples != nil ? ", mic=\(micSamples!.count) samples" : " (single-track)"))
+
+    try Task.checkCancellation()
+
+    // 3. ASR
+    let asrManager = AsrManager()
+    try await asrManager.initialize(models: asrModels)
+
+    let systemASR = try await asrManager.transcribe(systemSamples, source: .system)
+    Log.info(
+      Log.transcription, "local",
+      "system ASR: \(systemASR.text.prefix(80))... (\(String(format: "%.0f", systemASR.rtfx))x realtime)"
+    )
+
+    try Task.checkCancellation()
+
+    var micASR: ASRResult?
+    if let micSamples {
+      micASR = try await asrManager.transcribe(micSamples, source: .microphone)
+      Log.info(
+        Log.transcription, "local",
+        "mic ASR: \(micASR!.text.prefix(80))... (\(String(format: "%.0f", micASR!.rtfx))x realtime)"
+      )
+    }
+
+    try Task.checkCancellation()
+
+    // 4. Diarize (graceful degradation - transcript still works without diarization)
+    var systemDiarization: DiarizationResult?
+    var micDiarization: DiarizationResult?
+
+    do {
+      systemDiarization = try await diarizerManager.process(audio: systemSamples)
+      Log.info(
+        Log.transcription, "local",
+        "system diarization: \(systemDiarization!.segments.count) segments")
+
+      if let micSamples {
+        try Task.checkCancellation()
+        micDiarization = try await diarizerManager.process(audio: micSamples)
+        Log.info(
+          Log.transcription, "local",
+          "mic diarization: \(micDiarization!.segments.count) segments")
+      }
+    } catch {
+      Log.error(Log.transcription, "local", "diarization failed, proceeding without: \(error)")
+    }
+
+    // 5. Assign speakers to ASR segments via temporal overlap
+    let systemSegments = labelSegments(asr: systemASR, diarization: systemDiarization)
+    let micSegments = micASR != nil ? labelSegments(asr: micASR!, diarization: micDiarization) : nil
+
+    // 6. Merge into document
+    let language = systemASR.ctcDetectedTerms?.first
+
+    return mergeIntoDocument(
+      systemSegments: systemSegments,
+      micSegments: micSegments,
+      language: language
+    )
+  }
+
+  // MARK: - Model Loading
+
+  /// Downloads ASR models only. Diarizer models are prepared in `run()` where
+  /// the `OfflineDiarizerManager` instance is actually used.
+  @discardableResult
+  nonisolated private static func downloadASRModels() async throws -> AsrModels {
+    let asrModels = try await AsrModels.downloadAndLoad(version: .v3)
+    Log.info(Log.transcription, "local", "ASR models ready")
+    return asrModels
+  }
+
+  /// Downloads both ASR and diarizer models (for Settings pre-download).
+  nonisolated private static func downloadAllModels() async throws {
+    _ = try await AsrModels.downloadAndLoad(version: .v3)
+    let diarizerManager = OfflineDiarizerManager()
+    try await diarizerManager.prepareModels()
+    UserDefaults.standard.set(true, forKey: "localTranscriptionModelsReady")
+    Log.info(Log.transcription, "local", "all models ready")
+  }
+
+  // MARK: - Track Extraction
+
+  nonisolated private static func extractTracks(
+    from url: URL
+  ) async throws -> (system: [Float], mic: [Float]?) {
+    let asset = AVURLAsset(url: url)
+    let tracks = try await asset.loadTracks(withMediaType: .audio)
+
+    guard !tracks.isEmpty else {
+      throw LocalTranscriptionError.noAudioTracks
+    }
+
+    let pcmSettings: [String: Any] = [
+      AVFormatIDKey: kAudioFormatLinearPCM,
+      AVSampleRateKey: 16000 as Double,
+      AVNumberOfChannelsKey: 1,
+      AVLinearPCMBitDepthKey: 32,
+      AVLinearPCMIsFloatKey: true,
+      AVLinearPCMIsBigEndianKey: false,
+      AVLinearPCMIsNonInterleaved: false,
+    ]
+
+    let duration = try await asset.load(.duration).seconds
+    let expectedSamples = Int(duration * 16000)
+
+    let systemSamples = try readTrack(
+      tracks[0], asset: asset, settings: pcmSettings, reserveCount: expectedSamples)
+    let micSamples =
+      tracks.count >= 2
+      ? try readTrack(
+        tracks[1], asset: asset, settings: pcmSettings, reserveCount: expectedSamples) : nil
+
+    return (system: systemSamples, mic: micSamples)
+  }
+
+  nonisolated private static func readTrack(
+    _ track: AVAssetTrack, asset: AVURLAsset, settings: [String: Any],
+    reserveCount: Int
+  ) throws -> [Float] {
+    let reader = try AVAssetReader(asset: asset)
+    let output = AVAssetReaderTrackOutput(track: track, outputSettings: settings)
+    output.alwaysCopiesSampleData = false
+    reader.add(output)
+
+    guard reader.startReading() else {
+      throw LocalTranscriptionError.trackReadFailed(
+        reader.error?.localizedDescription ?? "unknown")
+    }
+
+    var samples: [Float] = []
+    samples.reserveCapacity(reserveCount)
+    while let sampleBuffer = output.copyNextSampleBuffer() {
+      guard let blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer) else { continue }
+      let length = CMBlockBufferGetDataLength(blockBuffer)
+      let floatCount = length / MemoryLayout<Float>.size
+      var chunk = [Float](repeating: 0, count: floatCount)
+      chunk.withUnsafeMutableBufferPointer { ptr in
+        guard let base = ptr.baseAddress else { return }
+        _ = CMBlockBufferCopyDataBytes(
+          blockBuffer, atOffset: 0, dataLength: length, destination: base)
+      }
+      samples.append(contentsOf: chunk)
+    }
+
+    return samples
+  }
+
+  // MARK: - Speaker Assignment (Temporal Overlap Matching)
+
+  private struct LabeledSegment {
+    let speakerId: String
+    let startTime: Double
+    let text: String
+  }
+
+  /// Assign speaker labels to ASR tokens by finding the diarization segment
+  /// with maximum temporal overlap. Falls back to nearest segment by gap distance.
+  nonisolated private static func labelSegments(
+    asr: ASRResult, diarization: DiarizationResult?
+  ) -> [LabeledSegment] {
+    // No diarization or no token timings - entire text as one segment
+    guard let timings = asr.tokenTimings, !timings.isEmpty,
+      let diarization, !diarization.segments.isEmpty
+    else {
+      let speaker =
+        diarization?.segments
+        .max(by: { $0.durationSeconds < $1.durationSeconds })?.speakerId ?? "SPEAKER_0"
+      return [LabeledSegment(speakerId: speaker, startTime: 0, text: asr.text)]
+    }
+
+    // For each token, find best matching diarization speaker
+    var labeledTokens: [(speakerId: String, timing: TokenTiming)] = []
+
+    for token in timings {
+      let tokenStart = Float(token.startTime)
+      let tokenEnd = Float(token.endTime)
+
+      var bestSpeaker: String?
+      var bestOverlap: Float = 0
+
+      for seg in diarization.segments {
+        let overlap = max(
+          0, min(tokenEnd, seg.endTimeSeconds) - max(tokenStart, seg.startTimeSeconds))
+        if overlap > bestOverlap {
+          bestOverlap = overlap
+          bestSpeaker = seg.speakerId
+        }
+      }
+
+      // Fallback: nearest diarization segment by gap distance
+      if bestSpeaker == nil {
+        var nearestGap: Float = .infinity
+        for seg in diarization.segments {
+          let gap: Float
+          if tokenEnd <= seg.startTimeSeconds {
+            gap = seg.startTimeSeconds - tokenEnd
+          } else if tokenStart >= seg.endTimeSeconds {
+            gap = tokenStart - seg.endTimeSeconds
+          } else {
+            gap = 0
+          }
+          if gap < nearestGap {
+            nearestGap = gap
+            bestSpeaker = seg.speakerId
+          }
+        }
+      }
+
+      labeledTokens.append((bestSpeaker ?? "SPEAKER_0", token))
+    }
+
+    // Group consecutive same-speaker tokens into segments
+    var segments: [LabeledSegment] = []
+    var currentSpeaker: String?
+    var currentText = ""
+    var currentStart: Double = 0
+
+    for (speaker, token) in labeledTokens {
+      if speaker != currentSpeaker {
+        if let s = currentSpeaker,
+          !currentText.trimmingCharacters(in: .whitespaces).isEmpty
+        {
+          segments.append(
+            LabeledSegment(
+              speakerId: s, startTime: currentStart,
+              text: currentText.trimmingCharacters(in: .whitespaces)))
+        }
+        currentSpeaker = speaker
+        currentText = ""
+        currentStart = token.startTime
+      }
+      currentText += token.token
+    }
+
+    if let s = currentSpeaker, !currentText.trimmingCharacters(in: .whitespaces).isEmpty {
+      segments.append(
+        LabeledSegment(
+          speakerId: s, startTime: currentStart,
+          text: currentText.trimmingCharacters(in: .whitespaces)))
+    }
+
+    return segments
+  }
+
+  // MARK: - Merge Dual-Track Results
+
+  /// Merge labeled segments from system (remote) and mic (local) tracks into a
+  /// TranscriptDocument with sequential integer speaker IDs and default names.
+  nonisolated private static func mergeIntoDocument(
+    systemSegments: [LabeledSegment],
+    micSegments: [LabeledSegment]?,
+    language: String?
+  ) -> TranscriptDocument {
+    // Collect unique speaker IDs per track
+    var micSpeakerIds: [String] = []
+    if let micSegs = micSegments {
+      var seen = Set<String>()
+      for seg in micSegs where seen.insert(seg.speakerId).inserted {
+        micSpeakerIds.append(seg.speakerId)
+      }
+    }
+
+    var remoteSpeakerIds: [String] = []
+    var seenRemote = Set<String>()
+    for seg in systemSegments where seenRemote.insert(seg.speakerId).inserted {
+      remoteSpeakerIds.append(seg.speakerId)
+    }
+
+    // Map to sequential integers: mic speakers first, then remote
+    var speakerMap: [String: Int] = [:]
+    var speakerNames: [String: String] = [:]
+    var nextId = 0
+
+    for id in micSpeakerIds {
+      speakerMap["M_\(id)"] = nextId
+      speakerNames[String(nextId)] =
+        micSpeakerIds.count == 1 ? "You" : "Local \(nextId + 1)"
+      nextId += 1
+    }
+
+    for id in remoteSpeakerIds {
+      speakerMap["R_\(id)"] = nextId
+      speakerNames[String(nextId)] = "Speaker \(nextId + 1)"
+      nextId += 1
+    }
+
+    // Convert to TranscriptSegments
+    var allSegments: [TranscriptSegment] = []
+
+    if let micSegs = micSegments {
+      for seg in micSegs {
+        allSegments.append(
+          TranscriptSegment(
+            speaker: speakerMap["M_\(seg.speakerId)"] ?? 0,
+            time: seg.startTime,
+            text: seg.text))
+      }
+    }
+
+    for seg in systemSegments {
+      allSegments.append(
+        TranscriptSegment(
+          speaker: speakerMap["R_\(seg.speakerId)"] ?? 0,
+          time: seg.startTime,
+          text: seg.text))
+    }
+
+    // Sort by time
+    allSegments.sort { $0.time < $1.time }
+
+    // Single-track: no M_/R_ prefixes were used, simplify speaker names
+    if micSegments == nil {
+      speakerNames = [:]
+      for (i, id) in remoteSpeakerIds.enumerated() {
+        speakerMap["R_\(id)"] = i
+        speakerNames[String(i)] = "Speaker \(i + 1)"
+      }
+      // Re-map segments with corrected IDs
+      allSegments = systemSegments.map { seg in
+        TranscriptSegment(
+          speaker: speakerMap["R_\(seg.speakerId)"] ?? 0,
+          time: seg.startTime,
+          text: seg.text)
+      }
+    }
+
+    return TranscriptDocument(
+      segments: allSegments,
+      language: language,
+      createdAt: Date(),
+      speakers: speakerNames
+    )
+  }
+}
+
+// MARK: - Errors
+
+nonisolated enum LocalTranscriptionError: Error, LocalizedError, Sendable {
+  case noAudioTracks
+  case trackReadFailed(String)
+
+  var errorDescription: String? {
+    switch self {
+    case .noAudioTracks: "No audio tracks found in recording"
+    case .trackReadFailed(let msg): "Failed to read audio track: \(msg)"
+    }
+  }
+}
diff --git a/Sources/MainWindowView.swift b/Sources/MainWindowView.swift
index 1669093..ffced54 100644
--- a/Sources/MainWindowView.swift
+++ b/Sources/MainWindowView.swift
@@ -155,7 +155,21 @@ struct RecordingsView: View {
         let processedSize =
           hasProcessed
           ? ((try? processedURL.resourceValues(forKeys: [.fileSizeKey]).fileSize) ?? 0) : 0
-        let sidecar = TranscriptDocument.sidecarURL(for: url)
+        TranscriptDocument.migrateLegacyTranscript(in: url)
+        var available = Set<TranscriptionProvider>()
+        for provider in TranscriptionProvider.allCases {
+          let sidecar = TranscriptDocument.sidecarURL(for: url, provider: provider)
+          if FileManager.default.fileExists(atPath: sidecar.path) {
+            available.insert(provider)
+          }
+        }
+        // Fallback: unmigrated legacy transcript counts as soniox
+        if !available.contains(.soniox),
+          FileManager.default.fileExists(
+            atPath: url.appendingPathComponent("transcript.json").path)
+        {
+          available.insert(.soniox)
+        }
         results.append(
           RecordingFile(
             url: url,
@@ -167,7 +181,7 @@ struct RecordingsView: View {
               ).contentModificationDate) ?? .distantPast,
             size: originalSize + processedSize,
             hasProcessed: hasProcessed,
-            hasTranscript: FileManager.default.fileExists(atPath: sidecar.path)
+            availableTranscripts: available
           ))
       }
 
@@ -296,7 +310,7 @@ private struct RecordingRow: View {
             .foregroundStyle(.secondary)
             .help("Echo cancellation applied")
         }
-        if recording.hasTranscript {
+        if !recording.availableTranscripts.isEmpty {
           Image(systemName: "text.quote")
             .font(.caption2)
             .foregroundStyle(.secondary)
@@ -346,8 +360,15 @@ struct RecordingDetailView: View {
   @State private var editedTitle = ""
 
   // Transcription
-  @State private var transcript: TranscriptDocument?
-  @State private var transcriptionStatus: TranscriptionStatus = .idle
+  @State private var activeProvider: TranscriptionProvider = {
+    TranscriptionProvider(
+      rawValue: UserDefaults.standard.string(forKey: TranscriptionProvider.defaultsKey) ?? "local"
+    ) ?? .local
+  }()
+  @State private var localTranscript: TranscriptDocument?
+  @State private var sonioxTranscript: TranscriptDocument?
+  @State private var localTranscriptionStatus: TranscriptionStatus = .idle
+  @State private var sonioxTranscriptionStatus: TranscriptionStatus = .idle
   @State private var transcriptionTask: Task<Void, Never>?
 
   var body: some View {
@@ -434,7 +455,7 @@ struct RecordingDetailView: View {
       }
       Spacer()
       HStack(spacing: 8) {
-        if transcript != nil {
+        if activeTranscript != nil {
           Button {
             exportTranscript()
           } label: {
@@ -721,7 +742,24 @@ struct RecordingDetailView: View {
     onTitleChanged()
   }
 
+  private var activeTranscript: TranscriptDocument? {
+    switch activeProvider {
+    case .local: localTranscript
+    case .soniox: sonioxTranscript
+    }
+  }
+
+  private var activeTranscriptionStatus: TranscriptionStatus {
+    switch activeProvider {
+    case .local: localTranscriptionStatus
+    case .soniox: sonioxTranscriptionStatus
+    }
+  }
+
   private func speakerName(for speakerID: Int) -> String {
+    if let name = activeTranscript?.speakers?[String(speakerID)], !name.isEmpty {
+      return name
+    }
     if let name = metadata?.speakers[String(speakerID)], !name.isEmpty {
       return name
     }
@@ -729,67 +767,94 @@ struct RecordingDetailView: View {
   }
 
   private func saveSpeakerName(_ name: String, for speakerID: Int) {
-    var meta =
-      metadata
-      ?? RecordingMetadata(
-        title: recording.title,
-        createdAt: recording.date,
-        appName: recording.title,
-        speakers: [:]
-      )
-    meta.speakers[String(speakerID)] = name
-    try? meta.save(in: recording.url)
-    metadata = meta
+    switch activeProvider {
+    case .local:
+      if localTranscript?.speakers == nil { localTranscript?.speakers = [:] }
+      localTranscript?.speakers?[String(speakerID)] = name
+      if let doc = localTranscript {
+        try? doc.save(for: recording.url, provider: .local)
+      }
+    case .soniox:
+      if sonioxTranscript?.speakers == nil { sonioxTranscript?.speakers = [:] }
+      sonioxTranscript?.speakers?[String(speakerID)] = name
+      if let doc = sonioxTranscript {
+        try? doc.save(for: recording.url, provider: .soniox)
+      }
+    }
   }
 
   // MARK: - Transcript
 
   private var transcriptArea: some View {
-    Group {
-      if let transcript, !transcript.segments.isEmpty {
-        transcriptView(transcript)
-      } else if case .error(let msg) = transcriptionStatus {
-        VStack(spacing: 12) {
-          Image(systemName: "exclamationmark.triangle")
-            .font(.title)
-            .foregroundStyle(.secondary)
-          Text(msg)
-            .font(.caption)
-            .foregroundStyle(.secondary)
-            .multilineTextAlignment(.center)
-          Button("Retry") { startTranscription() }
-            .buttonStyle(.bordered)
-        }
-        .frame(maxWidth: .infinity, maxHeight: .infinity)
-      } else if transcriptionStatus != .idle {
-        VStack(spacing: 12) {
-          ProgressView()
-          Text(transcriptionStatusText)
-            .font(.caption)
-            .foregroundStyle(.secondary)
-          Button("Cancel") { cancelTranscription() }
-            .buttonStyle(.bordered)
+    VStack(spacing: 0) {
+      Picker("", selection: $activeProvider) {
+        ForEach(TranscriptionProvider.allCases) { p in
+          Text(p.label).tag(p)
         }
-        .frame(maxWidth: .infinity, maxHeight: .infinity)
-      } else {
-        VStack(spacing: 12) {
-          if sonioxAPIKey.isEmpty {
-            Text("Add your Soniox API key in Settings to enable transcription")
+      }
+      .pickerStyle(.segmented)
+      .frame(width: 180)
+      .padding(.vertical, 8)
+
+      Divider()
+
+      Group {
+        if let transcript = activeTranscript, !transcript.segments.isEmpty {
+          transcriptView(transcript)
+        } else if case .error(let msg) = activeTranscriptionStatus {
+          VStack(spacing: 12) {
+            Image(systemName: "exclamationmark.triangle")
+              .font(.title)
+              .foregroundStyle(.secondary)
+            Text(msg)
               .font(.caption)
               .foregroundStyle(.secondary)
               .multilineTextAlignment(.center)
-          } else {
-            Button("Transcribe") { startTranscription() }
-              .buttonStyle(.borderedProminent)
+            Button("Retry") { startActiveTranscription() }
+              .buttonStyle(.bordered)
+          }
+          .frame(maxWidth: .infinity, maxHeight: .infinity)
+        } else if activeTranscriptionStatus != .idle {
+          VStack(spacing: 12) {
+            ProgressView()
+            Text(transcriptionStatusText)
+              .font(.caption)
+              .foregroundStyle(.secondary)
+            Button("Cancel") { cancelTranscription() }
+              .buttonStyle(.bordered)
           }
+          .frame(maxWidth: .infinity, maxHeight: .infinity)
+        } else {
+          providerIdleView
+        }
+      }
+    }
+  }
+
+  private var providerIdleView: some View {
+    VStack(spacing: 12) {
+      switch activeProvider {
+      case .local:
+        Button("Transcribe") { startLocalTranscription() }
+          .buttonStyle(.borderedProminent)
+      case .soniox:
+        if sonioxAPIKey.isEmpty {
+          Text("Add your Soniox API key in Settings to enable cloud transcription")
+            .font(.caption)
+            .foregroundStyle(.secondary)
+            .multilineTextAlignment(.center)
+        } else {
+          Button("Transcribe") { startSonioxTranscription() }
+            .buttonStyle(.borderedProminent)
         }
-        .frame(maxWidth: .infinity, maxHeight: .infinity)
       }
     }
+    .frame(maxWidth: .infinity, maxHeight: .infinity)
   }
 
   private var transcriptionStatusText: String {
-    switch transcriptionStatus {
+    switch activeTranscriptionStatus {
+    case .preparing: "Loading models..."
     case .uploading: "Uploading audio..."
     case .queued: "Queued for transcription..."
     case .transcribing: "Transcribing..."
@@ -840,49 +905,93 @@ struct RecordingDetailView: View {
   // MARK: - Transcription Lifecycle
 
   private func loadTranscript() {
-    transcript = TranscriptDocument.load(for: recording.url)
-    transcriptionStatus = .idle
+    localTranscript = TranscriptDocument.load(for: recording.url, provider: .local)
+    sonioxTranscript =
+      TranscriptDocument.load(for: recording.url, provider: .soniox)
+      ?? TranscriptDocument.loadLegacy(in: recording.url)
+    localTranscriptionStatus = .idle
+    sonioxTranscriptionStatus = .idle
+  }
+
+  private func startActiveTranscription() {
+    switch activeProvider {
+    case .local: startLocalTranscription()
+    case .soniox: startSonioxTranscription()
+    }
+  }
+
+  private func startLocalTranscription() {
+    transcriptionTask = Task {
+      localTranscriptionStatus = .preparing
+      do {
+        let doc = try await LocalTranscriptionService.transcribe(
+          recordingDirectory: recording.url
+        ) { status in
+          Task { @MainActor in
+            localTranscriptionStatus = status
+          }
+        }
+        localTranscript = doc
+        localTranscriptionStatus = .completed
+        onTranscriptChanged()
+        do {
+          try doc.save(for: recording.url, provider: .local)
+        } catch {
+          Log.error(
+            Log.transcription, "local",
+            "failed to save transcript: \(error.localizedDescription)")
+        }
+      } catch is CancellationError {
+        localTranscriptionStatus = .idle
+      } catch {
+        if Task.isCancelled {
+          localTranscriptionStatus = .idle
+        } else {
+          localTranscriptionStatus = .error(error.localizedDescription)
+          Log.error(
+            Log.transcription, "local", "failed: \(error.localizedDescription)")
+        }
+      }
+    }
   }
 
-  private func startTranscription() {
+  private func startSonioxTranscription() {
     guard !sonioxAPIKey.isEmpty else { return }
 
     transcriptionTask = Task {
       let service = TranscriptionService(apiKey: sonioxAPIKey)
-      transcriptionStatus = .uploading
+      sonioxTranscriptionStatus = .uploading
 
       do {
         let doc = try await service.transcribe(
           fileURL: recording.audioURL,
           onStatus: { status in
-            transcriptionStatus = status
+            sonioxTranscriptionStatus = status
           }
         )
-        // Show transcript immediately, then persist
-        transcript = doc
-        transcriptionStatus = .completed
+        sonioxTranscript = doc
+        sonioxTranscriptionStatus = .completed
         onTranscriptChanged()
         do {
-          try doc.save(for: recording.url)
+          try doc.save(for: recording.url, provider: .soniox)
         } catch {
           Log.error(
             Log.transcription, "transcription",
             "failed to save transcript: \(error.localizedDescription)")
         }
       } catch is CancellationError {
-        transcriptionStatus = .idle
+        sonioxTranscriptionStatus = .idle
       } catch let error as URLError where error.code == .cancelled {
-        // URLSession throws URLError.cancelled when Task is cancelled
         if Task.isCancelled {
-          transcriptionStatus = .idle
+          sonioxTranscriptionStatus = .idle
         } else {
-          transcriptionStatus = .error(error.localizedDescription)
+          sonioxTranscriptionStatus = .error(error.localizedDescription)
         }
       } catch {
         if Task.isCancelled {
-          transcriptionStatus = .idle
+          sonioxTranscriptionStatus = .idle
         } else {
-          transcriptionStatus = .error(error.localizedDescription)
+          sonioxTranscriptionStatus = .error(error.localizedDescription)
           Log.error(
             Log.transcription, "transcription",
             "failed: \(error.localizedDescription)")
@@ -894,14 +1003,20 @@ struct RecordingDetailView: View {
   private func cancelTranscription() {
     transcriptionTask?.cancel()
     transcriptionTask = nil
-    if case .error = transcriptionStatus { return }
-    transcriptionStatus = .idle
+    switch activeProvider {
+    case .local:
+      if case .error = localTranscriptionStatus { return }
+      localTranscriptionStatus = .idle
+    case .soniox:
+      if case .error = sonioxTranscriptionStatus { return }
+      sonioxTranscriptionStatus = .idle
+    }
   }
 
   // MARK: - Transcript Export
 
   private func exportTranscript() {
-    guard let transcript, !transcript.segments.isEmpty else { return }
+    guard let transcript = activeTranscript, !transcript.segments.isEmpty else { return }
 
     let panel = NSSavePanel()
     panel.nameFieldStringValue = "\(recording.title).json"
@@ -1051,7 +1166,7 @@ struct RecordingFile: Identifiable {
   let date: Date  // from metadata.createdAt
   let size: Int
   let hasProcessed: Bool
-  let hasTranscript: Bool
+  let availableTranscripts: Set<TranscriptionProvider>
 
   var sizeFormatted: String {
     ByteCountFormatter.string(fromByteCount: Int64(size), countStyle: .file)
diff --git a/Sources/SettingsView.swift b/Sources/SettingsView.swift
index 10950e4..b51609c 100644
--- a/Sources/SettingsView.swift
+++ b/Sources/SettingsView.swift
@@ -174,8 +174,48 @@ struct SettingsView: View {
 
   // MARK: - Transcription
 
+  @AppStorage(TranscriptionProvider.defaultsKey) private var defaultProvider = "local"
+  @State private var isDownloadingModels = false
+  @State private var localModelsReady = LocalTranscriptionService.modelsReady
+
   private var transcriptionSection: some View {
     Section("Transcription") {
+      Picker("Default provider", selection: $defaultProvider) {
+        Text("Local (on-device)").tag("local")
+        Text("Soniox (cloud)").tag("soniox")
+      }
+
+      if localModelsReady {
+        Label("Models downloaded", systemImage: "checkmark.circle.fill")
+          .foregroundStyle(.green)
+          .font(.caption)
+      } else {
+        HStack {
+          Button("Download Models") {
+            isDownloadingModels = true
+            Task {
+              do {
+                try await LocalTranscriptionService.prepareModels { _ in }
+                localModelsReady = true
+              } catch {
+                Log.error(
+                  Log.transcription, "settings",
+                  "model download failed: \(error.localizedDescription)")
+              }
+              isDownloadingModels = false
+            }
+          }
+          .disabled(isDownloadingModels)
+          if isDownloadingModels {
+            ProgressView()
+              .controlSize(.small)
+          }
+        }
+        Text("Required for local transcription. Downloads ~300 MB of speech models.")
+          .font(.caption)
+          .foregroundStyle(.secondary)
+      }
+
       SecureField("Soniox API Key", text: $sonioxAPIKey)
       Text(
         "Get your API key at soniox.com. Audio is sent to Soniox servers for transcription."
diff --git a/Sources/TranscriptionService.swift b/Sources/TranscriptionService.swift
index f6d2e4e..b933022 100644
--- a/Sources/TranscriptionService.swift
+++ b/Sources/TranscriptionService.swift
@@ -1,6 +1,31 @@
 import AVFoundation
 import Foundation
 
+// MARK: - Transcription Provider
+
+nonisolated enum TranscriptionProvider: String, CaseIterable, Identifiable, Sendable {
+  case local, soniox
+
+  /// UserDefaults key for the user's default transcription provider.
+  static let defaultsKey = "defaultTranscriptionProvider"
+
+  var id: String { rawValue }
+
+  var label: String {
+    switch self {
+    case .local: "Local"
+    case .soniox: "Soniox"
+    }
+  }
+
+  var filename: String {
+    switch self {
+    case .local: "transcript-local.json"
+    case .soniox: "transcript-soniox.json"
+    }
+  }
+}
+
 // MARK: - Data Models
 
 nonisolated struct RecordingMetadata: Codable, Sendable {
@@ -42,27 +67,52 @@ nonisolated struct TranscriptDocument: Codable, Sendable {
   var segments: [TranscriptSegment]
   var language: String?
   var createdAt: Date
+  var speakers: [String: String]?
 
-  nonisolated static func sidecarURL(for recordingURL: URL) -> URL {
-    recordingURL.appendingPathComponent("transcript.json")
+  nonisolated static func sidecarURL(
+    for recordingURL: URL, provider: TranscriptionProvider
+  ) -> URL {
+    recordingURL.appendingPathComponent(provider.filename)
   }
 
-  nonisolated static func load(for recordingURL: URL) -> TranscriptDocument? {
-    let url = sidecarURL(for: recordingURL)
+  nonisolated static func load(
+    for recordingURL: URL, provider: TranscriptionProvider
+  ) -> TranscriptDocument? {
+    let url = sidecarURL(for: recordingURL, provider: provider)
     guard let data = try? Data(contentsOf: url) else { return nil }
     let decoder = JSONDecoder()
     decoder.dateDecodingStrategy = .iso8601
     return try? decoder.decode(TranscriptDocument.self, from: data)
   }
 
-  func save(for recordingURL: URL) throws {
-    let url = Self.sidecarURL(for: recordingURL)
+  func save(for recordingURL: URL, provider: TranscriptionProvider) throws {
+    let url = Self.sidecarURL(for: recordingURL, provider: provider)
     let encoder = JSONEncoder()
     encoder.dateEncodingStrategy = .iso8601
     encoder.outputFormatting = [.prettyPrinted, .sortedKeys]
     let data = try encoder.encode(self)
     try data.write(to: url, options: .atomic)
   }
+
+  /// Migrates legacy `transcript.json` to `transcript-soniox.json`.
+  nonisolated static func migrateLegacyTranscript(in directory: URL) {
+    let legacy = directory.appendingPathComponent("transcript.json")
+    let target = directory.appendingPathComponent(TranscriptionProvider.soniox.filename)
+    if FileManager.default.fileExists(atPath: legacy.path),
+      !FileManager.default.fileExists(atPath: target.path)
+    {
+      try? FileManager.default.moveItem(at: legacy, to: target)
+    }
+  }
+
+  /// Fallback: load pre-migration `transcript.json` if provider-specific file is missing.
+  nonisolated static func loadLegacy(in directory: URL) -> TranscriptDocument? {
+    let url = directory.appendingPathComponent("transcript.json")
+    guard let data = try? Data(contentsOf: url) else { return nil }
+    let decoder = JSONDecoder()
+    decoder.dateDecodingStrategy = .iso8601
+    return try? decoder.decode(TranscriptDocument.self, from: data)
+  }
 }
 
 nonisolated struct TranscriptSegment: Codable, Identifiable, Sendable {
@@ -89,6 +139,7 @@ nonisolated struct TranscriptSegment: Codable, Identifiable, Sendable {
 
 nonisolated enum TranscriptionStatus: Equatable, Sendable {
   case idle
+  case preparing
   case uploading
   case queued
   case transcribing
@@ -448,7 +499,7 @@ final class TranscriptionService {
       })?["language"] as? String
 
     return TranscriptDocument(
-      segments: segments, language: language, createdAt: Date())
+      segments: segments, language: language, createdAt: Date(), speakers: nil)
   }
 
   // MARK: - Cleanup