From 5576a366fb2ac52dc0673565a55da578295c74ac Mon Sep 17 00:00:00 2001
From: Rory Ford <me@roryford.com>
Date: Mon, 15 Jun 2026 19:55:21 +1000
Subject: [PATCH] feat(inference): relocate InferenceMetric to
 ManifoldInference + wire FoundationBackend metrics

Move InferenceMetric/InferenceMetricSink/InMemoryMetricSink and
GenerationMetricTracker/SSEGenerationMetrics from ManifoldCloudCore to
ManifoldInference so that ManifoldFoundation (which depends on ManifoldInference
but not ManifoldCloudCore) can emit metrics without a cross-family import.

ManifoldCloudCore re-exports ManifoldInference via @_exported import in
MetricTypeAliases.swift to preserve source compat for all existing
import ManifoldCloudCore consumers. SSEGenerationTaskRunner is updated to
compute cost using InferenceCostEstimator (which stays in CloudCore) and
pass the resolved values to the now-parameter-explicit SSEGenerationMetrics.record.

FoundationBackend gains a public metricSink property (defaulting to
InMemoryMetricSink.shared) and emits one InferenceMetric per generation
via GenerationMetricTracker in both the text-only and tool-aware streaming
paths. FoundationBackendMetricEmissionTests covers the sink wiring and
live-inference paths (guarded by XCTSkip when Apple Intelligence is absent).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/api-breakage-allowlist.txt            |   3 +
 .../ManifoldCloudCore/MetricTypeAliases.swift |  13 ++
 .../SSEGenerationTaskRunner.swift             |  17 +-
 .../FoundationBackend.swift                   |  47 ++++-
 .../Metrics}/GenerationMetricTracker.swift    |  47 +++--
 .../Metrics}/InferenceMetric.swift            |  20 +-
 ...FoundationBackendMetricEmissionTests.swift | 191 ++++++++++++++++++
 7 files changed, 302 insertions(+), 36 deletions(-)
 create mode 100644 Sources/ManifoldCloudCore/MetricTypeAliases.swift
 rename Sources/{ManifoldCloudCore => ManifoldInference/Metrics}/GenerationMetricTracker.swift (79%)
 rename Sources/{ManifoldCloudCore => ManifoldInference/Metrics}/InferenceMetric.swift (88%)
 create mode 100644 Tests/ManifoldBackendsTests/FoundationBackendMetricEmissionTests.swift

diff --git a/.github/api-breakage-allowlist.txt b/.github/api-breakage-allowlist.txt
index 3816cb7f0..ace26956a 100644
--- a/.github/api-breakage-allowlist.txt
+++ b/.github/api-breakage-allowlist.txt
@@ -100,3 +100,6 @@ API breakage: func ModelLoadCoordinator.dispatchLoad(_:) has been renamed to fun
 API breakage: enumelement GenerationEvent.toolCallParseFailed has been added as a new enum case
 API breakage: enumelement GenerationEvent.toolCallTruncated has been added as a new enum case
 API breakage: constructor ToolCallTransform.init(markers:) has been removed
+API breakage: struct InferenceMetric has been removed
+API breakage: protocol InferenceMetricSink has been removed
+API breakage: class InMemoryMetricSink has been removed
diff --git a/Sources/ManifoldCloudCore/MetricTypeAliases.swift b/Sources/ManifoldCloudCore/MetricTypeAliases.swift
new file mode 100644
index 000000000..78ace05f0
--- /dev/null
+++ b/Sources/ManifoldCloudCore/MetricTypeAliases.swift
@@ -0,0 +1,13 @@
+// Source compatibility shim — InferenceMetric, InferenceMetricSink, and
+// InMemoryMetricSink were relocated from ManifoldCloudCore to ManifoldInference
+// in the observability train so that ManifoldFoundation (which depends on
+// ManifoldInference but not ManifoldCloudCore) can reach them.
+//
+// @_exported re-surfaces the entire ManifoldInference surface through
+// ManifoldCloudCore so all existing `import ManifoldCloudCore` consumers
+// continue to resolve InferenceMetric / InferenceMetricSink / InMemoryMetricSink
+// at the same import depth — no source changes required downstream.
+//
+// ManifoldCloudCore already takes a direct dependency on ManifoldInference in
+// Package.swift, so this is a pure source-compat promotion, not a new dep.
+@_exported import ManifoldInference
diff --git a/Sources/ManifoldCloudCore/SSEGenerationTaskRunner.swift b/Sources/ManifoldCloudCore/SSEGenerationTaskRunner.swift
index b5b305b00..630e47c08 100644
--- a/Sources/ManifoldCloudCore/SSEGenerationTaskRunner.swift
+++ b/Sources/ManifoldCloudCore/SSEGenerationTaskRunner.swift
@@ -110,12 +110,27 @@ struct SSEGenerationTaskRunner {
         }
 
         if let sink = context.metricSink {
+            // Compute cost in ManifoldCloudCore where InferenceCostEstimator lives,
+            // then pass the pre-resolved values to the ManifoldInference record helper.
+            let usage = context.readUsage()
+            let promptTokens = usage?.promptTokens ?? 0
+            let completionTokens = usage?.completionTokens ?? 0
+            let (costUSD, isApprox) = InferenceCostEstimator.estimatedCost(
+                provider: context.backendName,
+                model: context.modelName,
+                promptTokens: promptTokens,
+                completionTokens: completionTokens
+            )
             SSEGenerationMetrics.record(
                 to: sink,
                 tracker: metricTracker,
                 provider: context.backendName,
                 model: context.modelName,
-                usage: context.readUsage(),
+                promptTokens: promptTokens,
+                completionTokens: completionTokens,
+                estimatedCostUSD: costUSD,
+                isCostApproximate: isApprox,
+                costTableDate: InferenceCostEstimator.costTableDate,
                 errorClass: streamError.map { SSECloudBackend.classifyError($0) }
             )
         }
diff --git a/Sources/ManifoldFoundation/FoundationBackend.swift b/Sources/ManifoldFoundation/FoundationBackend.swift
index da780f4cd..c3fef5e42 100644
--- a/Sources/ManifoldFoundation/FoundationBackend.swift
+++ b/Sources/ManifoldFoundation/FoundationBackend.swift
@@ -6,6 +6,10 @@ import os
 // surface only (InferenceBackend, GenerationConfig, GenerationEvent, …) — no
 // engine state. ManifoldContract re-exports the P1 leaf types it needs.
 import ManifoldContract
+// InferenceMetricSink and InMemoryMetricSink live in ManifoldInference since
+// the observability train relocated them from ManifoldCloudCore so that this
+// backend can reach them without a ManifoldCloudCore dependency.
+import ManifoldInference
 
 /// Apple FoundationModels inference backend for on-device Apple Intelligence models.
 ///
@@ -195,6 +199,12 @@ public final class FoundationBackend: InferenceBackend, @unchecked Sendable {
     /// real Apple Intelligence entitlement. Production uses the system default.
     private let availabilityResolver: @Sendable () -> SystemLanguageModel.Availability
 
+    /// The sink that receives an ``InferenceMetric`` after every generation call.
+    ///
+    /// Defaults to ``InMemoryMetricSink/shared`` so callers can read recent
+    /// metrics without any configuration. Set to `nil` to disable metric emission.
+    public var metricSink: (any InferenceMetricSink)? = InMemoryMetricSink.shared
+
     /// Structured conversation history installed by ``GenerationHistoryInstaller``
     /// through the ``StructuredHistoryReceiver`` opt-in.
     ///
@@ -496,7 +506,10 @@ public final class FoundationBackend: InferenceBackend, @unchecked Sendable {
         // returning and the Task being scheduled by the cooperative executor.
         // The retain cycle (backend → generationTask → backend) is broken in the
         // `defer` block when `generationTask` is nilled out on completion.
+        let metricTracker = GenerationMetricTracker()
+        let capturedMetricSink = withStateLock { metricSink }
         let task = Task { [self, generationStream] in
+            var streamError: Error?
             defer {
                 withStateLock {
                     if generationSequence == generationID {
@@ -504,6 +517,23 @@ public final class FoundationBackend: InferenceBackend, @unchecked Sendable {
                         generationTask = nil
                     }
                 }
+                // Emit an InferenceMetric after every generation (success or
+                // failure). Cost is zero / approximate because the Foundation
+                // Models framework does not expose token-level billing.
+                if let sink = capturedMetricSink {
+                    SSEGenerationMetrics.record(
+                        to: sink,
+                        tracker: metricTracker,
+                        provider: "FoundationModels",
+                        model: "apple-foundation",
+                        promptTokens: 0,
+                        completionTokens: 0,
+                        estimatedCostUSD: 0,
+                        isCostApproximate: true,
+                        costTableDate: "",
+                        errorClass: streamError.map { String(describing: type(of: $0)) }
+                    )
+                }
                 Self.logger.debug("Foundation generate finished")
             }
 
@@ -527,6 +557,8 @@ public final class FoundationBackend: InferenceBackend, @unchecked Sendable {
                 // iterator was dropped before returning nil.
                 withStateLock { _sessionIsClean = false }
 
+                metricTracker.start()
+
                 let result: StreamResult
                 if let toolEnvelope {
                     result = try await runToolAwareStream(
@@ -535,7 +567,8 @@ public final class FoundationBackend: InferenceBackend, @unchecked Sendable {
                         schema: toolEnvelope,
                         options: options,
                         continuation: continuation,
-                        generationStream: generationStream
+                        generationStream: generationStream,
+                        metricTracker: capturedMetricSink != nil ? metricTracker : nil
                     )
                 } else {
                     result = try await runTextOnlyStream(
@@ -543,7 +576,8 @@ public final class FoundationBackend: InferenceBackend, @unchecked Sendable {
                         prompt: prompt,
                         options: options,
                         continuation: continuation,
-                        generationStream: generationStream
+                        generationStream: generationStream,
+                        metricTracker: capturedMetricSink != nil ? metricTracker : nil
                     )
                 }
 
@@ -579,6 +613,7 @@ public final class FoundationBackend: InferenceBackend, @unchecked Sendable {
 
                 await MainActor.run { generationStream.setPhase(.done) }
             } catch {
+                streamError = error
                 if !Task.isCancelled {
                     Self.logger.error("Foundation generation error: \(error)")
                     await MainActor.run { generationStream.setPhase(.failed(error.localizedDescription)) }
@@ -620,7 +655,8 @@ public final class FoundationBackend: InferenceBackend, @unchecked Sendable {
         prompt: String,
         options: GenerationOptions,
         continuation: AsyncThrowingStream<GenerationEvent, Error>.Continuation,
-        generationStream: GenerationStream
+        generationStream: GenerationStream,
+        metricTracker: GenerationMetricTracker?
     ) async throws -> StreamResult {
         let responseStream = session.streamResponse(to: prompt, options: options)
 
@@ -641,6 +677,7 @@ public final class FoundationBackend: InferenceBackend, @unchecked Sendable {
                     await MainActor.run { generationStream.setPhase(.streaming) }
                     isFirstToken = false
                 }
+                metricTracker?.recordToken()
                 continuation.yield(.token(newContent))
                 eventsEmitted += 1
                 previousCount = currentText.count
@@ -662,7 +699,8 @@ public final class FoundationBackend: InferenceBackend, @unchecked Sendable {
         schema: GenerationSchema,
         options: GenerationOptions,
         continuation: AsyncThrowingStream<GenerationEvent, Error>.Continuation,
-        generationStream: GenerationStream
+        generationStream: GenerationStream,
+        metricTracker: GenerationMetricTracker?
     ) async throws -> StreamResult {
         let responseStream = session.streamResponse(
             to: prompt,
@@ -699,6 +737,7 @@ public final class FoundationBackend: InferenceBackend, @unchecked Sendable {
                         await MainActor.run { generationStream.setPhase(.streaming) }
                         isFirstToken = false
                     }
+                    metricTracker?.recordToken()
                     continuation.yield(.token(delta))
                     eventsEmitted += 1
                     lastTextLength = textSoFar.count
diff --git a/Sources/ManifoldCloudCore/GenerationMetricTracker.swift b/Sources/ManifoldInference/Metrics/GenerationMetricTracker.swift
similarity index 79%
rename from Sources/ManifoldCloudCore/GenerationMetricTracker.swift
rename to Sources/ManifoldInference/Metrics/GenerationMetricTracker.swift
index 052b60490..a4e1afeea 100644
--- a/Sources/ManifoldCloudCore/GenerationMetricTracker.swift
+++ b/Sources/ManifoldInference/Metrics/GenerationMetricTracker.swift
@@ -1,11 +1,10 @@
 import Foundation
-import ManifoldInference
 
 /// Accumulates per-token timing data for a single generation call.
 ///
 /// Thread-safety via `NSLock`. Updated from the generation task (arbitrary
 /// thread); read after the task completes to build the final ``InferenceMetric``.
-final class GenerationMetricTracker: @unchecked Sendable {
+package final class GenerationMetricTracker: @unchecked Sendable {
     private let lock = NSLock()
     private var wallStart: ContinuousClock.Instant = ContinuousClock.now
     private var dispatchDate: Date = Date()
@@ -13,7 +12,9 @@ final class GenerationMetricTracker: @unchecked Sendable {
     private var lastTokenInstant: ContinuousClock.Instant?
     private var interTokenGapsNs: [Int64] = []
 
-    func start() {
+    package init() {}
+
+    package func start() {
         lock.lock()
         defer { lock.unlock() }
         wallStart = ContinuousClock.now
@@ -22,7 +23,7 @@ final class GenerationMetricTracker: @unchecked Sendable {
         dispatchDate = Date()
     }
 
-    func recordToken() {
+    package func recordToken() {
         lock.lock()
         defer { lock.unlock() }
         let now = ContinuousClock.now
@@ -37,7 +38,7 @@ final class GenerationMetricTracker: @unchecked Sendable {
         lastTokenInstant = now
     }
 
-    func buildMetric(
+    package func buildMetric(
         provider: String,
         model: String,
         promptTokens: Int,
@@ -88,8 +89,8 @@ final class GenerationMetricTracker: @unchecked Sendable {
     }
 }
 
-enum SSEGenerationMetrics {
-    static func observing(
+package enum SSEGenerationMetrics {
+    package static func observing(
         _ stream: AsyncThrowingStream<GenerationEvent, Error>,
         tracker: GenerationMetricTracker,
         enabled: Bool
@@ -115,31 +116,35 @@ enum SSEGenerationMetrics {
         }
     }
 
-    static func record(
+    /// Records a metric to `sink` using pre-built tracker data.
+    ///
+    /// Cost fields are passed explicitly so this method remains in
+    /// `ManifoldInference` without a dependency on `InferenceCostEstimator`,
+    /// which lives in `ManifoldCloudCore`. Cloud backends compute cost before
+    /// calling this method; local backends (Foundation) pass zero cost with
+    /// `isCostApproximate: true`.
+    package static func record(
         to sink: any InferenceMetricSink,
         tracker: GenerationMetricTracker,
         provider: String,
         model: String,
-        usage: (promptTokens: Int, completionTokens: Int)?,
+        promptTokens: Int,
+        completionTokens: Int,
+        cachedPromptTokens: Int = 0,
+        estimatedCostUSD: Double,
+        isCostApproximate: Bool,
+        costTableDate: String,
         errorClass: String?
     ) {
-        let promptTokens = usage?.promptTokens ?? 0
-        let completionTokens = usage?.completionTokens ?? 0
-        let (costUSD, isApprox) = InferenceCostEstimator.estimatedCost(
-            provider: provider,
-            model: model,
-            promptTokens: promptTokens,
-            completionTokens: completionTokens
-        )
         let metric = tracker.buildMetric(
             provider: provider,
             model: model,
             promptTokens: promptTokens,
-            cachedPromptTokens: 0,
+            cachedPromptTokens: cachedPromptTokens,
             completionTokens: completionTokens,
-            estimatedCostUSD: costUSD,
-            isCostApproximate: isApprox,
-            costTableDate: InferenceCostEstimator.costTableDate,
+            estimatedCostUSD: estimatedCostUSD,
+            isCostApproximate: isCostApproximate,
+            costTableDate: costTableDate,
             errorClass: errorClass
         )
         Task { await sink.record(metric) }
diff --git a/Sources/ManifoldCloudCore/InferenceMetric.swift b/Sources/ManifoldInference/Metrics/InferenceMetric.swift
similarity index 88%
rename from Sources/ManifoldCloudCore/InferenceMetric.swift
rename to Sources/ManifoldInference/Metrics/InferenceMetric.swift
index 9103d72c6..0f6a2760d 100644
--- a/Sources/ManifoldCloudCore/InferenceMetric.swift
+++ b/Sources/ManifoldInference/Metrics/InferenceMetric.swift
@@ -1,14 +1,14 @@
 import Foundation
 
 /// A snapshot of latency, cost, and token-count data produced after a single
-/// cloud inference call.
+/// inference call.
 ///
-/// Emitted by ``SSECloudBackend`` after every generation (success or failure)
-/// and forwarded to the configured ``InferenceMetricSink``. Consumers use this
-/// to power dashboards, cost alerts, and latency regression detection without
-/// having to instrument individual backends.
+/// Emitted by backends after every generation (success or failure) and forwarded
+/// to the configured ``InferenceMetricSink``. Consumers use this to power
+/// dashboards, cost alerts, and latency regression detection without having to
+/// instrument individual backends.
 public struct InferenceMetric: Sendable {
-    /// Human-readable backend name (e.g. "Claude", "OpenAI").
+    /// Human-readable backend name (e.g. "Claude", "OpenAI", "FoundationModels").
     public let provider: String
     /// Model identifier used for the call (e.g. "claude-sonnet-4-6").
     public let model: String
@@ -75,7 +75,7 @@ public struct InferenceMetric: Sendable {
 
 // MARK: - Sink Protocol
 
-/// A type that receives ``InferenceMetric`` values produced by cloud backends.
+/// A type that receives ``InferenceMetric`` values produced by backends.
 ///
 /// Conform to this protocol to route metrics into observability systems (Datadog,
 /// OpenTelemetry, a local ring buffer, etc.) without coupling the backend layer
@@ -89,7 +89,7 @@ public protocol InferenceMetricSink: AnyObject, Sendable {
 
 /// A thread-safe, bounded ring buffer of ``InferenceMetric`` values.
 ///
-/// The shared singleton is the default sink wired into ``SSECloudBackend``.
+/// The shared singleton is the default sink wired into cloud and local backends.
 /// Tests and host apps can inject their own sink; this actor is useful as a
 /// lightweight diagnostic tool in debug builds.
 ///
@@ -97,8 +97,8 @@ public protocol InferenceMetricSink: AnyObject, Sendable {
 /// appended, so memory usage stays constant regardless of call volume.
 public actor InMemoryMetricSink: InferenceMetricSink {
 
-    /// Shared singleton. ``SSECloudBackend`` defaults to this sink so callers
-    /// can read recent metrics without configuring anything.
+    /// Shared singleton. Backends default to this sink so callers can read
+    /// recent metrics without configuring anything.
     public static let shared = InMemoryMetricSink()
 
     private var metrics: [InferenceMetric] = []
diff --git a/Tests/ManifoldBackendsTests/FoundationBackendMetricEmissionTests.swift b/Tests/ManifoldBackendsTests/FoundationBackendMetricEmissionTests.swift
new file mode 100644
index 000000000..c04a884be
--- /dev/null
+++ b/Tests/ManifoldBackendsTests/FoundationBackendMetricEmissionTests.swift
@@ -0,0 +1,191 @@
+#if canImport(FoundationModels)
+import XCTest
+import FoundationModels
+import ManifoldInference
+@testable import ManifoldFoundation
+
+/// Spy sink that captures every recorded metric for test assertions.
+@available(iOS 26, macOS 26, *)
+final class SpyMetricSink: InferenceMetricSink, @unchecked Sendable {
+    private let lock = NSLock()
+    private var _recorded: [InferenceMetric] = []
+
+    func record(_ metric: InferenceMetric) {
+        lock.lock()
+        defer { lock.unlock() }
+        _recorded.append(metric)
+    }
+
+    var recorded: [InferenceMetric] {
+        lock.lock()
+        defer { lock.unlock() }
+        return _recorded
+    }
+}
+
+/// Tests that ``FoundationBackend`` emits an ``InferenceMetric`` after every
+/// generation attempt and populates the key diagnostic fields.
+///
+/// These tests require iOS 26 / macOS 26 SDK symbols but do NOT require a live
+/// Apple Intelligence entitlement — `_forceLoaded()` bypasses the probe, and
+/// `MockInferenceBackend`-style forced responses are not needed because
+/// ``GenerationMetricTracker`` operates on wall-clock timing that the
+/// test harness can verify structurally rather than exactly.
+@available(iOS 26, macOS 26, *)
+final class FoundationBackendMetricEmissionTests: XCTestCase {
+
+    private var backend: FoundationBackend!
+    private var spy: SpyMetricSink!
+
+    override func setUp() async throws {
+        try await super.setUp()
+        guard ProcessInfo.processInfo.isOperatingSystemAtLeast(
+            OperatingSystemVersion(majorVersion: 26, minorVersion: 0, patchVersion: 0)
+        ) else {
+            throw XCTSkip("iOS 26 / macOS 26 required")
+        }
+        spy = SpyMetricSink()
+        backend = FoundationBackend(availabilityResolver: { .available })
+        backend.metricSink = spy
+    }
+
+    override func tearDown() async throws {
+        await backend?.unloadModelAndWait()
+        backend = nil
+        spy = nil
+        try await super.tearDown()
+    }
+
+    // MARK: - metricSink wiring
+
+    func test_metricSink_defaultsToInMemoryMetricSinkShared() {
+        let fresh = FoundationBackend()
+        // The default sink must be non-nil so metrics are captured without any
+        // host-app configuration — mirrors SSECloudBackend's contract.
+        XCTAssertNotNil(fresh.metricSink)
+        XCTAssertTrue(fresh.metricSink is InMemoryMetricSink)
+    }
+
+    func test_metricSink_canBeSetToNil() {
+        backend.metricSink = nil
+        XCTAssertNil(backend.metricSink)
+    }
+
+    // MARK: - Metric emission (requires live inference)
+
+    func test_generate_emitsOneMetricOnSuccess() async throws {
+        guard FoundationBackend.isAvailable else {
+            throw XCTSkip("Apple Intelligence not available on this device")
+        }
+        guard await FoundationBackend.probeIsReady() else {
+            throw XCTSkip("Apple Intelligence model not ready")
+        }
+
+        backend._forceLoaded()
+
+        let stream = try backend.generate(
+            prompt: "Reply with exactly one word: hello",
+            systemPrompt: nil,
+            config: .init()
+        )
+
+        // Drain the stream to let the generation run to completion.
+        var tokenCount = 0
+        do {
+            for try await event in stream.events {
+                if case .token = event { tokenCount += 1 }
+            }
+        } catch {
+            // Generation errors are still expected to emit a metric.
+        }
+
+        // Allow the Task's defer block (which fires the metric) to execute.
+        await Task.yield()
+        try await Task.sleep(for: .milliseconds(50))
+
+        let metrics = spy.recorded
+        XCTAssertEqual(metrics.count, 1, "Expected exactly one metric per generation call")
+
+        let m = try XCTUnwrap(metrics.first)
+        XCTAssertEqual(m.provider, "FoundationModels")
+        XCTAssertNil(m.errorClass, "errorClass must be nil on a successful generation")
+
+        // Foundation backend cannot report token counts via the SDK, so the
+        // field is always zero. Verify it's not accidentally negative.
+        XCTAssertGreaterThanOrEqual(m.completionTokens, 0)
+
+        // wallClockDuration must be strictly positive.
+        XCTAssertGreaterThan(m.wallClockDuration, .zero,
+                             "wallClockDuration must reflect real elapsed time")
+    }
+
+    func test_generate_emitsMetricWithNonNilErrorClassOnFailure() async throws {
+        // Use an unavailable-resolver so generate() will fail immediately
+        // once we force the load check open.
+        let failingBackend = FoundationBackend(availabilityResolver: { .available })
+        let failSpy = SpyMetricSink()
+        failingBackend.metricSink = failSpy
+
+        // _forceLoaded bypasses the probe — but the session is still nil.
+        // Trying to generate will fail when the SDK is unavailable or not ready.
+        failingBackend._forceLoaded()
+
+        do {
+            let stream = try failingBackend.generate(
+                prompt: "test",
+                systemPrompt: nil,
+                config: .init()
+            )
+            // If we get here the device has Apple Intelligence — drain and skip.
+            var saw = false
+            for try await event in stream.events {
+                if case .token = event { saw = true }
+            }
+            if saw {
+                throw XCTSkip("Device has Apple Intelligence; failure path not exercisable")
+            }
+        } catch is InferenceError {
+            // Synchronous failure (e.g. alreadyGenerating) — metric fires in defer.
+        } catch {
+            // Async failure propagated through the stream.
+        }
+
+        await Task.yield()
+        try await Task.sleep(for: .milliseconds(50))
+
+        // On a device without Apple Intelligence the defer block should have fired.
+        // If the device HAS Apple Intelligence and succeeded, we skip above.
+        guard !failSpy.recorded.isEmpty else {
+            throw XCTSkip("No metric recorded — device may have Apple Intelligence loaded")
+        }
+
+        // When a metric is recorded, wallClockDuration must be non-negative.
+        let m = try XCTUnwrap(failSpy.recorded.first)
+        XCTAssertGreaterThanOrEqual(m.wallClockDuration, .zero)
+    }
+
+    func test_generate_noMetricEmittedWhenSinkIsNil() async throws {
+        backend.metricSink = nil
+        backend._forceLoaded()
+
+        do {
+            let stream = try backend.generate(
+                prompt: "hello",
+                systemPrompt: nil,
+                config: .init()
+            )
+            for try await _ in stream.events {}
+        } catch {}
+
+        await Task.yield()
+        try await Task.sleep(for: .milliseconds(50))
+
+        // No crash and no metric — just verify the spy (which is not wired)
+        // received nothing. The real assertion is that no call was made to a
+        // nil sink (which would have crashed).
+        XCTAssertTrue(spy.recorded.isEmpty,
+                      "Spy was replaced by nil — it should receive nothing")
+    }
+}
+
+#endif