roryford · roryford · Jun 15, 2026 · Jun 15, 2026
@@ -72,6 +72,27 @@ public enum GenerationEvent: Sendable, Equatable {
     /// `tokensPerSecond` is the backend-reported prompt-eval throughput.
     case prefillProgress(tokensProcessed: Int, tokensTotal: Int, tokensPerSecond: Double)
 
+    /// The fully-assembled prompt text that was submitted to the backend for
+    /// this generation turn, including the system prompt, conversation history,
+    /// and any tool definitions that were injected.
+    ///
+    /// **Opt-in only.** Emitted by the orchestration layer immediately before
+    /// the first ``prefillProgress`` or ``token`` event, and only when
+    /// ``GenerationConfig/captureRenderedPrompt`` is `true`. Off by default to
+    /// avoid unintentional retention of sensitive prompt content.
+    ///
+    /// For backends that use a prompt-template (local GGUF, MLX), `text` is
+    /// the formatted string passed to
+    /// ``InferenceBackend/generate(prompt:systemPrompt:config:)``.
+    /// For cloud backends (which receive history as a message array on the wire),
+    /// `text` is the most-recent user message content — the value passed as
+    /// `prompt:`. The full conversation history is encoded on the wire and is
+    /// not available as a single rendered string.
+    ///
+    /// Consumers that do not opt in will never observe this case. This is
+    /// advisory metadata with no chat-message state mutation.
+    case promptRendered(text: String)
+
     /// A fragment of generated text (typically one token).
     case token(String)
 

@@ -316,6 +316,19 @@ public struct GenerationConfig: Sendable, Codable {
     /// per-request payloads; this is a per-request *contract*.
     public var requiredCapabilities: Set<GenerationCapabilityRequirement> = []
 
+    /// When `true`, the orchestration layer emits a
+    /// ``GenerationEvent/promptRendered(text:)`` event as the first event
+    /// in the generation stream, carrying the fully-assembled prompt string.
+    ///
+    /// Off by default (`false`) to avoid unintentional retention of
+    /// sensitive prompt content. Only set this when you need to inspect or
+    /// log the rendered prompt for debugging — do not leave it on in
+    /// production builds that handle private user data.
+    ///
+    /// Runtime-only flag: excluded from `Codable` persistence to match
+    /// other per-request hints like ``thinkingMarkers`` and ``jsonMode``.
+    public var captureRenderedPrompt: Bool = false
+
     public init(
         temperature: Float = 0.7,
         topP: Float = 0.9,

@@ -99,6 +99,10 @@ public struct EventRecorder: Sendable {
                         kind: "prefillProgress",
                         v: "\(tokensProcessed)/\(tokensTotal)@\(tokensPerSecond)"
                     ))
+                case .promptRendered(let text):
+                    // Opt-in diagnostic; record presence but not the potentially
+                    // large prompt body so fuzz trace files stay compact.
+                    events.append(.init(t: t, kind: "promptRendered", v: "\(text.count)chars"))
                 case .token(let text):
                     if firstTokenAt == nil { firstTokenAt = ContinuousClock.now }
                     raw += text

@@ -426,11 +426,15 @@ final class GenerationQueue {
                 config: config
             )
             GenerationHistoryInstaller.installHistory(on: backend, structuredMessages: result.trimmedMessages)
-            return try backend.generateEnforcingCapabilities(
+            let stream = try backend.generateEnforcingCapabilities(
                 prompt: result.prompt,
                 systemPrompt: nil,
                 config: config
             )
+            if config.captureRenderedPrompt {
+                return Self.prependingPromptRendered(text: result.prompt, to: stream)
+            }
+            return stream
         }
 
         // Non-TokenCountingBackend path: assemble prompt and forward.
@@ -466,11 +470,43 @@ final class GenerationQueue {
 
         GenerationHistoryInstaller.installHistory(on: backend, structuredMessages: messages)
 
-        return try backend.generateEnforcingCapabilities(
+        let stream = try backend.generateEnforcingCapabilities(
             prompt: assembledPrompt,
             systemPrompt: effectiveSystemPrompt,
             config: config
         )
+        if config.captureRenderedPrompt {
+            return Self.prependingPromptRendered(text: assembledPrompt, to: stream)
+        }
+        return stream
+    }
+
+    /// Wraps a `GenerationStream` to emit a single `.promptRendered(text:)` event
+    /// before forwarding all events from the upstream stream.
+    ///
+    /// Used only when `GenerationConfig.captureRenderedPrompt` is `true`. The
+    /// wrapper forwards errors faithfully — if the upstream stream throws, the
+    /// wrapped stream re-throws the same error so callers see no difference in
+    /// error handling.
+    private static func prependingPromptRendered(
+        text: String,
+        to upstream: GenerationStream
+    ) -> GenerationStream {
+        let wrapped = AsyncThrowingStream<GenerationEvent, Error> { continuation in
+            let task = Task {
+                continuation.yield(.promptRendered(text: text))
+                do {
+                    for try await event in upstream.events {
+                        continuation.yield(event)
+                    }
+                    continuation.finish()
+                } catch {
+                    continuation.finish(throwing: error)
+                }
+            }
+            continuation.onTermination = { _ in task.cancel() }
+        }
+        return GenerationStream(wrapped)
     }
 
     /// Folds the canonical tool-preference preamble into `systemPrompt` when the

@@ -19,6 +19,14 @@ public struct GenerationStreamConsumer: Sendable {
         case .prefillProgress:
             return .ignore
 
+        case .promptRendered:
+            // Opt-in diagnostic event carrying the assembled prompt text.
+            // The consumer has no chat-message state to mutate — hosts that
+            // want to inspect or log the rendered prompt observe the raw event
+            // upstream, mirroring the `.throttleDiagnostic` and `.kvCacheReuse`
+            // precedents.
+            return .ignore
+
         case .token(let text):
             return .appendText(text)
 

@@ -86,7 +86,7 @@ public final class ScenarioRunner {
                 case .toolCall(let call):
                     turnToolCalls.append(call)
                     logger?.append(.toolCall(scenarioId: scenario.id, name: call.toolName, arguments: call.arguments))
-                case .prefillProgress, .usage, .thinkingToken, .thinkingCompleted, .thinkingSignature:
+                case .prefillProgress, .promptRendered, .usage, .thinkingToken, .thinkingCompleted, .thinkingSignature:
                     continue
                 case .toolResult, .toolIterationLimitExceeded:
                     // ScenarioRunner calls backend.generate() directly and owns

@@ -122,6 +122,8 @@ enum BackendSeamConsumer {
         switch event {
         case .prefillProgress(let tokensProcessed, let tokensTotal, let tokensPerSecond):
             _ = (tokensProcessed, tokensTotal, tokensPerSecond)
+        case .promptRendered(text: let text):
+            _ = text
         case .token(let text):
             _ = text
         case .usage(let usage):

@@ -316,6 +316,7 @@ final class ClaudeStreamEventExtractorParityTests: XCTestCase {
         case .toolCall(let c): return "toolCall(\(c.id),\(c.toolName),\(c.arguments))"
         case .usage(let u): return "usage(\(u.promptTokens),\(u.completionTokens))"
         case .prefillProgress(let n, let t, _): return "prefillProgress(\(n)/\(t))"
+        case .promptRendered: return "promptRendered"
         case .toolIterationLimitExceeded(let n): return "toolIterationLimitExceeded(\(n))"
         case .toolResult: return "toolResult"
         case .toolProgress: return "toolProgress"

@@ -52,6 +52,7 @@ private func categorise(_ event: GenerationEvent) -> EventCategory? {
     case .toolCallStart, .toolCallArgumentsDelta: return nil
     case .toolProgress, .toolDispatchStarted, .toolDispatchCompleted, .toolCallApproved: return nil
     case .prefillProgress: return nil
+    case .promptRendered: return nil
     case .handoffRequested: return nil
     case .generationCompleted: return nil
     }

@@ -250,6 +250,7 @@ final class OllamaStreamEventExtractorParityTests: XCTestCase {
         case .toolCall(let c): return "toolCall(\(c.id),\(c.toolName),\(c.arguments))"
         case .usage(let u): return "usage(\(u.promptTokens),\(u.completionTokens))"
         case .prefillProgress(let n, let t, _): return "prefillProgress(\(n)/\(t))"
+        case .promptRendered: return "promptRendered"
         case .toolIterationLimitExceeded(let n): return "toolIterationLimitExceeded(\(n))"
         case .toolResult: return "toolResult"
         case .toolProgress: return "toolProgress"

@@ -215,7 +215,7 @@ final class OllamaToolCallLiveReplayTests: XCTestCase {
                     // Orchestrator-level dispatch lifecycle events; raw
                     // backend replay never emits them.
                     break
-                case .prefillProgress:
+                case .prefillProgress, .promptRendered:
                     break
                 case .handoffRequested:
                     // Runtime-synthesised handoff event; live raw backend

@@ -76,7 +76,7 @@ final class OpenAIResponsesBackendTests: XCTestCase {
              .toolCallStart, .toolCallArgumentsDelta,
              .toolDispatchStarted, .toolDispatchCompleted, .toolCallApproved,
              .toolCallParseFailed, .toolCallTruncated,
-             .prefillProgress, .toolProgress,
+             .prefillProgress, .promptRendered, .toolProgress,
              .handoffRequested, .generationCompleted:
             return nil
         }

@@ -200,6 +200,7 @@ final class OpenAIResponsesStreamEventExtractorTests: XCTestCase {
         case .toolCall(let c): return "toolCall(\(c.id),\(c.toolName))"
         case .usage(let u): return "usage(\(u.promptTokens),\(u.completionTokens))"
         case .prefillProgress(let n, let t, _): return "prefillProgress(\(n)/\(t))"
+        case .promptRendered: return "promptRendered"
         case .toolIterationLimitExceeded(let n): return "toolIterationLimitExceeded(\(n))"
         case .toolResult: return "toolResult"
         case .toolProgress: return "toolProgress"
@@ -327,6 +328,7 @@ final class OpenAIResponsesStreamEventExtractorParityTests: XCTestCase {
         case .toolCall(let c): return "toolCall(\(c.id),\(c.toolName),\(c.arguments))"
         case .usage(let u): return "usage(\(u.promptTokens),\(u.completionTokens))"
         case .prefillProgress(let n, let t, _): return "prefillProgress(\(n)/\(t))"
+        case .promptRendered: return "promptRendered"
         case .toolIterationLimitExceeded(let n): return "toolIterationLimitExceeded(\(n))"
         case .toolResult: return "toolResult"
         case .toolProgress: return "toolProgress"

@@ -305,6 +305,7 @@ final class OpenAIStreamEventExtractorParityTests: XCTestCase {
         case .toolCall(let c): return "toolCall(\(c.id),\(c.toolName),\(c.arguments))"
         case .usage(let u): return "usage(\(u.promptTokens),\(u.completionTokens))"
         case .prefillProgress(let n, let t, _): return "prefillProgress(\(n)/\(t))"
+        case .promptRendered: return "promptRendered"
         case .toolIterationLimitExceeded(let n): return "toolIterationLimitExceeded(\(n))"
         case .toolResult: return "toolResult"
         case .toolProgress: return "toolProgress"

@@ -54,7 +54,7 @@ final class ParallelToolCallOrderingTests: XCTestCase {
             switch event {
             case .toolCall(let c):
                 calls.append(c)
-            case .prefillProgress, .token, .usage,
+            case .prefillProgress, .promptRendered, .token, .usage,
                  .thinkingToken, .thinkingCompleted, .thinkingSignature,
                  .toolResult, .toolProgress, .toolIterationLimitExceeded,
                  .kvCacheReuse, .throttleDiagnostic,

@@ -0,0 +1,165 @@
+import XCTest
+import Foundation
+@testable import ManifoldInference
+import ManifoldTestSupport
+
+/// Tests for the `captureRenderedPrompt` opt-in and the resulting
+/// `.promptRendered(text:)` event.
+///
+/// Exercises the opt-in/opt-out gate in `GenerationQueue.dispatchToBackend`
+/// without hitting a real backend. Uses `XCTestCase` per #681 (Swift Testing
+/// mixed with XCTest triggers libmalloc SIGABRT in the same process).
+@MainActor
+final class PromptRenderedEventTests: XCTestCase {
+
+    // MARK: - Fixture
+
+    private var backend: MockInferenceBackend!
+    private var provider: FakePromptRenderedTestProvider!
+    private var queue: GenerationQueue!
+
+    override func setUp() async throws {
+        try await super.setUp()
+        backend = MockInferenceBackend()
+        backend.isModelLoaded = true
+        backend.tokensToYield = ["A", "B"]
+        provider = FakePromptRenderedTestProvider(backend: backend)
+        queue = GenerationQueue()
+        provider.bind(to: queue)
+    }
+
+    override func tearDown() async throws {
+        await queue?.stopGenerationAndWait()
+        queue = nil
+        provider = nil
+        backend = nil
+        try await super.tearDown()
+    }
+
+    // MARK: - Helpers
+
+    private func collectEvents(_ stream: GenerationStream) async throws -> [GenerationEvent] {
+        var events: [GenerationEvent] = []
+        for try await event in stream.events {
+            events.append(event)
+        }
+        return events
+    }
+
+    private func generate(config: GenerationConfig) async throws -> [GenerationEvent] {
+        let (_, stream) = try queue.enqueue(
+            structuredMessages: [StructuredMessage(role: "user", content: "hello")],
+            systemPrompt: nil,
+            config: config
+        )
+        return try await collectEvents(stream)
+    }
+
+    // MARK: - Opt-in emits event
+
+    func test_captureRenderedPrompt_true_emitsPromptRenderedAsFirstEvent() async throws {
+        var config = GenerationConfig()
+        config.captureRenderedPrompt = true
+
+        let events = try await generate(config: config)
+
+        // The very first event must be .promptRendered.
+        guard case .promptRendered = events.first else {
+            XCTFail("Expected .promptRendered as first event, got: \(events.first as Any)")
+            return
+        }
+    }
+
+    func test_captureRenderedPrompt_true_promptRenderedTextMatchesUserMessage() async throws {
+        var config = GenerationConfig()
+        config.captureRenderedPrompt = true
+
+        let events = try await generate(config: config)
+
+        guard case .promptRendered(let text) = events.first else {
+            XCTFail("Expected .promptRendered as first event")
+            return
+        }
+        // Non-template backend passes the last user message as `prompt:`,
+        // so the rendered text should contain the user content.
+        XCTAssertFalse(text.isEmpty, "promptRendered text must not be empty")
+        XCTAssertTrue(text.contains("hello"), "promptRendered text must contain the user message")
+    }
+
+    func test_captureRenderedPrompt_true_tokenEventFollowsPromptRendered() async throws {
+        var config = GenerationConfig()
+        config.captureRenderedPrompt = true
+
+        let events = try await generate(config: config)
+
+        let hasPromptRendered = events.contains { if case .promptRendered = $0 { return true } else { return false } }
+        let hasToken = events.contains { if case .token = $0 { return true } else { return false } }
+        XCTAssertTrue(hasPromptRendered, "stream must include .promptRendered when opt-in is true")
+        XCTAssertTrue(hasToken, "stream must still include token events after .promptRendered")
+
+        // Verify ordering: .promptRendered must precede any .token.
+        let promptRenderedIdx = events.firstIndex { if case .promptRendered = $0 { return true } else { return false } }
+        let firstTokenIdx = events.firstIndex { if case .token = $0 { return true } else { return false } }
+        if let prIdx = promptRenderedIdx, let tkIdx = firstTokenIdx {
+            XCTAssertLessThan(prIdx, tkIdx, ".promptRendered must appear before the first .token")
+        }
+    }
+
+    // MARK: - Opt-out emits no event
+
+    func test_captureRenderedPrompt_false_noPromptRenderedEvent() async throws {
+        // Default config has captureRenderedPrompt == false.
+        let config = GenerationConfig()
+        XCTAssertFalse(config.captureRenderedPrompt, "captureRenderedPrompt must default to false")
+
+        let events = try await generate(config: config)
+
+        let hasPromptRendered = events.contains { if case .promptRendered = $0 { return true } else { return false } }
+        XCTAssertFalse(hasPromptRendered, "stream must NOT include .promptRendered when opt-in is false (default)")
+    }
+
+    func test_captureRenderedPrompt_explicitFalse_noPromptRenderedEvent() async throws {
+        var config = GenerationConfig()
+        config.captureRenderedPrompt = false
+
+        let events = try await generate(config: config)
+
+        let hasPromptRendered = events.contains { if case .promptRendered = $0 { return true } else { return false } }
+        XCTAssertFalse(hasPromptRendered, "stream must NOT include .promptRendered when explicitly set to false")
+    }
+
+    // MARK: - Exactly once
+
+    func test_captureRenderedPrompt_true_emitsExactlyOnePromptRenderedEvent() async throws {
+        var config = GenerationConfig()
+        config.captureRenderedPrompt = true
+
+        let events = try await generate(config: config)
+
+        let count = events.filter { if case .promptRendered = $0 { return true } else { return false } }.count
+        XCTAssertEqual(count, 1, ".promptRendered must be emitted exactly once per turn")
+    }
+}
+
+// MARK: - Test fixture
+
+/// Minimal context provider wiring a `MockInferenceBackend` into a
+/// `GenerationQueue` for `PromptRenderedEventTests`. Uses
+/// `requiresPromptTemplate: false` (the default) so `assembledPrompt` is the
+/// last user-message content — a simple, predictable string to assert on.
+@MainActor
+private final class FakePromptRenderedTestProvider {
+    let backend: MockInferenceBackend
+
+    init(backend: MockInferenceBackend) {
+        self.backend = backend
+    }
+
+    func bind(to queue: GenerationQueue) {
+        queue.bindContext(
+            currentBackend: { [weak self] in self?.backend },
+            isBackendLoaded: { [weak self] in self?.backend.isModelLoaded ?? false },
+            selectedPromptTemplate: { .chatML }
+        )
+    }
+}