Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions Sources/ManifoldContract/GenerationEvent.swift
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,27 @@ public enum GenerationEvent: Sendable, Equatable {
/// `tokensPerSecond` is the backend-reported prompt-eval throughput.
case prefillProgress(tokensProcessed: Int, tokensTotal: Int, tokensPerSecond: Double)

/// The fully-assembled prompt text that was submitted to the backend for
/// this generation turn, including the system prompt, conversation history,
/// and any tool definitions that were injected.
///
/// **Opt-in only.** Emitted by the orchestration layer immediately before
/// the first ``prefillProgress`` or ``token`` event, and only when
/// ``GenerationConfig/captureRenderedPrompt`` is `true`. Off by default to
/// avoid unintentional retention of sensitive prompt content.
///
/// For backends that use a prompt-template (local GGUF, MLX), `text` is
/// the formatted string passed to
/// ``InferenceBackend/generate(prompt:systemPrompt:config:)``.
/// For cloud backends (which receive history as a message array on the wire),
/// `text` is the most-recent user message content — the value passed as
/// `prompt:`. The full conversation history is encoded on the wire and is
/// not available as a single rendered string.
///
/// Consumers that do not opt in will never observe this case. This is
/// advisory metadata with no chat-message state mutation.
case promptRendered(text: String)

/// A fragment of generated text (typically one token).
case token(String)

Expand Down
13 changes: 13 additions & 0 deletions Sources/ManifoldContract/InferenceBackend.swift
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,19 @@ public struct GenerationConfig: Sendable, Codable {
/// per-request payloads; this is a per-request *contract*.
public var requiredCapabilities: Set<GenerationCapabilityRequirement> = []

/// When `true`, the orchestration layer emits a
/// ``GenerationEvent/promptRendered(text:)`` event as the first event
/// in the generation stream, carrying the fully-assembled prompt string.
///
/// Off by default (`false`) to avoid unintentional retention of
/// sensitive prompt content. Only set this when you need to inspect or
/// log the rendered prompt for debugging — do not leave it on in
/// production builds that handle private user data.
///
/// Runtime-only flag: excluded from `Codable` persistence to match
/// other per-request hints like ``thinkingMarkers`` and ``jsonMode``.
public var captureRenderedPrompt: Bool = false

public init(
temperature: Float = 0.7,
topP: Float = 0.9,
Expand Down
4 changes: 4 additions & 0 deletions Sources/ManifoldFuzz/EventRecorder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ public struct EventRecorder: Sendable {
kind: "prefillProgress",
v: "\(tokensProcessed)/\(tokensTotal)@\(tokensPerSecond)"
))
case .promptRendered(let text):
// Opt-in diagnostic; record presence but not the potentially
// large prompt body so fuzz trace files stay compact.
events.append(.init(t: t, kind: "promptRendered", v: "\(text.count)chars"))
case .token(let text):
if firstTokenAt == nil { firstTokenAt = ContinuousClock.now }
raw += text
Expand Down
40 changes: 38 additions & 2 deletions Sources/ManifoldInference/Services/GenerationQueue.swift
Original file line number Diff line number Diff line change
Expand Up @@ -426,11 +426,15 @@ final class GenerationQueue {
config: config
)
GenerationHistoryInstaller.installHistory(on: backend, structuredMessages: result.trimmedMessages)
return try backend.generateEnforcingCapabilities(
let stream = try backend.generateEnforcingCapabilities(
prompt: result.prompt,
systemPrompt: nil,
config: config
)
if config.captureRenderedPrompt {
return Self.prependingPromptRendered(text: result.prompt, to: stream)
}
return stream
}

// Non-TokenCountingBackend path: assemble prompt and forward.
Expand Down Expand Up @@ -466,11 +470,43 @@ final class GenerationQueue {

GenerationHistoryInstaller.installHistory(on: backend, structuredMessages: messages)

return try backend.generateEnforcingCapabilities(
let stream = try backend.generateEnforcingCapabilities(
prompt: assembledPrompt,
systemPrompt: effectiveSystemPrompt,
config: config
)
if config.captureRenderedPrompt {
return Self.prependingPromptRendered(text: assembledPrompt, to: stream)
}
return stream
}

/// Wraps a `GenerationStream` to emit a single `.promptRendered(text:)` event
/// before forwarding all events from the upstream stream.
///
/// Used only when `GenerationConfig.captureRenderedPrompt` is `true`. The
/// wrapper forwards errors faithfully — if the upstream stream throws, the
/// wrapped stream re-throws the same error so callers see no difference in
/// error handling.
private static func prependingPromptRendered(
text: String,
to upstream: GenerationStream
) -> GenerationStream {
let wrapped = AsyncThrowingStream<GenerationEvent, Error> { continuation in
let task = Task {
continuation.yield(.promptRendered(text: text))
do {
for try await event in upstream.events {
continuation.yield(event)
}
continuation.finish()
} catch {
continuation.finish(throwing: error)
}
}
continuation.onTermination = { _ in task.cancel() }
}
return GenerationStream(wrapped)
}

/// Folds the canonical tool-preference preamble into `systemPrompt` when the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ public struct GenerationStreamConsumer: Sendable {
case .prefillProgress:
return .ignore

case .promptRendered:
// Opt-in diagnostic event carrying the assembled prompt text.
// The consumer has no chat-message state to mutate — hosts that
// want to inspect or log the rendered prompt observe the raw event
// upstream, mirroring the `.throttleDiagnostic` and `.kvCacheReuse`
// precedents.
return .ignore

case .token(let text):
return .appendText(text)

Expand Down
2 changes: 1 addition & 1 deletion Sources/ManifoldTools/ScenarioRunner.swift
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ public final class ScenarioRunner {
case .toolCall(let call):
turnToolCalls.append(call)
logger?.append(.toolCall(scenarioId: scenario.id, name: call.toolName, arguments: call.arguments))
case .prefillProgress, .usage, .thinkingToken, .thinkingCompleted, .thinkingSignature:
case .prefillProgress, .promptRendered, .usage, .thinkingToken, .thinkingCompleted, .thinkingSignature:
continue
case .toolResult, .toolIterationLimitExceeded:
// ScenarioRunner calls backend.generate() directly and owns
Expand Down
2 changes: 2 additions & 0 deletions Tests/APIFreezeTests/Fixtures/BackendSeamConsumer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ enum BackendSeamConsumer {
switch event {
case .prefillProgress(let tokensProcessed, let tokensTotal, let tokensPerSecond):
_ = (tokensProcessed, tokensTotal, tokensPerSecond)
case .promptRendered(text: let text):
_ = text
case .token(let text):
_ = text
case .usage(let usage):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ final class ClaudeStreamEventExtractorParityTests: XCTestCase {
case .toolCall(let c): return "toolCall(\(c.id),\(c.toolName),\(c.arguments))"
case .usage(let u): return "usage(\(u.promptTokens),\(u.completionTokens))"
case .prefillProgress(let n, let t, _): return "prefillProgress(\(n)/\(t))"
case .promptRendered: return "promptRendered"
case .toolIterationLimitExceeded(let n): return "toolIterationLimitExceeded(\(n))"
case .toolResult: return "toolResult"
case .toolProgress: return "toolProgress"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ private func categorise(_ event: GenerationEvent) -> EventCategory? {
case .toolCallStart, .toolCallArgumentsDelta: return nil
case .toolProgress, .toolDispatchStarted, .toolDispatchCompleted, .toolCallApproved: return nil
case .prefillProgress: return nil
case .promptRendered: return nil
case .handoffRequested: return nil
case .generationCompleted: return nil
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ final class OllamaStreamEventExtractorParityTests: XCTestCase {
case .toolCall(let c): return "toolCall(\(c.id),\(c.toolName),\(c.arguments))"
case .usage(let u): return "usage(\(u.promptTokens),\(u.completionTokens))"
case .prefillProgress(let n, let t, _): return "prefillProgress(\(n)/\(t))"
case .promptRendered: return "promptRendered"
case .toolIterationLimitExceeded(let n): return "toolIterationLimitExceeded(\(n))"
case .toolResult: return "toolResult"
case .toolProgress: return "toolProgress"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ final class OllamaToolCallLiveReplayTests: XCTestCase {
// Orchestrator-level dispatch lifecycle events; raw
// backend replay never emits them.
break
case .prefillProgress:
case .prefillProgress, .promptRendered:
break
case .handoffRequested:
// Runtime-synthesised handoff event; live raw backend
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ final class OpenAIResponsesBackendTests: XCTestCase {
.toolCallStart, .toolCallArgumentsDelta,
.toolDispatchStarted, .toolDispatchCompleted, .toolCallApproved,
.toolCallParseFailed, .toolCallTruncated,
.prefillProgress, .toolProgress,
.prefillProgress, .promptRendered, .toolProgress,
.handoffRequested, .generationCompleted:
return nil
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ final class OpenAIResponsesStreamEventExtractorTests: XCTestCase {
case .toolCall(let c): return "toolCall(\(c.id),\(c.toolName))"
case .usage(let u): return "usage(\(u.promptTokens),\(u.completionTokens))"
case .prefillProgress(let n, let t, _): return "prefillProgress(\(n)/\(t))"
case .promptRendered: return "promptRendered"
case .toolIterationLimitExceeded(let n): return "toolIterationLimitExceeded(\(n))"
case .toolResult: return "toolResult"
case .toolProgress: return "toolProgress"
Expand Down Expand Up @@ -327,6 +328,7 @@ final class OpenAIResponsesStreamEventExtractorParityTests: XCTestCase {
case .toolCall(let c): return "toolCall(\(c.id),\(c.toolName),\(c.arguments))"
case .usage(let u): return "usage(\(u.promptTokens),\(u.completionTokens))"
case .prefillProgress(let n, let t, _): return "prefillProgress(\(n)/\(t))"
case .promptRendered: return "promptRendered"
case .toolIterationLimitExceeded(let n): return "toolIterationLimitExceeded(\(n))"
case .toolResult: return "toolResult"
case .toolProgress: return "toolProgress"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ final class OpenAIStreamEventExtractorParityTests: XCTestCase {
case .toolCall(let c): return "toolCall(\(c.id),\(c.toolName),\(c.arguments))"
case .usage(let u): return "usage(\(u.promptTokens),\(u.completionTokens))"
case .prefillProgress(let n, let t, _): return "prefillProgress(\(n)/\(t))"
case .promptRendered: return "promptRendered"
case .toolIterationLimitExceeded(let n): return "toolIterationLimitExceeded(\(n))"
case .toolResult: return "toolResult"
case .toolProgress: return "toolProgress"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ final class ParallelToolCallOrderingTests: XCTestCase {
switch event {
case .toolCall(let c):
calls.append(c)
case .prefillProgress, .token, .usage,
case .prefillProgress, .promptRendered, .token, .usage,
.thinkingToken, .thinkingCompleted, .thinkingSignature,
.toolResult, .toolProgress, .toolIterationLimitExceeded,
.kvCacheReuse, .throttleDiagnostic,
Expand Down
165 changes: 165 additions & 0 deletions Tests/ManifoldInferenceTests/PromptRenderedEventTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
import XCTest
import Foundation
@testable import ManifoldInference
import ManifoldTestSupport

/// Tests for the `captureRenderedPrompt` opt-in and the resulting
/// `.promptRendered(text:)` event.
///
/// Exercises the opt-in/opt-out gate in `GenerationQueue.dispatchToBackend`
/// without hitting a real backend. Uses `XCTestCase` per #681 (Swift Testing
/// mixed with XCTest triggers libmalloc SIGABRT in the same process).
@MainActor
final class PromptRenderedEventTests: XCTestCase {

// MARK: - Fixture

private var backend: MockInferenceBackend!
private var provider: FakePromptRenderedTestProvider!
private var queue: GenerationQueue!

override func setUp() async throws {
try await super.setUp()
backend = MockInferenceBackend()
backend.isModelLoaded = true
backend.tokensToYield = ["A", "B"]
provider = FakePromptRenderedTestProvider(backend: backend)
queue = GenerationQueue()
provider.bind(to: queue)
}

override func tearDown() async throws {
await queue?.stopGenerationAndWait()
queue = nil
provider = nil
backend = nil
try await super.tearDown()
}

// MARK: - Helpers

private func collectEvents(_ stream: GenerationStream) async throws -> [GenerationEvent] {
var events: [GenerationEvent] = []
for try await event in stream.events {
events.append(event)
}
return events
}

private func generate(config: GenerationConfig) async throws -> [GenerationEvent] {
let (_, stream) = try queue.enqueue(
structuredMessages: [StructuredMessage(role: "user", content: "hello")],
systemPrompt: nil,
config: config
)
return try await collectEvents(stream)
}

// MARK: - Opt-in emits event

func test_captureRenderedPrompt_true_emitsPromptRenderedAsFirstEvent() async throws {
var config = GenerationConfig()
config.captureRenderedPrompt = true

let events = try await generate(config: config)

// The very first event must be .promptRendered.
guard case .promptRendered = events.first else {
XCTFail("Expected .promptRendered as first event, got: \(events.first as Any)")
return
}
}

func test_captureRenderedPrompt_true_promptRenderedTextMatchesUserMessage() async throws {
var config = GenerationConfig()
config.captureRenderedPrompt = true

let events = try await generate(config: config)

guard case .promptRendered(let text) = events.first else {
XCTFail("Expected .promptRendered as first event")
return
}
// Non-template backend passes the last user message as `prompt:`,
// so the rendered text should contain the user content.
XCTAssertFalse(text.isEmpty, "promptRendered text must not be empty")
XCTAssertTrue(text.contains("hello"), "promptRendered text must contain the user message")
}

func test_captureRenderedPrompt_true_tokenEventFollowsPromptRendered() async throws {
var config = GenerationConfig()
config.captureRenderedPrompt = true

let events = try await generate(config: config)

let hasPromptRendered = events.contains { if case .promptRendered = $0 { return true } else { return false } }
let hasToken = events.contains { if case .token = $0 { return true } else { return false } }
XCTAssertTrue(hasPromptRendered, "stream must include .promptRendered when opt-in is true")
XCTAssertTrue(hasToken, "stream must still include token events after .promptRendered")

// Verify ordering: .promptRendered must precede any .token.
let promptRenderedIdx = events.firstIndex { if case .promptRendered = $0 { return true } else { return false } }
let firstTokenIdx = events.firstIndex { if case .token = $0 { return true } else { return false } }
if let prIdx = promptRenderedIdx, let tkIdx = firstTokenIdx {
XCTAssertLessThan(prIdx, tkIdx, ".promptRendered must appear before the first .token")
}
}

// MARK: - Opt-out emits no event

func test_captureRenderedPrompt_false_noPromptRenderedEvent() async throws {
// Default config has captureRenderedPrompt == false.
let config = GenerationConfig()
XCTAssertFalse(config.captureRenderedPrompt, "captureRenderedPrompt must default to false")

let events = try await generate(config: config)

let hasPromptRendered = events.contains { if case .promptRendered = $0 { return true } else { return false } }
XCTAssertFalse(hasPromptRendered, "stream must NOT include .promptRendered when opt-in is false (default)")
}

func test_captureRenderedPrompt_explicitFalse_noPromptRenderedEvent() async throws {
var config = GenerationConfig()
config.captureRenderedPrompt = false

let events = try await generate(config: config)

let hasPromptRendered = events.contains { if case .promptRendered = $0 { return true } else { return false } }
XCTAssertFalse(hasPromptRendered, "stream must NOT include .promptRendered when explicitly set to false")
}

// MARK: - Exactly once

func test_captureRenderedPrompt_true_emitsExactlyOnePromptRenderedEvent() async throws {
var config = GenerationConfig()
config.captureRenderedPrompt = true

let events = try await generate(config: config)

let count = events.filter { if case .promptRendered = $0 { return true } else { return false } }.count
XCTAssertEqual(count, 1, ".promptRendered must be emitted exactly once per turn")
}
}

// MARK: - Test fixture

/// Minimal context provider wiring a `MockInferenceBackend` into a
/// `GenerationQueue` for `PromptRenderedEventTests`. Uses
/// `requiresPromptTemplate: false` (the default) so `assembledPrompt` is the
/// last user-message content — a simple, predictable string to assert on.
@MainActor
private final class FakePromptRenderedTestProvider {
let backend: MockInferenceBackend

init(backend: MockInferenceBackend) {
self.backend = backend
}

func bind(to queue: GenerationQueue) {
queue.bindContext(
currentBackend: { [weak self] in self?.backend },
isBackendLoaded: { [weak self] in self?.backend.isModelLoaded ?? false },
selectedPromptTemplate: { .chatML }
)
}
}
Loading