Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/api-breakage-allowlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,6 @@ API breakage: enumelement MessagePart.generatedVideo has been removed
API breakage: var GBNFSchemaPreValidator.CVEAuditRecord.fixedAtBuild has been removed
API breakage: var GBNFSchemaPreValidator.CVEAuditRecord.vendoredBuild has been removed
API breakage: func ModelLoadCoordinator.dispatchLoad(_:) has been renamed to func dispatchLoad(_:drivesChatSeams:)
API breakage: enumelement GenerationEvent.toolCallParseFailed has been added as a new enum case
API breakage: enumelement GenerationEvent.toolCallTruncated has been added as a new enum case
API breakage: constructor ToolCallTransform.init(markers:) has been removed
35 changes: 35 additions & 0 deletions Sources/ManifoldContract/GenerationEvent.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@
/// a major (`feat!:`) release. Public-facing / cross-module consumers should add
/// an `@unknown default:` arm to stay resilient to a future major.
///
/// The two non-fatal tool-call diagnostics —
/// ``toolCallParseFailed(rawBody:)`` and ``toolCallTruncated(rawBody:)`` — are
/// part of this frozen vocabulary. They were the last pre-1.0 additions
/// (`feat!:`, #1857 / #1858): a delimited tool-call body that fails to parse,
/// and an unterminated tool block surfaced at finalize, are now observable
/// instead of being silently dropped. Both follow the
/// ``throttleDiagnostic(reason:)`` precedent — advisory metadata with no
/// chat-message state mutation.
///
/// Payloads that are expected to grow are modelled as **associated structs**
/// rather than bare enum parameters so their fields can grow non-breakingly
/// after the freeze:
Expand Down Expand Up @@ -168,6 +177,32 @@ public enum GenerationEvent: Sendable, Equatable {
/// `reason` is a short, human-readable string the UI may display verbatim.
case throttleDiagnostic(reason: String)

/// Non-fatal diagnostic: a delimited tool-call block closed, but its body
/// failed to parse into a ``ToolCall`` (the dialect's `parseBody` returned
/// `nil`).
///
/// Emitted by ``ToolCallTransform`` in lieu of a ``toolCall(_:)`` event when
/// a well-formed open/close marker pair surrounds a body the dialect parser
/// rejects (malformed JSON, unknown shape, empty name). Without this event a
/// broken tool call vanishes silently and the host cannot distinguish
/// "model emitted a broken tool call" from "model emitted no tool call".
/// `rawBody` is the exact buffered body text between the open and close
/// markers so hosts can log, surface, or attempt their own recovery. This is
/// advisory metadata — like ``throttleDiagnostic(reason:)`` it carries no
/// chat-message state mutation and consumers that do not care may ignore it.
case toolCallParseFailed(rawBody: String)

/// Non-fatal diagnostic: the stream ended while a tool-call block was still
/// open (no matching close marker arrived before `finalize()`).
///
/// Emitted by ``ToolCallTransform/finalize()`` **only when the transform was
/// constructed with `surfaceTruncatedToolBody: true`** (the default keeps
/// the historical silent-discard behavior). `rawBody` is the partial body
/// buffered since the open marker, so a mid-tool-call stream truncation is
/// observable rather than lost. Like ``toolCallParseFailed(rawBody:)`` this
/// is advisory metadata with no chat-message state mutation.
case toolCallTruncated(rawBody: String)

/// Emitted by the orchestrator immediately before it begins handling a
/// model-emitted ``ToolCall``.
///
Expand Down
52 changes: 45 additions & 7 deletions Sources/ManifoldContract/ToolCallTransform.swift
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,15 @@ public struct ToolCallMarker: Sendable {
/// preference), the parser switches into the block.
/// - Inside a block, body text is buffered and suppressed from `.token`.
/// - On the matching close, `marker.parseBody(body)` runs; a non-nil result
/// emits `.toolCall`, and `nil` silently drops the call (matching both
/// legacy parsers).
/// emits `.toolCall`. A `nil` result no longer vanishes silently — it emits
/// a non-fatal `.toolCallParseFailed(rawBody:)` diagnostic carrying the
/// buffered body so hosts can distinguish a broken tool call from no tool
/// call (#1857).
/// - On the body-size cap or an unterminated block at `finalize()`, the partial
/// body is discarded by default. Constructing the transform with
/// `surfaceTruncatedToolBody: true` instead emits a non-fatal
/// `.toolCallTruncated(rawBody:)` diagnostic so a mid-stream truncation is
/// observable (#1858, opt-in — default behavior is unchanged).
/// - Partial open/close markers straddling a chunk boundary are held back via
/// the shared `overlap` primitives — the open-tag holdback is the max
/// overlap across *all* candidate opens.
Expand All @@ -60,15 +67,23 @@ public struct ToolCallTransform: StreamTransform {
/// below a memory-pressure threat.
private static let maxBodyBytes = 256 * 1024

/// Opt-in: surface the buffered body of an unterminated tool-call block as a
/// non-fatal `.toolCallTruncated(rawBody:)` diagnostic instead of discarding
/// it. Defaults to `false` so the historical silent-discard behavior is
/// unchanged (#1858). Applies both to the `finalize()` flush of an open
/// block and to the body-size-cap drop of a runaway unclosed body.
public let surfaceTruncatedToolBody: Bool

private var buffer = ""
/// Index into `markers` of the dialect whose open tag is currently active,
/// or `nil` when not inside a tool-call block.
private var activeMarker: Int?
/// Body text buffered since the active open tag.
private var bodyBuffer = ""

public init(markers: [ToolCallMarker]) {
public init(markers: [ToolCallMarker], surfaceTruncatedToolBody: Bool = false) {
self.markers = markers
self.surfaceTruncatedToolBody = surfaceTruncatedToolBody
}

public mutating func process(_ events: [GenerationEvent]) -> [GenerationEvent] {
Expand Down Expand Up @@ -98,6 +113,12 @@ public struct ToolCallTransform: StreamTransform {
buffer = String(buffer[range.upperBound...])
if let call = markers[active].parseBody(bodyBuffer) {
events.append(.toolCall(call))
} else {
// A well-formed open/close pair surrounded a body the
// dialect parser rejected. Surface it as a non-fatal
// diagnostic instead of dropping the call silently so
// hosts can recover or report (#1857).
events.append(.toolCallParseFailed(rawBody: bodyBuffer))
}
bodyBuffer = ""
activeMarker = nil
Expand All @@ -117,6 +138,9 @@ public struct ToolCallTransform: StreamTransform {
Log.inference.warning(
"ToolCallTransform: dropping tool-call body exceeding \(Self.maxBodyBytes)-byte cap without a close tag"
)
if surfaceTruncatedToolBody {
events.append(.toolCallTruncated(rawBody: bodyBuffer))
}
bodyBuffer = ""
activeMarker = nil
continue
Expand Down Expand Up @@ -173,12 +197,26 @@ public struct ToolCallTransform: StreamTransform {
/// Flush the held-back buffer at stream end.
///
/// Remaining visible text outside a block is emitted as `.token`. An
/// incomplete (unclosed) tool-call block is discarded — partial body text
/// cannot produce a valid `ToolCall` — matching both legacy parsers.
/// incomplete (unclosed) tool-call block is discarded by default — partial
/// body text cannot produce a valid `ToolCall` — matching both legacy
/// parsers. When the transform was constructed with
/// `surfaceTruncatedToolBody: true`, the partial body is instead surfaced as
/// a non-fatal `.toolCallTruncated(rawBody:)` diagnostic so a mid-tool-call
/// stream truncation is observable (#1858).
public mutating func finalize() -> [GenerationEvent] {
var events: [GenerationEvent] = []
if activeMarker == nil, !buffer.isEmpty {
events.append(.token(buffer))
if activeMarker == nil {
if !buffer.isEmpty {
events.append(.token(buffer))
}
} else if surfaceTruncatedToolBody {
// Inside an unterminated block: the held-back `buffer` is a partial
// close suffix that still belongs to the body, so fold it in before
// surfacing. Default behavior (flag off) discards silently.
let partial = bodyBuffer + buffer
if !partial.isEmpty {
events.append(.toolCallTruncated(rawBody: partial))
}
}
buffer = ""
bodyBuffer = ""
Expand Down
4 changes: 4 additions & 0 deletions Sources/ManifoldFuzz/EventRecorder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,10 @@ public struct EventRecorder: Sendable {
// orchestrator. Record the reason in the trace so fuzz
// scenarios can pin exactly-once terminal emission.
events.append(.init(t: t, kind: "generationCompleted", v: "\(completion.reason)"))
case .toolCallParseFailed(let rawBody):
events.append(.init(t: t, kind: "toolCallParseFailed", v: rawBody))
case .toolCallTruncated(let rawBody):
events.append(.init(t: t, kind: "toolCallTruncated", v: rawBody))
}
memoryTick()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@ public struct GenerationStreamConsumer: Sendable {
// upstream instead of going through the action mapping.
return .ignore

case .toolCallParseFailed, .toolCallTruncated:
// Non-fatal tool-call diagnostics (#1857 / #1858). Advisory
// metadata with no chat-message text/tool state to mutate; hosts
// that want to recover or surface a "broken/truncated tool call"
// hint observe the raw event upstream, mirroring throttleDiagnostic.
return .ignore

case .toolCallStart, .toolCallArgumentsDelta:
// Streaming tool-call deltas are observed by UI surfaces
// upstream (rendering an in-flight call card). The
Expand Down
5 changes: 5 additions & 0 deletions Sources/ManifoldTools/ScenarioRunner.swift
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@ public final class ScenarioRunner {
// Dispatch lifecycle markers are observational; tool
// accounting flows through `.toolCall` / `.toolResult`.
continue
case .toolCallParseFailed, .toolCallTruncated:
// Non-fatal tool-call diagnostics (#1857 / #1858); the
// authoritative call still lands on `.toolCall(_:)` when it
// parses. Observational here.
continue
case .handoffRequested:
// Multi-agent handoffs are runtime-driven; deterministic
// single-agent replays never observe them.
Expand Down
4 changes: 4 additions & 0 deletions Tests/APIFreezeTests/Fixtures/BackendSeamConsumer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,10 @@ enum BackendSeamConsumer {
_ = reused
case .throttleDiagnostic(reason: let reason):
_ = reason
case .toolCallParseFailed(rawBody: let rawBody):
_ = rawBody
case .toolCallTruncated(rawBody: let rawBody):
_ = rawBody
case .toolDispatchStarted(callId: let callId, name: let name, attempt: let attempt):
_ = (callId, name, attempt)
case .toolCallApproved(callId: let callId):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,8 @@ final class ClaudeStreamEventExtractorParityTests: XCTestCase {
case .toolCallApproved: return "toolCallApproved"
case .kvCacheReuse: return "kvCacheReuse"
case .throttleDiagnostic: return "throttleDiagnostic"
case .toolCallParseFailed(let body): return "toolCallParseFailed(\(body))"
case .toolCallTruncated(let body): return "toolCallTruncated(\(body))"
case .handoffRequested(let h): return "handoffRequested(\(h.targetAgentID))"
case .generationCompleted(let c): return "generationCompleted(\(c.reason))"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ private func categorise(_ event: GenerationEvent) -> EventCategory? {
case .toolIterationLimitExceeded: return nil
case .kvCacheReuse: return nil
case .throttleDiagnostic: return nil
case .toolCallParseFailed, .toolCallTruncated: return nil
case .thinkingSignature: return nil
case .toolCallStart, .toolCallArgumentsDelta: return nil
case .toolProgress, .toolDispatchStarted, .toolDispatchCompleted, .toolCallApproved: return nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,8 @@ final class OllamaStreamEventExtractorParityTests: XCTestCase {
case .toolCallApproved: return "toolCallApproved"
case .kvCacheReuse: return "kvCacheReuse"
case .throttleDiagnostic: return "throttleDiagnostic"
case .toolCallParseFailed(let body): return "toolCallParseFailed(\(body))"
case .toolCallTruncated(let body): return "toolCallTruncated(\(body))"
case .handoffRequested(let h): return "handoffRequested(\(h.targetAgentID))"
case .generationCompleted(let c): return "generationCompleted(\(c.reason))"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,10 @@ final class OllamaToolCallLiveReplayTests: XCTestCase {
// Cooperative thermal pause — informational only;
// raw backend replay neither emits nor projects it.
break
case .toolCallParseFailed, .toolCallTruncated:
// Tool-call diagnostics surface in the OutputParser layer,
// not raw Ollama replay; ignore for forward-compat.
break
case .toolCallStart, .toolCallArgumentsDelta:
// Streaming tool-call deltas are projected only by
// backends that opt into `streamsToolCallArguments`;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ final class OpenAIResponsesBackendTests: XCTestCase {
.throttleDiagnostic, .thinkingSignature,
.toolCallStart, .toolCallArgumentsDelta,
.toolDispatchStarted, .toolDispatchCompleted, .toolCallApproved,
.toolCallParseFailed, .toolCallTruncated,
.prefillProgress, .toolProgress,
.handoffRequested, .generationCompleted:
return nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,8 @@ final class OpenAIResponsesStreamEventExtractorTests: XCTestCase {
case .toolCallApproved: return "toolCallApproved"
case .kvCacheReuse: return "kvCacheReuse"
case .throttleDiagnostic: return "throttleDiagnostic"
case .toolCallParseFailed(let body): return "toolCallParseFailed(\(body))"
case .toolCallTruncated(let body): return "toolCallTruncated(\(body))"
case .handoffRequested(let h): return "handoffRequested(\(h.targetAgentID))"
case .generationCompleted(let c): return "generationCompleted(\(c.reason))"
}
Expand Down Expand Up @@ -333,6 +335,8 @@ final class OpenAIResponsesStreamEventExtractorParityTests: XCTestCase {
case .toolCallApproved: return "toolCallApproved"
case .kvCacheReuse: return "kvCacheReuse"
case .throttleDiagnostic: return "throttleDiagnostic"
case .toolCallParseFailed(let body): return "toolCallParseFailed(\(body))"
case .toolCallTruncated(let body): return "toolCallTruncated(\(body))"
case .handoffRequested(let h): return "handoffRequested(\(h.targetAgentID))"
case .generationCompleted(let c): return "generationCompleted(\(c.reason))"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,8 @@ final class OpenAIStreamEventExtractorParityTests: XCTestCase {
case .toolCallApproved: return "toolCallApproved"
case .kvCacheReuse: return "kvCacheReuse"
case .throttleDiagnostic: return "throttleDiagnostic"
case .toolCallParseFailed(let body): return "toolCallParseFailed(\(body))"
case .toolCallTruncated(let body): return "toolCallTruncated(\(body))"
case .handoffRequested(let h): return "handoffRequested(\(h.targetAgentID))"
case .generationCompleted(let c): return "generationCompleted(\(c.reason))"
}
Expand Down
Loading