From fe3461db2a86f02529f43785ba77c1712881efbf Mon Sep 17 00:00:00 2001 From: Rory Ford Date: Sun, 14 Jun 2026 22:48:57 +1000 Subject: [PATCH] feat!(contract): tool-call parse-failure + truncation diagnostics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add two non-fatal GenerationEvent diagnostics so hosts can observe tool calls that previously vanished silently in ToolCallTransform: - #1857 .toolCallParseFailed(rawBody:): when a delimited open/close marker pair surrounds a body the dialect parser rejects (parseBody returns nil), emit a diagnostic carrying the raw body instead of dropping the call with no event. Hosts can now distinguish "broken tool call" from "no tool call". - #1858 .toolCallTruncated(rawBody:): opt-in via ToolCallTransform(markers:surfaceTruncatedToolBody:) (default false, so default behavior is unchanged). When enabled, finalize() and the body-size cap surface the buffered partial body of an unterminated tool block so a mid-stream truncation is observable rather than silently discarded. Both follow the throttleDiagnostic(reason:) precedent — advisory metadata, Sendable/Equatable String payloads, no chat-message state mutation. Freeze hygiene: the GenerationEvent "Vocabulary freeze (1.0)" header is updated to list the new cases. Every exhaustive switch over GenerationEvent across Sources/ and Tests/ gains the new arms (12 sites: GenerationStream- Consumer, EventRecorder, ScenarioRunner, the APIFreeze BackendSeamConsumer freeze fixture, and 8 backend/contract test switches). api-breakage-allowlist gains the two new-enum-case lines plus the ToolCallTransform.init signature change (defaulted param; existing markers: callers still compile). Digester passes locally with exit 0. BREAKING CHANGE: GenerationEvent gains .toolCallParseFailed(rawBody:) and .toolCallTruncated(rawBody:); exhaustive switches over GenerationEvent without a default/@unknown default arm must add handling for the new cases. Resolves #1857 Resolves #1858 Co-Authored-By: Claude Opus 4.8 --- .github/api-breakage-allowlist.txt | 3 + .../ManifoldContract/GenerationEvent.swift | 35 +++++++ .../ManifoldContract/ToolCallTransform.swift | 52 ++++++++-- Sources/ManifoldFuzz/EventRecorder.swift | 4 + .../Services/GenerationStreamConsumer.swift | 7 ++ Sources/ManifoldTools/ScenarioRunner.swift | 5 + .../Fixtures/BackendSeamConsumer.swift | 4 + .../ClaudeStreamEventExtractorTests.swift | 2 + .../CloudThinkingTokenTests.swift | 1 + .../OllamaStreamEventExtractorTests.swift | 2 + .../OllamaToolCallLiveReplayTests.swift | 4 + .../OpenAIResponsesBackendTests.swift | 1 + ...AIResponsesStreamEventExtractorTests.swift | 4 + .../OpenAIStreamEventExtractorTests.swift | 2 + .../OutputParserSessionTests.swift | 96 +++++++++++++++++++ .../ParallelToolCallOrderingTests.swift | 1 + .../ToolCallContractTests.swift | 2 + 17 files changed, 218 insertions(+), 7 deletions(-) diff --git a/.github/api-breakage-allowlist.txt b/.github/api-breakage-allowlist.txt index 167bfd950..3816cb7f0 100644 --- a/.github/api-breakage-allowlist.txt +++ b/.github/api-breakage-allowlist.txt @@ -97,3 +97,6 @@ API breakage: enumelement MessagePart.generatedVideo has been removed API breakage: var GBNFSchemaPreValidator.CVEAuditRecord.fixedAtBuild has been removed API breakage: var GBNFSchemaPreValidator.CVEAuditRecord.vendoredBuild has been removed API breakage: func ModelLoadCoordinator.dispatchLoad(_:) has been renamed to func dispatchLoad(_:drivesChatSeams:) +API breakage: enumelement GenerationEvent.toolCallParseFailed has been added as a new enum case +API breakage: enumelement GenerationEvent.toolCallTruncated has been added as a new enum case +API breakage: constructor ToolCallTransform.init(markers:) has been removed diff --git a/Sources/ManifoldContract/GenerationEvent.swift b/Sources/ManifoldContract/GenerationEvent.swift index ab62a8f68..f81c9a723 100644 --- a/Sources/ManifoldContract/GenerationEvent.swift +++ b/Sources/ManifoldContract/GenerationEvent.swift @@ -14,6 +14,15 @@ /// a major (`feat!:`) release. Public-facing / cross-module consumers should add /// an `@unknown default:` arm to stay resilient to a future major. /// +/// The two non-fatal tool-call diagnostics — +/// ``toolCallParseFailed(rawBody:)`` and ``toolCallTruncated(rawBody:)`` — are +/// part of this frozen vocabulary. They were the last pre-1.0 additions +/// (`feat!:`, #1857 / #1858): a delimited tool-call body that fails to parse, +/// and an unterminated tool block surfaced at finalize, are now observable +/// instead of being silently dropped. Both follow the +/// ``throttleDiagnostic(reason:)`` precedent — advisory metadata with no +/// chat-message state mutation. +/// /// Payloads that are expected to grow are modelled as **associated structs** /// rather than bare enum parameters so their fields can grow non-breakingly /// after the freeze: @@ -168,6 +177,32 @@ public enum GenerationEvent: Sendable, Equatable { /// `reason` is a short, human-readable string the UI may display verbatim. case throttleDiagnostic(reason: String) + /// Non-fatal diagnostic: a delimited tool-call block closed, but its body + /// failed to parse into a ``ToolCall`` (the dialect's `parseBody` returned + /// `nil`). + /// + /// Emitted by ``ToolCallTransform`` in lieu of a ``toolCall(_:)`` event when + /// a well-formed open/close marker pair surrounds a body the dialect parser + /// rejects (malformed JSON, unknown shape, empty name). Without this event a + /// broken tool call vanishes silently and the host cannot distinguish + /// "model emitted a broken tool call" from "model emitted no tool call". + /// `rawBody` is the exact buffered body text between the open and close + /// markers so hosts can log, surface, or attempt their own recovery. This is + /// advisory metadata — like ``throttleDiagnostic(reason:)`` it carries no + /// chat-message state mutation and consumers that do not care may ignore it. + case toolCallParseFailed(rawBody: String) + + /// Non-fatal diagnostic: the stream ended while a tool-call block was still + /// open (no matching close marker arrived before `finalize()`). + /// + /// Emitted by ``ToolCallTransform/finalize()`` **only when the transform was + /// constructed with `surfaceTruncatedToolBody: true`** (the default keeps + /// the historical silent-discard behavior). `rawBody` is the partial body + /// buffered since the open marker, so a mid-tool-call stream truncation is + /// observable rather than lost. Like ``toolCallParseFailed(rawBody:)`` this + /// is advisory metadata with no chat-message state mutation. + case toolCallTruncated(rawBody: String) + /// Emitted by the orchestrator immediately before it begins handling a /// model-emitted ``ToolCall``. /// diff --git a/Sources/ManifoldContract/ToolCallTransform.swift b/Sources/ManifoldContract/ToolCallTransform.swift index c736a61dc..13340efb8 100644 --- a/Sources/ManifoldContract/ToolCallTransform.swift +++ b/Sources/ManifoldContract/ToolCallTransform.swift @@ -40,8 +40,15 @@ public struct ToolCallMarker: Sendable { /// preference), the parser switches into the block. /// - Inside a block, body text is buffered and suppressed from `.token`. /// - On the matching close, `marker.parseBody(body)` runs; a non-nil result -/// emits `.toolCall`, and `nil` silently drops the call (matching both -/// legacy parsers). +/// emits `.toolCall`. A `nil` result no longer vanishes silently — it emits +/// a non-fatal `.toolCallParseFailed(rawBody:)` diagnostic carrying the +/// buffered body so hosts can distinguish a broken tool call from no tool +/// call (#1857). +/// - On the body-size cap or an unterminated block at `finalize()`, the partial +/// body is discarded by default. Constructing the transform with +/// `surfaceTruncatedToolBody: true` instead emits a non-fatal +/// `.toolCallTruncated(rawBody:)` diagnostic so a mid-stream truncation is +/// observable (#1858, opt-in — default behavior is unchanged). /// - Partial open/close markers straddling a chunk boundary are held back via /// the shared `overlap` primitives — the open-tag holdback is the max /// overlap across *all* candidate opens. @@ -60,6 +67,13 @@ public struct ToolCallTransform: StreamTransform { /// below a memory-pressure threat. private static let maxBodyBytes = 256 * 1024 + /// Opt-in: surface the buffered body of an unterminated tool-call block as a + /// non-fatal `.toolCallTruncated(rawBody:)` diagnostic instead of discarding + /// it. Defaults to `false` so the historical silent-discard behavior is + /// unchanged (#1858). Applies both to the `finalize()` flush of an open + /// block and to the body-size-cap drop of a runaway unclosed body. + public let surfaceTruncatedToolBody: Bool + private var buffer = "" /// Index into `markers` of the dialect whose open tag is currently active, /// or `nil` when not inside a tool-call block. @@ -67,8 +81,9 @@ public struct ToolCallTransform: StreamTransform { /// Body text buffered since the active open tag. private var bodyBuffer = "" - public init(markers: [ToolCallMarker]) { + public init(markers: [ToolCallMarker], surfaceTruncatedToolBody: Bool = false) { self.markers = markers + self.surfaceTruncatedToolBody = surfaceTruncatedToolBody } public mutating func process(_ events: [GenerationEvent]) -> [GenerationEvent] { @@ -98,6 +113,12 @@ public struct ToolCallTransform: StreamTransform { buffer = String(buffer[range.upperBound...]) if let call = markers[active].parseBody(bodyBuffer) { events.append(.toolCall(call)) + } else { + // A well-formed open/close pair surrounded a body the + // dialect parser rejected. Surface it as a non-fatal + // diagnostic instead of dropping the call silently so + // hosts can recover or report (#1857). + events.append(.toolCallParseFailed(rawBody: bodyBuffer)) } bodyBuffer = "" activeMarker = nil @@ -117,6 +138,9 @@ public struct ToolCallTransform: StreamTransform { Log.inference.warning( "ToolCallTransform: dropping tool-call body exceeding \(Self.maxBodyBytes)-byte cap without a close tag" ) + if surfaceTruncatedToolBody { + events.append(.toolCallTruncated(rawBody: bodyBuffer)) + } bodyBuffer = "" activeMarker = nil continue @@ -173,12 +197,26 @@ public struct ToolCallTransform: StreamTransform { /// Flush the held-back buffer at stream end. /// /// Remaining visible text outside a block is emitted as `.token`. An - /// incomplete (unclosed) tool-call block is discarded — partial body text - /// cannot produce a valid `ToolCall` — matching both legacy parsers. + /// incomplete (unclosed) tool-call block is discarded by default — partial + /// body text cannot produce a valid `ToolCall` — matching both legacy + /// parsers. When the transform was constructed with + /// `surfaceTruncatedToolBody: true`, the partial body is instead surfaced as + /// a non-fatal `.toolCallTruncated(rawBody:)` diagnostic so a mid-tool-call + /// stream truncation is observable (#1858). public mutating func finalize() -> [GenerationEvent] { var events: [GenerationEvent] = [] - if activeMarker == nil, !buffer.isEmpty { - events.append(.token(buffer)) + if activeMarker == nil { + if !buffer.isEmpty { + events.append(.token(buffer)) + } + } else if surfaceTruncatedToolBody { + // Inside an unterminated block: the held-back `buffer` is a partial + // close suffix that still belongs to the body, so fold it in before + // surfacing. Default behavior (flag off) discards silently. + let partial = bodyBuffer + buffer + if !partial.isEmpty { + events.append(.toolCallTruncated(rawBody: partial)) + } } buffer = "" bodyBuffer = "" diff --git a/Sources/ManifoldFuzz/EventRecorder.swift b/Sources/ManifoldFuzz/EventRecorder.swift index 36b808ff0..2b6a11adb 100644 --- a/Sources/ManifoldFuzz/EventRecorder.swift +++ b/Sources/ManifoldFuzz/EventRecorder.swift @@ -160,6 +160,10 @@ public struct EventRecorder: Sendable { // orchestrator. Record the reason in the trace so fuzz // scenarios can pin exactly-once terminal emission. events.append(.init(t: t, kind: "generationCompleted", v: "\(completion.reason)")) + case .toolCallParseFailed(let rawBody): + events.append(.init(t: t, kind: "toolCallParseFailed", v: rawBody)) + case .toolCallTruncated(let rawBody): + events.append(.init(t: t, kind: "toolCallTruncated", v: rawBody)) } memoryTick() } diff --git a/Sources/ManifoldInference/Services/GenerationStreamConsumer.swift b/Sources/ManifoldInference/Services/GenerationStreamConsumer.swift index b5cfef590..71d75f291 100644 --- a/Sources/ManifoldInference/Services/GenerationStreamConsumer.swift +++ b/Sources/ManifoldInference/Services/GenerationStreamConsumer.swift @@ -56,6 +56,13 @@ public struct GenerationStreamConsumer: Sendable { // upstream instead of going through the action mapping. return .ignore + case .toolCallParseFailed, .toolCallTruncated: + // Non-fatal tool-call diagnostics (#1857 / #1858). Advisory + // metadata with no chat-message text/tool state to mutate; hosts + // that want to recover or surface a "broken/truncated tool call" + // hint observe the raw event upstream, mirroring throttleDiagnostic. + return .ignore + case .toolCallStart, .toolCallArgumentsDelta: // Streaming tool-call deltas are observed by UI surfaces // upstream (rendering an in-flight call card). The diff --git a/Sources/ManifoldTools/ScenarioRunner.swift b/Sources/ManifoldTools/ScenarioRunner.swift index 627210a61..3ba386efb 100644 --- a/Sources/ManifoldTools/ScenarioRunner.swift +++ b/Sources/ManifoldTools/ScenarioRunner.swift @@ -107,6 +107,11 @@ public final class ScenarioRunner { // Dispatch lifecycle markers are observational; tool // accounting flows through `.toolCall` / `.toolResult`. continue + case .toolCallParseFailed, .toolCallTruncated: + // Non-fatal tool-call diagnostics (#1857 / #1858); the + // authoritative call still lands on `.toolCall(_:)` when it + // parses. Observational here. + continue case .handoffRequested: // Multi-agent handoffs are runtime-driven; deterministic // single-agent replays never observe them. diff --git a/Tests/APIFreezeTests/Fixtures/BackendSeamConsumer.swift b/Tests/APIFreezeTests/Fixtures/BackendSeamConsumer.swift index 0b624f78a..8c2d44797 100644 --- a/Tests/APIFreezeTests/Fixtures/BackendSeamConsumer.swift +++ b/Tests/APIFreezeTests/Fixtures/BackendSeamConsumer.swift @@ -148,6 +148,10 @@ enum BackendSeamConsumer { _ = reused case .throttleDiagnostic(reason: let reason): _ = reason + case .toolCallParseFailed(rawBody: let rawBody): + _ = rawBody + case .toolCallTruncated(rawBody: let rawBody): + _ = rawBody case .toolDispatchStarted(callId: let callId, name: let name, attempt: let attempt): _ = (callId, name, attempt) case .toolCallApproved(callId: let callId): diff --git a/Tests/ManifoldBackendsTests/ClaudeStreamEventExtractorTests.swift b/Tests/ManifoldBackendsTests/ClaudeStreamEventExtractorTests.swift index 999be6bf7..a8f90da1d 100644 --- a/Tests/ManifoldBackendsTests/ClaudeStreamEventExtractorTests.swift +++ b/Tests/ManifoldBackendsTests/ClaudeStreamEventExtractorTests.swift @@ -324,6 +324,8 @@ final class ClaudeStreamEventExtractorParityTests: XCTestCase { case .toolCallApproved: return "toolCallApproved" case .kvCacheReuse: return "kvCacheReuse" case .throttleDiagnostic: return "throttleDiagnostic" + case .toolCallParseFailed(let body): return "toolCallParseFailed(\(body))" + case .toolCallTruncated(let body): return "toolCallTruncated(\(body))" case .handoffRequested(let h): return "handoffRequested(\(h.targetAgentID))" case .generationCompleted(let c): return "generationCompleted(\(c.reason))" } diff --git a/Tests/ManifoldBackendsTests/CloudThinkingTokenTests.swift b/Tests/ManifoldBackendsTests/CloudThinkingTokenTests.swift index 9cc031a14..cb0c56762 100644 --- a/Tests/ManifoldBackendsTests/CloudThinkingTokenTests.swift +++ b/Tests/ManifoldBackendsTests/CloudThinkingTokenTests.swift @@ -47,6 +47,7 @@ private func categorise(_ event: GenerationEvent) -> EventCategory? { case .toolIterationLimitExceeded: return nil case .kvCacheReuse: return nil case .throttleDiagnostic: return nil + case .toolCallParseFailed, .toolCallTruncated: return nil case .thinkingSignature: return nil case .toolCallStart, .toolCallArgumentsDelta: return nil case .toolProgress, .toolDispatchStarted, .toolDispatchCompleted, .toolCallApproved: return nil diff --git a/Tests/ManifoldBackendsTests/OllamaStreamEventExtractorTests.swift b/Tests/ManifoldBackendsTests/OllamaStreamEventExtractorTests.swift index 7c845e738..5184afc00 100644 --- a/Tests/ManifoldBackendsTests/OllamaStreamEventExtractorTests.swift +++ b/Tests/ManifoldBackendsTests/OllamaStreamEventExtractorTests.swift @@ -258,6 +258,8 @@ final class OllamaStreamEventExtractorParityTests: XCTestCase { case .toolCallApproved: return "toolCallApproved" case .kvCacheReuse: return "kvCacheReuse" case .throttleDiagnostic: return "throttleDiagnostic" + case .toolCallParseFailed(let body): return "toolCallParseFailed(\(body))" + case .toolCallTruncated(let body): return "toolCallTruncated(\(body))" case .handoffRequested(let h): return "handoffRequested(\(h.targetAgentID))" case .generationCompleted(let c): return "generationCompleted(\(c.reason))" } diff --git a/Tests/ManifoldBackendsTests/OllamaToolCallLiveReplayTests.swift b/Tests/ManifoldBackendsTests/OllamaToolCallLiveReplayTests.swift index 7fcece0d4..2843b0f07 100644 --- a/Tests/ManifoldBackendsTests/OllamaToolCallLiveReplayTests.swift +++ b/Tests/ManifoldBackendsTests/OllamaToolCallLiveReplayTests.swift @@ -202,6 +202,10 @@ final class OllamaToolCallLiveReplayTests: XCTestCase { // Cooperative thermal pause — informational only; // raw backend replay neither emits nor projects it. break + case .toolCallParseFailed, .toolCallTruncated: + // Tool-call diagnostics surface in the OutputParser layer, + // not raw Ollama replay; ignore for forward-compat. + break case .toolCallStart, .toolCallArgumentsDelta: // Streaming tool-call deltas are projected only by // backends that opt into `streamsToolCallArguments`; diff --git a/Tests/ManifoldBackendsTests/OpenAIResponsesBackendTests.swift b/Tests/ManifoldBackendsTests/OpenAIResponsesBackendTests.swift index e04944598..47352f54a 100644 --- a/Tests/ManifoldBackendsTests/OpenAIResponsesBackendTests.swift +++ b/Tests/ManifoldBackendsTests/OpenAIResponsesBackendTests.swift @@ -75,6 +75,7 @@ final class OpenAIResponsesBackendTests: XCTestCase { .throttleDiagnostic, .thinkingSignature, .toolCallStart, .toolCallArgumentsDelta, .toolDispatchStarted, .toolDispatchCompleted, .toolCallApproved, + .toolCallParseFailed, .toolCallTruncated, .prefillProgress, .toolProgress, .handoffRequested, .generationCompleted: return nil diff --git a/Tests/ManifoldBackendsTests/OpenAIResponsesStreamEventExtractorTests.swift b/Tests/ManifoldBackendsTests/OpenAIResponsesStreamEventExtractorTests.swift index 63bbf3ba8..651fb76f2 100644 --- a/Tests/ManifoldBackendsTests/OpenAIResponsesStreamEventExtractorTests.swift +++ b/Tests/ManifoldBackendsTests/OpenAIResponsesStreamEventExtractorTests.swift @@ -208,6 +208,8 @@ final class OpenAIResponsesStreamEventExtractorTests: XCTestCase { case .toolCallApproved: return "toolCallApproved" case .kvCacheReuse: return "kvCacheReuse" case .throttleDiagnostic: return "throttleDiagnostic" + case .toolCallParseFailed(let body): return "toolCallParseFailed(\(body))" + case .toolCallTruncated(let body): return "toolCallTruncated(\(body))" case .handoffRequested(let h): return "handoffRequested(\(h.targetAgentID))" case .generationCompleted(let c): return "generationCompleted(\(c.reason))" } @@ -333,6 +335,8 @@ final class OpenAIResponsesStreamEventExtractorParityTests: XCTestCase { case .toolCallApproved: return "toolCallApproved" case .kvCacheReuse: return "kvCacheReuse" case .throttleDiagnostic: return "throttleDiagnostic" + case .toolCallParseFailed(let body): return "toolCallParseFailed(\(body))" + case .toolCallTruncated(let body): return "toolCallTruncated(\(body))" case .handoffRequested(let h): return "handoffRequested(\(h.targetAgentID))" case .generationCompleted(let c): return "generationCompleted(\(c.reason))" } diff --git a/Tests/ManifoldBackendsTests/OpenAIStreamEventExtractorTests.swift b/Tests/ManifoldBackendsTests/OpenAIStreamEventExtractorTests.swift index e60f8ff57..5f066bbc8 100644 --- a/Tests/ManifoldBackendsTests/OpenAIStreamEventExtractorTests.swift +++ b/Tests/ManifoldBackendsTests/OpenAIStreamEventExtractorTests.swift @@ -313,6 +313,8 @@ final class OpenAIStreamEventExtractorParityTests: XCTestCase { case .toolCallApproved: return "toolCallApproved" case .kvCacheReuse: return "kvCacheReuse" case .throttleDiagnostic: return "throttleDiagnostic" + case .toolCallParseFailed(let body): return "toolCallParseFailed(\(body))" + case .toolCallTruncated(let body): return "toolCallTruncated(\(body))" case .handoffRequested(let h): return "handoffRequested(\(h.targetAgentID))" case .generationCompleted(let c): return "generationCompleted(\(c.reason))" } diff --git a/Tests/ManifoldInferenceTests/OutputParserSessionTests.swift b/Tests/ManifoldInferenceTests/OutputParserSessionTests.swift index 3b44bd264..b7307ee40 100644 --- a/Tests/ManifoldInferenceTests/OutputParserSessionTests.swift +++ b/Tests/ManifoldInferenceTests/OutputParserSessionTests.swift @@ -26,6 +26,14 @@ final class OutputParserSessionTests: XCTestCase { events.compactMap { if case .toolCall(let c) = $0 { return c } else { return nil } } } + private func parseFailures(_ events: [GenerationEvent]) -> [String] { + events.compactMap { if case .toolCallParseFailed(let body) = $0 { return body } else { return nil } } + } + + private func truncations(_ events: [GenerationEvent]) -> [String] { + events.compactMap { if case .toolCallTruncated(let body) = $0 { return body } else { return nil } } + } + // MARK: - Test marker fixtures /// A simple JSON `` dialect: `{"name":...}` → ToolCall. @@ -269,4 +277,92 @@ final class OutputParserSessionTests: XCTestCase { XCTAssertEqual(toolCalls(recovery).map(\.toolName), ["f"], "Parser must recover and parse a valid call after dropping an oversized body") } + + // MARK: - #1857: malformed-body parse-failure diagnostic + + func test_closedToolBlock_withMalformedBody_emitsParseFailedDiagnostic() { + // A well-formed open/close pair surrounds a body the dialect parser + // rejects (not valid JSON). Previously this vanished with NO event; + // now it surfaces a non-fatal `.toolCallParseFailed` carrying the body. + var transform = ToolCallTransform(markers: [jsonMarker()]) + var events = transform.process([.token("beforenot jsonafter")]) + events += transform.finalize() + + XCTAssertTrue(toolCalls(events).isEmpty, + "A malformed body produces no ToolCall") + XCTAssertEqual(parseFailures(events), ["not json"], + "A malformed closed tool body must surface the raw body as a parse-failure diagnostic (#1857)") + XCTAssertEqual(visible(events), "beforeafter", + "Visible text around the failed block is still emitted") + + // Sabotage: deleting the `.toolCallParseFailed` emission in + // ToolCallTransform makes parseFailures empty and this assertion fails — + // confirming the diagnostic is load-bearing, not incidental. + } + + func test_parseFailure_doesNotBreakSubsequentValidCall() { + var transform = ToolCallTransform(markers: [jsonMarker()]) + var events = transform.process([.token("garbage{\"name\":\"ok\"}")]) + events += transform.finalize() + + XCTAssertEqual(parseFailures(events), ["garbage"]) + XCTAssertEqual(toolCalls(events).map(\.toolName), ["ok"], + "A parse failure must not poison a following well-formed call") + } + + // MARK: - #1858: opt-in truncated-body diagnostic at finalize + + func test_finalize_unterminatedToolBlock_default_dropsSilently() { + // Default behavior is unchanged: an unterminated block is discarded with + // NO new event. + var transform = ToolCallTransform(markers: [jsonMarker()]) + var events = transform.process([.token("text{\"name\":\"f\",\"arg")]) + events += transform.finalize() + + XCTAssertTrue(toolCalls(events).isEmpty) + XCTAssertTrue(truncations(events).isEmpty, + "With the opt-in OFF, a truncated tool block must NOT emit a diagnostic (default unchanged)") + XCTAssertEqual(visible(events), "text") + } + + func test_finalize_unterminatedToolBlock_optIn_surfacesPartialBody() { + // With the opt-in ON, the partial body is surfaced as a non-fatal + // truncation diagnostic so a mid-tool-call stream cut is observable. + var transform = ToolCallTransform(markers: [jsonMarker()], surfaceTruncatedToolBody: true) + var events = transform.process([.token("text{\"name\":\"f\",\"arg")]) + events += transform.finalize() + + XCTAssertTrue(toolCalls(events).isEmpty) + XCTAssertEqual(truncations(events), ["{\"name\":\"f\",\"arg"], + "With the opt-in ON, finalize must surface the buffered partial body (#1858)") + XCTAssertEqual(visible(events), "text") + + // Sabotage: flipping surfaceTruncatedToolBody back to false (or dropping + // the finalize branch) makes truncations empty and this fails. + } + + func test_finalize_optIn_partialCloseSuffixFoldedIntoTruncatedBody() { + // The body ends mid-close-tag; the held-back partial close suffix still + // belongs to the body and must be included in the surfaced raw body. + var transform = ToolCallTransform(markers: [jsonMarker()], surfaceTruncatedToolBody: true) + // "{\"name\":\"f\"}{\"name\":\"f\"}tail")]) + events += transform.finalize() + + XCTAssertEqual(toolCalls(events).map(\.toolName), ["f"]) + XCTAssertTrue(truncations(events).isEmpty, + "A fully-closed block plus trailing text must not emit a truncation diagnostic") + XCTAssertEqual(visible(events), "tail") + } } diff --git a/Tests/ManifoldInferenceTests/ParallelToolCallOrderingTests.swift b/Tests/ManifoldInferenceTests/ParallelToolCallOrderingTests.swift index a54e2298b..3b220f8aa 100644 --- a/Tests/ManifoldInferenceTests/ParallelToolCallOrderingTests.swift +++ b/Tests/ManifoldInferenceTests/ParallelToolCallOrderingTests.swift @@ -60,6 +60,7 @@ final class ParallelToolCallOrderingTests: XCTestCase { .kvCacheReuse, .throttleDiagnostic, .toolCallStart, .toolCallArgumentsDelta, .toolDispatchStarted, .toolDispatchCompleted, .toolCallApproved, + .toolCallParseFailed, .toolCallTruncated, .handoffRequested, .generationCompleted: break } diff --git a/Tests/ManifoldInferenceTests/ToolCallContractTests.swift b/Tests/ManifoldInferenceTests/ToolCallContractTests.swift index ee0295fa5..e923624d4 100644 --- a/Tests/ManifoldInferenceTests/ToolCallContractTests.swift +++ b/Tests/ManifoldInferenceTests/ToolCallContractTests.swift @@ -214,6 +214,7 @@ final class ToolCallContractTests: XCTestCase { case .throttleDiagnostic: break case .toolCallStart, .toolCallArgumentsDelta: break case .toolDispatchStarted, .toolDispatchCompleted, .toolCallApproved: break + case .toolCallParseFailed, .toolCallTruncated: break case .handoffRequested: break case .generationCompleted: break } @@ -289,6 +290,7 @@ final class ToolCallContractTests: XCTestCase { case .throttleDiagnostic: break case .toolCallStart, .toolCallArgumentsDelta: break case .toolDispatchStarted, .toolDispatchCompleted, .toolCallApproved: break + case .toolCallParseFailed, .toolCallTruncated: break case .handoffRequested: break case .generationCompleted: break }