From fe3461db2a86f02529f43785ba77c1712881efbf Mon Sep 17 00:00:00 2001
From: Rory Ford <me@roryford.com>
Date: Sun, 14 Jun 2026 22:48:57 +1000
Subject: [PATCH] feat!(contract): tool-call parse-failure + truncation
 diagnostics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add two non-fatal GenerationEvent diagnostics so hosts can observe tool
calls that previously vanished silently in ToolCallTransform:

- #1857 .toolCallParseFailed(rawBody:): when a delimited open/close marker
  pair surrounds a body the dialect parser rejects (parseBody returns nil),
  emit a diagnostic carrying the raw body instead of dropping the call with
  no event. Hosts can now distinguish "broken tool call" from "no tool call".
- #1858 .toolCallTruncated(rawBody:): opt-in via
  ToolCallTransform(markers:surfaceTruncatedToolBody:) (default false, so
  default behavior is unchanged). When enabled, finalize() and the body-size
  cap surface the buffered partial body of an unterminated tool block so a
  mid-stream truncation is observable rather than silently discarded.

Both follow the throttleDiagnostic(reason:) precedent — advisory metadata,
Sendable/Equatable String payloads, no chat-message state mutation.

Freeze hygiene: the GenerationEvent "Vocabulary freeze (1.0)" header is
updated to list the new cases. Every exhaustive switch over GenerationEvent
across Sources/ and Tests/ gains the new arms (12 sites: GenerationStream-
Consumer, EventRecorder, ScenarioRunner, the APIFreeze BackendSeamConsumer
freeze fixture, and 8 backend/contract test switches). api-breakage-allowlist
gains the two new-enum-case lines plus the ToolCallTransform.init signature
change (defaulted param; existing markers: callers still compile). Digester
passes locally with exit 0.

BREAKING CHANGE: GenerationEvent gains .toolCallParseFailed(rawBody:) and
.toolCallTruncated(rawBody:); exhaustive switches over GenerationEvent without
a default/@unknown default arm must add handling for the new cases.

Resolves #1857
Resolves #1858

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .github/api-breakage-allowlist.txt            |  3 +
 .../ManifoldContract/GenerationEvent.swift    | 35 +++++++
 .../ManifoldContract/ToolCallTransform.swift  | 52 ++++++++--
 Sources/ManifoldFuzz/EventRecorder.swift      |  4 +
 .../Services/GenerationStreamConsumer.swift   |  7 ++
 Sources/ManifoldTools/ScenarioRunner.swift    |  5 +
 .../Fixtures/BackendSeamConsumer.swift        |  4 +
 .../ClaudeStreamEventExtractorTests.swift     |  2 +
 .../CloudThinkingTokenTests.swift             |  1 +
 .../OllamaStreamEventExtractorTests.swift     |  2 +
 .../OllamaToolCallLiveReplayTests.swift       |  4 +
 .../OpenAIResponsesBackendTests.swift         |  1 +
 ...AIResponsesStreamEventExtractorTests.swift |  4 +
 .../OpenAIStreamEventExtractorTests.swift     |  2 +
 .../OutputParserSessionTests.swift            | 96 +++++++++++++++++++
 .../ParallelToolCallOrderingTests.swift       |  1 +
 .../ToolCallContractTests.swift               |  2 +
 17 files changed, 218 insertions(+), 7 deletions(-)

diff --git a/.github/api-breakage-allowlist.txt b/.github/api-breakage-allowlist.txt
index 167bfd950..3816cb7f0 100644
--- a/.github/api-breakage-allowlist.txt
+++ b/.github/api-breakage-allowlist.txt
@@ -97,3 +97,6 @@ API breakage: enumelement MessagePart.generatedVideo has been removed
 API breakage: var GBNFSchemaPreValidator.CVEAuditRecord.fixedAtBuild has been removed
 API breakage: var GBNFSchemaPreValidator.CVEAuditRecord.vendoredBuild has been removed
 API breakage: func ModelLoadCoordinator.dispatchLoad(_:) has been renamed to func dispatchLoad(_:drivesChatSeams:)
+API breakage: enumelement GenerationEvent.toolCallParseFailed has been added as a new enum case
+API breakage: enumelement GenerationEvent.toolCallTruncated has been added as a new enum case
+API breakage: constructor ToolCallTransform.init(markers:) has been removed
diff --git a/Sources/ManifoldContract/GenerationEvent.swift b/Sources/ManifoldContract/GenerationEvent.swift
index ab62a8f68..f81c9a723 100644
--- a/Sources/ManifoldContract/GenerationEvent.swift
+++ b/Sources/ManifoldContract/GenerationEvent.swift
@@ -14,6 +14,15 @@
 /// a major (`feat!:`) release. Public-facing / cross-module consumers should add
 /// an `@unknown default:` arm to stay resilient to a future major.
 ///
+/// The two non-fatal tool-call diagnostics —
+/// ``toolCallParseFailed(rawBody:)`` and ``toolCallTruncated(rawBody:)`` — are
+/// part of this frozen vocabulary. They were the last pre-1.0 additions
+/// (`feat!:`, #1857 / #1858): a delimited tool-call body that fails to parse,
+/// and an unterminated tool block surfaced at finalize, are now observable
+/// instead of being silently dropped. Both follow the
+/// ``throttleDiagnostic(reason:)`` precedent — advisory metadata with no
+/// chat-message state mutation.
+///
 /// Payloads that are expected to grow are modelled as **associated structs**
 /// rather than bare enum parameters so their fields can grow non-breakingly
 /// after the freeze:
@@ -168,6 +177,32 @@ public enum GenerationEvent: Sendable, Equatable {
     /// `reason` is a short, human-readable string the UI may display verbatim.
     case throttleDiagnostic(reason: String)
 
+    /// Non-fatal diagnostic: a delimited tool-call block closed, but its body
+    /// failed to parse into a ``ToolCall`` (the dialect's `parseBody` returned
+    /// `nil`).
+    ///
+    /// Emitted by ``ToolCallTransform`` in lieu of a ``toolCall(_:)`` event when
+    /// a well-formed open/close marker pair surrounds a body the dialect parser
+    /// rejects (malformed JSON, unknown shape, empty name). Without this event a
+    /// broken tool call vanishes silently and the host cannot distinguish
+    /// "model emitted a broken tool call" from "model emitted no tool call".
+    /// `rawBody` is the exact buffered body text between the open and close
+    /// markers so hosts can log, surface, or attempt their own recovery. This is
+    /// advisory metadata — like ``throttleDiagnostic(reason:)`` it carries no
+    /// chat-message state mutation and consumers that do not care may ignore it.
+    case toolCallParseFailed(rawBody: String)
+
+    /// Non-fatal diagnostic: the stream ended while a tool-call block was still
+    /// open (no matching close marker arrived before `finalize()`).
+    ///
+    /// Emitted by ``ToolCallTransform/finalize()`` **only when the transform was
+    /// constructed with `surfaceTruncatedToolBody: true`** (the default keeps
+    /// the historical silent-discard behavior). `rawBody` is the partial body
+    /// buffered since the open marker, so a mid-tool-call stream truncation is
+    /// observable rather than lost. Like ``toolCallParseFailed(rawBody:)`` this
+    /// is advisory metadata with no chat-message state mutation.
+    case toolCallTruncated(rawBody: String)
+
     /// Emitted by the orchestrator immediately before it begins handling a
     /// model-emitted ``ToolCall``.
     ///
diff --git a/Sources/ManifoldContract/ToolCallTransform.swift b/Sources/ManifoldContract/ToolCallTransform.swift
index c736a61dc..13340efb8 100644
--- a/Sources/ManifoldContract/ToolCallTransform.swift
+++ b/Sources/ManifoldContract/ToolCallTransform.swift
@@ -40,8 +40,15 @@ public struct ToolCallMarker: Sendable {
 ///   preference), the parser switches into the block.
 /// - Inside a block, body text is buffered and suppressed from `.token`.
 /// - On the matching close, `marker.parseBody(body)` runs; a non-nil result
-///   emits `.toolCall`, and `nil` silently drops the call (matching both
-///   legacy parsers).
+///   emits `.toolCall`. A `nil` result no longer vanishes silently — it emits
+///   a non-fatal `.toolCallParseFailed(rawBody:)` diagnostic carrying the
+///   buffered body so hosts can distinguish a broken tool call from no tool
+///   call (#1857).
+/// - On the body-size cap or an unterminated block at `finalize()`, the partial
+///   body is discarded by default. Constructing the transform with
+///   `surfaceTruncatedToolBody: true` instead emits a non-fatal
+///   `.toolCallTruncated(rawBody:)` diagnostic so a mid-stream truncation is
+///   observable (#1858, opt-in — default behavior is unchanged).
 /// - Partial open/close markers straddling a chunk boundary are held back via
 ///   the shared `overlap` primitives — the open-tag holdback is the max
 ///   overlap across *all* candidate opens.
@@ -60,6 +67,13 @@ public struct ToolCallTransform: StreamTransform {
     /// below a memory-pressure threat.
     private static let maxBodyBytes = 256 * 1024
 
+    /// Opt-in: surface the buffered body of an unterminated tool-call block as a
+    /// non-fatal `.toolCallTruncated(rawBody:)` diagnostic instead of discarding
+    /// it. Defaults to `false` so the historical silent-discard behavior is
+    /// unchanged (#1858). Applies both to the `finalize()` flush of an open
+    /// block and to the body-size-cap drop of a runaway unclosed body.
+    public let surfaceTruncatedToolBody: Bool
+
     private var buffer = ""
     /// Index into `markers` of the dialect whose open tag is currently active,
     /// or `nil` when not inside a tool-call block.
@@ -67,8 +81,9 @@ public struct ToolCallTransform: StreamTransform {
     /// Body text buffered since the active open tag.
     private var bodyBuffer = ""
 
-    public init(markers: [ToolCallMarker]) {
+    public init(markers: [ToolCallMarker], surfaceTruncatedToolBody: Bool = false) {
         self.markers = markers
+        self.surfaceTruncatedToolBody = surfaceTruncatedToolBody
     }
 
     public mutating func process(_ events: [GenerationEvent]) -> [GenerationEvent] {
@@ -98,6 +113,12 @@ public struct ToolCallTransform: StreamTransform {
                     buffer = String(buffer[range.upperBound...])
                     if let call = markers[active].parseBody(bodyBuffer) {
                         events.append(.toolCall(call))
+                    } else {
+                        // A well-formed open/close pair surrounded a body the
+                        // dialect parser rejected. Surface it as a non-fatal
+                        // diagnostic instead of dropping the call silently so
+                        // hosts can recover or report (#1857).
+                        events.append(.toolCallParseFailed(rawBody: bodyBuffer))
                     }
                     bodyBuffer = ""
                     activeMarker = nil
@@ -117,6 +138,9 @@ public struct ToolCallTransform: StreamTransform {
                         Log.inference.warning(
                             "ToolCallTransform: dropping tool-call body exceeding \(Self.maxBodyBytes)-byte cap without a close tag"
                         )
+                        if surfaceTruncatedToolBody {
+                            events.append(.toolCallTruncated(rawBody: bodyBuffer))
+                        }
                         bodyBuffer = ""
                         activeMarker = nil
                         continue
@@ -173,12 +197,26 @@ public struct ToolCallTransform: StreamTransform {
     /// Flush the held-back buffer at stream end.
     ///
     /// Remaining visible text outside a block is emitted as `.token`. An
-    /// incomplete (unclosed) tool-call block is discarded — partial body text
-    /// cannot produce a valid `ToolCall` — matching both legacy parsers.
+    /// incomplete (unclosed) tool-call block is discarded by default — partial
+    /// body text cannot produce a valid `ToolCall` — matching both legacy
+    /// parsers. When the transform was constructed with
+    /// `surfaceTruncatedToolBody: true`, the partial body is instead surfaced as
+    /// a non-fatal `.toolCallTruncated(rawBody:)` diagnostic so a mid-tool-call
+    /// stream truncation is observable (#1858).
     public mutating func finalize() -> [GenerationEvent] {
         var events: [GenerationEvent] = []
-        if activeMarker == nil, !buffer.isEmpty {
-            events.append(.token(buffer))
+        if activeMarker == nil {
+            if !buffer.isEmpty {
+                events.append(.token(buffer))
+            }
+        } else if surfaceTruncatedToolBody {
+            // Inside an unterminated block: the held-back `buffer` is a partial
+            // close suffix that still belongs to the body, so fold it in before
+            // surfacing. Default behavior (flag off) discards silently.
+            let partial = bodyBuffer + buffer
+            if !partial.isEmpty {
+                events.append(.toolCallTruncated(rawBody: partial))
+            }
         }
         buffer = ""
         bodyBuffer = ""
diff --git a/Sources/ManifoldFuzz/EventRecorder.swift b/Sources/ManifoldFuzz/EventRecorder.swift
index 36b808ff0..2b6a11adb 100644
--- a/Sources/ManifoldFuzz/EventRecorder.swift
+++ b/Sources/ManifoldFuzz/EventRecorder.swift
@@ -160,6 +160,10 @@ public struct EventRecorder: Sendable {
                     // orchestrator. Record the reason in the trace so fuzz
                     // scenarios can pin exactly-once terminal emission.
                     events.append(.init(t: t, kind: "generationCompleted", v: "\(completion.reason)"))
+                case .toolCallParseFailed(let rawBody):
+                    events.append(.init(t: t, kind: "toolCallParseFailed", v: rawBody))
+                case .toolCallTruncated(let rawBody):
+                    events.append(.init(t: t, kind: "toolCallTruncated", v: rawBody))
                 }
                 memoryTick()
             }
diff --git a/Sources/ManifoldInference/Services/GenerationStreamConsumer.swift b/Sources/ManifoldInference/Services/GenerationStreamConsumer.swift
index b5cfef590..71d75f291 100644
--- a/Sources/ManifoldInference/Services/GenerationStreamConsumer.swift
+++ b/Sources/ManifoldInference/Services/GenerationStreamConsumer.swift
@@ -56,6 +56,13 @@ public struct GenerationStreamConsumer: Sendable {
             // upstream instead of going through the action mapping.
             return .ignore
 
+        case .toolCallParseFailed, .toolCallTruncated:
+            // Non-fatal tool-call diagnostics (#1857 / #1858). Advisory
+            // metadata with no chat-message text/tool state to mutate; hosts
+            // that want to recover or surface a "broken/truncated tool call"
+            // hint observe the raw event upstream, mirroring throttleDiagnostic.
+            return .ignore
+
         case .toolCallStart, .toolCallArgumentsDelta:
             // Streaming tool-call deltas are observed by UI surfaces
             // upstream (rendering an in-flight call card). The
diff --git a/Sources/ManifoldTools/ScenarioRunner.swift b/Sources/ManifoldTools/ScenarioRunner.swift
index 627210a61..3ba386efb 100644
--- a/Sources/ManifoldTools/ScenarioRunner.swift
+++ b/Sources/ManifoldTools/ScenarioRunner.swift
@@ -107,6 +107,11 @@ public final class ScenarioRunner {
                     // Dispatch lifecycle markers are observational; tool
                     // accounting flows through `.toolCall` / `.toolResult`.
                     continue
+                case .toolCallParseFailed, .toolCallTruncated:
+                    // Non-fatal tool-call diagnostics (#1857 / #1858); the
+                    // authoritative call still lands on `.toolCall(_:)` when it
+                    // parses. Observational here.
+                    continue
                 case .handoffRequested:
                     // Multi-agent handoffs are runtime-driven; deterministic
                     // single-agent replays never observe them.
diff --git a/Tests/APIFreezeTests/Fixtures/BackendSeamConsumer.swift b/Tests/APIFreezeTests/Fixtures/BackendSeamConsumer.swift
index 0b624f78a..8c2d44797 100644
--- a/Tests/APIFreezeTests/Fixtures/BackendSeamConsumer.swift
+++ b/Tests/APIFreezeTests/Fixtures/BackendSeamConsumer.swift
@@ -148,6 +148,10 @@ enum BackendSeamConsumer {
             _ = reused
         case .throttleDiagnostic(reason: let reason):
             _ = reason
+        case .toolCallParseFailed(rawBody: let rawBody):
+            _ = rawBody
+        case .toolCallTruncated(rawBody: let rawBody):
+            _ = rawBody
         case .toolDispatchStarted(callId: let callId, name: let name, attempt: let attempt):
             _ = (callId, name, attempt)
         case .toolCallApproved(callId: let callId):
diff --git a/Tests/ManifoldBackendsTests/ClaudeStreamEventExtractorTests.swift b/Tests/ManifoldBackendsTests/ClaudeStreamEventExtractorTests.swift
index 999be6bf7..a8f90da1d 100644
--- a/Tests/ManifoldBackendsTests/ClaudeStreamEventExtractorTests.swift
+++ b/Tests/ManifoldBackendsTests/ClaudeStreamEventExtractorTests.swift
@@ -324,6 +324,8 @@ final class ClaudeStreamEventExtractorParityTests: XCTestCase {
         case .toolCallApproved: return "toolCallApproved"
         case .kvCacheReuse: return "kvCacheReuse"
         case .throttleDiagnostic: return "throttleDiagnostic"
+        case .toolCallParseFailed(let body): return "toolCallParseFailed(\(body))"
+        case .toolCallTruncated(let body): return "toolCallTruncated(\(body))"
         case .handoffRequested(let h): return "handoffRequested(\(h.targetAgentID))"
         case .generationCompleted(let c): return "generationCompleted(\(c.reason))"
         }
diff --git a/Tests/ManifoldBackendsTests/CloudThinkingTokenTests.swift b/Tests/ManifoldBackendsTests/CloudThinkingTokenTests.swift
index 9cc031a14..cb0c56762 100644
--- a/Tests/ManifoldBackendsTests/CloudThinkingTokenTests.swift
+++ b/Tests/ManifoldBackendsTests/CloudThinkingTokenTests.swift
@@ -47,6 +47,7 @@ private func categorise(_ event: GenerationEvent) -> EventCategory? {
     case .toolIterationLimitExceeded: return nil
     case .kvCacheReuse: return nil
     case .throttleDiagnostic: return nil
+    case .toolCallParseFailed, .toolCallTruncated: return nil
     case .thinkingSignature: return nil
     case .toolCallStart, .toolCallArgumentsDelta: return nil
     case .toolProgress, .toolDispatchStarted, .toolDispatchCompleted, .toolCallApproved: return nil
diff --git a/Tests/ManifoldBackendsTests/OllamaStreamEventExtractorTests.swift b/Tests/ManifoldBackendsTests/OllamaStreamEventExtractorTests.swift
index 7c845e738..5184afc00 100644
--- a/Tests/ManifoldBackendsTests/OllamaStreamEventExtractorTests.swift
+++ b/Tests/ManifoldBackendsTests/OllamaStreamEventExtractorTests.swift
@@ -258,6 +258,8 @@ final class OllamaStreamEventExtractorParityTests: XCTestCase {
         case .toolCallApproved: return "toolCallApproved"
         case .kvCacheReuse: return "kvCacheReuse"
         case .throttleDiagnostic: return "throttleDiagnostic"
+        case .toolCallParseFailed(let body): return "toolCallParseFailed(\(body))"
+        case .toolCallTruncated(let body): return "toolCallTruncated(\(body))"
         case .handoffRequested(let h): return "handoffRequested(\(h.targetAgentID))"
         case .generationCompleted(let c): return "generationCompleted(\(c.reason))"
         }
diff --git a/Tests/ManifoldBackendsTests/OllamaToolCallLiveReplayTests.swift b/Tests/ManifoldBackendsTests/OllamaToolCallLiveReplayTests.swift
index 7fcece0d4..2843b0f07 100644
--- a/Tests/ManifoldBackendsTests/OllamaToolCallLiveReplayTests.swift
+++ b/Tests/ManifoldBackendsTests/OllamaToolCallLiveReplayTests.swift
@@ -202,6 +202,10 @@ final class OllamaToolCallLiveReplayTests: XCTestCase {
                     // Cooperative thermal pause — informational only;
                     // raw backend replay neither emits nor projects it.
                     break
+                case .toolCallParseFailed, .toolCallTruncated:
+                    // Tool-call diagnostics surface in the OutputParser layer,
+                    // not raw Ollama replay; ignore for forward-compat.
+                    break
                 case .toolCallStart, .toolCallArgumentsDelta:
                     // Streaming tool-call deltas are projected only by
                     // backends that opt into `streamsToolCallArguments`;
diff --git a/Tests/ManifoldBackendsTests/OpenAIResponsesBackendTests.swift b/Tests/ManifoldBackendsTests/OpenAIResponsesBackendTests.swift
index e04944598..47352f54a 100644
--- a/Tests/ManifoldBackendsTests/OpenAIResponsesBackendTests.swift
+++ b/Tests/ManifoldBackendsTests/OpenAIResponsesBackendTests.swift
@@ -75,6 +75,7 @@ final class OpenAIResponsesBackendTests: XCTestCase {
              .throttleDiagnostic, .thinkingSignature,
              .toolCallStart, .toolCallArgumentsDelta,
              .toolDispatchStarted, .toolDispatchCompleted, .toolCallApproved,
+             .toolCallParseFailed, .toolCallTruncated,
              .prefillProgress, .toolProgress,
              .handoffRequested, .generationCompleted:
             return nil
diff --git a/Tests/ManifoldBackendsTests/OpenAIResponsesStreamEventExtractorTests.swift b/Tests/ManifoldBackendsTests/OpenAIResponsesStreamEventExtractorTests.swift
index 63bbf3ba8..651fb76f2 100644
--- a/Tests/ManifoldBackendsTests/OpenAIResponsesStreamEventExtractorTests.swift
+++ b/Tests/ManifoldBackendsTests/OpenAIResponsesStreamEventExtractorTests.swift
@@ -208,6 +208,8 @@ final class OpenAIResponsesStreamEventExtractorTests: XCTestCase {
         case .toolCallApproved: return "toolCallApproved"
         case .kvCacheReuse: return "kvCacheReuse"
         case .throttleDiagnostic: return "throttleDiagnostic"
+        case .toolCallParseFailed(let body): return "toolCallParseFailed(\(body))"
+        case .toolCallTruncated(let body): return "toolCallTruncated(\(body))"
         case .handoffRequested(let h): return "handoffRequested(\(h.targetAgentID))"
         case .generationCompleted(let c): return "generationCompleted(\(c.reason))"
         }
@@ -333,6 +335,8 @@ final class OpenAIResponsesStreamEventExtractorParityTests: XCTestCase {
         case .toolCallApproved: return "toolCallApproved"
         case .kvCacheReuse: return "kvCacheReuse"
         case .throttleDiagnostic: return "throttleDiagnostic"
+        case .toolCallParseFailed(let body): return "toolCallParseFailed(\(body))"
+        case .toolCallTruncated(let body): return "toolCallTruncated(\(body))"
         case .handoffRequested(let h): return "handoffRequested(\(h.targetAgentID))"
         case .generationCompleted(let c): return "generationCompleted(\(c.reason))"
         }
diff --git a/Tests/ManifoldBackendsTests/OpenAIStreamEventExtractorTests.swift b/Tests/ManifoldBackendsTests/OpenAIStreamEventExtractorTests.swift
index e60f8ff57..5f066bbc8 100644
--- a/Tests/ManifoldBackendsTests/OpenAIStreamEventExtractorTests.swift
+++ b/Tests/ManifoldBackendsTests/OpenAIStreamEventExtractorTests.swift
@@ -313,6 +313,8 @@ final class OpenAIStreamEventExtractorParityTests: XCTestCase {
         case .toolCallApproved: return "toolCallApproved"
         case .kvCacheReuse: return "kvCacheReuse"
         case .throttleDiagnostic: return "throttleDiagnostic"
+        case .toolCallParseFailed(let body): return "toolCallParseFailed(\(body))"
+        case .toolCallTruncated(let body): return "toolCallTruncated(\(body))"
         case .handoffRequested(let h): return "handoffRequested(\(h.targetAgentID))"
         case .generationCompleted(let c): return "generationCompleted(\(c.reason))"
         }
diff --git a/Tests/ManifoldInferenceTests/OutputParserSessionTests.swift b/Tests/ManifoldInferenceTests/OutputParserSessionTests.swift
index 3b44bd264..b7307ee40 100644
--- a/Tests/ManifoldInferenceTests/OutputParserSessionTests.swift
+++ b/Tests/ManifoldInferenceTests/OutputParserSessionTests.swift
@@ -26,6 +26,14 @@ final class OutputParserSessionTests: XCTestCase {
         events.compactMap { if case .toolCall(let c) = $0 { return c } else { return nil } }
     }
 
+    private func parseFailures(_ events: [GenerationEvent]) -> [String] {
+        events.compactMap { if case .toolCallParseFailed(let body) = $0 { return body } else { return nil } }
+    }
+
+    private func truncations(_ events: [GenerationEvent]) -> [String] {
+        events.compactMap { if case .toolCallTruncated(let body) = $0 { return body } else { return nil } }
+    }
+
     // MARK: - Test marker fixtures
 
     /// A simple JSON `<tool_call>` dialect: `{"name":...}` → ToolCall.
@@ -269,4 +277,92 @@ final class OutputParserSessionTests: XCTestCase {
         XCTAssertEqual(toolCalls(recovery).map(\.toolName), ["f"],
             "Parser must recover and parse a valid call after dropping an oversized body")
     }
+
+    // MARK: - #1857: malformed-body parse-failure diagnostic
+
+    func test_closedToolBlock_withMalformedBody_emitsParseFailedDiagnostic() {
+        // A well-formed open/close pair surrounds a body the dialect parser
+        // rejects (not valid JSON). Previously this vanished with NO event;
+        // now it surfaces a non-fatal `.toolCallParseFailed` carrying the body.
+        var transform = ToolCallTransform(markers: [jsonMarker()])
+        var events = transform.process([.token("before<tool_call>not json</tool_call>after")])
+        events += transform.finalize()
+
+        XCTAssertTrue(toolCalls(events).isEmpty,
+            "A malformed body produces no ToolCall")
+        XCTAssertEqual(parseFailures(events), ["not json"],
+            "A malformed closed tool body must surface the raw body as a parse-failure diagnostic (#1857)")
+        XCTAssertEqual(visible(events), "beforeafter",
+            "Visible text around the failed block is still emitted")
+
+        // Sabotage: deleting the `.toolCallParseFailed` emission in
+        // ToolCallTransform makes parseFailures empty and this assertion fails —
+        // confirming the diagnostic is load-bearing, not incidental.
+    }
+
+    func test_parseFailure_doesNotBreakSubsequentValidCall() {
+        var transform = ToolCallTransform(markers: [jsonMarker()])
+        var events = transform.process([.token("<tool_call>garbage</tool_call><tool_call>{\"name\":\"ok\"}</tool_call>")])
+        events += transform.finalize()
+
+        XCTAssertEqual(parseFailures(events), ["garbage"])
+        XCTAssertEqual(toolCalls(events).map(\.toolName), ["ok"],
+            "A parse failure must not poison a following well-formed call")
+    }
+
+    // MARK: - #1858: opt-in truncated-body diagnostic at finalize
+
+    func test_finalize_unterminatedToolBlock_default_dropsSilently() {
+        // Default behavior is unchanged: an unterminated block is discarded with
+        // NO new event.
+        var transform = ToolCallTransform(markers: [jsonMarker()])
+        var events = transform.process([.token("text<tool_call>{\"name\":\"f\",\"arg")])
+        events += transform.finalize()
+
+        XCTAssertTrue(toolCalls(events).isEmpty)
+        XCTAssertTrue(truncations(events).isEmpty,
+            "With the opt-in OFF, a truncated tool block must NOT emit a diagnostic (default unchanged)")
+        XCTAssertEqual(visible(events), "text")
+    }
+
+    func test_finalize_unterminatedToolBlock_optIn_surfacesPartialBody() {
+        // With the opt-in ON, the partial body is surfaced as a non-fatal
+        // truncation diagnostic so a mid-tool-call stream cut is observable.
+        var transform = ToolCallTransform(markers: [jsonMarker()], surfaceTruncatedToolBody: true)
+        var events = transform.process([.token("text<tool_call>{\"name\":\"f\",\"arg")])
+        events += transform.finalize()
+
+        XCTAssertTrue(toolCalls(events).isEmpty)
+        XCTAssertEqual(truncations(events), ["{\"name\":\"f\",\"arg"],
+            "With the opt-in ON, finalize must surface the buffered partial body (#1858)")
+        XCTAssertEqual(visible(events), "text")
+
+        // Sabotage: flipping surfaceTruncatedToolBody back to false (or dropping
+        // the finalize branch) makes truncations empty and this fails.
+    }
+
+    func test_finalize_optIn_partialCloseSuffixFoldedIntoTruncatedBody() {
+        // The body ends mid-close-tag; the held-back partial close suffix still
+        // belongs to the body and must be included in the surfaced raw body.
+        var transform = ToolCallTransform(markers: [jsonMarker()], surfaceTruncatedToolBody: true)
+        // "</tool_c" is a partial close suffix held back at the boundary.
+        var events = transform.process([.token("<tool_call>{\"name\":\"f\"}</tool_c")])
+        events += transform.finalize()
+
+        XCTAssertEqual(truncations(events), ["{\"name\":\"f\"}</tool_c"],
+            "Partial close suffix must be folded into the truncated body, not lost")
+    }
+
+    func test_finalize_optIn_noOpenBlock_emitsNoTruncation() {
+        // The opt-in must only fire for a genuinely open block — a clean stream
+        // end emits nothing extra.
+        var transform = ToolCallTransform(markers: [jsonMarker()], surfaceTruncatedToolBody: true)
+        var events = transform.process([.token("<tool_call>{\"name\":\"f\"}</tool_call>tail")])
+        events += transform.finalize()
+
+        XCTAssertEqual(toolCalls(events).map(\.toolName), ["f"])
+        XCTAssertTrue(truncations(events).isEmpty,
+            "A fully-closed block plus trailing text must not emit a truncation diagnostic")
+        XCTAssertEqual(visible(events), "tail")
+    }
 }
diff --git a/Tests/ManifoldInferenceTests/ParallelToolCallOrderingTests.swift b/Tests/ManifoldInferenceTests/ParallelToolCallOrderingTests.swift
index a54e2298b..3b220f8aa 100644
--- a/Tests/ManifoldInferenceTests/ParallelToolCallOrderingTests.swift
+++ b/Tests/ManifoldInferenceTests/ParallelToolCallOrderingTests.swift
@@ -60,6 +60,7 @@ final class ParallelToolCallOrderingTests: XCTestCase {
                  .kvCacheReuse, .throttleDiagnostic,
                  .toolCallStart, .toolCallArgumentsDelta,
                  .toolDispatchStarted, .toolDispatchCompleted, .toolCallApproved,
+                 .toolCallParseFailed, .toolCallTruncated,
                  .handoffRequested, .generationCompleted:
                 break
             }
diff --git a/Tests/ManifoldInferenceTests/ToolCallContractTests.swift b/Tests/ManifoldInferenceTests/ToolCallContractTests.swift
index ee0295fa5..e923624d4 100644
--- a/Tests/ManifoldInferenceTests/ToolCallContractTests.swift
+++ b/Tests/ManifoldInferenceTests/ToolCallContractTests.swift
@@ -214,6 +214,7 @@ final class ToolCallContractTests: XCTestCase {
             case .throttleDiagnostic: break
             case .toolCallStart, .toolCallArgumentsDelta: break
             case .toolDispatchStarted, .toolDispatchCompleted, .toolCallApproved: break
+            case .toolCallParseFailed, .toolCallTruncated: break
             case .handoffRequested: break
             case .generationCompleted: break
             }
@@ -289,6 +290,7 @@ final class ToolCallContractTests: XCTestCase {
             case .throttleDiagnostic: break
             case .toolCallStart, .toolCallArgumentsDelta: break
             case .toolDispatchStarted, .toolDispatchCompleted, .toolCallApproved: break
+            case .toolCallParseFailed, .toolCallTruncated: break
             case .handoffRequested: break
             case .generationCompleted: break
             }