From 77350a3cde275eb273531eb887f6bc86f67340ee Mon Sep 17 00:00:00 2001 From: Batur <52473505+baturyilmaz@users.noreply.github.com> Date: Tue, 25 Nov 2025 23:33:49 +0300 Subject: [PATCH] fix(xai): support Responses API streaming events and custom_tool_call type (#10523) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add missing schema validation for xAI Responses API server-side tools: - Add custom_tool_call type to outputItemSchema - Make toolCallSchema fields optional for in_progress states - Add input field for custom_tool_call (vs arguments) - Add action field for in_progress tool execution states - Add 12 streaming event types for tool lifecycle: - web_search_call: in_progress, searching, completed - x_search_call: in_progress, searching, completed - code_execution_call: in_progress, executing, completed - code_interpreter_call: in_progress, executing, completed Fixes validation errors ('Invalid JSON response', 'No matching discriminator') when using xai.responses() with xai.tools.webSearch(), xai.tools.xSearch(), or xai.tools.codeExecution(). ## Background The xAI Responses API with server-side tools (`web_search`, `x_search`, `code_execution`) was failing with validation errors when using the Vercel AI SDK: ``` AI_TypeValidationError: Invalid JSON response Error: No matching discriminator for output[].type ``` **Root cause**: The xAI API returns response formats that were not included in the SDK's Zod validation schemas: 1. **`custom_tool_call` type** - Server-side tool calls use this type instead of the standard tool call types 2. **Streaming progress events** - Events like `response.web_search_call.in_progress`, `response.web_search_call.searching`, `response.web_search_call.completed` were not recognized 3. **Optional fields during execution** - During `in_progress` state, fields like `name`, `arguments`, `call_id` are undefined 4. **Different field names** - `custom_tool_call` uses `input` field instead of `arguments` ## Summary Updated `packages/xai/src/responses/xai-responses-api.ts` to support the complete xAI Responses API format: ### 1. Added `custom_tool_call` Type Support **Type definition** (`XaiResponsesToolCall`): ```typescript export type XaiResponsesToolCall = { type: | 'function_call' | 'web_search_call' | 'x_search_call' | 'code_interpreter_call' | 'custom_tool_call'; // ✅ Added id: string; call_id?: string; // ✅ Made optional name?: string; // ✅ Made optional arguments?: string; // ✅ Made optional input?: string; // ✅ Added for custom_tool_call status: string; action?: any; // ✅ Added for in_progress state }; ``` **Schema** (`outputItemSchema`): ```typescript z.object({ type: z.literal('custom_tool_call'), ...toolCallSchema.shape, }), ``` ### 2. Made Tool Call Fields Optional Updated `toolCallSchema` to handle in-progress states where fields are undefined: ```typescript const toolCallSchema = z.object({ name: z.string().optional(), // Was required arguments: z.string().optional(), // Was required input: z.string().optional(), // ✅ New (for custom_tool_call) call_id: z.string().optional(), // Was required id: z.string(), status: z.string(), action: z.any().optional(), // ✅ New (for in_progress state) }); ``` ### 3. Added 12 Streaming Event Types Added to `xaiResponsesChunkSchema` for complete tool execution lifecycle: **Web Search:** - `response.web_search_call.in_progress` - `response.web_search_call.searching` - `response.web_search_call.completed` **X Search:** - `response.x_search_call.in_progress` - `response.x_search_call.searching` - `response.x_search_call.completed` **Code Execution:** - `response.code_execution_call.in_progress` - `response.code_execution_call.executing` - `response.code_execution_call.completed` **Code Interpreter:** - `response.code_interpreter_call.in_progress` - `response.code_interpreter_call.executing` - `response.code_interpreter_call.completed` ## Manual Verification Tested all server-side tools with both `generateText()` and `streamText()` to ensure end-to-end functionality: ### ✅ Web Search Tool ```typescript import { xai } from '@ai-sdk/xai'; import { generateText } from 'ai'; const { text, sources } = await generateText({ model: xai.responses('grok-4-fast'), prompt: 'What are the latest developments in AI?', tools: { web_search: xai.tools.webSearch(), }, }); console.log(text); // Comprehensive response console.log(sources); // Array of URL citations ``` **Result**: ✅ Returned comprehensive response with 14 URL citations, no validation errors ### ✅ X Search Tool ```typescript import { xai } from '@ai-sdk/xai'; import { generateText } from 'ai'; const { text, sources } = await generateText({ model: xai.responses('grok-4-fast'), prompt: 'What are people saying about AI on X this week?', tools: { x_search: xai.tools.xSearch({ allowedXHandles: ['elonmusk', 'xai'], fromDate: '2025-11-18', toDate: '2025-11-24', enableImageUnderstanding: true, enableVideoUnderstanding: true, }), }, }); console.log(text); // Analysis of X discussions console.log(sources); // Array of X post citations ``` **Result**: ✅ Returned analysis with 16 X post citations, all streaming events properly handled ### ✅ Code Execution Tool ```typescript import { xai } from '@ai-sdk/xai'; import { generateText } from 'ai'; const { text } = await generateText({ model: xai.responses('grok-4-fast'), prompt: 'Calculate the factorial of 20 using Python', tools: { code_execution: xai.tools.codeExecution(), }, }); console.log(text); // Result with code execution details ``` **Result**: ✅ Computed result with execution details, no validation errors ### ✅ Multiple Tools with Streaming ```typescript import { xai } from '@ai-sdk/xai'; import { streamText } from 'ai'; const { fullStream, usage: usagePromise } = streamText({ model: xai.responses('grok-4-fast'), system: 'You are an AI research assistant.', tools: { web_search: xai.tools.webSearch(), x_search: xai.tools.xSearch(), code_execution: xai.tools.codeExecution(), }, prompt: 'Research prompt caching in LLMs and explain how it reduces costs', }); const sources = new Set(); let lastToolName = ''; for await (const event of fullStream) { switch (event.type) { case 'tool-call': lastToolName = event.toolName; if (event.providerExecuted) { console.log(`[Calling ${event.toolName} on server...]`); } break; case 'tool-result': console.log(`[${lastToolName} completed]`); break; case 'text-delta': process.stdout.write(event.text); break; case 'source': if (event.sourceType === 'url') { sources.add(event.url); } break; } } const usage = await usagePromise; console.log(`\nSources used: ${sources.size}`); console.log(`Token usage: ${usage.inputTokens} input, ${usage.outputTokens} output`); ``` **Result**: ✅ Full streaming response with web searches, real-time progress updates, and source citations. All streaming events (`tool-call`, `tool-result`, `text-delta`, `source`) work correctly. **Summary of manual testing:** - ✅ All three tool types (web_search, x_search, code_execution) work without validation errors - ✅ Both `generateText()` and `streamText()` work correctly - ✅ Source citations are properly parsed and returned - ✅ Streaming progress events are handled correctly - ✅ No "Invalid JSON response" or "No matching discriminator" errors ## Related issues closes https://github.com/vercel/ai/issues/10607 --- .changeset/smooth-ravens-suffer.md | 5 ++ .../xai/src/responses/xai-responses-api.ts | 83 +++++++++++++++++-- .../responses/xai-responses-language-model.ts | 36 +++++--- 3 files changed, 106 insertions(+), 18 deletions(-) create mode 100644 .changeset/smooth-ravens-suffer.md diff --git a/.changeset/smooth-ravens-suffer.md b/.changeset/smooth-ravens-suffer.md new file mode 100644 index 000000000000..b6cc215c0924 --- /dev/null +++ b/.changeset/smooth-ravens-suffer.md @@ -0,0 +1,5 @@ +--- +'@ai-sdk/xai': patch +--- + +Fix Responses API validation errors for server-side tools (web_search, x_search, code_execution). Add missing custom_tool_call type and streaming event schemas. diff --git a/packages/xai/src/responses/xai-responses-api.ts b/packages/xai/src/responses/xai-responses-api.ts index a3d5c03fb3c2..e87cc455d028 100644 --- a/packages/xai/src/responses/xai-responses-api.ts +++ b/packages/xai/src/responses/xai-responses-api.ts @@ -48,12 +48,15 @@ export type XaiResponsesToolCall = { | 'function_call' | 'web_search_call' | 'x_search_call' - | 'code_interpreter_call'; + | 'code_interpreter_call' + | 'custom_tool_call'; id: string; - call_id: string; - name: string; - arguments: string; + call_id?: string; + name?: string; + arguments?: string; + input?: string; status: string; + action?: any; }; export type XaiResponsesTool = @@ -110,11 +113,13 @@ const reasoningSummaryPartSchema = z.object({ }); const toolCallSchema = z.object({ - name: z.string(), - arguments: z.string(), - call_id: z.string(), + name: z.string().optional(), + arguments: z.string().optional(), + input: z.string().optional(), + call_id: z.string().optional(), id: z.string(), status: z.string(), + action: z.any().optional(), }); const outputItemSchema = z.discriminatedUnion('type', [ @@ -142,6 +147,10 @@ const outputItemSchema = z.discriminatedUnion('type', [ type: z.literal('view_x_video_call'), ...toolCallSchema.shape, }), + z.object({ + type: z.literal('custom_tool_call'), + ...toolCallSchema.shape, + }), z.object({ type: z.literal('message'), role: z.string(), @@ -278,6 +287,66 @@ export const xaiResponsesChunkSchema = z.union([ summary_index: z.number(), text: z.string(), }), + z.object({ + type: z.literal('response.web_search_call.in_progress'), + item_id: z.string(), + output_index: z.number(), + }), + z.object({ + type: z.literal('response.web_search_call.searching'), + item_id: z.string(), + output_index: z.number(), + }), + z.object({ + type: z.literal('response.web_search_call.completed'), + item_id: z.string(), + output_index: z.number(), + }), + z.object({ + type: z.literal('response.x_search_call.in_progress'), + item_id: z.string(), + output_index: z.number(), + }), + z.object({ + type: z.literal('response.x_search_call.searching'), + item_id: z.string(), + output_index: z.number(), + }), + z.object({ + type: z.literal('response.x_search_call.completed'), + item_id: z.string(), + output_index: z.number(), + }), + z.object({ + type: z.literal('response.code_execution_call.in_progress'), + item_id: z.string(), + output_index: z.number(), + }), + z.object({ + type: z.literal('response.code_execution_call.executing'), + item_id: z.string(), + output_index: z.number(), + }), + z.object({ + type: z.literal('response.code_execution_call.completed'), + item_id: z.string(), + output_index: z.number(), + }), + z.object({ + type: z.literal('response.code_interpreter_call.in_progress'), + item_id: z.string(), + output_index: z.number(), + }), + z.object({ + type: z.literal('response.code_interpreter_call.executing'), + item_id: z.string(), + output_index: z.number(), + }), + z.object({ + type: z.literal('response.code_interpreter_call.completed'), + item_id: z.string(), + output_index: z.number(), + }), z.object({ type: z.literal('response.done'), response: xaiResponsesResponseSchema, diff --git a/packages/xai/src/responses/xai-responses-language-model.ts b/packages/xai/src/responses/xai-responses-language-model.ts index e496647a34b3..7caec7d50dd5 100644 --- a/packages/xai/src/responses/xai-responses-language-model.ts +++ b/packages/xai/src/responses/xai-responses-language-model.ts @@ -198,22 +198,29 @@ export class XaiResponsesLanguageModel implements LanguageModelV2 { part.type === 'code_interpreter_call' || part.type === 'code_execution_call' || part.type === 'view_image_call' || - part.type === 'view_x_video_call' + part.type === 'view_x_video_call' || + part.type === 'custom_tool_call' ) { - let toolName = part.name; - if (webSearchSubTools.includes(part.name)) { + let toolName = part.name ?? ''; + if (webSearchSubTools.includes(part.name ?? '')) { toolName = webSearchToolName ?? 'web_search'; - } else if (xSearchSubTools.includes(part.name)) { + } else if (xSearchSubTools.includes(part.name ?? '')) { toolName = xSearchToolName ?? 'x_search'; } else if (part.name === 'code_execution') { toolName = codeExecutionToolName ?? 'code_execution'; } + // custom_tool_call uses 'input' field, others use 'arguments' + const toolInput = + part.type === 'custom_tool_call' + ? (part.input ?? '') + : (part.arguments ?? ''); + content.push({ type: 'tool-call', toolCallId: part.id, toolName, - input: part.arguments, + input: toolInput, providerExecuted: true, }); @@ -476,7 +483,8 @@ export class XaiResponsesLanguageModel implements LanguageModelV2 { part.type === 'code_interpreter_call' || part.type === 'code_execution_call' || part.type === 'view_image_call' || - part.type === 'view_x_video_call' + part.type === 'view_x_video_call' || + part.type === 'custom_tool_call' ) { if (!seenToolCalls.has(part.id)) { seenToolCalls.add(part.id); @@ -493,15 +501,21 @@ export class XaiResponsesLanguageModel implements LanguageModelV2 { 'x_thread_fetch', ]; - let toolName = part.name; - if (webSearchSubTools.includes(part.name)) { + let toolName = part.name ?? ''; + if (webSearchSubTools.includes(part.name ?? '')) { toolName = webSearchToolName ?? 'web_search'; - } else if (xSearchSubTools.includes(part.name)) { + } else if (xSearchSubTools.includes(part.name ?? '')) { toolName = xSearchToolName ?? 'x_search'; } else if (part.name === 'code_execution') { toolName = codeExecutionToolName ?? 'code_execution'; } + // custom_tool_call uses 'input' field, others use 'arguments' + const toolInput = + part.type === 'custom_tool_call' + ? (part.input ?? '') + : (part.arguments ?? ''); + controller.enqueue({ type: 'tool-input-start', id: part.id, @@ -511,7 +525,7 @@ export class XaiResponsesLanguageModel implements LanguageModelV2 { controller.enqueue({ type: 'tool-input-delta', id: part.id, - delta: part.arguments, + delta: toolInput, }); controller.enqueue({ @@ -523,7 +537,7 @@ export class XaiResponsesLanguageModel implements LanguageModelV2 { type: 'tool-call', toolCallId: part.id, toolName, - input: part.arguments, + input: toolInput, providerExecuted: true, }); }