From 77350a3cde275eb273531eb887f6bc86f67340ee Mon Sep 17 00:00:00 2001
From: Batur <52473505+baturyilmaz@users.noreply.github.com>
Date: Tue, 25 Nov 2025 23:33:49 +0300
Subject: [PATCH] fix(xai): support Responses API streaming events and
 custom_tool_call type (#10523)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add missing schema validation for xAI Responses API server-side tools:
- Add custom_tool_call type to outputItemSchema
- Make toolCallSchema fields optional for in_progress states
- Add input field for custom_tool_call (vs arguments)
- Add action field for in_progress tool execution states
- Add 12 streaming event types for tool lifecycle:
  - web_search_call: in_progress, searching, completed
  - x_search_call: in_progress, searching, completed
  - code_execution_call: in_progress, executing, completed
  - code_interpreter_call: in_progress, executing, completed

Fixes validation errors ('Invalid JSON response', 'No matching
discriminator') when using xai.responses() with xai.tools.webSearch(),
xai.tools.xSearch(), or xai.tools.codeExecution().

## Background

The xAI Responses API with server-side tools (`web_search`, `x_search`,
`code_execution`) was failing with validation errors when using the
Vercel AI SDK:

```
AI_TypeValidationError: Invalid JSON response
Error: No matching discriminator for output[].type
```

**Root cause**: The xAI API returns response formats that were not
included in the SDK's Zod validation schemas:

1. **`custom_tool_call` type** - Server-side tool calls use this type
instead of the standard tool call types
2. **Streaming progress events** - Events like
`response.web_search_call.in_progress`,
`response.web_search_call.searching`,
`response.web_search_call.completed` were not recognized
3. **Optional fields during execution** - During `in_progress` state,
fields like `name`, `arguments`, `call_id` are undefined
4. **Different field names** - `custom_tool_call` uses `input` field
instead of `arguments`

## Summary

Updated `packages/xai/src/responses/xai-responses-api.ts` to support the
complete xAI Responses API format:

### 1. Added `custom_tool_call` Type Support

**Type definition** (`XaiResponsesToolCall`):
```typescript
export type XaiResponsesToolCall = {
  type:
    | 'function_call'
    | 'web_search_call'
    | 'x_search_call'
    | 'code_interpreter_call'
    | 'custom_tool_call';  // ✅ Added
  id: string;
  call_id?: string;        // ✅ Made optional
  name?: string;           // ✅ Made optional
  arguments?: string;      // ✅ Made optional
  input?: string;          // ✅ Added for custom_tool_call
  status: string;
  action?: any;            // ✅ Added for in_progress state
};
```

**Schema** (`outputItemSchema`):
```typescript
z.object({
  type: z.literal('custom_tool_call'),
  ...toolCallSchema.shape,
}),
```

### 2. Made Tool Call Fields Optional

Updated `toolCallSchema` to handle in-progress states where fields are
undefined:
```typescript
const toolCallSchema = z.object({
  name: z.string().optional(),      // Was required
  arguments: z.string().optional(), // Was required
  input: z.string().optional(),     // ✅ New (for custom_tool_call)
  call_id: z.string().optional(),   // Was required
  id: z.string(),
  status: z.string(),
  action: z.any().optional(),       // ✅ New (for in_progress state)
});
```

### 3. Added 12 Streaming Event Types

Added to `xaiResponsesChunkSchema` for complete tool execution
lifecycle:

**Web Search:**
- `response.web_search_call.in_progress`
- `response.web_search_call.searching`
- `response.web_search_call.completed`

**X Search:**
- `response.x_search_call.in_progress`
- `response.x_search_call.searching`
- `response.x_search_call.completed`

**Code Execution:**
- `response.code_execution_call.in_progress`
- `response.code_execution_call.executing`
- `response.code_execution_call.completed`

**Code Interpreter:**
- `response.code_interpreter_call.in_progress`
- `response.code_interpreter_call.executing`
- `response.code_interpreter_call.completed`

## Manual Verification

Tested all server-side tools with both `generateText()` and
`streamText()` to ensure end-to-end functionality:

### ✅ Web Search Tool

```typescript
import { xai } from '@ai-sdk/xai';
import { generateText } from 'ai';

const { text, sources } = await generateText({
  model: xai.responses('grok-4-fast'),
  prompt: 'What are the latest developments in AI?',
  tools: {
    web_search: xai.tools.webSearch(),
  },
});

console.log(text); // Comprehensive response
console.log(sources); // Array of URL citations
```

**Result**: ✅ Returned comprehensive response with 14 URL citations, no
validation errors

### ✅ X Search Tool

```typescript
import { xai } from '@ai-sdk/xai';
import { generateText } from 'ai';

const { text, sources } = await generateText({
  model: xai.responses('grok-4-fast'),
  prompt: 'What are people saying about AI on X this week?',
  tools: {
    x_search: xai.tools.xSearch({
      allowedXHandles: ['elonmusk', 'xai'],
      fromDate: '2025-11-18',
      toDate: '2025-11-24',
      enableImageUnderstanding: true,
      enableVideoUnderstanding: true,
    }),
  },
});

console.log(text); // Analysis of X discussions
console.log(sources); // Array of X post citations
```

**Result**: ✅ Returned analysis with 16 X post citations, all streaming
events properly handled

### ✅ Code Execution Tool

```typescript
import { xai } from '@ai-sdk/xai';
import { generateText } from 'ai';

const { text } = await generateText({
  model: xai.responses('grok-4-fast'),
  prompt: 'Calculate the factorial of 20 using Python',
  tools: {
    code_execution: xai.tools.codeExecution(),
  },
});

console.log(text); // Result with code execution details
```

**Result**: ✅ Computed result with execution details, no validation
errors

### ✅ Multiple Tools with Streaming

```typescript
import { xai } from '@ai-sdk/xai';
import { streamText } from 'ai';

const { fullStream, usage: usagePromise } = streamText({
  model: xai.responses('grok-4-fast'),
  system: 'You are an AI research assistant.',
  tools: {
    web_search: xai.tools.webSearch(),
    x_search: xai.tools.xSearch(),
    code_execution: xai.tools.codeExecution(),
  },
  prompt: 'Research prompt caching in LLMs and explain how it reduces costs',
});

const sources = new Set<string>();
let lastToolName = '';

for await (const event of fullStream) {
  switch (event.type) {
    case 'tool-call':
      lastToolName = event.toolName;
      if (event.providerExecuted) {
        console.log(`[Calling ${event.toolName} on server...]`);
      }
      break;

    case 'tool-result':
      console.log(`[${lastToolName} completed]`);
      break;

    case 'text-delta':
      process.stdout.write(event.text);
      break;

    case 'source':
      if (event.sourceType === 'url') {
        sources.add(event.url);
      }
      break;
  }
}

const usage = await usagePromise;
console.log(`\nSources used: ${sources.size}`);
console.log(`Token usage: ${usage.inputTokens} input, ${usage.outputTokens} output`);
```

**Result**: ✅ Full streaming response with web searches, real-time
progress updates, and source citations. All streaming events
(`tool-call`, `tool-result`, `text-delta`, `source`) work correctly.

**Summary of manual testing:**
- ✅ All three tool types (web_search, x_search, code_execution) work
without validation errors
- ✅ Both `generateText()` and `streamText()` work correctly
- ✅ Source citations are properly parsed and returned
- ✅ Streaming progress events are handled correctly
- ✅ No "Invalid JSON response" or "No matching discriminator" errors

## Related issues

closes https://github.com/vercel/ai/issues/10607
---
 .changeset/smooth-ravens-suffer.md            |  5 ++
 .../xai/src/responses/xai-responses-api.ts    | 83 +++++++++++++++++--
 .../responses/xai-responses-language-model.ts | 36 +++++---
 3 files changed, 106 insertions(+), 18 deletions(-)
 create mode 100644 .changeset/smooth-ravens-suffer.md

diff --git a/.changeset/smooth-ravens-suffer.md b/.changeset/smooth-ravens-suffer.md
new file mode 100644
index 000000000000..b6cc215c0924
--- /dev/null
+++ b/.changeset/smooth-ravens-suffer.md
@@ -0,0 +1,5 @@
+---
+'@ai-sdk/xai': patch
+---
+
+Fix Responses API validation errors for server-side tools (web_search, x_search, code_execution). Add missing custom_tool_call type and streaming event schemas.
diff --git a/packages/xai/src/responses/xai-responses-api.ts b/packages/xai/src/responses/xai-responses-api.ts
index a3d5c03fb3c2..e87cc455d028 100644
--- a/packages/xai/src/responses/xai-responses-api.ts
+++ b/packages/xai/src/responses/xai-responses-api.ts
@@ -48,12 +48,15 @@ export type XaiResponsesToolCall = {
     | 'function_call'
     | 'web_search_call'
     | 'x_search_call'
-    | 'code_interpreter_call';
+    | 'code_interpreter_call'
+    | 'custom_tool_call';
   id: string;
-  call_id: string;
-  name: string;
-  arguments: string;
+  call_id?: string;
+  name?: string;
+  arguments?: string;
+  input?: string;
   status: string;
+  action?: any;
 };
 
 export type XaiResponsesTool =
@@ -110,11 +113,13 @@ const reasoningSummaryPartSchema = z.object({
 });
 
 const toolCallSchema = z.object({
-  name: z.string(),
-  arguments: z.string(),
-  call_id: z.string(),
+  name: z.string().optional(),
+  arguments: z.string().optional(),
+  input: z.string().optional(),
+  call_id: z.string().optional(),
   id: z.string(),
   status: z.string(),
+  action: z.any().optional(),
 });
 
 const outputItemSchema = z.discriminatedUnion('type', [
@@ -142,6 +147,10 @@ const outputItemSchema = z.discriminatedUnion('type', [
     type: z.literal('view_x_video_call'),
     ...toolCallSchema.shape,
   }),
+  z.object({
+    type: z.literal('custom_tool_call'),
+    ...toolCallSchema.shape,
+  }),
   z.object({
     type: z.literal('message'),
     role: z.string(),
@@ -278,6 +287,66 @@ export const xaiResponsesChunkSchema = z.union([
     summary_index: z.number(),
     text: z.string(),
   }),
+  z.object({
+    type: z.literal('response.web_search_call.in_progress'),
+    item_id: z.string(),
+    output_index: z.number(),
+  }),
+  z.object({
+    type: z.literal('response.web_search_call.searching'),
+    item_id: z.string(),
+    output_index: z.number(),
+  }),
+  z.object({
+    type: z.literal('response.web_search_call.completed'),
+    item_id: z.string(),
+    output_index: z.number(),
+  }),
+  z.object({
+    type: z.literal('response.x_search_call.in_progress'),
+    item_id: z.string(),
+    output_index: z.number(),
+  }),
+  z.object({
+    type: z.literal('response.x_search_call.searching'),
+    item_id: z.string(),
+    output_index: z.number(),
+  }),
+  z.object({
+    type: z.literal('response.x_search_call.completed'),
+    item_id: z.string(),
+    output_index: z.number(),
+  }),
+  z.object({
+    type: z.literal('response.code_execution_call.in_progress'),
+    item_id: z.string(),
+    output_index: z.number(),
+  }),
+  z.object({
+    type: z.literal('response.code_execution_call.executing'),
+    item_id: z.string(),
+    output_index: z.number(),
+  }),
+  z.object({
+    type: z.literal('response.code_execution_call.completed'),
+    item_id: z.string(),
+    output_index: z.number(),
+  }),
+  z.object({
+    type: z.literal('response.code_interpreter_call.in_progress'),
+    item_id: z.string(),
+    output_index: z.number(),
+  }),
+  z.object({
+    type: z.literal('response.code_interpreter_call.executing'),
+    item_id: z.string(),
+    output_index: z.number(),
+  }),
+  z.object({
+    type: z.literal('response.code_interpreter_call.completed'),
+    item_id: z.string(),
+    output_index: z.number(),
+  }),
   z.object({
     type: z.literal('response.done'),
     response: xaiResponsesResponseSchema,
diff --git a/packages/xai/src/responses/xai-responses-language-model.ts b/packages/xai/src/responses/xai-responses-language-model.ts
index e496647a34b3..7caec7d50dd5 100644
--- a/packages/xai/src/responses/xai-responses-language-model.ts
+++ b/packages/xai/src/responses/xai-responses-language-model.ts
@@ -198,22 +198,29 @@ export class XaiResponsesLanguageModel implements LanguageModelV2 {
         part.type === 'code_interpreter_call' ||
         part.type === 'code_execution_call' ||
         part.type === 'view_image_call' ||
-        part.type === 'view_x_video_call'
+        part.type === 'view_x_video_call' ||
+        part.type === 'custom_tool_call'
       ) {
-        let toolName = part.name;
-        if (webSearchSubTools.includes(part.name)) {
+        let toolName = part.name ?? '';
+        if (webSearchSubTools.includes(part.name ?? '')) {
           toolName = webSearchToolName ?? 'web_search';
-        } else if (xSearchSubTools.includes(part.name)) {
+        } else if (xSearchSubTools.includes(part.name ?? '')) {
           toolName = xSearchToolName ?? 'x_search';
         } else if (part.name === 'code_execution') {
           toolName = codeExecutionToolName ?? 'code_execution';
         }
 
+        // custom_tool_call uses 'input' field, others use 'arguments'
+        const toolInput =
+          part.type === 'custom_tool_call'
+            ? (part.input ?? '')
+            : (part.arguments ?? '');
+
         content.push({
           type: 'tool-call',
           toolCallId: part.id,
           toolName,
-          input: part.arguments,
+          input: toolInput,
           providerExecuted: true,
         });
 
@@ -476,7 +483,8 @@ export class XaiResponsesLanguageModel implements LanguageModelV2 {
                 part.type === 'code_interpreter_call' ||
                 part.type === 'code_execution_call' ||
                 part.type === 'view_image_call' ||
-                part.type === 'view_x_video_call'
+                part.type === 'view_x_video_call' ||
+                part.type === 'custom_tool_call'
               ) {
                 if (!seenToolCalls.has(part.id)) {
                   seenToolCalls.add(part.id);
@@ -493,15 +501,21 @@ export class XaiResponsesLanguageModel implements LanguageModelV2 {
                     'x_thread_fetch',
                   ];
 
-                  let toolName = part.name;
-                  if (webSearchSubTools.includes(part.name)) {
+                  let toolName = part.name ?? '';
+                  if (webSearchSubTools.includes(part.name ?? '')) {
                     toolName = webSearchToolName ?? 'web_search';
-                  } else if (xSearchSubTools.includes(part.name)) {
+                  } else if (xSearchSubTools.includes(part.name ?? '')) {
                     toolName = xSearchToolName ?? 'x_search';
                   } else if (part.name === 'code_execution') {
                     toolName = codeExecutionToolName ?? 'code_execution';
                   }
 
+                  // custom_tool_call uses 'input' field, others use 'arguments'
+                  const toolInput =
+                    part.type === 'custom_tool_call'
+                      ? (part.input ?? '')
+                      : (part.arguments ?? '');
+
                   controller.enqueue({
                     type: 'tool-input-start',
                     id: part.id,
@@ -511,7 +525,7 @@ export class XaiResponsesLanguageModel implements LanguageModelV2 {
                   controller.enqueue({
                     type: 'tool-input-delta',
                     id: part.id,
-                    delta: part.arguments,
+                    delta: toolInput,
                   });
 
                   controller.enqueue({
@@ -523,7 +537,7 @@ export class XaiResponsesLanguageModel implements LanguageModelV2 {
                     type: 'tool-call',
                     toolCallId: part.id,
                     toolName,
-                    input: part.arguments,
+                    input: toolInput,
                     providerExecuted: true,
                   });
                 }