-
-
Notifications
You must be signed in to change notification settings - Fork 824
feat(core): add prompt context telemetry breakdown #1169
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| --- | ||
| "@voltagent/core": patch | ||
| --- | ||
|
|
||
| feat: add estimated prompt context telemetry for observability | ||
|
|
||
| - record estimated prompt-context breakdown for system instructions, conversation messages, and tool schemas on LLM spans | ||
| - expose cached and reasoning token usage on LLM spans for observability consumers | ||
| - add tests for prompt-context estimation helpers |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,71 @@ | ||
| import { describe, expect, it } from "vitest"; | ||
| import { z } from "zod"; | ||
| import { | ||
| estimatePromptContextUsage, | ||
| promptContextUsageEstimateToAttributes, | ||
| } from "./prompt-context-usage"; | ||
|
|
||
| describe("prompt context usage estimation", () => { | ||
| it("estimates system, message, and tool context separately", () => { | ||
| const estimate = estimatePromptContextUsage({ | ||
| messages: [ | ||
| { | ||
| role: "system", | ||
| content: "You are a careful assistant.", | ||
| }, | ||
| { | ||
| role: "user", | ||
| content: "Summarize the latest release notes.", | ||
| }, | ||
| { | ||
| role: "assistant", | ||
| content: [{ type: "text", text: "Let me inspect them." }], | ||
| }, | ||
| ], | ||
| tools: { | ||
| searchDocs: { | ||
| description: "Search the documentation", | ||
| inputSchema: z.object({ | ||
| query: z.string(), | ||
| topK: z.number().int().optional(), | ||
| }), | ||
| }, | ||
| }, | ||
| }); | ||
|
|
||
| expect(estimate).toBeDefined(); | ||
| expect(estimate?.systemMessageCount).toBe(1); | ||
| expect(estimate?.toolCount).toBe(1); | ||
| expect(estimate?.systemTokensEstimated).toBeGreaterThan(0); | ||
| expect(estimate?.nonSystemMessageTokensEstimated).toBeGreaterThan(0); | ||
| expect(estimate?.toolTokensEstimated).toBeGreaterThan(0); | ||
| expect(estimate?.messageTokensEstimated).toBe( | ||
| (estimate?.systemTokensEstimated ?? 0) + (estimate?.nonSystemMessageTokensEstimated ?? 0), | ||
| ); | ||
| expect(estimate?.totalTokensEstimated).toBe( | ||
| (estimate?.messageTokensEstimated ?? 0) + (estimate?.toolTokensEstimated ?? 0), | ||
| ); | ||
| }); | ||
|
|
||
| it("returns prompt context usage span attributes", () => { | ||
| const attributes = promptContextUsageEstimateToAttributes({ | ||
| systemTokensEstimated: 12, | ||
| messageTokensEstimated: 34, | ||
| nonSystemMessageTokensEstimated: 22, | ||
| toolTokensEstimated: 18, | ||
| totalTokensEstimated: 52, | ||
| systemMessageCount: 1, | ||
| toolCount: 2, | ||
| }); | ||
|
|
||
| expect(attributes).toEqual({ | ||
| "usage.prompt_context.system_tokens_estimated": 12, | ||
| "usage.prompt_context.message_tokens_estimated": 34, | ||
| "usage.prompt_context.non_system_message_tokens_estimated": 22, | ||
| "usage.prompt_context.tool_tokens_estimated": 18, | ||
| "usage.prompt_context.total_tokens_estimated": 52, | ||
| "usage.prompt_context.system_message_count": 1, | ||
| "usage.prompt_context.tool_count": 2, | ||
| }); | ||
| }); | ||
| }); |
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,223 @@ | ||||||||||||||
| import { safeStringify } from "@voltagent/internal/utils"; | ||||||||||||||
| import type { ToolSet } from "ai"; | ||||||||||||||
| import { zodSchemaToJsonUI } from "../utils/toolParser"; | ||||||||||||||
|
|
||||||||||||||
| const ESTIMATED_CHARS_PER_TOKEN = 4; | ||||||||||||||
| const BINARY_PART_TYPES = new Set([ | ||||||||||||||
| "audio", | ||||||||||||||
| "file", | ||||||||||||||
| "image", | ||||||||||||||
| "input_audio", | ||||||||||||||
| "input_image", | ||||||||||||||
| "media", | ||||||||||||||
| ]); | ||||||||||||||
| const LARGE_BINARY_KEYS = new Set(["audio", "base64", "bytes", "data", "image"]); | ||||||||||||||
|
|
||||||||||||||
| type PromptMessage = { | ||||||||||||||
| role?: string; | ||||||||||||||
| content?: unknown; | ||||||||||||||
| }; | ||||||||||||||
|
|
||||||||||||||
| export interface PromptContextUsageEstimate { | ||||||||||||||
| systemTokensEstimated: number; | ||||||||||||||
| messageTokensEstimated: number; | ||||||||||||||
| nonSystemMessageTokensEstimated: number; | ||||||||||||||
| toolTokensEstimated: number; | ||||||||||||||
| totalTokensEstimated: number; | ||||||||||||||
| systemMessageCount: number; | ||||||||||||||
| toolCount: number; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| export function estimatePromptContextUsage(params: { | ||||||||||||||
| messages?: PromptMessage[]; | ||||||||||||||
| tools?: ToolSet; | ||||||||||||||
| }): PromptContextUsageEstimate | undefined { | ||||||||||||||
| let systemTokensEstimated = 0; | ||||||||||||||
| let messageTokensEstimated = 0; | ||||||||||||||
| let nonSystemMessageTokensEstimated = 0; | ||||||||||||||
| let systemMessageCount = 0; | ||||||||||||||
|
|
||||||||||||||
| for (const message of params.messages ?? []) { | ||||||||||||||
| const serializedMessage = serializePromptMessage(message); | ||||||||||||||
| if (!serializedMessage) { | ||||||||||||||
| continue; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| const estimatedTokens = estimateTokensFromText(serializedMessage); | ||||||||||||||
| messageTokensEstimated += estimatedTokens; | ||||||||||||||
|
|
||||||||||||||
| if (message.role === "system") { | ||||||||||||||
| systemTokensEstimated += estimatedTokens; | ||||||||||||||
| systemMessageCount += 1; | ||||||||||||||
| continue; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| nonSystemMessageTokensEstimated += estimatedTokens; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| const serializedTools = Object.entries(params.tools ?? {}).map(([name, tool]) => | ||||||||||||||
| serializeToolDefinition(name, tool), | ||||||||||||||
| ); | ||||||||||||||
| const toolTokensEstimated = | ||||||||||||||
| serializedTools.length > 0 ? estimateTokensFromText(safeStringify(serializedTools)) : 0; | ||||||||||||||
| const totalTokensEstimated = messageTokensEstimated + toolTokensEstimated; | ||||||||||||||
|
|
||||||||||||||
| if (totalTokensEstimated === 0) { | ||||||||||||||
| return undefined; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| return { | ||||||||||||||
| systemTokensEstimated, | ||||||||||||||
| messageTokensEstimated, | ||||||||||||||
| nonSystemMessageTokensEstimated, | ||||||||||||||
| toolTokensEstimated, | ||||||||||||||
| totalTokensEstimated, | ||||||||||||||
| systemMessageCount, | ||||||||||||||
| toolCount: serializedTools.length, | ||||||||||||||
| }; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| export function promptContextUsageEstimateToAttributes( | ||||||||||||||
| estimate: PromptContextUsageEstimate, | ||||||||||||||
| ): Record<string, number> { | ||||||||||||||
| return { | ||||||||||||||
| "usage.prompt_context.system_tokens_estimated": estimate.systemTokensEstimated, | ||||||||||||||
| "usage.prompt_context.message_tokens_estimated": estimate.messageTokensEstimated, | ||||||||||||||
| "usage.prompt_context.non_system_message_tokens_estimated": | ||||||||||||||
| estimate.nonSystemMessageTokensEstimated, | ||||||||||||||
| "usage.prompt_context.tool_tokens_estimated": estimate.toolTokensEstimated, | ||||||||||||||
| "usage.prompt_context.total_tokens_estimated": estimate.totalTokensEstimated, | ||||||||||||||
| "usage.prompt_context.system_message_count": estimate.systemMessageCount, | ||||||||||||||
| "usage.prompt_context.tool_count": estimate.toolCount, | ||||||||||||||
| }; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| function estimateTokensFromText(text: string): number { | ||||||||||||||
| if (!text) { | ||||||||||||||
| return 0; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| return Math.ceil(text.length / ESTIMATED_CHARS_PER_TOKEN); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| function serializePromptMessage(message: PromptMessage): string { | ||||||||||||||
| const content = serializePromptValue(message.content).trim(); | ||||||||||||||
| if (!content) { | ||||||||||||||
| return ""; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| const role = typeof message.role === "string" ? message.role.toUpperCase() : "MESSAGE"; | ||||||||||||||
| return `${role}:\n${content}`; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| function serializePromptValue(value: unknown): string { | ||||||||||||||
| if (typeof value === "string") { | ||||||||||||||
| return value; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| if (typeof value === "number" || typeof value === "boolean") { | ||||||||||||||
| return String(value); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| if (Array.isArray(value)) { | ||||||||||||||
| return value | ||||||||||||||
| .map((entry) => serializePromptValue(entry)) | ||||||||||||||
| .filter((entry) => entry.trim().length > 0) | ||||||||||||||
| .join("\n"); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| if (!value || typeof value !== "object") { | ||||||||||||||
| return ""; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| const record = value as Record<string, unknown>; | ||||||||||||||
| const type = typeof record.type === "string" ? record.type : undefined; | ||||||||||||||
|
|
||||||||||||||
| if (typeof record.text === "string") { | ||||||||||||||
| return record.text; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| if (type && BINARY_PART_TYPES.has(type)) { | ||||||||||||||
| return `[${type}]`; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| if (type === "tool-call") { | ||||||||||||||
| const toolName = typeof record.toolName === "string" ? record.toolName : "tool"; | ||||||||||||||
| const input = serializePromptValue(record.input); | ||||||||||||||
| return input ? `tool-call ${toolName}: ${input}` : `tool-call ${toolName}`; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| if (type === "tool-result") { | ||||||||||||||
| const toolName = typeof record.toolName === "string" ? record.toolName : "tool"; | ||||||||||||||
| const output = serializePromptValue(record.output); | ||||||||||||||
| return output ? `tool-result ${toolName}: ${output}` : `tool-result ${toolName}`; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| if ("content" in record) { | ||||||||||||||
| const nestedContent = serializePromptValue(record.content); | ||||||||||||||
| if (nestedContent) { | ||||||||||||||
| return nestedContent; | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| return safeStringify(sanitizeRecord(record)); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| function sanitizeRecord(record: Record<string, unknown>): Record<string, unknown> { | ||||||||||||||
| const sanitized: Record<string, unknown> = {}; | ||||||||||||||
|
|
||||||||||||||
| for (const [key, value] of Object.entries(record)) { | ||||||||||||||
| sanitized[key] = LARGE_BINARY_KEYS.has(key) ? "[omitted]" : value; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| return sanitized; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| function serializeToolDefinition(name: string, tool: unknown): Record<string, unknown> { | ||||||||||||||
| if (!tool || typeof tool !== "object") { | ||||||||||||||
| return { name }; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| const candidate = tool as Record<string, unknown>; | ||||||||||||||
|
|
||||||||||||||
| return { | ||||||||||||||
| name, | ||||||||||||||
| ...(typeof candidate.type === "string" ? { type: candidate.type } : {}), | ||||||||||||||
| ...(typeof candidate.id === "string" ? { id: candidate.id } : {}), | ||||||||||||||
| ...(typeof candidate.description === "string" ? { description: candidate.description } : {}), | ||||||||||||||
| ...(candidate.inputSchema || candidate.parameters || candidate.input_schema || candidate.schema | ||||||||||||||
| ? { | ||||||||||||||
| inputSchema: normalizeSchema( | ||||||||||||||
| candidate.inputSchema ?? | ||||||||||||||
| candidate.parameters ?? | ||||||||||||||
| candidate.input_schema ?? | ||||||||||||||
| candidate.schema, | ||||||||||||||
| ), | ||||||||||||||
| } | ||||||||||||||
| : {}), | ||||||||||||||
| ...(candidate.outputSchema || candidate.output_schema | ||||||||||||||
| ? { | ||||||||||||||
| outputSchema: normalizeSchema(candidate.outputSchema ?? candidate.output_schema), | ||||||||||||||
| } | ||||||||||||||
| : {}), | ||||||||||||||
| ...(candidate.providerOptions ? { providerOptions: candidate.providerOptions } : {}), | ||||||||||||||
| ...(candidate.args ? { args: sanitizeRecord(candidate.args as Record<string, unknown>) } : {}), | ||||||||||||||
| ...(candidate.needsApproval !== undefined ? { needsApproval: candidate.needsApproval } : {}), | ||||||||||||||
|
Comment on lines
+203
to
+205
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
set -euo pipefail
echo "Check where tool payloads are transformed before provider/model calls:"
rg -nP -C3 '\b(tool|tools)\b.*\b(name|description|parameters|schema|inputSchema|outputSchema)\b' --type=ts
echo
echo "Check whether providerOptions / needsApproval / args are consumed in provider-facing payloads:"
rg -nP -C3 '\b(providerOptions|needsApproval|args)\b' --type=ts
echo
echo "Find function-valued needsApproval declarations (would skew stringify-based estimates):"
rg -nP -C2 'needsApproval\s*:\s*(async\s*)?(\([^)]*\)\s*=>|function\b)' --type=tsRepository: VoltAgent/voltagent Length of output: 50376 🏁 Script executed: # First, let's look at the exact content around lines 203-205 in prompt-context-usage.ts
cat -n packages/core/src/agent/prompt-context-usage.ts | sed -n '190,220p'Repository: VoltAgent/voltagent Length of output: 1260 🏁 Script executed: # Find the serializeToolDefinition function to understand what it does
rg -n "serializeToolDefinition" packages/core/src/agent/prompt-context-usage.ts -A 30Repository: VoltAgent/voltagent Length of output: 2684 🏁 Script executed: # Check the Tool type definition to understand field types
rg -n "interface Tool|type Tool" packages/core/src/tool/ --type=ts -A 20 | head -100Repository: VoltAgent/voltagent Length of output: 6483 🏁 Script executed: # Check if safeStringify is used in this file
rg -n "safeStringify|JSON.stringify" packages/core/src/agent/prompt-context-usage.tsRepository: VoltAgent/voltagent Length of output: 271 🏁 Script executed: # Find Tool interface definition to understand field types
rg -n "interface Tool|type Tool.*=" packages/core/src/tool/ --type=ts -B 2 -A 50 | grep -A 50 "^packages/core/src/tool/index.ts"Repository: VoltAgent/voltagent Length of output: 7184 🏁 Script executed: # Search for needsApproval type definition
rg -n "needsApproval" packages/core/src/tool/index.ts -B 3 -A 3Repository: VoltAgent/voltagent Length of output: 900 🏁 Script executed: # Check how providerOptions, needsApproval are used in provider adapters
rg -n "providerOptions|needsApproval" packages/core/src/tool/manager/ToolManager.ts -B 3 -A 3Repository: VoltAgent/voltagent Length of output: 874 🏁 Script executed: # Check what 'args' field is in ProviderTool definition
rg -n "type ProviderTool|interface ProviderTool" packages/core/src/tool/index.ts -A 10Repository: VoltAgent/voltagent Length of output: 324 🏁 Script executed: # Check the AI SDK's Tool type to understand which fields are model-visible
rg -n "from \"ai\"" packages/core/src/tool/index.tsRepository: VoltAgent/voltagent Length of output: 159 🏁 Script executed: # Look for how serializeToolDefinition output is actually used in token estimation
rg -n "serializeToolDefinition\|toolTokensEstimated" packages/core/src/agent/prompt-context-usage.ts -B 3 -A 3Repository: VoltAgent/voltagent Length of output: 45 🏁 Script executed: # Check if ToolNeedsApprovalFunction is defined and what it looks like
rg -n "ToolNeedsApprovalFunction" packages/core/src/tool/index.ts -B 2 -A 3Repository: VoltAgent/voltagent Length of output: 767 🏁 Script executed: # Check what safeStringify does and how it handles functions
rg -n "function safeStringify" packages/core/src -A 15 --type=tsRepository: VoltAgent/voltagent Length of output: 45 🏁 Script executed: # Look at how sanitizeRecord works
rg -n "function sanitizeRecord|export.*sanitizeRecord" packages/core/src/agent/prompt-context-usage.ts -B 2 -A 10Repository: VoltAgent/voltagent Length of output: 494 🏁 Script executed: # See where args comes from in the tool object and if it should be in schema
rg -n "\.args\s*=" packages/core/src/tool/ --type=ts | head -20Repository: VoltAgent/voltagent Length of output: 45 🏁 Script executed: # Find safeStringify implementation in `@voltagent/internal`
fd -t f "*.ts" | xargs rg -l "safeStringify" | head -5Repository: VoltAgent/voltagent Length of output: 566 🏁 Script executed: # Check how AI SDK's Tool type is used and what fields it expects
rg -n "VercelTool\[" packages/core/src/tool/manager/ToolManager.ts -B 3 -A 5Repository: VoltAgent/voltagent Length of output: 45 🏁 Script executed: # Verify what serializeToolDefinition output is for: check comments around it
rg -n "serializeToolDefinition\|toolTokensEstimated" packages/core/src/agent/prompt-context-usage.ts -B 5 -A 2Repository: VoltAgent/voltagent Length of output: 45 🏁 Script executed: # Look at the AI SDK Tool type definition being used
rg -n "Tool\s*\{|Tool\s*<" packages/core/src/tool/manager/ToolManager.ts -B 2 -A 20 | head -80Repository: VoltAgent/voltagent Length of output: 45 🏁 Script executed: # Check comments in prompt-context-usage.ts about what this serialization is for
cat -n packages/core/src/agent/prompt-context-usage.ts | sed -n '40,80p'Repository: VoltAgent/voltagent Length of output: 1507 🏁 Script executed: # See what fields the AI SDK Tool actually expects/sends to model
rg -n "description.*inputSchema\|parameters.*description" packages/core/src/tool/manager/ToolManager.ts -B 3 -A 3Repository: VoltAgent/voltagent Length of output: 45 🏁 Script executed: # Check the actual AI SDK tool field documentation - look at how tools are used in providers
rg -n "tools\[.*\]\s*=" packages/core/src/agent/providers/ --type=ts -B 2 -A 5 | head -60Repository: VoltAgent/voltagent Length of output: 45 🏁 Script executed: # See how ToolManager constructs tools for the AI SDK
cat -n packages/core/src/tool/manager/ToolManager.ts | sed -n '63,92p'Repository: VoltAgent/voltagent Length of output: 1294 🏁 Script executed: # Verify what fields AI SDK actually sends to model by checking provider integrations
rg -n "generateText|doGenerate" packages/core/src/agent/providers/anthropic/ --type=ts -A 10 | head -40Repository: VoltAgent/voltagent Length of output: 147 🏁 Script executed: # Check if there are any tests or documentation about what serializeToolDefinition should include
rg -n "serializeToolDefinition" packages/core/src/agent/prompt-context-usage.spec.ts -B 5 -A 10Repository: VoltAgent/voltagent Length of output: 45 🏁 Script executed: # Look at test files for prompt-context-usage
fd -type f "prompt-context-usage" packages/core/src/Repository: VoltAgent/voltagent Length of output: 233 🏁 Script executed: # Check what the AI SDK actually sends to the model for tool definitions
rg -n "name.*description.*inputSchema\|description.*inputSchema" packages/core/src/tool/manager/ToolManager.ts -B 3 -A 3Repository: VoltAgent/voltagent Length of output: 45 Remove runtime metadata fields from tool schema serialization to avoid inflating token estimates. Lines 203–205 currently include For Suggested fix- ...(candidate.providerOptions ? { providerOptions: candidate.providerOptions } : {}),
- ...(candidate.args ? { args: sanitizeRecord(candidate.args as Record<string, unknown>) } : {}),
- ...(candidate.needsApproval !== undefined ? { needsApproval: candidate.needsApproval } : {}),
+ ...(candidate.args && typeof candidate.args === "object" && !Array.isArray(candidate.args)
+ ? { args: sanitizeRecord(candidate.args as Record<string, unknown>) }
+ : {}),📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||
| }; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| function normalizeSchema(schema: unknown): unknown { | ||||||||||||||
| if (!schema || typeof schema !== "object") { | ||||||||||||||
| return schema; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| try { | ||||||||||||||
| if ("_def" in (schema as Record<string, unknown>)) { | ||||||||||||||
| return zodSchemaToJsonUI(schema); | ||||||||||||||
| } | ||||||||||||||
| } catch (_error) { | ||||||||||||||
| return schema; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| return schema; | ||||||||||||||
| } | ||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Deep binary fields are not sanitized, which can still blow up serialization cost.
On Line 170, only top-level keys are redacted. Nested payloads (e.g.,
content.metadata.data) still pass through and can create largesafeStringifyinputs in hot paths, skewing estimates and adding avoidable overhead.Proposed fix (recursive sanitization)
📝 Committable suggestion
🤖 Prompt for AI Agents