diff --git a/.changeset/green-planes-jump.md b/.changeset/green-planes-jump.md new file mode 100644 index 000000000..2e1e1208c --- /dev/null +++ b/.changeset/green-planes-jump.md @@ -0,0 +1,9 @@ +--- +"@voltagent/core": patch +--- + +feat: add estimated prompt context telemetry for observability + +- record estimated prompt-context breakdown for system instructions, conversation messages, and tool schemas on LLM spans +- expose cached and reasoning token usage on LLM spans for observability consumers +- add tests for prompt-context estimation helpers diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts index 39cbdee22..abd1c1c61 100644 --- a/packages/core/src/agent/agent.ts +++ b/packages/core/src/agent/agent.ts @@ -109,6 +109,10 @@ import { import type { AgentHooks, OnToolEndHookResult, OnToolErrorHookResult } from "./hooks"; import { stripDanglingOpenAIReasoningFromModelMessages } from "./model-message-normalizer"; import { AgentTraceContext, addModelAttributesToSpan } from "./open-telemetry/trace-context"; +import { + estimatePromptContextUsage, + promptContextUsageEstimateToAttributes, +} from "./prompt-context-usage"; import type { BaseMessage, BaseTool, @@ -4101,7 +4105,16 @@ export class Agent { }, ): Span { const { label, ...spanParams } = params; - const attributes = this.buildLLMSpanAttributes(spanParams); + const promptContextUsageEstimate = estimatePromptContextUsage({ + messages: params.messages, + tools: params.tools, + }); + const attributes = { + ...this.buildLLMSpanAttributes(spanParams), + ...(promptContextUsageEstimate + ? promptContextUsageEstimateToAttributes(promptContextUsageEstimate) + : {}), + }; const span = oc.traceContext.createChildSpan(`llm:${params.operation}`, "llm", { kind: SpanKind.CLIENT, label, @@ -4240,7 +4253,8 @@ export class Agent { return; } - const { promptTokens, completionTokens, totalTokens } = normalizedUsage; + const { promptTokens, completionTokens, totalTokens, cachedInputTokens, reasoningTokens } = + normalizedUsage; if (promptTokens !== undefined) { span.setAttribute("llm.usage.prompt_tokens", promptTokens); @@ -4251,6 +4265,12 @@ export class Agent { if (totalTokens !== undefined) { span.setAttribute("llm.usage.total_tokens", totalTokens); } + if (cachedInputTokens !== undefined) { + span.setAttribute("llm.usage.cached_tokens", cachedInputTokens); + } + if (reasoningTokens !== undefined) { + span.setAttribute("llm.usage.reasoning_tokens", reasoningTokens); + } } private recordProviderCost(span: Span, providerMetadata?: unknown): void { diff --git a/packages/core/src/agent/prompt-context-usage.spec.ts b/packages/core/src/agent/prompt-context-usage.spec.ts new file mode 100644 index 000000000..61111b3a0 --- /dev/null +++ b/packages/core/src/agent/prompt-context-usage.spec.ts @@ -0,0 +1,71 @@ +import { describe, expect, it } from "vitest"; +import { z } from "zod"; +import { + estimatePromptContextUsage, + promptContextUsageEstimateToAttributes, +} from "./prompt-context-usage"; + +describe("prompt context usage estimation", () => { + it("estimates system, message, and tool context separately", () => { + const estimate = estimatePromptContextUsage({ + messages: [ + { + role: "system", + content: "You are a careful assistant.", + }, + { + role: "user", + content: "Summarize the latest release notes.", + }, + { + role: "assistant", + content: [{ type: "text", text: "Let me inspect them." }], + }, + ], + tools: { + searchDocs: { + description: "Search the documentation", + inputSchema: z.object({ + query: z.string(), + topK: z.number().int().optional(), + }), + }, + }, + }); + + expect(estimate).toBeDefined(); + expect(estimate?.systemMessageCount).toBe(1); + expect(estimate?.toolCount).toBe(1); + expect(estimate?.systemTokensEstimated).toBeGreaterThan(0); + expect(estimate?.nonSystemMessageTokensEstimated).toBeGreaterThan(0); + expect(estimate?.toolTokensEstimated).toBeGreaterThan(0); + expect(estimate?.messageTokensEstimated).toBe( + (estimate?.systemTokensEstimated ?? 0) + (estimate?.nonSystemMessageTokensEstimated ?? 0), + ); + expect(estimate?.totalTokensEstimated).toBe( + (estimate?.messageTokensEstimated ?? 0) + (estimate?.toolTokensEstimated ?? 0), + ); + }); + + it("returns prompt context usage span attributes", () => { + const attributes = promptContextUsageEstimateToAttributes({ + systemTokensEstimated: 12, + messageTokensEstimated: 34, + nonSystemMessageTokensEstimated: 22, + toolTokensEstimated: 18, + totalTokensEstimated: 52, + systemMessageCount: 1, + toolCount: 2, + }); + + expect(attributes).toEqual({ + "usage.prompt_context.system_tokens_estimated": 12, + "usage.prompt_context.message_tokens_estimated": 34, + "usage.prompt_context.non_system_message_tokens_estimated": 22, + "usage.prompt_context.tool_tokens_estimated": 18, + "usage.prompt_context.total_tokens_estimated": 52, + "usage.prompt_context.system_message_count": 1, + "usage.prompt_context.tool_count": 2, + }); + }); +}); diff --git a/packages/core/src/agent/prompt-context-usage.ts b/packages/core/src/agent/prompt-context-usage.ts new file mode 100644 index 000000000..b252704f3 --- /dev/null +++ b/packages/core/src/agent/prompt-context-usage.ts @@ -0,0 +1,223 @@ +import { safeStringify } from "@voltagent/internal/utils"; +import type { ToolSet } from "ai"; +import { zodSchemaToJsonUI } from "../utils/toolParser"; + +const ESTIMATED_CHARS_PER_TOKEN = 4; +const BINARY_PART_TYPES = new Set([ + "audio", + "file", + "image", + "input_audio", + "input_image", + "media", +]); +const LARGE_BINARY_KEYS = new Set(["audio", "base64", "bytes", "data", "image"]); + +type PromptMessage = { + role?: string; + content?: unknown; +}; + +export interface PromptContextUsageEstimate { + systemTokensEstimated: number; + messageTokensEstimated: number; + nonSystemMessageTokensEstimated: number; + toolTokensEstimated: number; + totalTokensEstimated: number; + systemMessageCount: number; + toolCount: number; +} + +export function estimatePromptContextUsage(params: { + messages?: PromptMessage[]; + tools?: ToolSet; +}): PromptContextUsageEstimate | undefined { + let systemTokensEstimated = 0; + let messageTokensEstimated = 0; + let nonSystemMessageTokensEstimated = 0; + let systemMessageCount = 0; + + for (const message of params.messages ?? []) { + const serializedMessage = serializePromptMessage(message); + if (!serializedMessage) { + continue; + } + + const estimatedTokens = estimateTokensFromText(serializedMessage); + messageTokensEstimated += estimatedTokens; + + if (message.role === "system") { + systemTokensEstimated += estimatedTokens; + systemMessageCount += 1; + continue; + } + + nonSystemMessageTokensEstimated += estimatedTokens; + } + + const serializedTools = Object.entries(params.tools ?? {}).map(([name, tool]) => + serializeToolDefinition(name, tool), + ); + const toolTokensEstimated = + serializedTools.length > 0 ? estimateTokensFromText(safeStringify(serializedTools)) : 0; + const totalTokensEstimated = messageTokensEstimated + toolTokensEstimated; + + if (totalTokensEstimated === 0) { + return undefined; + } + + return { + systemTokensEstimated, + messageTokensEstimated, + nonSystemMessageTokensEstimated, + toolTokensEstimated, + totalTokensEstimated, + systemMessageCount, + toolCount: serializedTools.length, + }; +} + +export function promptContextUsageEstimateToAttributes( + estimate: PromptContextUsageEstimate, +): Record { + return { + "usage.prompt_context.system_tokens_estimated": estimate.systemTokensEstimated, + "usage.prompt_context.message_tokens_estimated": estimate.messageTokensEstimated, + "usage.prompt_context.non_system_message_tokens_estimated": + estimate.nonSystemMessageTokensEstimated, + "usage.prompt_context.tool_tokens_estimated": estimate.toolTokensEstimated, + "usage.prompt_context.total_tokens_estimated": estimate.totalTokensEstimated, + "usage.prompt_context.system_message_count": estimate.systemMessageCount, + "usage.prompt_context.tool_count": estimate.toolCount, + }; +} + +function estimateTokensFromText(text: string): number { + if (!text) { + return 0; + } + + return Math.ceil(text.length / ESTIMATED_CHARS_PER_TOKEN); +} + +function serializePromptMessage(message: PromptMessage): string { + const content = serializePromptValue(message.content).trim(); + if (!content) { + return ""; + } + + const role = typeof message.role === "string" ? message.role.toUpperCase() : "MESSAGE"; + return `${role}:\n${content}`; +} + +function serializePromptValue(value: unknown): string { + if (typeof value === "string") { + return value; + } + + if (typeof value === "number" || typeof value === "boolean") { + return String(value); + } + + if (Array.isArray(value)) { + return value + .map((entry) => serializePromptValue(entry)) + .filter((entry) => entry.trim().length > 0) + .join("\n"); + } + + if (!value || typeof value !== "object") { + return ""; + } + + const record = value as Record; + const type = typeof record.type === "string" ? record.type : undefined; + + if (typeof record.text === "string") { + return record.text; + } + + if (type && BINARY_PART_TYPES.has(type)) { + return `[${type}]`; + } + + if (type === "tool-call") { + const toolName = typeof record.toolName === "string" ? record.toolName : "tool"; + const input = serializePromptValue(record.input); + return input ? `tool-call ${toolName}: ${input}` : `tool-call ${toolName}`; + } + + if (type === "tool-result") { + const toolName = typeof record.toolName === "string" ? record.toolName : "tool"; + const output = serializePromptValue(record.output); + return output ? `tool-result ${toolName}: ${output}` : `tool-result ${toolName}`; + } + + if ("content" in record) { + const nestedContent = serializePromptValue(record.content); + if (nestedContent) { + return nestedContent; + } + } + + return safeStringify(sanitizeRecord(record)); +} + +function sanitizeRecord(record: Record): Record { + const sanitized: Record = {}; + + for (const [key, value] of Object.entries(record)) { + sanitized[key] = LARGE_BINARY_KEYS.has(key) ? "[omitted]" : value; + } + + return sanitized; +} + +function serializeToolDefinition(name: string, tool: unknown): Record { + if (!tool || typeof tool !== "object") { + return { name }; + } + + const candidate = tool as Record; + + return { + name, + ...(typeof candidate.type === "string" ? { type: candidate.type } : {}), + ...(typeof candidate.id === "string" ? { id: candidate.id } : {}), + ...(typeof candidate.description === "string" ? { description: candidate.description } : {}), + ...(candidate.inputSchema || candidate.parameters || candidate.input_schema || candidate.schema + ? { + inputSchema: normalizeSchema( + candidate.inputSchema ?? + candidate.parameters ?? + candidate.input_schema ?? + candidate.schema, + ), + } + : {}), + ...(candidate.outputSchema || candidate.output_schema + ? { + outputSchema: normalizeSchema(candidate.outputSchema ?? candidate.output_schema), + } + : {}), + ...(candidate.providerOptions ? { providerOptions: candidate.providerOptions } : {}), + ...(candidate.args ? { args: sanitizeRecord(candidate.args as Record) } : {}), + ...(candidate.needsApproval !== undefined ? { needsApproval: candidate.needsApproval } : {}), + }; +} + +function normalizeSchema(schema: unknown): unknown { + if (!schema || typeof schema !== "object") { + return schema; + } + + try { + if ("_def" in (schema as Record)) { + return zodSchemaToJsonUI(schema); + } + } catch (_error) { + return schema; + } + + return schema; +}