diff --git a/.changeset/runtime-memory-envelope.md b/.changeset/runtime-memory-envelope.md new file mode 100644 index 000000000..a5ec8c717 --- /dev/null +++ b/.changeset/runtime-memory-envelope.md @@ -0,0 +1,67 @@ +--- +"@voltagent/core": patch +"@voltagent/server-core": patch +"@voltagent/resumable-streams": patch +--- + +feat: add runtime memory envelope (`options.memory`) and deprecate legacy top-level memory fields + +### What's New + +- Added a preferred per-call memory envelope: + - `options.memory.conversationId` for conversation-scoped memory + - `options.memory.userId` for user-scoped memory + - `options.memory.options` for memory behavior overrides (`contextLimit`, `semanticMemory`, `conversationPersistence`) +- Kept legacy top-level fields for backward compatibility: + - `options.conversationId`, `options.userId`, `options.contextLimit`, `options.semanticMemory`, `options.conversationPersistence` +- Legacy fields are now marked deprecated in type/docs, and envelope values are preferred when both are provided. + +### Usage Examples + +Legacy (still supported, deprecated): + +```ts +await agent.generateText("Hello", { + userId: "user-123", + conversationId: "conv-123", + contextLimit: 20, + semanticMemory: { + enabled: true, + semanticLimit: 5, + }, + conversationPersistence: { + mode: "step", + debounceMs: 150, + }, +}); +``` + +Preferred (new `memory` envelope): + +```ts +await agent.generateText("Hello", { + memory: { + userId: "user-123", + conversationId: "conv-123", + options: { + contextLimit: 20, + semanticMemory: { + enabled: true, + semanticLimit: 5, + }, + conversationPersistence: { + mode: "step", + debounceMs: 150, + }, + }, + }, +}); +``` + +### Server and Resumable Stream Alignment + +- `@voltagent/server-core` now accepts/documents the `options.memory` envelope in request schemas. +- Resumable stream identity resolution now reads `conversationId`/`userId` from `options.memory` first and falls back to legacy fields. +- Added tests for: + - parsing `options.memory` in server schemas + - resolving resumable stream keys from `options.memory` diff --git a/packages/core/src/agent/agent.spec-d.ts b/packages/core/src/agent/agent.spec-d.ts index 2596e5ca0..ec5cba3ec 100644 --- a/packages/core/src/agent/agent.spec-d.ts +++ b/packages/core/src/agent/agent.spec-d.ts @@ -412,14 +412,38 @@ describe("Agent Type System", () => { describe("Options Type Tests", () => { it("should distinguish PublicGenerateOptions from InternalGenerateOptions", () => { const publicOptions: PublicGenerateOptions = { - conversationId: "123", - userId: "user-123", - contextLimit: 1000, + memory: { + conversationId: "123", + userId: "user-123", + options: { + contextLimit: 1000, + semanticMemory: { + enabled: true, + semanticLimit: 3, + }, + conversationPersistence: { + mode: "step", + }, + }, + }, maxSteps: 5, signal: new AbortSignal(), context: new Map(), }; + const legacyOptions: PublicGenerateOptions = { + conversationId: "legacy-conversation", + userId: "legacy-user", + contextLimit: 250, + semanticMemory: { + enabled: true, + semanticThreshold: 0.75, + }, + conversationPersistence: { + mode: "finish", + }, + }; + const internalOptions: InternalGenerateOptions = { ...publicOptions, parentAgentId: "parent-123", @@ -432,6 +456,7 @@ describe("Agent Type System", () => { publicOptions.operationContext; expectTypeOf(publicOptions).toMatchTypeOf(); + expectTypeOf(legacyOptions).toMatchTypeOf(); expectTypeOf(internalOptions).toMatchTypeOf(); }); diff --git a/packages/core/src/agent/agent.spec.ts b/packages/core/src/agent/agent.spec.ts index 70561dea9..f369dbb1c 100644 --- a/packages/core/src/agent/agent.spec.ts +++ b/packages/core/src/agent/agent.spec.ts @@ -1918,6 +1918,211 @@ Use pandas and summarize findings.`.split("\n"), // Context limit should be respected expect(callArgs).toBeDefined(); }); + + it("should prefer memory envelope over legacy memory fields", async () => { + const memory = new Memory({ + storage: new InMemoryStorageAdapter(), + }); + const getMessagesSpy = vi.spyOn(memory, "getMessages"); + + const agent = new Agent({ + name: "TestAgent", + instructions: "Test", + model: mockModel as any, + memory, + }); + + vi.mocked(ai.generateText).mockResolvedValue({ + text: "Response", + content: [], + reasoning: [], + files: [], + sources: [], + toolCalls: [], + toolResults: [], + finishReason: "stop", + usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 }, + warnings: [], + request: {}, + response: { + id: "test", + modelId: "test-model", + timestamp: new Date(), + messages: [], + }, + steps: [], + } as any); + + await agent.generateText("Test", { + userId: "legacy-user", + conversationId: "legacy-conv", + contextLimit: 100, + memory: { + userId: "memory-user", + conversationId: "memory-conv", + options: { + contextLimit: 2, + }, + }, + }); + + const matchingCall = getMessagesSpy.mock.calls.find( + ([userId, conversationId, options]) => + userId === "memory-user" && conversationId === "memory-conv" && options?.limit === 2, + ); + const usedLegacyIds = getMessagesSpy.mock.calls.some( + ([userId, conversationId]) => userId === "legacy-user" && conversationId === "legacy-conv", + ); + + expect(getMessagesSpy.mock.calls.length).toBe(1); + expect(matchingCall).toBeDefined(); + expect(usedLegacyIds).toBe(false); + }); + + it("should fallback to legacy ids when memory envelope ids are blank", async () => { + const memory = new Memory({ + storage: new InMemoryStorageAdapter(), + }); + const getMessagesSpy = vi.spyOn(memory, "getMessages"); + + const agent = new Agent({ + name: "TestAgent", + instructions: "Test", + model: mockModel as any, + memory, + }); + + vi.mocked(ai.generateText).mockResolvedValue({ + text: "Response", + content: [], + reasoning: [], + files: [], + sources: [], + toolCalls: [], + toolResults: [], + finishReason: "stop", + usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 }, + warnings: [], + request: {}, + response: { + id: "test", + modelId: "test-model", + timestamp: new Date(), + messages: [], + }, + steps: [], + } as any); + + await agent.generateText("Test", { + userId: "legacy-user", + conversationId: "legacy-conv", + memory: { + userId: " ", + conversationId: "", + }, + }); + + const matchingCall = getMessagesSpy.mock.calls.find( + ([userId, conversationId]) => userId === "legacy-user" && conversationId === "legacy-conv", + ); + + expect(getMessagesSpy.mock.calls.length).toBe(1); + expect(matchingCall).toBeDefined(); + }); + + it("should store resolved memory envelope on operation context", () => { + const agent = new Agent({ + name: "TestAgent", + instructions: "Test", + model: mockModel as any, + }); + + const operationContext = (agent as any).createOperationContext("input", { + userId: "legacy-user", + conversationId: "legacy-conv", + contextLimit: 99, + semanticMemory: { + enabled: true, + semanticLimit: 9, + }, + conversationPersistence: { + mode: "finish", + }, + memory: { + userId: "memory-user", + conversationId: "memory-conv", + options: { + contextLimit: 5, + semanticMemory: { + enabled: false, + semanticThreshold: 0.8, + }, + conversationPersistence: { + mode: "step", + debounceMs: 120, + }, + }, + }, + }); + + expect(operationContext.userId).toBe("memory-user"); + expect(operationContext.conversationId).toBe("memory-conv"); + expect(operationContext.resolvedMemory).toMatchObject({ + userId: "memory-user", + conversationId: "memory-conv", + contextLimit: 5, + semanticMemory: { + enabled: false, + semanticThreshold: 0.8, + }, + conversationPersistence: { + mode: "step", + debounceMs: 120, + }, + }); + }); + + it("should fallback to parent operation context resolved memory when call overrides are missing", () => { + const agent = new Agent({ + name: "TestAgent", + instructions: "Test", + model: mockModel as any, + }); + + const parentOperationContext = (agent as any).createOperationContext("parent-input", { + memory: { + userId: "memory-user", + conversationId: "memory-conv", + options: { + contextLimit: 4, + semanticMemory: { + enabled: true, + semanticLimit: 2, + }, + conversationPersistence: { + mode: "finish", + }, + }, + }, + }); + + const resolvedFromParent = (agent as any).resolveMemoryRuntimeOptions({ + parentOperationContext, + }); + + expect(resolvedFromParent).toMatchObject({ + userId: "memory-user", + conversationId: "memory-conv", + contextLimit: 4, + semanticMemory: { + enabled: true, + semanticLimit: 2, + }, + conversationPersistence: { + mode: "finish", + }, + }); + }); }); describe("Global Memory Defaults", () => { @@ -3154,6 +3359,46 @@ Use pandas and summarize findings.`.split("\n"), factorySpy.mockRestore(); }); + it("should resolve delegate tool identity from memory envelope", async () => { + const agent = new Agent({ + name: "TestAgent", + instructions: "Test", + model: mockModel as any, + }); + + const delegateTool = new Tool({ + name: "delegate-tool", + description: "Delegate tool", + parameters: z.object({}), + execute: vi.fn(), + }); + + const mockHasSubAgents = vi.fn().mockReturnValue(true); + const mockCreateDelegateTool = vi.fn().mockReturnValue(delegateTool); + (agent as any).subAgentManager = { + hasSubAgents: mockHasSubAgents, + createDelegateTool: mockCreateDelegateTool, + }; + + const operationContext = (agent as any).createOperationContext("input message"); + const options = { + conversationId: "legacy-conv", + userId: "legacy-user", + memory: { + conversationId: "memory-conv", + userId: "memory-user", + }, + } as any; + await (agent as any).prepareTools([], operationContext, 7, options); + + expect(mockCreateDelegateTool).toHaveBeenCalledWith( + expect.objectContaining({ + conversationId: "memory-conv", + userId: "memory-user", + }), + ); + }); + it("should include working memory tools produced at runtime", async () => { const agent = new Agent({ name: "TestAgent", @@ -3176,7 +3421,7 @@ Use pandas and summarize findings.`.split("\n"), const options = { conversationId: "conv-2" } as any; const prepared = await (agent as any).prepareTools([], operationContext, 4, options); - expect(workingMemorySpy).toHaveBeenCalledWith(options); + expect(workingMemorySpy).toHaveBeenCalledWith(options, operationContext); expect(prepared.get_working_memory).toBeDefined(); expect(typeof prepared.get_working_memory.execute).toBe("function"); @@ -3880,5 +4125,45 @@ Use pandas and summarize findings.`.split("\n"), expect(systemMessage.content).toContain("Relevant Context:"); expect(systemMessage.content).toContain("Retrieved context for query"); }); + + it("should include user-scoped working memory when conversationId is not set", async () => { + const memory = new Memory({ + storage: new InMemoryStorageAdapter(), + workingMemory: { + enabled: true, + scope: "user", + }, + }); + + await memory.updateWorkingMemory({ + userId: "user-1", + content: "Preferred language: Turkish", + }); + + const agent = new Agent({ + name: "TestAgent", + instructions: "Base instructions", + model: mockModel as any, + memory, + }); + + const operationContext = (agent as any).createOperationContext("user input", { + memory: { + userId: "user-1", + }, + }); + + const systemMessage = await (agent as any).getSystemMessage("user input", operationContext, { + memory: { + userId: "user-1", + }, + }); + + expect(systemMessage).toMatchObject({ + role: "system", + }); + expect(systemMessage.content).toContain(""); + expect(systemMessage.content).toContain("Preferred language: Turkish"); + }); }); }); diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts index 251f645c4..33795a374 100644 --- a/packages/core/src/agent/agent.ts +++ b/packages/core/src/agent/agent.ts @@ -117,6 +117,11 @@ import type { } from "./providers/base/types"; import { coerceStringifiedJsonToolArgs } from "./tool-input-coercion"; export type { AgentHooks } from "./hooks"; +export type { + RuntimeMemoryBehaviorOptions, + RuntimeMemoryEnvelope, + SemanticMemoryOptions, +} from "./types"; import { P, match } from "ts-pattern"; import type { StopWhen } from "../ai-types"; import type { SamplingPolicy } from "../eval/runtime"; @@ -183,6 +188,7 @@ import type { AgentSummarizationOptions, AgentToolRoutingState, ApiToolInfo, + CommonResolvedRuntimeMemoryOptions, DynamicValue, DynamicValueOptions, InputGuardrail, @@ -191,6 +197,8 @@ import type { OperationContext, OutputGuardrail, OutputMiddleware, + RuntimeMemoryEnvelope, + SemanticMemoryOptions, SupervisorConfig, } from "./types"; @@ -233,6 +241,15 @@ const isRecord = (value: unknown): value is Record => const hasNonEmptyString = (value: unknown): value is string => typeof value === "string" && value.trim().length > 0; +const firstNonBlank = (...values: Array): string | undefined => { + for (const value of values) { + if (hasNonEmptyString(value)) { + return value; + } + } + return undefined; +}; + const isAssistantContentPart = (value: unknown): boolean => { if (!isRecord(value)) { return false; @@ -679,7 +696,17 @@ export interface BaseGenerationOptions { // === VoltAgent Specific === // Context + /** + * Runtime memory envelope for per-call memory identity and behavior overrides. + */ + memory?: RuntimeMemoryEnvelope; + /** + * @deprecated Use `memory.userId` instead. + */ userId?: string; + /** + * @deprecated Use `memory.conversationId` instead. + */ conversationId?: string; context?: ContextInput; elicitation?: (request: unknown) => Promise; @@ -691,15 +718,19 @@ export interface BaseGenerationOptions { const resolvedInput = await this.validateIncomingUIMessages(input, oc); const messages: UIMessage[] = []; - - // Get system message with retriever context and working memory - const systemMessage = await this.getSystemMessage(resolvedInput, oc, options, runtimeToolkits); - if (systemMessage) { - const systemMessagesAsUI: UIMessage[] = (() => { - if (typeof systemMessage === "string") { - return [ - { - id: randomUUID(), - role: "system", - parts: [ - { - type: "text", - text: systemMessage, - }, - ], - }, - ]; - } - - if (Array.isArray(systemMessage)) { - return convertModelMessagesToUIMessages(systemMessage); - } - - return convertModelMessagesToUIMessages([systemMessage]); - })(); - - for (const systemUIMessage of systemMessagesAsUI) { - messages.push(systemUIMessage); - } - - const instructionText = systemMessagesAsUI - .flatMap((msg) => - msg.parts.flatMap((part) => - part.type === "text" && typeof (part as any).text === "string" - ? [(part as any).text as string] - : [], - ), - ) - .join("\n\n"); - - if (instructionText) { - oc.traceContext.setInstructions(instructionText); - } - } - - const middlewareRetryFeedback = this.consumeMiddlewareRetryFeedback(oc); - if (middlewareRetryFeedback) { - messages.push({ - id: randomUUID(), - role: "system", - parts: [{ type: "text", text: middlewareRetryFeedback }], - }); - } - - const canIUseMemory = options?.userId && options.conversationId; + const resolvedMemory = this.resolveMemoryRuntimeOptions(options, oc); + const canIUseMemory = Boolean(resolvedMemory.userId); + const memoryContextMessages: UIMessage[] = []; // Load memory context if available (already returns UIMessages) if (canIUseMemory) { // Check if we should use semantic search // Default to true if vector support is available - const useSemanticSearch = options?.semanticMemory?.enabled ?? this.hasSemanticSearchSupport(); + const useSemanticSearch = + resolvedMemory.semanticMemory?.enabled ?? this.hasSemanticSearchSupport(); // Extract user query for semantic search if enabled const currentQuery = useSemanticSearch ? this.extractUserQuery(resolvedInput) : undefined; // Prepare memory read parameters - const semanticLimit = options?.semanticMemory?.semanticLimit ?? 5; - const semanticThreshold = options?.semanticMemory?.semanticThreshold ?? 0.7; - const mergeStrategy = options?.semanticMemory?.mergeStrategy ?? "append"; + const semanticLimit = resolvedMemory.semanticMemory?.semanticLimit ?? 5; + const semanticThreshold = resolvedMemory.semanticMemory?.semanticThreshold ?? 0.7; + const mergeStrategy = resolvedMemory.semanticMemory?.mergeStrategy ?? "append"; const isSemanticSearch = useSemanticSearch && currentQuery; const traceContext = oc.traceContext; @@ -4284,8 +4324,8 @@ export class Agent { const spanInput = { query: isSemanticSearch ? currentQuery : resolvedInput, - userId: options?.userId, - conversationId: options?.conversationId, + userId: resolvedMemory.userId, + conversationId: resolvedMemory.conversationId, }; const memoryReadSpan = traceContext.createChildSpan("memory.read", "memory", { label: isSemanticSearch ? "Semantic Memory Read" : "Memory Context Read", @@ -4309,7 +4349,7 @@ export class Agent { oc, oc.userId, oc.conversationId, - options?.contextLimit, + resolvedMemory.contextLimit, { useSemanticSearch: true, currentQuery, @@ -4337,11 +4377,14 @@ export class Agent { inputForMemory, oc.userId, oc.conversationId, - options?.contextLimit, + resolvedMemory.contextLimit, ); // Update conversation ID oc.conversationId = result.conversationId; + if (oc.resolvedMemory) { + oc.resolvedMemory.conversationId = result.conversationId; + } buffer.ingestUIMessages(result.messages, true); @@ -4360,10 +4403,12 @@ export class Agent { // Ensure conversation ID exists for semantic search if (isSemanticSearch && !oc.conversationId) { oc.conversationId = randomUUID(); + if (oc.resolvedMemory) { + oc.resolvedMemory.conversationId = oc.conversationId; + } } - // Add memory messages - messages.push(...memoryResult); + memoryContextMessages.push(...memoryResult); // When using semantic search, also persist the current input in background // so user messages are stored and embedded consistently. @@ -4389,6 +4434,64 @@ export class Agent { } } + // Get system message with retriever context and working memory + const systemMessage = await this.getSystemMessage(resolvedInput, oc, options, runtimeToolkits); + if (systemMessage) { + const systemMessagesAsUI: UIMessage[] = (() => { + if (typeof systemMessage === "string") { + return [ + { + id: randomUUID(), + role: "system", + parts: [ + { + type: "text", + text: systemMessage, + }, + ], + }, + ]; + } + + if (Array.isArray(systemMessage)) { + return convertModelMessagesToUIMessages(systemMessage); + } + + return convertModelMessagesToUIMessages([systemMessage]); + })(); + + for (const systemUIMessage of systemMessagesAsUI) { + messages.push(systemUIMessage); + } + + const instructionText = systemMessagesAsUI + .flatMap((msg) => + msg.parts.flatMap((part) => + part.type === "text" && typeof (part as any).text === "string" + ? [(part as any).text as string] + : [], + ), + ) + .join("\n\n"); + + if (instructionText) { + oc.traceContext.setInstructions(instructionText); + } + } + + const middlewareRetryFeedback = this.consumeMiddlewareRetryFeedback(oc); + if (middlewareRetryFeedback) { + messages.push({ + id: randomUUID(), + role: "system", + parts: [{ type: "text", text: middlewareRetryFeedback }], + }); + } + + if (memoryContextMessages.length > 0) { + messages.push(...memoryContextMessages); + } + // Add current input if (typeof resolvedInput === "string") { messages.push({ @@ -4478,6 +4581,10 @@ export class Agent { options?: BaseGenerationOptions, runtimeToolkits: Toolkit[] = [], ): Promise { + const resolvedMemory = this.resolveMemoryRuntimeOptions(options, oc); + const workingMemoryConversationId = oc.conversationId ?? resolvedMemory.conversationId; + const workingMemoryUserId = oc.userId ?? resolvedMemory.userId; + // Resolve dynamic instructions const promptHelper = VoltOpsClientClass.createPromptHelperWithFallback( this.id, @@ -4548,17 +4655,23 @@ export class Agent { retrieverContext = await this.getRetrieverContext(input, oc); } - // Get working memory instructions if available + // Get working memory instructions if available. + // Prefer conversation scope when conversationId exists; otherwise fall back to user scope. let workingMemoryContext: string | null = null; - if (this.hasWorkingMemorySupport() && options?.conversationId) { + const workingMemoryLookup = + workingMemoryConversationId || workingMemoryUserId + ? { + ...(workingMemoryConversationId ? { conversationId: workingMemoryConversationId } : {}), + ...(workingMemoryUserId ? { userId: workingMemoryUserId } : {}), + } + : undefined; + if (this.hasWorkingMemorySupport() && workingMemoryLookup) { const memory = this.memoryManager.getMemory(); if (memory) { // Get full working memory instructions with current data - const workingMemoryInstructions = await memory.getWorkingMemoryInstructions({ - conversationId: options.conversationId, - userId: options.userId, - }); + const workingMemoryInstructions = + await memory.getWorkingMemoryInstructions(workingMemoryLookup); if (workingMemoryInstructions) { workingMemoryContext = `\n\n${workingMemoryInstructions}`; @@ -4569,10 +4682,7 @@ export class Agent { const rootSpan = oc.traceContext.getRootSpan(); // Get the raw working memory content - const workingMemoryContent = await memory.getWorkingMemory({ - conversationId: options.conversationId, - userId: options.userId, - }); + const workingMemoryContent = await memory.getWorkingMemory(workingMemoryLookup); if (workingMemoryContent) { rootSpan.setAttribute("agent.workingMemory.content", workingMemoryContent); @@ -5464,6 +5574,7 @@ export class Agent { maxSteps: number, options?: BaseGenerationOptions, ): Promise> { + const resolvedMemory = this.resolveMemoryRuntimeOptions(options, oc); const hooks = this.getMergedHooks(options); const createToolExecuteFunction = this.createToolExecutionFactory(oc, hooks); @@ -5476,13 +5587,13 @@ export class Agent { currentHistoryEntryId: oc.operationId, operationContext: oc, maxSteps: maxSteps, - conversationId: options?.conversationId, - userId: options?.userId, + conversationId: resolvedMemory.conversationId, + userId: resolvedMemory.userId, }); runtimeTools.push(delegateTool); } // Add working memory tools if Memory V2 with working memory is configured - const workingMemoryTools = this.createWorkingMemoryTools(options); + const workingMemoryTools = this.createWorkingMemoryTools(options, oc); if (workingMemoryTools.length > 0) { runtimeTools.push(...workingMemoryTools); } @@ -7757,13 +7868,36 @@ export class Agent { // Extract OperationContext from options if available // Since ToolExecuteOptions extends Partial, we can extract the fields const oc = options as OperationContext | undefined; + const resolvedMemory = options?.resolvedMemory; + const memoryBehaviorOverrides = resolvedMemory + ? { + ...(resolvedMemory.contextLimit !== undefined + ? { contextLimit: resolvedMemory.contextLimit } + : {}), + ...(resolvedMemory.semanticMemory !== undefined + ? { semanticMemory: resolvedMemory.semanticMemory } + : {}), + ...(resolvedMemory.conversationPersistence !== undefined + ? { conversationPersistence: resolvedMemory.conversationPersistence } + : {}), + } + : undefined; + const memory = + resolvedMemory || options?.conversationId || options?.userId + ? { + conversationId: resolvedMemory?.conversationId ?? options?.conversationId, + userId: resolvedMemory?.userId ?? options?.userId, + ...(memoryBehaviorOverrides && Object.keys(memoryBehaviorOverrides).length > 0 + ? { options: memoryBehaviorOverrides } + : {}), + } + : undefined; // Generate response using this agent const result = await this.generateText(prompt, { // Pass through the operation context if available parentOperationContext: oc, - conversationId: options?.conversationId, - userId: options?.userId, + ...(memory ? { memory } : {}), }); // Return the text result @@ -7805,10 +7939,14 @@ export class Agent { /** * Create working memory tools if configured */ - private createWorkingMemoryTools(options?: BaseGenerationOptions): Tool[] { + private createWorkingMemoryTools( + options?: BaseGenerationOptions, + operationContext?: OperationContext, + ): Tool[] { if (!this.hasWorkingMemorySupport()) { return []; } + const resolvedMemory = this.resolveMemoryRuntimeOptions(options, operationContext); const memoryManager = this.memoryManager as unknown as MemoryManager; const memory = memoryManager.getMemory(); @@ -7827,8 +7965,8 @@ export class Agent { parameters: z.object({}), execute: async () => { const content = await memory.getWorkingMemory({ - conversationId: options?.conversationId, - userId: options?.userId, + conversationId: resolvedMemory.conversationId, + userId: resolvedMemory.userId, }); return content || "No working memory content found."; }, @@ -7862,8 +8000,8 @@ export class Agent { parameters: z.object({ ...baseParams, ...modeParam }), execute: async ({ content, mode }, oc) => { await memory.updateWorkingMemory({ - conversationId: options?.conversationId, - userId: options?.userId, + conversationId: resolvedMemory.conversationId, + userId: resolvedMemory.userId, content, options: { mode: mode as MemoryUpdateMode | undefined, @@ -7873,8 +8011,8 @@ export class Agent { // Update root span with final content if (oc?.traceContext) { const finalContent = await memory.getWorkingMemory({ - conversationId: options?.conversationId, - userId: options?.userId, + conversationId: resolvedMemory.conversationId, + userId: resolvedMemory.userId, }); const rootSpan = oc.traceContext.getRootSpan(); rootSpan.setAttribute("agent.workingMemory.finalContent", finalContent || ""); @@ -7894,8 +8032,8 @@ export class Agent { parameters: z.object({}), execute: async (_, oc) => { await memory.clearWorkingMemory({ - conversationId: options?.conversationId, - userId: options?.userId, + conversationId: resolvedMemory.conversationId, + userId: resolvedMemory.userId, }); // Update root span to indicate cleared state diff --git a/packages/core/src/agent/types.ts b/packages/core/src/agent/types.ts index 78b0434eb..e6b2f43a4 100644 --- a/packages/core/src/agent/types.ts +++ b/packages/core/src/agent/types.ts @@ -943,19 +943,74 @@ export type ProviderType = T extends { llm: LLMProvider } ? P : neve * Common generate options - internal version that includes historyEntryId * Not exposed directly to users */ +export interface CommonSemanticMemoryOptions { + enabled?: boolean; + semanticLimit?: number; + semanticThreshold?: number; + mergeStrategy?: "prepend" | "append" | "interleave"; +} + +export interface CommonRuntimeMemoryBehaviorOptions { + contextLimit?: number; + semanticMemory?: CommonSemanticMemoryOptions; + conversationPersistence?: AgentConversationPersistenceOptions; +} + +export interface CommonRuntimeMemoryEnvelope { + conversationId?: string; + userId?: string; + options?: CommonRuntimeMemoryBehaviorOptions; +} + +export type SemanticMemoryOptions = CommonSemanticMemoryOptions; +export type RuntimeMemoryBehaviorOptions = CommonRuntimeMemoryBehaviorOptions; +export type RuntimeMemoryEnvelope = CommonRuntimeMemoryEnvelope; + +export interface CommonResolvedRuntimeMemoryOptions { + userId?: string; + conversationId?: string; + contextLimit?: number; + semanticMemory?: CommonSemanticMemoryOptions; + conversationPersistence?: AgentConversationPersistenceOptions; +} + export interface CommonGenerateOptions { // Common LLM provider properties provider?: ProviderOptions; + // Preferred runtime memory envelope for per-call memory identity and behavior overrides. + memory?: CommonRuntimeMemoryEnvelope; + + /** + * @deprecated Use `memory.conversationId` instead. + */ // Conversation ID to maintain context conversationId?: string; + /** + * @deprecated Use `memory.userId` instead. + */ // User ID for authentication userId?: string; + /** + * @deprecated Use `memory.options.contextLimit` instead. + */ // Context limit for conversation contextLimit?: number; + /** + * @deprecated Use `memory.options.semanticMemory` instead. + */ + // Semantic memory runtime overrides + semanticMemory?: CommonSemanticMemoryOptions; + + /** + * @deprecated Use `memory.options.conversationPersistence` instead. + */ + // Conversation persistence runtime overrides + conversationPersistence?: AgentConversationPersistenceOptions; + // Specific tools to use for this generation (overrides agent's tools) tools?: BaseTool[]; @@ -1202,6 +1257,9 @@ export type OperationContext = { /** Optional conversation identifier associated with this operation */ conversationId?: string; + /** Resolved runtime memory options (memory envelope preferred, legacy as fallback) */ + resolvedMemory?: CommonResolvedRuntimeMemoryOptions; + /** Workspace configured on the executing agent (if any). */ workspace?: Workspace; diff --git a/packages/resumable-streams/src/chat-handlers.ts b/packages/resumable-streams/src/chat-handlers.ts index 81efb997e..5ef441082 100644 --- a/packages/resumable-streams/src/chat-handlers.ts +++ b/packages/resumable-streams/src/chat-handlers.ts @@ -221,6 +221,13 @@ function defaultResolveConversationId({ payload.options && typeof payload.options === "object" ? (payload.options as Record) : undefined; + const memory = + options?.memory && typeof options.memory === "object" + ? (options.memory as Record) + : undefined; + if (memory && typeof memory.conversationId === "string") { + return memory.conversationId; + } if (options && typeof options.conversationId === "string") { return options.conversationId; } @@ -243,6 +250,13 @@ function defaultResolveUserId({ payload.options && typeof payload.options === "object" ? (payload.options as Record) : undefined; + const memory = + options?.memory && typeof options.memory === "object" + ? (options.memory as Record) + : undefined; + if (memory && typeof memory.userId === "string") { + return memory.userId; + } if (options && typeof options.userId === "string") { return options.userId; } diff --git a/packages/server-core/src/handlers/agent.handlers.spec.ts b/packages/server-core/src/handlers/agent.handlers.spec.ts index d782ce200..25f98f44d 100644 --- a/packages/server-core/src/handlers/agent.handlers.spec.ts +++ b/packages/server-core/src/handlers/agent.handlers.spec.ts @@ -1,6 +1,6 @@ import { ToolDeniedError } from "@voltagent/core"; import { describe, expect, it, vi } from "vitest"; -import { handleGenerateText } from "./agent.handlers"; +import { handleChatStream, handleGenerateText } from "./agent.handlers"; describe("server-core: agent.handlers ClientHTTPError mapping", () => { it("handleGenerateText should map ClientHTTPError (ToolDeniedError) to ApiResponse error fields", async () => { @@ -57,3 +57,107 @@ describe("server-core: agent.handlers ClientHTTPError mapping", () => { }); }); }); + +describe("server-core: agent.handlers resumable memory envelope", () => { + it("handleChatStream should resolve conversationId/userId from options.memory for resumable streams", async () => { + const logger = { + error: vi.fn(), + warn: vi.fn(), + } as any; + + const streamText = vi.fn(async () => ({ + toUIMessageStreamResponse: vi.fn(() => new Response("ok", { status: 200 })), + })); + + const deps = { + agentRegistry: { + getAgent: vi.fn(() => ({ streamText })), + }, + resumableStreamDefault: false, + resumableStream: { + clearActiveStream: vi.fn(async () => undefined), + }, + } as any; + + const res = await handleChatStream( + "agent-1", + { + input: "hello", + options: { + resumableStream: true, + conversationId: "legacy-conv", + userId: "legacy-user", + memory: { + conversationId: "conv-1", + userId: "user-1", + }, + }, + }, + deps, + logger, + ); + + expect(res.status).toBe(200); + expect(streamText).toHaveBeenCalledWith( + "hello", + expect.objectContaining({ + resumableStream: true, + memory: { + conversationId: "conv-1", + userId: "user-1", + }, + }), + ); + expect(deps.resumableStream.clearActiveStream).toHaveBeenCalledWith({ + conversationId: "conv-1", + agentId: "agent-1", + userId: "user-1", + }); + }); + + it("handleChatStream should ignore blank memory.conversationId and fall back to legacy conversationId", async () => { + const logger = { + error: vi.fn(), + warn: vi.fn(), + } as any; + + const streamText = vi.fn(async () => ({ + toUIMessageStreamResponse: vi.fn(() => new Response("ok", { status: 200 })), + })); + + const deps = { + agentRegistry: { + getAgent: vi.fn(() => ({ streamText })), + }, + resumableStreamDefault: false, + resumableStream: { + clearActiveStream: vi.fn(async () => undefined), + }, + } as any; + + const res = await handleChatStream( + "agent-1", + { + input: "hello", + options: { + resumableStream: true, + conversationId: "legacy-conv", + userId: "legacy-user", + memory: { + conversationId: " ", + userId: "user-1", + }, + }, + }, + deps, + logger, + ); + + expect(res.status).toBe(200); + expect(deps.resumableStream.clearActiveStream).toHaveBeenCalledWith({ + conversationId: "legacy-conv", + agentId: "agent-1", + userId: "user-1", + }); + }); +}); diff --git a/packages/server-core/src/handlers/agent.handlers.ts b/packages/server-core/src/handlers/agent.handlers.ts index 247957730..f4ff0db21 100644 --- a/packages/server-core/src/handlers/agent.handlers.ts +++ b/packages/server-core/src/handlers/agent.handlers.ts @@ -246,12 +246,24 @@ export async function handleChatStream( ? body.options.resumableStream : (deps.resumableStreamDefault ?? false); const options = processAgentOptions(body, signal); + const memory = + options.memory && typeof options.memory === "object" ? options.memory : undefined; + const memoryConversationId = + memory && typeof memory.conversationId === "string" && memory.conversationId.trim().length > 0 + ? memory.conversationId + : undefined; + const memoryUserId = + memory && typeof memory.userId === "string" && memory.userId.trim().length > 0 + ? memory.userId + : undefined; const conversationId = - typeof options.conversationId === "string" ? options.conversationId : undefined; + memoryConversationId ?? + (typeof options.conversationId === "string" ? options.conversationId : undefined); const userId = - typeof options.userId === "string" && options.userId.trim().length > 0 + memoryUserId ?? + (typeof options.userId === "string" && options.userId.trim().length > 0 ? options.userId - : undefined; + : undefined); const resumableEnabled = Boolean(deps.resumableStream); const resumableStreamEnabled = resumableEnabled && diff --git a/packages/server-core/src/schemas/agent.schemas.spec.ts b/packages/server-core/src/schemas/agent.schemas.spec.ts index dcc511bcb..5e5e02de8 100644 --- a/packages/server-core/src/schemas/agent.schemas.spec.ts +++ b/packages/server-core/src/schemas/agent.schemas.spec.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "vitest"; -import { WorkflowExecutionRequestSchema } from "./agent.schemas"; +import { GenerateOptionsSchema, WorkflowExecutionRequestSchema } from "./agent.schemas"; describe("WorkflowExecutionRequestSchema", () => { it("accepts options.workflowState payload", () => { @@ -46,3 +46,74 @@ describe("WorkflowExecutionRequestSchema", () => { }); }); }); + +describe("GenerateOptionsSchema", () => { + it("accepts options.memory envelope with nested runtime overrides", () => { + const payload = { + memory: { + userId: "user-1", + conversationId: "conv-1", + options: { + contextLimit: 12, + semanticMemory: { + enabled: true, + semanticLimit: 4, + semanticThreshold: 0.8, + mergeStrategy: "append", + }, + conversationPersistence: { + mode: "step", + debounceMs: 150, + flushOnToolResult: true, + }, + }, + }, + }; + + const result = GenerateOptionsSchema.parse(payload); + + expect(result.memory?.userId).toBe("user-1"); + expect(result.memory?.conversationId).toBe("conv-1"); + expect(result.memory?.options?.contextLimit).toBe(12); + expect(result.memory?.options?.semanticMemory?.enabled).toBe(true); + expect(result.memory?.options?.semanticMemory?.semanticLimit).toBe(4); + expect(result.memory?.options?.semanticMemory?.semanticThreshold).toBe(0.8); + expect(result.memory?.options?.semanticMemory?.mergeStrategy).toBe("append"); + expect(result.memory?.options?.conversationPersistence?.mode).toBe("step"); + expect(result.memory?.options?.conversationPersistence?.debounceMs).toBe(150); + expect(result.memory?.options?.conversationPersistence?.flushOnToolResult).toBe(true); + }); + + it("keeps legacy top-level memory fields for backward compatibility", () => { + const payload = { + userId: "legacy-user", + conversationId: "legacy-conv", + contextLimit: 10, + semanticMemory: { + enabled: true, + }, + conversationPersistence: { + mode: "finish", + }, + }; + + expect(() => GenerateOptionsSchema.parse(payload)).not.toThrow(); + }); + + it("rejects invalid nested memory.options values", () => { + const payload = { + memory: { + userId: "user-1", + conversationId: "conv-1", + options: { + contextLimit: "12", + semanticMemory: { + mergeStrategy: "invalid-strategy", + }, + }, + }, + }; + + expect(() => GenerateOptionsSchema.parse(payload)).toThrow(); + }); +}); diff --git a/packages/server-core/src/schemas/agent.schemas.ts b/packages/server-core/src/schemas/agent.schemas.ts index 75dbf0f7c..bdd94c311 100644 --- a/packages/server-core/src/schemas/agent.schemas.ts +++ b/packages/server-core/src/schemas/agent.schemas.ts @@ -164,14 +164,87 @@ const FeedbackOptionsSchema = z .passthrough() .describe("Feedback options for the generated trace"); +const SemanticMemoryOptionsSchema = z + .object({ + enabled: z + .boolean() + .optional() + .describe( + "Enable semantic retrieval for this call (default: auto when vectors are available)", + ), + semanticLimit: z + .number() + .int() + .positive() + .optional() + .describe("Number of similar messages to retrieve"), + semanticThreshold: z + .number() + .min(0) + .max(1) + .optional() + .describe("Minimum similarity score (0-1)"), + mergeStrategy: z + .enum(["prepend", "append", "interleave"]) + .optional() + .describe("How semantic results merge with recent history"), + }) + .passthrough(); + +const ConversationPersistenceOptionsSchema = z + .object({ + mode: z + .enum(["step", "finish"]) + .optional() + .describe("Persistence strategy: checkpoint by step or persist on finish"), + debounceMs: z + .number() + .int() + .positive() + .optional() + .describe("Debounce window for step checkpoint persistence"), + flushOnToolResult: z + .boolean() + .optional() + .describe("Flush immediately on tool-result/tool-error in step mode"), + }) + .passthrough(); + +const RuntimeMemoryBehaviorOptionsSchema = z + .object({ + contextLimit: z + .number() + .int() + .positive() + .optional() + .describe("Number of previous messages to include from memory"), + semanticMemory: SemanticMemoryOptionsSchema.optional().describe( + "Semantic retrieval configuration for this call", + ), + conversationPersistence: ConversationPersistenceOptionsSchema.optional().describe( + "Per-call conversation persistence behavior", + ), + }) + .passthrough(); + +const RuntimeMemoryEnvelopeSchema = z + .object({ + conversationId: z.string().optional().describe("Conversation identifier for memory scoping"), + userId: z.string().optional().describe("User identifier for memory scoping"), + options: RuntimeMemoryBehaviorOptionsSchema.optional().describe( + "Per-call memory behavior overrides", + ), + }) + .passthrough(); + // Generation options schema export const GenerateOptionsSchema = z .object({ - userId: z - .string() - .optional() - .describe("Optional user ID for context tracking (required for resumable streams)"), - conversationId: z.string().optional().describe("Optional conversation ID for context tracking"), + memory: RuntimeMemoryEnvelopeSchema.optional().describe( + "Runtime memory envelope (preferred): memory.userId/memory.conversationId + memory.options.*", + ), + userId: z.string().optional().describe("Deprecated: use options.memory.userId"), + conversationId: z.string().optional().describe("Deprecated: use options.memory.conversationId"), context: z .record(z.string(), z.unknown()) .nullish() @@ -182,7 +255,13 @@ export const GenerateOptionsSchema = z .positive() .optional() .default(10) - .describe("Optional limit for conversation history context"), + .describe("Deprecated: use options.memory.options.contextLimit"), + semanticMemory: SemanticMemoryOptionsSchema.optional().describe( + "Deprecated: use options.memory.options.semanticMemory", + ), + conversationPersistence: ConversationPersistenceOptionsSchema.optional().describe( + "Deprecated: use options.memory.options.conversationPersistence", + ), maxSteps: z .number() .int() @@ -234,7 +313,7 @@ export const GenerateOptionsSchema = z .boolean() .optional() .describe( - "When true, avoids wiring the HTTP abort signal into streams so they can be resumed (requires resumable streams and options.conversationId + options.userId). If omitted, server defaults may apply.", + "When true, avoids wiring the HTTP abort signal into streams so they can be resumed (requires resumable streams and conversation/user IDs via options.memory or top-level legacy fields). If omitted, server defaults may apply.", ), output: z .object({ diff --git a/packages/server-core/src/utils/options.ts b/packages/server-core/src/utils/options.ts index f340d0dd9..c84885d10 100644 --- a/packages/server-core/src/utils/options.ts +++ b/packages/server-core/src/utils/options.ts @@ -7,6 +7,24 @@ import { convertJsonSchemaToZod as convertJsonSchemaToZodV3 } from "zod-from-jso * Process agent options from request body */ export interface ProcessedAgentOptions { + memory?: { + conversationId?: string; + userId?: string; + options?: { + contextLimit?: number; + semanticMemory?: { + enabled?: boolean; + semanticLimit?: number; + semanticThreshold?: number; + mergeStrategy?: "prepend" | "append" | "interleave"; + }; + conversationPersistence?: { + mode?: "step" | "finish"; + debounceMs?: number; + flushOnToolResult?: boolean; + }; + }; + }; conversationId?: string; userId?: string; context?: Map; @@ -14,6 +32,17 @@ export interface ProcessedAgentOptions { maxOutputTokens?: number; maxSteps?: number; contextLimit?: number; + semanticMemory?: { + enabled?: boolean; + semanticLimit?: number; + semanticThreshold?: number; + mergeStrategy?: "prepend" | "append" | "interleave"; + }; + conversationPersistence?: { + mode?: "step" | "finish"; + debounceMs?: number; + flushOnToolResult?: boolean; + }; topP?: number; topK?: number; frequencyPenalty?: number; diff --git a/website/docs/agents/memory.md b/website/docs/agents/memory.md index fbcfad929..ab9c9e56b 100644 --- a/website/docs/agents/memory.md +++ b/website/docs/agents/memory.md @@ -44,14 +44,18 @@ const agent = new Agent({ // First message await agent.generateText("My name is Sarah", { - userId: "user-123", - conversationId: "chat-001", + memory: { + userId: "user-123", + conversationId: "chat-001", + }, }); // Agent remembers context await agent.generateText("What's my name?", { - userId: "user-123", - conversationId: "chat-001", + memory: { + userId: "user-123", + conversationId: "chat-001", + }, }); ``` diff --git a/website/docs/agents/memory/in-memory.md b/website/docs/agents/memory/in-memory.md index fb9efc379..c2e64d1f0 100644 --- a/website/docs/agents/memory/in-memory.md +++ b/website/docs/agents/memory/in-memory.md @@ -101,8 +101,10 @@ const testAgent = new Agent({ // Test conversations without persistence await testAgent.generateText("Test message", { - userId: "test-user", - conversationId: "test-conversation", + memory: { + userId: "test-user", + conversationId: "test-conversation", + }, }); ``` @@ -120,8 +122,10 @@ export async function handler(event) { }); return await agent.generateText(event.message, { - userId: event.userId, - conversationId: event.sessionId, + memory: { + userId: event.userId, + conversationId: event.sessionId, + }, }); } ``` diff --git a/website/docs/agents/memory/overview.md b/website/docs/agents/memory/overview.md index 60048baa3..6c0032eb6 100644 --- a/website/docs/agents/memory/overview.md +++ b/website/docs/agents/memory/overview.md @@ -164,12 +164,14 @@ new VoltAgent({ ## Usage with User and Conversation IDs -Provide `userId` and `conversationId` in generation calls to scope memory: +Provide `memory.userId` and `memory.conversationId` in generation calls to scope memory: ```ts const response = await agent.generateText("What did we discuss yesterday?", { - userId: "user-123", - conversationId: "thread-abc", + memory: { + userId: "user-123", + conversationId: "thread-abc", + }, }); ``` @@ -236,12 +238,16 @@ const agent = new Agent({ // Enable semantic search per call const result = await agent.generateText("What preferences did I mention?", { - userId: "user-123", - conversationId: "thread-abc", - semanticMemory: { - enabled: true, - semanticLimit: 5, - semanticThreshold: 0.7, + memory: { + userId: "user-123", + conversationId: "thread-abc", + options: { + semanticMemory: { + enabled: true, + semanticLimit: 5, + semanticThreshold: 0.7, + }, + }, }, }); ``` @@ -343,7 +349,9 @@ const agent = new Agent({ // Pass tenant per request await agent.generateText("Query", { - userId: "user-123", + memory: { + userId: "user-123", + }, context: { tenantId: "company-abc" }, // Different tenant = different data }); ``` diff --git a/website/docs/agents/memory/semantic-search.md b/website/docs/agents/memory/semantic-search.md index 4a417559d..ce0d10d80 100644 --- a/website/docs/agents/memory/semantic-search.md +++ b/website/docs/agents/memory/semantic-search.md @@ -62,13 +62,17 @@ Enable semantic search per generation call: ```ts const result = await agent.generateText("What pricing model did we discuss?", { - userId: "user-123", - conversationId: "thread-abc", - semanticMemory: { - enabled: true, // default: auto-enabled when vector support is present - semanticLimit: 5, // number of similar messages to retrieve - semanticThreshold: 0.7, // minimum similarity score (0-1) - mergeStrategy: "append", // "prepend" | "append" | "interleave" + memory: { + userId: "user-123", + conversationId: "thread-abc", + options: { + semanticMemory: { + enabled: true, // default: auto-enabled when vector support is present + semanticLimit: 5, // number of similar messages to retrieve + semanticThreshold: 0.7, // minimum similarity score (0-1) + mergeStrategy: "append", // "prepend" | "append" | "interleave" + }, + }, }, }); ``` @@ -77,7 +81,7 @@ const result = await agent.generateText("What pricing model did we discuss?", { When `embedding` and `vector` adapters are configured: -- Semantic search auto-enables for calls with `userId` and `conversationId` +- Semantic search auto-enables for calls with `memory.userId` and `memory.conversationId` - Default `semanticLimit`: 5 messages - Default `semanticThreshold`: 0.7 - Default `mergeStrategy`: `"append"` (recent messages first, then similar messages) @@ -221,11 +225,15 @@ const agent = new Agent({ const result = await agent.generateText( "What did we decide about the API authentication approach?", { - userId: "user-123", - conversationId: "project-alpha", - semanticMemory: { - semanticLimit: 10, - semanticThreshold: 0.75, + memory: { + userId: "user-123", + conversationId: "project-alpha", + options: { + semanticMemory: { + semanticLimit: 10, + semanticThreshold: 0.75, + }, + }, }, } ); diff --git a/website/docs/agents/memory/working-memory.md b/website/docs/agents/memory/working-memory.md index de29a23f2..050057d4b 100644 --- a/website/docs/agents/memory/working-memory.md +++ b/website/docs/agents/memory/working-memory.md @@ -232,14 +232,18 @@ const agent = new Agent({ // First conversation await agent.generateText("I prefer casual communication and I'm into AI and music.", { - userId: "user-123", - conversationId: "conv-1", + memory: { + userId: "user-123", + conversationId: "conv-1", + }, }); // Different conversation - agent remembers user preferences await agent.generateText("What should I learn next?", { - userId: "user-123", - conversationId: "conv-2", // different thread + memory: { + userId: "user-123", + conversationId: "conv-2", // different thread + }, }); ``` @@ -273,13 +277,17 @@ const agent = new Agent({ // Each project gets its own working memory await agent.generateText("Let's plan the e-commerce project using Next.js.", { - userId: "user-123", - conversationId: "project-ecommerce", + memory: { + userId: "user-123", + conversationId: "project-ecommerce", + }, }); await agent.generateText("For the analytics dashboard, we'll use React and D3.", { - userId: "user-123", - conversationId: "project-analytics", + memory: { + userId: "user-123", + conversationId: "project-analytics", + }, }); ``` diff --git a/website/docs/agents/overview.md b/website/docs/agents/overview.md index 414b7cc2d..cffc6b8b8 100644 --- a/website/docs/agents/overview.md +++ b/website/docs/agents/overview.md @@ -242,8 +242,12 @@ You can also override this per call: ```ts await agent.generateText("Run the workflow", { - conversationPersistence: { - mode: "finish", + memory: { + options: { + conversationPersistence: { + mode: "finish", + }, + }, }, }); ``` @@ -357,8 +361,10 @@ export async function POST(req: Request) { const { messages, conversationId, userId } = await req.json(); const result = await agent.streamText(messages, { - conversationId, - userId, + memory: { + conversationId, + userId, + }, }); return result.toUIMessageStreamResponse(); diff --git a/website/docs/agents/voltagent-instance.md b/website/docs/agents/voltagent-instance.md index 163130fbf..1ee916d77 100644 --- a/website/docs/agents/voltagent-instance.md +++ b/website/docs/agents/voltagent-instance.md @@ -102,7 +102,8 @@ new VoltAgent({ **Precedence** -- Per-call `options.conversationPersistence` +- Per-call `options.memory.options.conversationPersistence` (preferred) +- Per-call `options.conversationPersistence` (deprecated) - Agent `conversationPersistence` - VoltAgent `agentConversationPersistence` - Built-in defaults (`mode: "step"`, `debounceMs: 200`, `flushOnToolResult: true`) diff --git a/website/docs/api/api-reference.md b/website/docs/api/api-reference.md index 5569bd743..dc9b9d7ee 100644 --- a/website/docs/api/api-reference.md +++ b/website/docs/api/api-reference.md @@ -48,9 +48,24 @@ Default port is 3141, but may vary based on configuration. { "input": "string or message array", "options": { - "userId": "string", - "conversationId": "string", - "contextLimit": 10, + "memory": { + "userId": "string", + "conversationId": "string", + "options": { + "contextLimit": 10, + "semanticMemory": { + "enabled": true, + "semanticLimit": 5, + "semanticThreshold": 0.7, + "mergeStrategy": "append" + }, + "conversationPersistence": { + "mode": "step", + "debounceMs": 200, + "flushOnToolResult": true + } + } + }, "maxSteps": 5, "temperature": 0.7, "maxOutputTokens": 4000, diff --git a/website/docs/api/endpoints/agents.md b/website/docs/api/endpoints/agents.md index a26a04759..8a2dc9346 100644 --- a/website/docs/api/endpoints/agents.md +++ b/website/docs/api/endpoints/agents.md @@ -86,9 +86,18 @@ Generate a text response from an agent synchronously. { "input": "What is the weather like today?", "options": { - "userId": "user-123", - "conversationId": "conv-456", - "contextLimit": 10, + "memory": { + "userId": "user-123", + "conversationId": "conv-456", + "options": { + "contextLimit": 10, + "conversationPersistence": { + "mode": "step", + "debounceMs": 200, + "flushOnToolResult": true + } + } + }, "maxSteps": 5, "temperature": 0.7, "maxOutputTokens": 1000, @@ -105,11 +114,6 @@ Generate a text response from an agent synchronously. "context": { "role": "admin", "tier": "premium" - }, - "conversationPersistence": { - "mode": "step", - "debounceMs": 200, - "flushOnToolResult": true } } } @@ -177,9 +181,27 @@ Generate a text response from an agent synchronously. **Options:** | Field | Type | Default | Description | |-------|------|---------|-------------| -| `userId` | string | - | User ID for tracking | -| `conversationId` | string | - | Conversation ID for context | -| `contextLimit` | number | 10 | Message history limit | +| `memory` | object | - | Runtime memory envelope (preferred) | +| `memory.userId` | string | - | User ID for memory scoping | +| `memory.conversationId` | string | - | Conversation ID for memory scoping | +| `memory.options.contextLimit` | number | 10 | Message history limit | +| `memory.options.semanticMemory` | object | - | Semantic retrieval config | +| `memory.options.semanticMemory.enabled` | boolean | - | Enable semantic retrieval for this call. Default: `undefined` (auto-enables if vectors are available). | +| `memory.options.semanticMemory.semanticLimit` | number | 5 | Number of similar messages to retrieve | +| `memory.options.semanticMemory.semanticThreshold` | number | 0.7 | Minimum similarity score (0-1) | +| `memory.options.semanticMemory.mergeStrategy` | string | `"append"` | `"prepend"` or `"append"` or `"interleave"` | +| `memory.options.conversationPersistence` | object | - | Groups conversation persistence settings (`mode`, `debounceMs`, `flushOnToolResult`) | +| `memory.options.conversationPersistence.mode` | string | `"step"` | Persistence strategy: `"step"` or `"finish"` | +| `memory.options.conversationPersistence.debounceMs` | number | `200` | Debounce interval for step checkpoint persistence | +| `memory.options.conversationPersistence.flushOnToolResult` | boolean | `true` | Flush immediately on `tool-result`/`tool-error` in step mode | +| `userId` | string | - | Deprecated: use `memory.userId` | +| `conversationId` | string | - | Deprecated: use `memory.conversationId` | +| `contextLimit` | number | 10 | Deprecated: use `memory.options.contextLimit` | +| `semanticMemory` | object | - | Deprecated: use `memory.options.semanticMemory` | +| `semanticMemory.enabled` | boolean | - | Deprecated: use `memory.options.semanticMemory.enabled`. Default: `undefined` (auto-enables if vectors are available). | +| `semanticMemory.semanticLimit` | number | 5 | Deprecated: use `memory.options.semanticMemory.semanticLimit` | +| `semanticMemory.semanticThreshold` | number | 0.7 | Deprecated: use `memory.options.semanticMemory.semanticThreshold` | +| `semanticMemory.mergeStrategy` | string | `"append"` | Deprecated: use `memory.options.semanticMemory.mergeStrategy` | | `maxSteps` | number | - | Max iteration steps (for tool use) | | `temperature` | number | 0.7 | Randomness (0-1) | | `maxOutputTokens` | number | 4000 | Max tokens to generate | @@ -190,9 +212,12 @@ Generate a text response from an agent synchronously. | `stopSequences` | string[] | - | Stop generation sequences | | `providerOptions` | object | - | Provider-specific options | | `context` | object | - | Dynamic agent context | -| `conversationPersistence.mode` | string | `"step"` | Persistence strategy: `"step"` or `"finish"` | -| `conversationPersistence.debounceMs` | number | `200` | Debounce interval for step checkpoint persistence | -| `conversationPersistence.flushOnToolResult` | boolean | `true` | Flush immediately on `tool-result`/`tool-error` in step mode | +| `conversationPersistence` | object | - | Deprecated: use `memory.options.conversationPersistence` (groups `mode`, `debounceMs`, `flushOnToolResult`) | +| `conversationPersistence.mode` | string | `"step"` | Deprecated: use `memory.options.conversationPersistence.mode` | +| `conversationPersistence.debounceMs` | number | `200` | Deprecated: use `memory.options.conversationPersistence.debounceMs` | +| `conversationPersistence.flushOnToolResult` | boolean | `true` | Deprecated: use `memory.options.conversationPersistence.flushOnToolResult` | + +When both top-level legacy memory fields and `memory` envelope fields are provided, runtime resolution follows `resolveMemoryRuntimeOptions()` and values under `memory` take precedence. **Response:** @@ -494,8 +519,10 @@ function ChatComponent({ agentId }) { body: { input: [lastMessage], // Send as array of UIMessage options: { - userId: "user-123", - conversationId: "conv-456", + memory: { + userId: "user-123", + conversationId: "conv-456", + }, temperature: 0.7, maxSteps: 10, }, @@ -670,7 +697,7 @@ The agent receives this context and can: 1. **Use streaming for long responses** - Better UX for lengthy generation 2. **Set appropriate temperature** - Lower (0.3) for factual, higher (0.9) for creative 3. **Limit tokens for cost control** - Use `maxOutputTokens` wisely -4. **Provide context** - Use `userId` and `conversationId` for conversation continuity +4. **Provide context** - Use `options.memory.userId` and `options.memory.conversationId` for conversation continuity 5. **Handle errors gracefully** - Check `success` field and handle errors 6. **Use abort signals** - Allow users to cancel long-running requests diff --git a/website/docs/ui/ai-sdk-integration.md b/website/docs/ui/ai-sdk-integration.md index 8a6865780..9608b4f80 100644 --- a/website/docs/ui/ai-sdk-integration.md +++ b/website/docs/ui/ai-sdk-integration.md @@ -92,8 +92,10 @@ function ChatWithMemory() { input: [lastMessage], options: { // Memory - userId, - conversationId, + memory: { + userId, + conversationId, + }, // Model parameters temperature: 0.7, @@ -137,7 +139,9 @@ function ChatWithFiles() { body: { input: [lastMessage], options: { - userId: 'user-123' + memory: { + userId: 'user-123' + } } } }; @@ -245,7 +249,7 @@ import { useCallback, useState, useRef } from 'react'; export function ChatInterface() { const [input, setInput] = useState(''); const [userId] = useState('user-123'); - const [conversationId] = useState(() => crypto.randomUUID()); + const [conversationId, setConversationId] = useState(() => crypto.randomUUID()); const createTransport = useCallback(() => { return new DefaultChatTransport({ @@ -257,8 +261,10 @@ export function ChatInterface() { body: { input: [lastMessage], options: { - userId, - conversationId, + memory: { + userId, + conversationId, + }, temperature: 0.7, maxSteps: 10 } @@ -293,7 +299,9 @@ export function ChatInterface() { }; const resetConversation = () => { + stop(); setMessages([]); + setConversationId(crypto.randomUUID()); }; return ( @@ -342,30 +350,44 @@ export function ChatInterface() { ### VoltAgent Specific -| Option | Type | Description | -| ------------------------------------------- | ------- | ------------------------------------------------------------------------------ | -| `userId` | string | User identifier for memory persistence | -| `conversationId` | string | Conversation thread ID | -| `context` | object | Dynamic context (converted to Map internally) | -| `contextLimit` | number | Number of previous messages to include from memory | -| `conversationPersistence.mode` | string | `"step"` (default) or `"finish"` | -| `conversationPersistence.debounceMs` | number | Debounce window in milliseconds (default: `200`) | -| `conversationPersistence.flushOnToolResult` | boolean | Flush immediately on `tool-result`/`tool-error` in step mode (default: `true`) | +| Option | Type | Description | +| ---------------------------------------------------------- | ------- | ------------------------------------------------------------------------------ | +| `memory` | object | Runtime memory envelope (preferred) | +| `memory.userId` | string | User identifier for memory persistence | +| `memory.conversationId` | string | Conversation thread ID | +| `context` | object | Dynamic context (converted to Map internally) | +| `memory.options.contextLimit` | number | Number of previous messages to include from memory | +| `memory.options.conversationPersistence.mode` | string | `"step"` (default) or `"finish"` | +| `memory.options.conversationPersistence.debounceMs` | number | Debounce window in milliseconds (default: `200`) | +| `memory.options.conversationPersistence.flushOnToolResult` | boolean | Flush immediately on `tool-result`/`tool-error` in step mode (default: `true`) | +| `userId` | string | Deprecated: use `memory.userId` | +| `conversationId` | string | Deprecated: use `memory.conversationId` | +| `contextLimit` | number | Deprecated: use `memory.options.contextLimit` | +| `semanticMemory` | object | Deprecated: use `memory.options.semanticMemory` | +| `conversationPersistence.mode` | string | Deprecated: use `memory.options.conversationPersistence.mode` | +| `conversationPersistence.debounceMs` | number | Deprecated: use `memory.options.conversationPersistence.debounceMs` | +| `conversationPersistence.flushOnToolResult` | boolean | Deprecated: use `memory.options.conversationPersistence.flushOnToolResult` | Example: ```ts options: { - userId, - conversationId, - conversationPersistence: { - mode: "step", - debounceMs: 200, - flushOnToolResult: true, + memory: { + userId, + conversationId, + options: { + conversationPersistence: { + mode: "step", + debounceMs: 200, + flushOnToolResult: true, + }, + }, }, } ``` +When both top-level legacy memory fields and `memory` envelope fields are provided, `memory` values are used. + ### AI SDK Core Options | Option | Type | Default | Description | @@ -397,11 +419,13 @@ options: { ### Semantic Memory Options -| Option | Type | Description | -| --------------------------------- | ------ | --------------------------------- | -| `semanticSearchOptions` | object | Configuration for semantic search | -| `semanticSearchOptions.maxChunks` | number | Maximum chunks to retrieve | -| `semanticSearchOptions.minScore` | number | Minimum similarity score | +| Option | Type | Description | +| ------------------------------------------------- | ------- | ------------------------------------------- | +| `memory.options.semanticMemory` | object | Configuration for semantic search | +| `memory.options.semanticMemory.enabled` | boolean | Enable semantic retrieval for this call | +| `memory.options.semanticMemory.semanticLimit` | number | Maximum similar messages to retrieve | +| `memory.options.semanticMemory.semanticThreshold` | number | Minimum similarity score | +| `memory.options.semanticMemory.mergeStrategy` | string | `"prepend"` or `"append"` or `"interleave"` | ## useChat Hook Options @@ -428,8 +452,8 @@ options: { ### Messages not persisting -- Include `userId` in options -- Use consistent `conversationId` +- Include `options.memory.userId` +- Use consistent `options.memory.conversationId` - Check agent memory configuration ### CORS errors diff --git a/website/docs/ui/assistant-ui.md b/website/docs/ui/assistant-ui.md index 65882682a..906846a10 100644 --- a/website/docs/ui/assistant-ui.md +++ b/website/docs/ui/assistant-ui.md @@ -173,8 +173,10 @@ export async function POST(req: Request) { setWaitUntil(after); const result = await agent.streamText([lastMessage], { - userId: userId ?? "anonymous-user", - conversationId: threadId, + memory: { + userId: userId ?? "anonymous-user", + conversationId: threadId, + }, }); return result.toUIMessageStreamResponse({ sendReasoning: true });