diff --git a/src/agent/agent.ts b/src/agent/agent.ts index 3df6c425..ba12a8bc 100644 --- a/src/agent/agent.ts +++ b/src/agent/agent.ts @@ -1,4 +1,4 @@ -import { AIMessage } from '@langchain/core/messages'; +import { AIMessage, HumanMessage, BaseMessage } from '@langchain/core/messages'; import { StructuredToolInterface } from '@langchain/core/tools'; import { callLlm } from '../model/llm.js'; import { Scratchpad, type ToolContext } from './scratchpad.js'; @@ -59,7 +59,7 @@ export class Agent { async *run(query: string, inMemoryHistory?: InMemoryChatHistory): AsyncGenerator { const startTime = Date.now(); const tokenCounter = new TokenCounter(); - + if (this.tools.length === 0) { yield { type: 'done', answer: 'No tools available. Please check your API key configuration.', toolCalls: [], iterations: 0, totalTime: Date.now() - startTime }; return; @@ -67,17 +67,20 @@ export class Agent { // Create scratchpad for this query - single source of truth for all work done const scratchpad = new Scratchpad(query); - - // Build initial prompt with conversation history context - let currentPrompt = this.buildInitialPrompt(query, inMemoryHistory); - + + // Build conversation history as actual message objects for multi-turn context + const historyMessages = this.buildConversationMessages(inMemoryHistory); + let currentPrompt = query; + let iteration = 0; // Main agent loop while (iteration < this.maxIterations) { iteration++; - const { response, usage } = await this.callModel(currentPrompt); + const { response, usage } = await this.callModel(currentPrompt, true, historyMessages); + // Clear history messages after first call — subsequent iterations use scratchpad context + historyMessages.length = 0; tokenCounter.add(usage); const responseText = typeof response === 'string' ? response : extractTextContent(response); @@ -102,12 +105,12 @@ export class Agent { // Generate final answer with full context from scratchpad const fullContext = this.buildFullContextForAnswer(query, scratchpad); const finalPrompt = buildFinalAnswerPrompt(query, fullContext); - + yield { type: 'answer_start' }; const { response: finalResponse, usage: finalUsage } = await this.callModel(finalPrompt, false); tokenCounter.add(finalUsage); - const answer = typeof finalResponse === 'string' - ? finalResponse + const answer = typeof finalResponse === 'string' + ? finalResponse : extractTextContent(finalResponse); const totalTime = Date.now() - startTime; @@ -124,10 +127,10 @@ export class Agent { yield result.value; result = await generator.next(); } - + // Anthropic-style context management: get full tool results let fullToolResults = scratchpad.getToolResults(); - + // Check context threshold and clear oldest tool results if needed const estimatedContextTokens = estimateTokens(this.systemPrompt + query + fullToolResults); if (estimatedContextTokens > CONTEXT_THRESHOLD) { @@ -138,10 +141,10 @@ export class Agent { fullToolResults = scratchpad.getToolResults(); } } - + // Build iteration prompt with full tool results (Anthropic-style) currentPrompt = buildIterationPrompt( - query, + query, fullToolResults, scratchpad.formatToolUsageForPrompt() ); @@ -150,12 +153,12 @@ export class Agent { // Max iterations reached - still generate proper final answer const fullContext = this.buildFullContextForAnswer(query, scratchpad); const finalPrompt = buildFinalAnswerPrompt(query, fullContext); - + yield { type: 'answer_start' }; const { response: finalResponse, usage: finalUsage } = await this.callModel(finalPrompt, false); tokenCounter.add(finalUsage); - const answer = typeof finalResponse === 'string' - ? finalResponse + const answer = typeof finalResponse === 'string' + ? finalResponse : extractTextContent(finalResponse); const totalTime = Date.now() - startTime; @@ -175,12 +178,13 @@ export class Agent { * @param prompt - The prompt to send to the LLM * @param useTools - Whether to bind tools (default: true). When false, returns string directly. */ - private async callModel(prompt: string, useTools: boolean = true): Promise<{ response: AIMessage | string; usage?: TokenUsage }> { + private async callModel(prompt: string, useTools: boolean = true, messages?: BaseMessage[]): Promise<{ response: AIMessage | string; usage?: TokenUsage }> { const result = await callLlm(prompt, { model: this.model, systemPrompt: this.systemPrompt, tools: useTools ? this.tools : undefined, signal: this.signal, + messages, }); return { response: result.response, usage: result.usage }; } @@ -231,13 +235,13 @@ export class Agent { // Check tool limits - yields warning if approaching/over limits const limitCheck = scratchpad.canCallTool(toolName, toolQuery); - + if (limitCheck.warning) { - yield { - type: 'tool_limit', - tool: toolName, - warning: limitCheck.warning, - blocked: false + yield { + type: 'tool_limit', + tool: toolName, + warning: limitCheck.warning, + blocked: false }; } @@ -299,34 +303,57 @@ export class Agent { */ private extractQueryFromArgs(args: Record): string | undefined { const queryKeys = ['query', 'search', 'question', 'q', 'text', 'input']; - + for (const key of queryKeys) { if (typeof args[key] === 'string') { return args[key] as string; } } - + return undefined; } /** - * Build initial prompt with conversation history context if available + * Build conversation history as actual HumanMessage/AIMessage pairs. + * Returns an array of message objects for proper multi-turn LLM context. + * Only includes completed turns (with answers) — excludes the current in-flight query. + * + * To prevent token blow-up: + * - Caps history at MAX_HISTORY_TURNS (10) turns + * - Uses full answers only for the last FULL_ANSWER_TURNS (3) turns + * - Uses short summaries for older turns (falling back to truncated answer) */ - private buildInitialPrompt( - query: string, + private buildConversationMessages( inMemoryChatHistory?: InMemoryChatHistory - ): string { + ): BaseMessage[] { + const MAX_HISTORY_TURNS = parseInt(process.env.DEXTER_MAX_HISTORY_TURNS || '10', 10); + const FULL_ANSWER_TURNS = parseInt(process.env.DEXTER_FULL_ANSWER_TURNS || '3', 10); + if (!inMemoryChatHistory?.hasMessages()) { - return query; + return []; } - const userMessages = inMemoryChatHistory.getUserMessages(); - if (userMessages.length === 0) { - return query; + const completedMessages = inMemoryChatHistory.getMessages().filter(m => m.answer !== null); + // Keep only the most recent N turns + const recentMessages = completedMessages.slice(-MAX_HISTORY_TURNS); + const messages: BaseMessage[] = []; + const fullAnswerStart = Math.max(0, recentMessages.length - FULL_ANSWER_TURNS); + + for (let i = 0; i < recentMessages.length; i++) { + const msg = recentMessages[i]; + messages.push(new HumanMessage(msg.query)); + + if (i >= fullAnswerStart) { + // Recent turns: full answer for immediate context + messages.push(new AIMessage(msg.answer!)); + } else { + // Older turns: use summary to save tokens, fall back to truncated answer + const shortContent = msg.summary || msg.answer!.slice(0, 300) + '...'; + messages.push(new AIMessage(shortContent)); + } } - const historyContext = userMessages.map((msg, i) => `${i + 1}. ${msg}`).join('\n'); - return `Current query to answer: ${query}\n\nPrevious user queries for context:\n${historyContext}`; + return messages; } /** diff --git a/src/agent/prompts.ts b/src/agent/prompts.ts index d5db96fd..27714d2f 100644 --- a/src/agent/prompts.ts +++ b/src/agent/prompts.ts @@ -24,13 +24,13 @@ export function getCurrentDate(): string { */ function buildSkillsSection(): string { const skills = discoverSkills(); - + if (skills.length === 0) { return ''; } const skillList = buildSkillMetadataSection(); - + return `## Available Skills ${skillList} @@ -132,6 +132,26 @@ ${buildSkillsSection()} - Never ask users to provide raw data, paste values, or reference JSON/API internals - users ask questions, they don't have access to financial APIs - If data is incomplete, answer with what you have without exposing implementation details +## Deep Research Interview + +When a user asks for "deep research", "deep dive", "comprehensive analysis", or "thorough analysis", **interview them first before doing any research or invoking skills**. + +Questions (ask ONE at a time, in order): +1. Goal (buy/sell/hold, thesis, trade, learning) +2. Time horizon +3. Focus areas (valuation, growth, risks, moat, etc.) +4. Output format (thesis, valuation, risk report, comparison, full report) +5. Comparisons (specific competitors or your pick) + +Format rules: +- Prefix each question with its number: "**Q1:**", "**Q2:**", etc. +- List options as **letters**: A) ... B) ... C) ... D) ... +- Always include a final option: "or type your own" +- Accept any reply: a letter ("B"), a word ("growth"), or a full sentence +- Acknowledge briefly ("Got it — long-term thesis."), then immediately ask the next question +- Never re-ask a question. Never dump all questions at once. +- After Q5, summarize all choices, then invoke the deep-research skill to execute. + ## Response Format - Keep casual responses brief and direct diff --git a/src/model/llm.ts b/src/model/llm.ts index 6e446346..3895b553 100644 --- a/src/model/llm.ts +++ b/src/model/llm.ts @@ -4,7 +4,7 @@ import { ChatAnthropic } from '@langchain/anthropic'; import { ChatGoogleGenerativeAI } from '@langchain/google-genai'; import { ChatOllama } from '@langchain/ollama'; import { ChatPromptTemplate } from '@langchain/core/prompts'; -import { SystemMessage, HumanMessage } from '@langchain/core/messages'; +import { SystemMessage, HumanMessage, BaseMessage } from '@langchain/core/messages'; import { BaseChatModel } from '@langchain/core/language_models/chat_models'; import { StructuredToolInterface } from '@langchain/core/tools'; import { Runnable } from '@langchain/core/runnables'; @@ -139,6 +139,10 @@ interface CallLlmOptions { outputSchema?: z.ZodType; tools?: StructuredToolInterface[]; signal?: AbortSignal; + /** Optional conversation history as message objects. When provided, these are + * sent as the full message array (system + history + current user prompt) + * instead of building a single-turn template. */ + messages?: BaseMessage[]; } export interface LlmResult { @@ -195,7 +199,7 @@ function buildAnthropicMessages(systemPrompt: string, userPrompt: string) { } export async function callLlm(prompt: string, options: CallLlmOptions = {}): Promise { - const { model = DEFAULT_MODEL, systemPrompt, outputSchema, tools, signal } = options; + const { model = DEFAULT_MODEL, systemPrompt, outputSchema, tools, signal, messages: historyMessages } = options; const finalSystemPrompt = systemPrompt || DEFAULT_SYSTEM_PROMPT; const llm = getChatModel(model, false); @@ -213,7 +217,15 @@ export async function callLlm(prompt: string, options: CallLlmOptions = {}): Pro const provider = resolveProvider(model); let result; - if (provider.id === 'anthropic') { + if (historyMessages && historyMessages.length > 0) { + // Full conversation history provided — use actual multi-turn messages + const fullMessages: BaseMessage[] = [ + new SystemMessage(finalSystemPrompt), + ...historyMessages, + new HumanMessage(prompt), + ]; + result = await withRetry(() => runnable.invoke(fullMessages, invokeOpts), provider.displayName); + } else if (provider.id === 'anthropic') { // Anthropic: use explicit messages with cache_control for prompt caching (~90% savings) const messages = buildAnthropicMessages(finalSystemPrompt, prompt); result = await withRetry(() => runnable.invoke(messages, invokeOpts), provider.displayName); diff --git a/src/skills/deep-research/SKILL.md b/src/skills/deep-research/SKILL.md new file mode 100644 index 00000000..1dab8243 --- /dev/null +++ b/src/skills/deep-research/SKILL.md @@ -0,0 +1,64 @@ +--- +name: deep-research +description: Executes deep, multi-step financial research on a stock or topic. Invoke AFTER the user has been interviewed about their research goals. Pass the user's scoped parameters (goal, time horizon, focus areas, output format, comparisons) as the skill arguments. +--- + +# Deep Research Skill + +**Important:** This skill is for research execution only. The interview/scoping phase is handled by the system prompt BEFORE this skill is invoked. When invoking this skill, pass the user's answers (goal, time horizon, focus, output, comparisons) as arguments. + +## Data Gathering + +Based on the scoping answers, gather data using these tools: + +### Financials (always) +- `financial_search`: "[TICKER] annual income statements last 5 years" +- `financial_search`: "[TICKER] financial metrics snapshot" +- `financial_search`: "[TICKER] latest balance sheet" + +### Growth & Drivers (if focus includes growth or thesis) +- `financial_search`: "[TICKER] quarterly revenue and earnings last 8 quarters" +- `financial_search`: "[TICKER] analyst estimates" +- `web_search`: "[COMPANY] growth strategy 2025 2026" + +### Risks (if focus includes risks) +- `web_search`: "[COMPANY] risks regulation competition" +- `financial_search`: "[TICKER] insider trades last 6 months" + +### Competitive Position (if focus includes competition or comparisons) +- `financial_search`: "[COMP_TICKER] financial metrics snapshot" (for each competitor) +- `web_search`: "[COMPANY] vs [COMPETITOR] market share" + +### Valuation (if focus includes valuation) +- Use the `dcf-valuation` skill if available +- Otherwise: `financial_search`: "[TICKER] price snapshot" + calculate P/E, EV/EBITDA, PEG from gathered data + +## Analysis & Output + +Structure the final output based on what the user requested: + +### Bull / Base / Bear Thesis +- **Bull case**: best realistic scenario with catalysts and upside % +- **Base case**: most likely outcome with fair value estimate +- **Bear case**: key risks and downside % + +### Competitor Comparison +- Side-by-side table: revenue, margins, growth, valuation multiples +- Qualitative moat comparison (1–2 sentences each) + +### Risk Report +- Top 5 risks ranked by impact × probability +- For each: what to watch (metric or signal) and trigger level + +### Full Report +- Executive summary (3–4 sentences) +- All sections above combined +- "What to watch" checklist for quarterly monitoring + +## Output Rules + +- Lead with the key finding — don't bury the conclusion +- Use tables for comparative data +- Keep total output concise — quality over length +- Include specific numbers, not vague qualifiers +- End with 3–5 actionable "things to watch" going forward