Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 63 additions & 36 deletions src/agent/agent.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { AIMessage } from '@langchain/core/messages';
import { AIMessage, HumanMessage, BaseMessage } from '@langchain/core/messages';
import { StructuredToolInterface } from '@langchain/core/tools';
import { callLlm } from '../model/llm.js';
import { Scratchpad, type ToolContext } from './scratchpad.js';
Expand Down Expand Up @@ -59,25 +59,28 @@ export class Agent {
async *run(query: string, inMemoryHistory?: InMemoryChatHistory): AsyncGenerator<AgentEvent> {
const startTime = Date.now();
const tokenCounter = new TokenCounter();

if (this.tools.length === 0) {
yield { type: 'done', answer: 'No tools available. Please check your API key configuration.', toolCalls: [], iterations: 0, totalTime: Date.now() - startTime };
return;
}

// Create scratchpad for this query - single source of truth for all work done
const scratchpad = new Scratchpad(query);

// Build initial prompt with conversation history context
let currentPrompt = this.buildInitialPrompt(query, inMemoryHistory);


// Build conversation history as actual message objects for multi-turn context
const historyMessages = this.buildConversationMessages(inMemoryHistory);
let currentPrompt = query;

let iteration = 0;

// Main agent loop
while (iteration < this.maxIterations) {
iteration++;

const { response, usage } = await this.callModel(currentPrompt);
const { response, usage } = await this.callModel(currentPrompt, true, historyMessages);
// Clear history messages after first call — subsequent iterations use scratchpad context
historyMessages.length = 0;
tokenCounter.add(usage);
const responseText = typeof response === 'string' ? response : extractTextContent(response);

Expand All @@ -102,12 +105,12 @@ export class Agent {
// Generate final answer with full context from scratchpad
const fullContext = this.buildFullContextForAnswer(query, scratchpad);
const finalPrompt = buildFinalAnswerPrompt(query, fullContext);

yield { type: 'answer_start' };
const { response: finalResponse, usage: finalUsage } = await this.callModel(finalPrompt, false);
tokenCounter.add(finalUsage);
const answer = typeof finalResponse === 'string'
? finalResponse
const answer = typeof finalResponse === 'string'
? finalResponse
: extractTextContent(finalResponse);

const totalTime = Date.now() - startTime;
Expand All @@ -124,10 +127,10 @@ export class Agent {
yield result.value;
result = await generator.next();
}

// Anthropic-style context management: get full tool results
let fullToolResults = scratchpad.getToolResults();

// Check context threshold and clear oldest tool results if needed
const estimatedContextTokens = estimateTokens(this.systemPrompt + query + fullToolResults);
if (estimatedContextTokens > CONTEXT_THRESHOLD) {
Expand All @@ -138,10 +141,10 @@ export class Agent {
fullToolResults = scratchpad.getToolResults();
}
}

// Build iteration prompt with full tool results (Anthropic-style)
currentPrompt = buildIterationPrompt(
query,
query,
fullToolResults,
scratchpad.formatToolUsageForPrompt()
);
Expand All @@ -150,12 +153,12 @@ export class Agent {
// Max iterations reached - still generate proper final answer
const fullContext = this.buildFullContextForAnswer(query, scratchpad);
const finalPrompt = buildFinalAnswerPrompt(query, fullContext);

yield { type: 'answer_start' };
const { response: finalResponse, usage: finalUsage } = await this.callModel(finalPrompt, false);
tokenCounter.add(finalUsage);
const answer = typeof finalResponse === 'string'
? finalResponse
const answer = typeof finalResponse === 'string'
? finalResponse
: extractTextContent(finalResponse);

const totalTime = Date.now() - startTime;
Expand All @@ -175,12 +178,13 @@ export class Agent {
* @param prompt - The prompt to send to the LLM
* @param useTools - Whether to bind tools (default: true). When false, returns string directly.
*/
private async callModel(prompt: string, useTools: boolean = true): Promise<{ response: AIMessage | string; usage?: TokenUsage }> {
private async callModel(prompt: string, useTools: boolean = true, messages?: BaseMessage[]): Promise<{ response: AIMessage | string; usage?: TokenUsage }> {
const result = await callLlm(prompt, {
model: this.model,
systemPrompt: this.systemPrompt,
tools: useTools ? this.tools : undefined,
signal: this.signal,
messages,
});
return { response: result.response, usage: result.usage };
}
Expand Down Expand Up @@ -231,13 +235,13 @@ export class Agent {

// Check tool limits - yields warning if approaching/over limits
const limitCheck = scratchpad.canCallTool(toolName, toolQuery);

if (limitCheck.warning) {
yield {
type: 'tool_limit',
tool: toolName,
warning: limitCheck.warning,
blocked: false
yield {
type: 'tool_limit',
tool: toolName,
warning: limitCheck.warning,
blocked: false
};
}

Expand Down Expand Up @@ -299,34 +303,57 @@ export class Agent {
*/
private extractQueryFromArgs(args: Record<string, unknown>): string | undefined {
const queryKeys = ['query', 'search', 'question', 'q', 'text', 'input'];

for (const key of queryKeys) {
if (typeof args[key] === 'string') {
return args[key] as string;
}
}

return undefined;
}

/**
* Build initial prompt with conversation history context if available
* Build conversation history as actual HumanMessage/AIMessage pairs.
* Returns an array of message objects for proper multi-turn LLM context.
* Only includes completed turns (with answers) — excludes the current in-flight query.
*
* To prevent token blow-up:
* - Caps history at MAX_HISTORY_TURNS (10) turns
* - Uses full answers only for the last FULL_ANSWER_TURNS (3) turns
* - Uses short summaries for older turns (falling back to truncated answer)
*/
private buildInitialPrompt(
query: string,
private buildConversationMessages(
inMemoryChatHistory?: InMemoryChatHistory
): string {
): BaseMessage[] {
const MAX_HISTORY_TURNS = parseInt(process.env.DEXTER_MAX_HISTORY_TURNS || '10', 10);
const FULL_ANSWER_TURNS = parseInt(process.env.DEXTER_FULL_ANSWER_TURNS || '3', 10);

if (!inMemoryChatHistory?.hasMessages()) {
return query;
return [];
}

const userMessages = inMemoryChatHistory.getUserMessages();
if (userMessages.length === 0) {
return query;
const completedMessages = inMemoryChatHistory.getMessages().filter(m => m.answer !== null);
// Keep only the most recent N turns
const recentMessages = completedMessages.slice(-MAX_HISTORY_TURNS);
const messages: BaseMessage[] = [];
const fullAnswerStart = Math.max(0, recentMessages.length - FULL_ANSWER_TURNS);

for (let i = 0; i < recentMessages.length; i++) {
const msg = recentMessages[i];
messages.push(new HumanMessage(msg.query));

if (i >= fullAnswerStart) {
// Recent turns: full answer for immediate context
messages.push(new AIMessage(msg.answer!));
} else {
// Older turns: use summary to save tokens, fall back to truncated answer
const shortContent = msg.summary || msg.answer!.slice(0, 300) + '...';
messages.push(new AIMessage(shortContent));
}
}

const historyContext = userMessages.map((msg, i) => `${i + 1}. ${msg}`).join('\n');
return `Current query to answer: ${query}\n\nPrevious user queries for context:\n${historyContext}`;
return messages;
}

/**
Expand Down
24 changes: 22 additions & 2 deletions src/agent/prompts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ export function getCurrentDate(): string {
*/
function buildSkillsSection(): string {
const skills = discoverSkills();

if (skills.length === 0) {
return '';
}

const skillList = buildSkillMetadataSection();

return `## Available Skills

${skillList}
Expand Down Expand Up @@ -132,6 +132,26 @@ ${buildSkillsSection()}
- Never ask users to provide raw data, paste values, or reference JSON/API internals - users ask questions, they don't have access to financial APIs
- If data is incomplete, answer with what you have without exposing implementation details

## Deep Research Interview

When a user asks for "deep research", "deep dive", "comprehensive analysis", or "thorough analysis", **interview them first before doing any research or invoking skills**.

Questions (ask ONE at a time, in order):
1. Goal (buy/sell/hold, thesis, trade, learning)
2. Time horizon
3. Focus areas (valuation, growth, risks, moat, etc.)
4. Output format (thesis, valuation, risk report, comparison, full report)
5. Comparisons (specific competitors or your pick)

Format rules:
- Prefix each question with its number: "**Q1:**", "**Q2:**", etc.
- List options as **letters**: A) ... B) ... C) ... D) ...
- Always include a final option: "or type your own"
- Accept any reply: a letter ("B"), a word ("growth"), or a full sentence
- Acknowledge briefly ("Got it — long-term thesis."), then immediately ask the next question
- Never re-ask a question. Never dump all questions at once.
- After Q5, summarize all choices, then invoke the deep-research skill to execute.

## Response Format

- Keep casual responses brief and direct
Expand Down
18 changes: 15 additions & 3 deletions src/model/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { ChatAnthropic } from '@langchain/anthropic';
import { ChatGoogleGenerativeAI } from '@langchain/google-genai';
import { ChatOllama } from '@langchain/ollama';
import { ChatPromptTemplate } from '@langchain/core/prompts';
import { SystemMessage, HumanMessage } from '@langchain/core/messages';
import { SystemMessage, HumanMessage, BaseMessage } from '@langchain/core/messages';
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { StructuredToolInterface } from '@langchain/core/tools';
import { Runnable } from '@langchain/core/runnables';
Expand Down Expand Up @@ -139,6 +139,10 @@ interface CallLlmOptions {
outputSchema?: z.ZodType<unknown>;
tools?: StructuredToolInterface[];
signal?: AbortSignal;
/** Optional conversation history as message objects. When provided, these are
* sent as the full message array (system + history + current user prompt)
* instead of building a single-turn template. */
messages?: BaseMessage[];
}

export interface LlmResult {
Expand Down Expand Up @@ -195,7 +199,7 @@ function buildAnthropicMessages(systemPrompt: string, userPrompt: string) {
}

export async function callLlm(prompt: string, options: CallLlmOptions = {}): Promise<LlmResult> {
const { model = DEFAULT_MODEL, systemPrompt, outputSchema, tools, signal } = options;
const { model = DEFAULT_MODEL, systemPrompt, outputSchema, tools, signal, messages: historyMessages } = options;
const finalSystemPrompt = systemPrompt || DEFAULT_SYSTEM_PROMPT;

const llm = getChatModel(model, false);
Expand All @@ -213,7 +217,15 @@ export async function callLlm(prompt: string, options: CallLlmOptions = {}): Pro
const provider = resolveProvider(model);
let result;

if (provider.id === 'anthropic') {
if (historyMessages && historyMessages.length > 0) {
// Full conversation history provided — use actual multi-turn messages
const fullMessages: BaseMessage[] = [
new SystemMessage(finalSystemPrompt),
...historyMessages,
new HumanMessage(prompt),
];
result = await withRetry(() => runnable.invoke(fullMessages, invokeOpts), provider.displayName);
} else if (provider.id === 'anthropic') {
// Anthropic: use explicit messages with cache_control for prompt caching (~90% savings)
const messages = buildAnthropicMessages(finalSystemPrompt, prompt);
result = await withRetry(() => runnable.invoke(messages, invokeOpts), provider.displayName);
Expand Down
64 changes: 64 additions & 0 deletions src/skills/deep-research/SKILL.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
---
name: deep-research
description: Executes deep, multi-step financial research on a stock or topic. Invoke AFTER the user has been interviewed about their research goals. Pass the user's scoped parameters (goal, time horizon, focus areas, output format, comparisons) as the skill arguments.
---

# Deep Research Skill

**Important:** This skill is for research execution only. The interview/scoping phase is handled by the system prompt BEFORE this skill is invoked. When invoking this skill, pass the user's answers (goal, time horizon, focus, output, comparisons) as arguments.

## Data Gathering

Based on the scoping answers, gather data using these tools:

### Financials (always)
- `financial_search`: "[TICKER] annual income statements last 5 years"
- `financial_search`: "[TICKER] financial metrics snapshot"
- `financial_search`: "[TICKER] latest balance sheet"

### Growth & Drivers (if focus includes growth or thesis)
- `financial_search`: "[TICKER] quarterly revenue and earnings last 8 quarters"
- `financial_search`: "[TICKER] analyst estimates"
- `web_search`: "[COMPANY] growth strategy 2025 2026"

### Risks (if focus includes risks)
- `web_search`: "[COMPANY] risks regulation competition"
- `financial_search`: "[TICKER] insider trades last 6 months"

### Competitive Position (if focus includes competition or comparisons)
- `financial_search`: "[COMP_TICKER] financial metrics snapshot" (for each competitor)
- `web_search`: "[COMPANY] vs [COMPETITOR] market share"

### Valuation (if focus includes valuation)
- Use the `dcf-valuation` skill if available
- Otherwise: `financial_search`: "[TICKER] price snapshot" + calculate P/E, EV/EBITDA, PEG from gathered data

## Analysis & Output

Structure the final output based on what the user requested:

### Bull / Base / Bear Thesis
- **Bull case**: best realistic scenario with catalysts and upside %
- **Base case**: most likely outcome with fair value estimate
- **Bear case**: key risks and downside %

### Competitor Comparison
- Side-by-side table: revenue, margins, growth, valuation multiples
- Qualitative moat comparison (1–2 sentences each)

### Risk Report
- Top 5 risks ranked by impact × probability
- For each: what to watch (metric or signal) and trigger level

### Full Report
- Executive summary (3–4 sentences)
- All sections above combined
- "What to watch" checklist for quarterly monitoring

## Output Rules

- Lead with the key finding — don't bury the conclusion
- Use tables for comparative data
- Keep total output concise — quality over length
- Include specific numbers, not vague qualifiers
- End with 3–5 actionable "things to watch" going forward