diff --git a/src/agent/__tests__/cost-tracker.test.ts b/src/agent/__tests__/cost-tracker.test.ts index 7b7c0105..7d9ee899 100644 --- a/src/agent/__tests__/cost-tracker.test.ts +++ b/src/agent/__tests__/cost-tracker.test.ts @@ -18,13 +18,21 @@ beforeEach(() => { tracker = new CostTracker(db); }); -function makeCost(usd: number, input: number, output: number): AgentCost { +function makeCost(usd: number, input: number, output: number, cacheRead = 0, cacheCreation = 0): AgentCost { return { totalUsd: usd, inputTokens: input, outputTokens: output, + cacheReadTokens: cacheRead, + cacheCreationTokens: cacheCreation, modelUsage: { - "claude-opus-4-6": { inputTokens: input, outputTokens: output, costUsd: usd }, + "claude-opus-4-6": { + inputTokens: input, + outputTokens: output, + cacheReadTokens: cacheRead, + cacheCreationTokens: cacheCreation, + costUsd: usd, + }, }, }; } @@ -77,4 +85,21 @@ describe("CostTracker", () => { expect(events.length).toBe(2); expect(events[0].model).toBe("claude-opus-4-6"); }); + + test("records and accumulates cache token counts", () => { + store.create("cli", "conv-cache"); + tracker.record("cli:conv-cache", makeCost(0.04, 1000, 500, 800, 200), "claude-opus-4-6"); + tracker.record("cli:conv-cache", makeCost(0.06, 1500, 700, 1200, 0), "claude-opus-4-6"); + + const events = tracker.getCostEvents("cli:conv-cache"); + expect(events.length).toBe(2); + const allCacheReads = events.map((e) => e.cache_read_tokens).sort((a, b) => a - b); + expect(allCacheReads).toEqual([800, 1200]); + const allCacheCreations = events.map((e) => e.cache_creation_tokens).sort((a, b) => a - b); + expect(allCacheCreations).toEqual([0, 200]); + + const session = store.getByKey("cli:conv-cache") as Record; + expect(session.cache_read_tokens).toBe(2000); + expect(session.cache_creation_tokens).toBe(200); + }); }); diff --git a/src/agent/cost-tracker.ts b/src/agent/cost-tracker.ts index 1fededbf..410c0c97 100644 --- a/src/agent/cost-tracker.ts +++ b/src/agent/cost-tracker.ts @@ -10,9 +10,17 @@ export class CostTracker { record(sessionKey: string, cost: AgentCost, model: string): void { this.db.run( - `INSERT INTO cost_events (session_key, cost_usd, input_tokens, output_tokens, model) - VALUES (?, ?, ?, ?, ?)`, - [sessionKey, cost.totalUsd, cost.inputTokens, cost.outputTokens, model], + `INSERT INTO cost_events (session_key, cost_usd, input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens, model) + VALUES (?, ?, ?, ?, ?, ?, ?)`, + [ + sessionKey, + cost.totalUsd, + cost.inputTokens, + cost.outputTokens, + cost.cacheReadTokens, + cost.cacheCreationTokens, + model, + ], ); this.db.run( @@ -20,10 +28,12 @@ export class CostTracker { total_cost_usd = total_cost_usd + ?, input_tokens = input_tokens + ?, output_tokens = output_tokens + ?, + cache_read_tokens = cache_read_tokens + ?, + cache_creation_tokens = cache_creation_tokens + ?, turn_count = turn_count + 1, last_active_at = datetime('now') WHERE session_key = ?`, - [cost.totalUsd, cost.inputTokens, cost.outputTokens, sessionKey], + [cost.totalUsd, cost.inputTokens, cost.outputTokens, cost.cacheReadTokens, cost.cacheCreationTokens, sessionKey], ); } @@ -47,6 +57,8 @@ export type CostEvent = { cost_usd: number; input_tokens: number; output_tokens: number; + cache_read_tokens: number; + cache_creation_tokens: number; model: string; created_at: string; }; diff --git a/src/agent/events.ts b/src/agent/events.ts index 475093e3..575e5ff1 100644 --- a/src/agent/events.ts +++ b/src/agent/events.ts @@ -2,7 +2,12 @@ export type AgentCost = { totalUsd: number; inputTokens: number; outputTokens: number; - modelUsage: Record; + cacheReadTokens: number; + cacheCreationTokens: number; + modelUsage: Record< + string, + { inputTokens: number; outputTokens: number; cacheReadTokens: number; cacheCreationTokens: number; costUsd: number } + >; }; export type AgentStopReason = @@ -41,6 +46,8 @@ export function emptyCost(): AgentCost { totalUsd: 0, inputTokens: 0, outputTokens: 0, + cacheReadTokens: 0, + cacheCreationTokens: 0, modelUsage: {}, }; } diff --git a/src/agent/runtime.ts b/src/agent/runtime.ts index e59e6335..1a93c851 100644 --- a/src/agent/runtime.ts +++ b/src/agent/runtime.ts @@ -327,26 +327,34 @@ function extractCost(message: { const modelUsage: AgentCost["modelUsage"] = {}; for (const [model, usage] of Object.entries(message.modelUsage)) { - const totalModelInput = - usage.inputTokens + (usage.cacheReadInputTokens ?? 0) + (usage.cacheCreationInputTokens ?? 0); + const cacheRead = usage.cacheReadInputTokens ?? 0; + const cacheCreation = usage.cacheCreationInputTokens ?? 0; modelUsage[model] = { - inputTokens: totalModelInput, + inputTokens: usage.inputTokens + cacheRead + cacheCreation, outputTokens: usage.outputTokens, + cacheReadTokens: cacheRead, + cacheCreationTokens: cacheCreation, costUsd: usage.costUSD, }; } let totalInput = 0; let totalOutput = 0; + let totalCacheRead = 0; + let totalCacheCreation = 0; for (const usage of Object.values(modelUsage)) { totalInput += usage.inputTokens; totalOutput += usage.outputTokens; + totalCacheRead += usage.cacheReadTokens; + totalCacheCreation += usage.cacheCreationTokens; } return { totalUsd: message.total_cost_usd, inputTokens: totalInput, outputTokens: totalOutput, + cacheReadTokens: totalCacheRead, + cacheCreationTokens: totalCacheCreation, modelUsage, }; } diff --git a/src/agent/session-store.ts b/src/agent/session-store.ts index 88df65d1..2f66cbb4 100644 --- a/src/agent/session-store.ts +++ b/src/agent/session-store.ts @@ -10,6 +10,8 @@ export type Session = { total_cost_usd: number; input_tokens: number; output_tokens: number; + cache_read_tokens: number; + cache_creation_tokens: number; turn_count: number; created_at: string; last_active_at: string; diff --git a/src/db/__tests__/migrate.test.ts b/src/db/__tests__/migrate.test.ts index cb668ffc..a3f4ebdf 100644 --- a/src/db/__tests__/migrate.test.ts +++ b/src/db/__tests__/migrate.test.ts @@ -36,7 +36,7 @@ describe("runMigrations", () => { runMigrations(db); const migrationCount = db.query("SELECT COUNT(*) as count FROM _migrations").get() as { count: number }; - expect(migrationCount.count).toBe(14); + expect(migrationCount.count).toBe(18); }); test("tracks applied migration indices", () => { @@ -48,6 +48,6 @@ describe("runMigrations", () => { .all() .map((r) => (r as { index_num: number }).index_num); - expect(indices).toEqual([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]); + expect(indices).toEqual([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]); }); }); diff --git a/src/db/schema.ts b/src/db/schema.ts index b900e4aa..f19b8571 100644 --- a/src/db/schema.ts +++ b/src/db/schema.ts @@ -133,4 +133,9 @@ export const MIGRATIONS: string[] = [ // LOOP_DEFAULT_MAX_TICK_DURATION_MS in src/loop/types.ts. Backfilled by // the DEFAULT clause for existing rows so the runner never sees NULL. "ALTER TABLE loops ADD COLUMN max_tick_duration_ms INTEGER NOT NULL DEFAULT 1800000", + + "ALTER TABLE cost_events ADD COLUMN cache_read_tokens INTEGER NOT NULL DEFAULT 0", + "ALTER TABLE cost_events ADD COLUMN cache_creation_tokens INTEGER NOT NULL DEFAULT 0", + "ALTER TABLE sessions ADD COLUMN cache_read_tokens INTEGER NOT NULL DEFAULT 0", + "ALTER TABLE sessions ADD COLUMN cache_creation_tokens INTEGER NOT NULL DEFAULT 0", ]; diff --git a/src/loop/__tests__/evolution-integration.test.ts b/src/loop/__tests__/evolution-integration.test.ts index 0e7227ad..42a8b9b0 100644 --- a/src/loop/__tests__/evolution-integration.test.ts +++ b/src/loop/__tests__/evolution-integration.test.ts @@ -14,7 +14,14 @@ type HandleMessageImpl = ( ) => Promise<{ text: string; sessionId: string; - cost: { totalUsd: number; inputTokens: number; outputTokens: number; modelUsage: Record }; + cost: { + totalUsd: number; + inputTokens: number; + outputTokens: number; + cacheReadTokens: number; + cacheCreationTokens: number; + modelUsage: Record; + }; durationMs: number; }>; @@ -22,7 +29,14 @@ function createMockRuntime(impl?: HandleMessageImpl) { const defaultImpl: HandleMessageImpl = async () => ({ text: "ok", sessionId: "s", - cost: { totalUsd: 0.01, inputTokens: 10, outputTokens: 10, modelUsage: {} }, + cost: { + totalUsd: 0.01, + inputTokens: 10, + outputTokens: 10, + cacheReadTokens: 0, + cacheCreationTokens: 0, + modelUsage: {}, + }, durationMs: 10, }); return { @@ -37,7 +51,14 @@ function agentFinishes(stateFile: string, loopId: string): HandleMessageImpl { return { text: "done", sessionId: "s", - cost: { totalUsd: 0.01, inputTokens: 1, outputTokens: 1, modelUsage: {} }, + cost: { + totalUsd: 0.01, + inputTokens: 1, + outputTokens: 1, + cacheReadTokens: 0, + cacheCreationTokens: 0, + modelUsage: {}, + }, durationMs: 1, }; }; diff --git a/src/loop/__tests__/runner.test.ts b/src/loop/__tests__/runner.test.ts index aee29b89..acd95254 100644 --- a/src/loop/__tests__/runner.test.ts +++ b/src/loop/__tests__/runner.test.ts @@ -10,7 +10,14 @@ import { LoopRunner } from "../runner.ts"; type MockResponse = { text: string; sessionId: string; - cost: { totalUsd: number; inputTokens: number; outputTokens: number; modelUsage: Record }; + cost: { + totalUsd: number; + inputTokens: number; + outputTokens: number; + cacheReadTokens: number; + cacheCreationTokens: number; + modelUsage: Record; + }; durationMs: number; }; @@ -26,7 +33,14 @@ function createMockRuntime(impl?: HandleMessageImpl) { const defaultImpl: HandleMessageImpl = async () => ({ text: "ok", sessionId: "s", - cost: { totalUsd: 0.01, inputTokens: 10, outputTokens: 10, modelUsage: {} }, + cost: { + totalUsd: 0.01, + inputTokens: 10, + outputTokens: 10, + cacheReadTokens: 0, + cacheCreationTokens: 0, + modelUsage: {}, + }, durationMs: 10, }); // Tracks activeSessions in the same way AgentRuntime does, so tests can @@ -48,7 +62,14 @@ function agentFinishes(stateFile: string, loopId: string): HandleMessageImpl { return { text: "done", sessionId: "s", - cost: { totalUsd: 0.01, inputTokens: 1, outputTokens: 1, modelUsage: {} }, + cost: { + totalUsd: 0.01, + inputTokens: 1, + outputTokens: 1, + cacheReadTokens: 0, + cacheCreationTokens: 0, + modelUsage: {}, + }, durationMs: 1, }; }; @@ -190,7 +211,14 @@ describe("LoopRunner", () => { const runtime = createMockRuntime(async () => ({ text: "expensive", sessionId: "s", - cost: { totalUsd: 0.6, inputTokens: 1, outputTokens: 1, modelUsage: {} }, + cost: { + totalUsd: 0.6, + inputTokens: 1, + outputTokens: 1, + cacheReadTokens: 0, + cacheCreationTokens: 0, + modelUsage: {}, + }, durationMs: 1, })); const runner = new LoopRunner({ db, runtime: runtime, dataDir, autoSchedule: false }); @@ -294,7 +322,14 @@ describe("LoopRunner", () => { return { text: "progress", sessionId: "s", - cost: { totalUsd: 0.01, inputTokens: 1, outputTokens: 1, modelUsage: {} }, + cost: { + totalUsd: 0.01, + inputTokens: 1, + outputTokens: 1, + cacheReadTokens: 0, + cacheCreationTokens: 0, + modelUsage: {}, + }, durationMs: 1, }; }); @@ -383,7 +418,14 @@ describe("LoopRunner", () => { return { text: "Error: aborted", sessionId: "s", - cost: { totalUsd: 0.01, inputTokens: 1, outputTokens: 1, modelUsage: {} }, + cost: { + totalUsd: 0.01, + inputTokens: 1, + outputTokens: 1, + cacheReadTokens: 0, + cacheCreationTokens: 0, + modelUsage: {}, + }, durationMs: 1, }; }; diff --git a/src/loop/__tests__/tool.test.ts b/src/loop/__tests__/tool.test.ts index c1288ae1..01dfbb1e 100644 --- a/src/loop/__tests__/tool.test.ts +++ b/src/loop/__tests__/tool.test.ts @@ -13,7 +13,14 @@ function mockRuntime() { handleMessage: mock(async () => ({ text: "ok", sessionId: "s", - cost: { totalUsd: 0.01, inputTokens: 1, outputTokens: 1, modelUsage: {} }, + cost: { + totalUsd: 0.01, + inputTokens: 1, + outputTokens: 1, + cacheReadTokens: 0, + cacheCreationTokens: 0, + modelUsage: {}, + }, durationMs: 1, })), releaseSession: mock(() => {}), diff --git a/src/mcp/__tests__/dynamic-tools.test.ts b/src/mcp/__tests__/dynamic-tools.test.ts index 8fb2497e..5288e2b8 100644 --- a/src/mcp/__tests__/dynamic-tools.test.ts +++ b/src/mcp/__tests__/dynamic-tools.test.ts @@ -13,7 +13,14 @@ function createMockRuntime() { handleMessage: async (_ch: string, _conv: string, text: string) => ({ text: `Mock: ${text}`, sessionId: "mock-session", - cost: { totalUsd: 0.001, inputTokens: 100, outputTokens: 50, modelUsage: {} }, + cost: { + totalUsd: 0.001, + inputTokens: 100, + outputTokens: 50, + cacheReadTokens: 0, + cacheCreationTokens: 0, + modelUsage: {}, + }, durationMs: 100, }), getActiveSessionCount: () => 0, diff --git a/src/mcp/__tests__/scope-enforcement.test.ts b/src/mcp/__tests__/scope-enforcement.test.ts index f40505f1..52edee62 100644 --- a/src/mcp/__tests__/scope-enforcement.test.ts +++ b/src/mcp/__tests__/scope-enforcement.test.ts @@ -14,7 +14,14 @@ function createMockRuntime() { handleMessage: async (_ch: string, _conv: string, text: string) => ({ text: `Mock response to: ${text}`, sessionId: "mock-session", - cost: { totalUsd: 0.001, inputTokens: 100, outputTokens: 50, modelUsage: {} }, + cost: { + totalUsd: 0.001, + inputTokens: 100, + outputTokens: 50, + cacheReadTokens: 0, + cacheCreationTokens: 0, + modelUsage: {}, + }, durationMs: 100, }), getActiveSessionCount: () => 0, diff --git a/src/mcp/__tests__/server.test.ts b/src/mcp/__tests__/server.test.ts index 489c712c..e5c50e63 100644 --- a/src/mcp/__tests__/server.test.ts +++ b/src/mcp/__tests__/server.test.ts @@ -9,7 +9,14 @@ function createMockRuntime() { handleMessage: async (_ch: string, _conv: string, text: string) => ({ text: `Mock response to: ${text}`, sessionId: "mock-session", - cost: { totalUsd: 0.001, inputTokens: 100, outputTokens: 50, modelUsage: {} }, + cost: { + totalUsd: 0.001, + inputTokens: 100, + outputTokens: 50, + cacheReadTokens: 0, + cacheCreationTokens: 0, + modelUsage: {}, + }, durationMs: 100, }), getActiveSessionCount: () => 0, diff --git a/src/mcp/__tests__/tools-swe.test.ts b/src/mcp/__tests__/tools-swe.test.ts index 96d292f1..b1564d5b 100644 --- a/src/mcp/__tests__/tools-swe.test.ts +++ b/src/mcp/__tests__/tools-swe.test.ts @@ -12,7 +12,14 @@ function createMockRuntime() { handleMessage: async (_ch: string, _conv: string, text: string) => ({ text: `Mock review: ${text.slice(0, 50)}`, sessionId: "mock-session", - cost: { totalUsd: 0.05, inputTokens: 1000, outputTokens: 500, modelUsage: {} }, + cost: { + totalUsd: 0.05, + inputTokens: 1000, + outputTokens: 500, + cacheReadTokens: 0, + cacheCreationTokens: 0, + modelUsage: {}, + }, durationMs: 5000, }), getActiveSessionCount: () => 0, diff --git a/src/scheduler/__tests__/service.test.ts b/src/scheduler/__tests__/service.test.ts index 2e80957c..4781ac3a 100644 --- a/src/scheduler/__tests__/service.test.ts +++ b/src/scheduler/__tests__/service.test.ts @@ -8,7 +8,14 @@ function createMockRuntime() { handleMessage: mock(async (_channel: string, _conversationId: string, _text: string) => ({ text: "Mock response from agent", sessionId: "mock-session", - cost: { totalUsd: 0.01, inputTokens: 100, outputTokens: 50, modelUsage: {} }, + cost: { + totalUsd: 0.01, + inputTokens: 100, + outputTokens: 50, + cacheReadTokens: 0, + cacheCreationTokens: 0, + modelUsage: {}, + }, durationMs: 500, })), setMemoryContextBuilder: mock(() => {}), @@ -260,7 +267,14 @@ describe("Scheduler", () => { errorRuntime.handleMessage.mockImplementation(async () => ({ text: "Error: Something went wrong", sessionId: "err-session", - cost: { totalUsd: 0, inputTokens: 0, outputTokens: 0, modelUsage: {} }, + cost: { + totalUsd: 0, + inputTokens: 0, + outputTokens: 0, + cacheReadTokens: 0, + cacheCreationTokens: 0, + modelUsage: {}, + }, durationMs: 100, })); diff --git a/src/scheduler/__tests__/tool.test.ts b/src/scheduler/__tests__/tool.test.ts index ee3f4d4c..51cf31f8 100644 --- a/src/scheduler/__tests__/tool.test.ts +++ b/src/scheduler/__tests__/tool.test.ts @@ -9,7 +9,14 @@ function createMockRuntime() { handleMessage: mock(async (_channel: string, _conversationId: string, _text: string) => ({ text: "Mock agent response", sessionId: "mock-session", - cost: { totalUsd: 0.01, inputTokens: 100, outputTokens: 50, modelUsage: {} }, + cost: { + totalUsd: 0.01, + inputTokens: 100, + outputTokens: 50, + cacheReadTokens: 0, + cacheCreationTokens: 0, + modelUsage: {}, + }, durationMs: 500, })), setMemoryContextBuilder: mock(() => {}),