Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
e7af8e0
feat(bridge): surface upstream API token usage in frontend and /usage…
GoldenFish123321 May 29, 2026
733c7fa
fix(bridge): add missing actualCostUsd to BridgeUsageState interface
GoldenFish123321 May 29, 2026
e292f09
fix(bridge): use API input_tokens as contextTokens for progress bar
GoldenFish123321 May 29, 2026
092a39b
fix(bridge): use apiUsage.inputTokens for progress bar display
GoldenFish123321 May 29, 2026
182188c
fix(bridge): use API input_tokens for Current context in /usage
GoldenFish123321 May 29, 2026
8de6b93
fix(bridge): persist API token values to survive page reload
GoldenFish123321 May 29, 2026
87c4427
fix(bridge): add type annotation and missing fields in DB fallback
GoldenFish123321 May 29, 2026
10d344c
refactor(bridge): deduplicate /usage message building
GoldenFish123321 May 29, 2026
d118bd0
fix(bridge): use \x0a for newline to avoid double-escape
GoldenFish123321 May 29, 2026
b5f5107
fix(bridge): skip undefined values in updateSession to avoid SQLite b…
GoldenFish123321 May 29, 2026
acd91ef
fix(bridge): use promptTokens for context display when input is cached
GoldenFish123321 May 29, 2026
9d52af4
fix(bridge): handle cache-hit edge cases in context display
GoldenFish123321 May 29, 2026
dabf944
fix(bridge): remove !input_tokens guard, use ?? for cost, cache getSe…
GoldenFish123321 May 29, 2026
7bc57e7
fix(bridge): reset bridgeUsage on each run to avoid stale data leak
GoldenFish123321 May 29, 2026
a3774ad
fix(bridge): handle apiUsage in run.failed handlers, deduplicate, fix…
GoldenFish123321 May 29, 2026
810dc28
fix(bridge): change applyApiUsage evt param type from Record to any
GoldenFish123321 May 29, 2026
2f40d77
fix(bridge): preserve apiUsage across loadSessions refresh
GoldenFish123321 May 30, 2026
9713b73
fix(bridge): include bridgeUsage in resume payload, rebuild apiUsage …
GoldenFish123321 May 30, 2026
e66049a
Merge branch 'EKKOLearnAI:main' into feature/bridge-real-token-usage
GoldenFish123321 May 30, 2026
ce1df17
fix(bridge): use last_prompt_tokens for progress bar to avoid double-…
GoldenFish123321 May 30, 2026
e9c2271
Merge branch 'feature/bridge-real-token-usage' of https://github.com/…
GoldenFish123321 May 30, 2026
95f18ed
fix(bridge): use lastPromptTokens for Current context in /usage
GoldenFish123321 May 30, 2026
16e7130
fix(bridge): use != null check instead of || for lastPromptTokens fal…
GoldenFish123321 May 30, 2026
7fc1a9b
fix(bridge): align /usage output format with CLI
GoldenFish123321 May 30, 2026
ddaaee9
fix(bridge): align /usage output exactly with CLI format
GoldenFish123321 May 30, 2026
2027780
fix(bridge): guard /usage session.command handler against DB fallback…
GoldenFish123321 May 30, 2026
1ee7845
fix(bridge): match /usage output to TUI CLI format (cli.py:10206-10234)
GoldenFish123321 May 30, 2026
cb23c49
fix(bridge): persist lastPromptTokens to DB for refresh survival
GoldenFish123321 May 30, 2026
145525c
fix(bridge): add missing last_prompt_tokens to mapSessionRow and crea…
GoldenFish123321 May 30, 2026
dfaf7dd
fix(bridge): guard resume apiUsage rebuild with lastPromptTokens check
GoldenFish123321 May 31, 2026
418ce81
fix(bridge): rebuild apiUsage from bridgeUsage on WebSocket reconnect…
GoldenFish123321 May 31, 2026
8e8caab
fix(bridge): add bridgeUsage to ResumeSessionPayload interface
GoldenFish123321 May 31, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/client/src/api/hermes/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ export interface ResumeSessionPayload {
inputTokens?: number
outputTokens?: number
contextTokens?: number
bridgeUsage?: any
queueLength?: number
queueMessages?: RunEvent['queued_messages']
}
Expand Down
9 changes: 9 additions & 0 deletions packages/client/src/components/hermes/chat/ChatInput.vue
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,15 @@ watch(
)

const totalTokens = computed(() => {
// When upstream API data is available, prefer last_prompt_tokens
// (the single-call prompt cost) over the cumulative session accumulator,
// which double-counts the shared context across multiple tool calls.
const api = chatStore.activeSession?.apiUsage
if (api) {
if (api.lastPromptTokens && api.lastPromptTokens > 0) return api.lastPromptTokens
const promptTokens = (api.inputTokens || 0) + (api.cacheReadTokens || 0) + (api.cacheWriteTokens || 0)
if (promptTokens > 0) return promptTokens
}
const context = chatStore.activeSession?.contextTokens
if (typeof context === 'number' && Number.isFinite(context) && context > 0) return context
const input = chatStore.activeSession?.inputTokens ?? 0
Expand Down
114 changes: 99 additions & 15 deletions packages/client/src/stores/hermes/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,32 @@ export interface Session {
endedAt?: number | null
lastActiveAt?: number
workspace?: string | null
apiUsage?: {
inputTokens: number
outputTokens: number
cacheReadTokens?: number
cacheWriteTokens?: number
reasoningTokens?: number
totalTokens?: number
lastPromptTokens?: number
model?: string
costStatus?: string
actualCostUsd?: number
estimatedCostUsd?: number
}
}

/** Apply token usage from a run.* event, preferring upstream API data */
function applyApiUsage(target: Session, evt: any): void {
const apiUsage = evt.apiUsage
if (apiUsage) {
target.inputTokens = apiUsage.inputTokens
target.outputTokens = apiUsage.outputTokens
target.apiUsage = apiUsage
} else if (evt.inputTokens != null) {
target.inputTokens = evt.inputTokens
target.outputTokens = evt.outputTokens
}
}

interface CompressionState {
Expand Down Expand Up @@ -479,11 +505,13 @@ export const useChatStore = defineStore('chat', () => {
const runtimeByIdBefore = new Map(sessions.value.map(s => [s.id, {
messages: s.messages,
contextTokens: s.contextTokens,
apiUsage: s.apiUsage,
}]))
for (const s of fresh) {
const prev = runtimeByIdBefore.get(s.id)
if (prev?.messages?.length) s.messages = prev.messages
if (prev?.contextTokens != null) s.contextTokens = prev.contextTokens
if (prev?.apiUsage) s.apiUsage = prev.apiUsage
}
sessions.value = fresh

Expand Down Expand Up @@ -630,6 +658,25 @@ export const useChatStore = defineStore('chat', () => {
if (data.inputTokens != null) target.inputTokens = data.inputTokens
if (data.outputTokens != null) target.outputTokens = data.outputTokens
if ((data as any).contextTokens != null) target.contextTokens = (data as any).contextTokens
// Rebuild apiUsage from bridge data sent by server on resume.
// Only when lastPromptTokens is available — stale bridgeUsage
// without it carries cumulative session_* values.
const bu = (data as any).bridgeUsage
if (bu && bu.lastPromptTokens != null) {
target.apiUsage = {
inputTokens: bu.inputTokens,
outputTokens: bu.outputTokens,
cacheReadTokens: bu.cacheReadTokens,
cacheWriteTokens: bu.cacheWriteTokens,
reasoningTokens: bu.reasoningTokens,
totalTokens: bu.totalTokens,
lastPromptTokens: bu.lastPromptTokens,
model: bu.model,
costStatus: bu.costStatus,
actualCostUsd: bu.actualCostUsd,
estimatedCostUsd: bu.estimatedCostUsd,
}
}
if (data.messages?.length) {
target.messages = mapHermesMessages(data.messages as any[])
target.loadedMessageCount = data.messageLoadedCount ?? data.messages.length
Expand Down Expand Up @@ -980,9 +1027,33 @@ export const useChatStore = defineStore('chat', () => {
}

if (action === 'usage' && target) {
target.inputTokens = (evt as any).inputTokens
target.outputTokens = (evt as any).outputTokens
if ((evt as any).contextTokens != null) target.contextTokens = (evt as any).contextTokens
// Only apply values from live bridge data (signaled by lastPromptTokens).
// DB fallback carries cumulative/inflated session_* counters — not safe
// for the progress bar or context display.
const isLive = (evt as any).lastPromptTokens != null
if (isLive) {
target.inputTokens = (evt as any).inputTokens
target.outputTokens = (evt as any).outputTokens
if ((evt as any).contextTokens != null) target.contextTokens = (evt as any).contextTokens
}
// Only build apiUsage from live bridge data (signaled by lastPromptTokens).
// DB fallback lacks lastPromptTokens and carries cumulative/inflated values
// from the previous turn's session_* counters — not safe for progress bar.
if (isLive) {
target.apiUsage = {
inputTokens: (evt as any).inputTokens,
outputTokens: (evt as any).outputTokens,
cacheReadTokens: (evt as any).cacheReadTokens,
cacheWriteTokens: (evt as any).cacheWriteTokens,
reasoningTokens: (evt as any).reasoningTokens,
totalTokens: (evt as any).totalTokens,
lastPromptTokens: (evt as any).lastPromptTokens,
model: (evt as any).model,
costStatus: (evt as any).costStatus,
actualCostUsd: (evt as any).actualCostUsd,
estimatedCostUsd: (evt as any).estimatedCostUsd,
}
}
}

if (action === 'destroy') {
Expand Down Expand Up @@ -1481,6 +1552,23 @@ export const useChatStore = defineStore('chat', () => {
if (data.inputTokens != null) target.inputTokens = data.inputTokens
if (data.outputTokens != null) target.outputTokens = data.outputTokens
if (data.contextTokens != null) target.contextTokens = data.contextTokens
// Rebuild apiUsage from bridge data on reconnect resume
const bu = (data as any).bridgeUsage
if (bu && bu.lastPromptTokens != null) {
target.apiUsage = {
inputTokens: bu.inputTokens,
outputTokens: bu.outputTokens,
cacheReadTokens: bu.cacheReadTokens,
cacheWriteTokens: bu.cacheWriteTokens,
reasoningTokens: bu.reasoningTokens,
totalTokens: bu.totalTokens,
lastPromptTokens: bu.lastPromptTokens,
model: bu.model,
costStatus: bu.costStatus,
actualCostUsd: bu.actualCostUsd,
estimatedCostUsd: bu.estimatedCostUsd,
}
}

if (Array.isArray(data.messages)) {
target.messages = mapHermesMessages(data.messages as any[])
Expand Down Expand Up @@ -1834,11 +1922,10 @@ export const useChatStore = defineStore('chat', () => {
updateMessage(sid, lastMsg.id, { isStreaming: false })
}
// Server-computed usage (local countTokens, snapshot-aware)
if ((evt as any).inputTokens != null) {
if ((evt as any).inputTokens != null || (evt as any).apiUsage) {
const target = sessions.value.find(s => s.id === sid)
if (target) {
target.inputTokens = (evt as any).inputTokens
target.outputTokens = (evt as any).outputTokens
applyApiUsage(target, evt)
if ((evt as any).contextTokens != null) target.contextTokens = (evt as any).contextTokens
}
}
Expand Down Expand Up @@ -1930,11 +2017,10 @@ export const useChatStore = defineStore('chat', () => {

case 'run.failed': {
clearAgentEventMessages(sid)
if ((evt as any).inputTokens != null) {
if ((evt as any).inputTokens != null || (evt as any).apiUsage) {
const target = sessions.value.find(s => s.id === sid)
if (target) {
target.inputTokens = (evt as any).inputTokens
target.outputTokens = (evt as any).outputTokens
applyApiUsage(target, evt)
if ((evt as any).contextTokens != null) target.contextTokens = (evt as any).contextTokens
}
}
Expand Down Expand Up @@ -2313,11 +2399,10 @@ export const useChatStore = defineStore('chat', () => {
updateMessage(sid, lastMsg.id, { isStreaming: false })
}
// Server-computed usage (local countTokens, snapshot-aware)
if ((evt as any).inputTokens != null) {
if ((evt as any).inputTokens != null || (evt as any).apiUsage) {
const target = sessions.value.find(s => s.id === sid)
if (target) {
target.inputTokens = (evt as any).inputTokens
target.outputTokens = (evt as any).outputTokens
applyApiUsage(target, evt)
if ((evt as any).contextTokens != null) target.contextTokens = (evt as any).contextTokens
}
}
Expand Down Expand Up @@ -2389,11 +2474,10 @@ export const useChatStore = defineStore('chat', () => {

case 'run.failed': {
clearAgentEventMessages(sid)
if ((evt as any).inputTokens != null) {
if ((evt as any).inputTokens != null || (evt as any).apiUsage) {
const target = sessions.value.find(s => s.id === sid)
if (target) {
target.inputTokens = (evt as any).inputTokens
target.outputTokens = (evt as any).outputTokens
applyApiUsage(target, evt)
if ((evt as any).contextTokens != null) target.contextTokens = (evt as any).contextTokens
}
}
Expand Down
1 change: 1 addition & 0 deletions packages/server/src/db/hermes/schemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ export const SESSIONS_SCHEMA: Record<string, string> = {
estimated_cost_usd: 'REAL NOT NULL DEFAULT 0',
actual_cost_usd: 'REAL',
cost_status: 'TEXT NOT NULL DEFAULT \'\'',
last_prompt_tokens: 'INTEGER NOT NULL DEFAULT 0',
preview: 'TEXT NOT NULL DEFAULT \'\'',
last_active: 'INTEGER NOT NULL',
workspace: 'TEXT',
Expand Down
5 changes: 4 additions & 1 deletion packages/server/src/db/hermes/session-store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ export interface HermesSessionRow {
estimated_cost_usd: number
actual_cost_usd: number | null
cost_status: string
last_prompt_tokens: number
preview: string
last_active: number
workspace: string | null
Expand Down Expand Up @@ -103,6 +104,7 @@ function mapSessionRow(row: Record<string, unknown>): HermesSessionRow {
estimated_cost_usd: Number(row.estimated_cost_usd || 0),
actual_cost_usd: row.actual_cost_usd != null ? Number(row.actual_cost_usd) : null,
cost_status: String(row.cost_status || ''),
last_prompt_tokens: Number(row.last_prompt_tokens || 0),
preview: String(row.preview || ''),
last_active: Number(row.last_active || 0),
workspace: row.workspace != null ? String(row.workspace) : null,
Expand Down Expand Up @@ -148,7 +150,7 @@ export function createSession(data: {
message_count: 0, tool_call_count: 0,
input_tokens: 0, output_tokens: 0, cache_read_tokens: 0, cache_write_tokens: 0, reasoning_tokens: 0,
billing_provider: null, estimated_cost_usd: 0, actual_cost_usd: null,
cost_status: '', preview: '', last_active: now, workspace: data.workspace || null,
cost_status: '', last_prompt_tokens: 0, preview: '', last_active: now, workspace: data.workspace || null,
}
}
const db = getDb()!
Expand Down Expand Up @@ -177,6 +179,7 @@ export function updateSession(id: string, data: Partial<Omit<HermesSessionRow, '
if (key === 'id' || key === 'profile') continue
// Skip last_active and ended_at - handle them separately below
if (key === 'last_active' || key === 'ended_at') continue
if (val === undefined) continue
fields.push(`"${key}" = ?`)
values.push(val)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import {
recordBridgeToolCompleted,
} from './bridge-message'
import { summarizeToolArguments } from './response-utils'
import type { ContentBlock, QueuedRun, SessionState } from './types'
import type { ContentBlock, QueuedRun, SessionState, BridgeUsageState } from './types'
import type { ChatMessage } from '../../../lib/context-compressor'
import { resolveBridgeRunModelConfig, type RunModelGroup } from './model-config'
import { filterBridgeToolCallMarkupDelta, flushPendingToolCallMarkup } from './bridge-delta'
Expand Down Expand Up @@ -106,6 +106,30 @@ function finiteToken(value: unknown): number | undefined {
: undefined
}

function extractBridgeUsage(result: unknown): BridgeUsageState | undefined {
if (!result || typeof result !== 'object' || Array.isArray(result)) return undefined
const r = result as Record<string, unknown>
const inputTokens = finiteToken(r.input_tokens)
if (inputTokens == null) return undefined
return {
inputTokens,
outputTokens: finiteToken(r.output_tokens) ?? 0,
cacheReadTokens: finiteToken(r.cache_read_tokens) ?? 0,
cacheWriteTokens: finiteToken(r.cache_write_tokens) ?? 0,
reasoningTokens: finiteToken(r.reasoning_tokens) ?? 0,
promptTokens: finiteToken(r.prompt_tokens) ?? 0,
completionTokens: finiteToken(r.completion_tokens) ?? 0,
totalTokens: finiteToken(r.total_tokens) ?? 0,
apiCalls: finiteToken(r.api_calls) ?? 0,
lastPromptTokens: finiteToken(r.last_prompt_tokens),
model: typeof r.model === 'string' ? r.model : undefined,
estimatedCostUsd: typeof r.estimated_cost_usd === 'number' ? r.estimated_cost_usd : undefined,
actualCostUsd: typeof r.actual_cost_usd === 'number' ? r.actual_cost_usd : undefined,
costStatus: typeof r.cost_status === 'string' ? r.cost_status : undefined,
costSource: typeof r.cost_source === 'string' ? r.cost_source : undefined,
}
}

function cacheBridgeContext(state: SessionState, data: Record<string, unknown> | AgentBridgeContextEstimate) {
const fixedContextTokens = finiteToken(data.fixed_context_tokens)
if (fixedContextTokens == null) return
Expand Down Expand Up @@ -248,6 +272,7 @@ export async function handleBridgeRun(
state.bridgePendingAssistantContent = ''
state.bridgePendingReasoningContent = ''
state.bridgePendingToolCallMarkup = ''
state.bridgeUsage = undefined
state.bridgeToolCounter = 0
state.bridgePendingTools = []
state.responseRun = undefined
Expand Down Expand Up @@ -453,6 +478,7 @@ export async function handleBridgeRun(
inputTokens: errUsage.inputTokens,
outputTokens: errUsage.outputTokens,
contextTokens: errContextTokens,
apiUsage: state.bridgeUsage,
queue_remaining: queueLen,
})
if (queueLen > 0) dequeueNextQueuedRun(socket, session_id)
Expand Down Expand Up @@ -904,6 +930,25 @@ async function applyBridgeChunkAsync(
outputTokens: usage.outputTokens,
profile: state.profile,
})
// Extract upstream API token usage from bridge result (hermes agent's
// conversation_loop.py already includes input_tokens, output_tokens,
// cache_*, reasoning_*, total_tokens, api_calls, and cost fields).
state.bridgeUsage = extractBridgeUsage(chunk.result)
// Persist upstream API token values in the Web UI session store so
// they survive page reloads and the frontend shows correct numbers.
if (state.bridgeUsage) {
updateSession(sessionId, {
input_tokens: state.bridgeUsage.inputTokens,
output_tokens: state.bridgeUsage.outputTokens,
cache_read_tokens: state.bridgeUsage.cacheReadTokens,
cache_write_tokens: state.bridgeUsage.cacheWriteTokens,
reasoning_tokens: state.bridgeUsage.reasoningTokens,
estimated_cost_usd: state.bridgeUsage.estimatedCostUsd,
actual_cost_usd: state.bridgeUsage.actualCostUsd,
cost_status: state.bridgeUsage.costStatus,
last_prompt_tokens: state.bridgeUsage.lastPromptTokens,
})
}
const terminalError = bridgeTerminalError(chunk)
const hadQueuedRunBeforeGoalEvaluation = state.queue.length > 0
state.isWorking = hadQueuedRunBeforeGoalEvaluation
Expand All @@ -923,6 +968,7 @@ async function applyBridgeChunkAsync(
inputTokens: usage.inputTokens,
outputTokens: usage.outputTokens,
contextTokens,
apiUsage: state.bridgeUsage,
queue_remaining: state.queue.length,
}
emit(eventName, payload)
Expand Down
1 change: 1 addition & 0 deletions packages/server/src/services/hermes/run-chat/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ export class ChatRunSocket {
inputTokens: state.inputTokens,
outputTokens: state.outputTokens,
contextTokens: state.contextTokens,
bridgeUsage: state.bridgeUsage,
queueLength: state.queue?.length || 0,
queueMessages: this.serializeQueuedMessages(state.queue || []),
})
Expand Down
Loading
Loading