Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 43 additions & 15 deletions internal/llm/usage_resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,37 @@ var completionTokensPaths = []string{
}

var cacheReadTokensPaths = []string{
"usage.cache_read_input_tokens", // Anthropic
"cache_read_input_tokens", // flat at root
"usage.prompt_tokens_details.cache_tokens_hit", // some providers
"usage.prompt_tokens_details.cache_tokens", // some providers
"usage.cache_read_input_tokens", // Anthropic
"cache_read_input_tokens", // flat at root
"data.usage.cache_read_input_tokens", // wrapped Anthropic-compatible proxy
"usage.prompt_tokens_details.cached_tokens", // OpenAI-compatible providers
"data.usage.prompt_tokens_details.cached_tokens", // wrapped OpenAI-compatible providers
}

var cacheWriteTokensPaths = []string{
"usage.cache_creation_input_tokens", // Anthropic / proxy
"cache_creation_input_tokens", // flat at root
"usage.cache_creation_input_tokens", // Anthropic / proxy
"cache_creation_input_tokens", // flat at root
"data.usage.cache_creation_input_tokens", // wrapped Anthropic-compatible proxy
"usage.prompt_tokens_details.cache_creation_tokens", // ApexRoute / LLM Gateway — proxy normalization of Anthropic cache_creation_input_tokens
"data.usage.prompt_tokens_details.cache_creation_tokens", // wrapped proxy normalization
}

// anthropicCacheReadPathCount is the number of Anthropic-style cache read paths
// at the start of cacheReadTokensPaths. OpenAI-style paths follow; under OpenAI
// semantics cached tokens are already included in prompt_tokens.
const anthropicCacheReadPathCount = 3

// anthropicCacheWritePathCount is the number of Anthropic-style cache write paths
// at the start of cacheWriteTokensPaths.
const anthropicCacheWritePathCount = 3

// totalTokensPaths is an ordered list of JSON paths to try when extracting
// total token count from a response body. Paths are dot-separated keys that
// navigate through nested map[string]any objects. The first match wins.
var totalTokensPaths = []string{
"usage.total_tokens", // OpenAI standard
"total_tokens", // flat at root
"data.usage.total_tokens", // wrapped in data layer (some proxy APIs)
"data.usage.total_tokens", // wrapped in data layer
}

// resolveUsage parses raw JSON bytes into a map and extracts token usage
Expand All @@ -58,8 +71,8 @@ func resolveUsage(raw []byte) *UsageInfo {
total, hasAny := probePath(rawBody, totalTokensPaths)
prompt, _ := probePath(rawBody, promptTokensPaths)
completion, _ := probePath(rawBody, completionTokensPaths)
cacheRead, _ := probePath(rawBody, cacheReadTokensPaths)
cacheWrite, _ := probePath(rawBody, cacheWriteTokensPaths)
cacheRead, cacheReadIdx, _ := probePathIndex(rawBody, cacheReadTokensPaths)
cacheWrite, cacheWriteIdx, _ := probePathIndex(rawBody, cacheWriteTokensPaths)

if !hasAny && prompt == 0 && completion == 0 {
return nil
Expand All @@ -74,8 +87,17 @@ func resolveUsage(raw []byte) *UsageInfo {
}

// If TotalTokens wasn't explicitly available but we have prompt+completion, compute it.
// Anthropic reports cache tokens separately from input_tokens, so include them in the
// fallback total. OpenAI prompt_tokens already includes cached_tokens, so only add cache
// counts when they came from Anthropic-style top-level fields.
if total == 0 && (prompt > 0 || completion > 0) {
ui.TotalTokens = prompt + completion + cacheRead + cacheWrite
ui.TotalTokens = prompt + completion
if cacheReadIdx >= 0 && cacheReadIdx < anthropicCacheReadPathCount {
ui.TotalTokens += cacheRead
}
if cacheWriteIdx >= 0 && cacheWriteIdx < anthropicCacheWritePathCount {
ui.TotalTokens += cacheWrite
}
}

return ui
Expand All @@ -84,7 +106,13 @@ func resolveUsage(raw []byte) *UsageInfo {
// probePath walks through each candidate path in order, returning the first
// int64 value found along with true. Returns (0, false) if none match.
func probePath(root map[string]any, paths []string) (int64, bool) {
for _, p := range paths {
v, _, ok := probePathIndex(root, paths)
return v, ok
}

// probePathIndex is like probePath but also returns the index of the matched path.
func probePathIndex(root map[string]any, paths []string) (int64, int, bool) {
for i, p := range paths {
parts := strings.Split(p, ".")

var current any = root
Expand All @@ -101,13 +129,13 @@ func probePath(root map[string]any, paths []string) (int64, bool) {

switch v := current.(type) {
case float64:
return int64(v), true
return int64(v), i, true
case int64:
return v, true
return v, i, true
case int:
return int64(v), true
return int64(v), i, true
}
next:
}
return 0, false
return 0, -1, false
}
123 changes: 123 additions & 0 deletions internal/llm/usage_resolver_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package llm

import "testing"

func TestResolveUsageOpenAICompatibleCachedTokens(t *testing.T) {
usage := resolveUsage([]byte(`{
"usage": {
"prompt_tokens": 100,
"completion_tokens": 20,
"total_tokens": 120,
"prompt_tokens_details": {
"cached_tokens": 75
}
}
}`))

if usage == nil {
t.Fatal("resolveUsage returned nil")
}
if usage.CacheReadTokens != 75 {
t.Errorf("CacheReadTokens = %d, want 75", usage.CacheReadTokens)
}
if usage.PromptTokens != 100 {
t.Errorf("PromptTokens = %d, want 100", usage.PromptTokens)
}
if usage.CompletionTokens != 20 {
t.Errorf("CompletionTokens = %d, want 20", usage.CompletionTokens)
}
}

func TestResolveUsageWrappedCachedTokens(t *testing.T) {
usage := resolveUsage([]byte(`{
"data": {
"usage": {
"prompt_tokens": 100,
"completion_tokens": 20,
"prompt_tokens_details": {
"cached_tokens": 75,
"cache_creation_tokens": 10
}
}
}
}`))

if usage == nil {
t.Fatal("resolveUsage returned nil")
}
if usage.CacheReadTokens != 75 {
t.Errorf("CacheReadTokens = %d, want 75", usage.CacheReadTokens)
}
if usage.CacheWriteTokens != 10 {
t.Errorf("CacheWriteTokens = %d, want 10", usage.CacheWriteTokens)
}
if usage.TotalTokens != 120 {
t.Errorf("TotalTokens = %d, want 120 (OpenAI cached tokens are included in prompt_tokens)", usage.TotalTokens)
}
}

func TestResolveUsageWrappedAnthropicCompatibleCacheTokens(t *testing.T) {
usage := resolveUsage([]byte(`{
"data": {
"usage": {
"prompt_tokens": 100,
"completion_tokens": 20,
"cache_read_input_tokens": 40,
"cache_creation_input_tokens": 15
}
}
}`))

if usage == nil {
t.Fatal("resolveUsage returned nil")
}
if usage.CacheReadTokens != 40 {
t.Errorf("CacheReadTokens = %d, want 40", usage.CacheReadTokens)
}
if usage.CacheWriteTokens != 15 {
t.Errorf("CacheWriteTokens = %d, want 15", usage.CacheWriteTokens)
}
if usage.TotalTokens != 175 {
t.Errorf("TotalTokens = %d, want 175", usage.TotalTokens)
}
}

func TestResolveUsageCacheReadPathPriority(t *testing.T) {
usage := resolveUsage([]byte(`{
"usage": {
"prompt_tokens": 100,
"completion_tokens": 20,
"cache_read_input_tokens": 40,
"prompt_tokens_details": {
"cached_tokens": 75
}
}
}`))

if usage == nil {
t.Fatal("resolveUsage returned nil")
}
if usage.CacheReadTokens != 40 {
t.Errorf("CacheReadTokens = %d, want 40 (Anthropic path should win)", usage.CacheReadTokens)
}
}

func TestResolveUsageCacheCreationTokensPriority(t *testing.T) {
usage := resolveUsage([]byte(`{
"usage": {
"prompt_tokens": 100,
"completion_tokens": 20,
"cache_creation_input_tokens": 30,
"prompt_tokens_details": {
"cache_creation_tokens": 15
}
}
}`))

if usage == nil {
t.Fatal("resolveUsage returned nil")
}
if usage.CacheWriteTokens != 30 {
t.Errorf("CacheWriteTokens = %d, want 30 (Anthropic top-level path should win over prompt_tokens_details)", usage.CacheWriteTokens)
}
}
Loading