diff --git a/internal/llm/usage_resolver.go b/internal/llm/usage_resolver.go index 13454207..f6411e10 100644 --- a/internal/llm/usage_resolver.go +++ b/internal/llm/usage_resolver.go @@ -27,24 +27,37 @@ var completionTokensPaths = []string{ } var cacheReadTokensPaths = []string{ - "usage.cache_read_input_tokens", // Anthropic - "cache_read_input_tokens", // flat at root - "usage.prompt_tokens_details.cache_tokens_hit", // some providers - "usage.prompt_tokens_details.cache_tokens", // some providers + "usage.cache_read_input_tokens", // Anthropic + "cache_read_input_tokens", // flat at root + "data.usage.cache_read_input_tokens", // wrapped Anthropic-compatible proxy + "usage.prompt_tokens_details.cached_tokens", // OpenAI-compatible providers + "data.usage.prompt_tokens_details.cached_tokens", // wrapped OpenAI-compatible providers } var cacheWriteTokensPaths = []string{ - "usage.cache_creation_input_tokens", // Anthropic / proxy - "cache_creation_input_tokens", // flat at root + "usage.cache_creation_input_tokens", // Anthropic / proxy + "cache_creation_input_tokens", // flat at root + "data.usage.cache_creation_input_tokens", // wrapped Anthropic-compatible proxy + "usage.prompt_tokens_details.cache_creation_tokens", // ApexRoute / LLM Gateway — proxy normalization of Anthropic cache_creation_input_tokens + "data.usage.prompt_tokens_details.cache_creation_tokens", // wrapped proxy normalization } +// anthropicCacheReadPathCount is the number of Anthropic-style cache read paths +// at the start of cacheReadTokensPaths. OpenAI-style paths follow; under OpenAI +// semantics cached tokens are already included in prompt_tokens. +const anthropicCacheReadPathCount = 3 + +// anthropicCacheWritePathCount is the number of Anthropic-style cache write paths +// at the start of cacheWriteTokensPaths. +const anthropicCacheWritePathCount = 3 + // totalTokensPaths is an ordered list of JSON paths to try when extracting // total token count from a response body. Paths are dot-separated keys that // navigate through nested map[string]any objects. The first match wins. var totalTokensPaths = []string{ "usage.total_tokens", // OpenAI standard "total_tokens", // flat at root - "data.usage.total_tokens", // wrapped in data layer (some proxy APIs) + "data.usage.total_tokens", // wrapped in data layer } // resolveUsage parses raw JSON bytes into a map and extracts token usage @@ -58,8 +71,8 @@ func resolveUsage(raw []byte) *UsageInfo { total, hasAny := probePath(rawBody, totalTokensPaths) prompt, _ := probePath(rawBody, promptTokensPaths) completion, _ := probePath(rawBody, completionTokensPaths) - cacheRead, _ := probePath(rawBody, cacheReadTokensPaths) - cacheWrite, _ := probePath(rawBody, cacheWriteTokensPaths) + cacheRead, cacheReadIdx, _ := probePathIndex(rawBody, cacheReadTokensPaths) + cacheWrite, cacheWriteIdx, _ := probePathIndex(rawBody, cacheWriteTokensPaths) if !hasAny && prompt == 0 && completion == 0 { return nil @@ -74,8 +87,17 @@ func resolveUsage(raw []byte) *UsageInfo { } // If TotalTokens wasn't explicitly available but we have prompt+completion, compute it. + // Anthropic reports cache tokens separately from input_tokens, so include them in the + // fallback total. OpenAI prompt_tokens already includes cached_tokens, so only add cache + // counts when they came from Anthropic-style top-level fields. if total == 0 && (prompt > 0 || completion > 0) { - ui.TotalTokens = prompt + completion + cacheRead + cacheWrite + ui.TotalTokens = prompt + completion + if cacheReadIdx >= 0 && cacheReadIdx < anthropicCacheReadPathCount { + ui.TotalTokens += cacheRead + } + if cacheWriteIdx >= 0 && cacheWriteIdx < anthropicCacheWritePathCount { + ui.TotalTokens += cacheWrite + } } return ui @@ -84,7 +106,13 @@ func resolveUsage(raw []byte) *UsageInfo { // probePath walks through each candidate path in order, returning the first // int64 value found along with true. Returns (0, false) if none match. func probePath(root map[string]any, paths []string) (int64, bool) { - for _, p := range paths { + v, _, ok := probePathIndex(root, paths) + return v, ok +} + +// probePathIndex is like probePath but also returns the index of the matched path. +func probePathIndex(root map[string]any, paths []string) (int64, int, bool) { + for i, p := range paths { parts := strings.Split(p, ".") var current any = root @@ -101,13 +129,13 @@ func probePath(root map[string]any, paths []string) (int64, bool) { switch v := current.(type) { case float64: - return int64(v), true + return int64(v), i, true case int64: - return v, true + return v, i, true case int: - return int64(v), true + return int64(v), i, true } next: } - return 0, false + return 0, -1, false } diff --git a/internal/llm/usage_resolver_test.go b/internal/llm/usage_resolver_test.go new file mode 100644 index 00000000..0c9397a0 --- /dev/null +++ b/internal/llm/usage_resolver_test.go @@ -0,0 +1,123 @@ +package llm + +import "testing" + +func TestResolveUsageOpenAICompatibleCachedTokens(t *testing.T) { + usage := resolveUsage([]byte(`{ + "usage": { + "prompt_tokens": 100, + "completion_tokens": 20, + "total_tokens": 120, + "prompt_tokens_details": { + "cached_tokens": 75 + } + } + }`)) + + if usage == nil { + t.Fatal("resolveUsage returned nil") + } + if usage.CacheReadTokens != 75 { + t.Errorf("CacheReadTokens = %d, want 75", usage.CacheReadTokens) + } + if usage.PromptTokens != 100 { + t.Errorf("PromptTokens = %d, want 100", usage.PromptTokens) + } + if usage.CompletionTokens != 20 { + t.Errorf("CompletionTokens = %d, want 20", usage.CompletionTokens) + } +} + +func TestResolveUsageWrappedCachedTokens(t *testing.T) { + usage := resolveUsage([]byte(`{ + "data": { + "usage": { + "prompt_tokens": 100, + "completion_tokens": 20, + "prompt_tokens_details": { + "cached_tokens": 75, + "cache_creation_tokens": 10 + } + } + } + }`)) + + if usage == nil { + t.Fatal("resolveUsage returned nil") + } + if usage.CacheReadTokens != 75 { + t.Errorf("CacheReadTokens = %d, want 75", usage.CacheReadTokens) + } + if usage.CacheWriteTokens != 10 { + t.Errorf("CacheWriteTokens = %d, want 10", usage.CacheWriteTokens) + } + if usage.TotalTokens != 120 { + t.Errorf("TotalTokens = %d, want 120 (OpenAI cached tokens are included in prompt_tokens)", usage.TotalTokens) + } +} + +func TestResolveUsageWrappedAnthropicCompatibleCacheTokens(t *testing.T) { + usage := resolveUsage([]byte(`{ + "data": { + "usage": { + "prompt_tokens": 100, + "completion_tokens": 20, + "cache_read_input_tokens": 40, + "cache_creation_input_tokens": 15 + } + } + }`)) + + if usage == nil { + t.Fatal("resolveUsage returned nil") + } + if usage.CacheReadTokens != 40 { + t.Errorf("CacheReadTokens = %d, want 40", usage.CacheReadTokens) + } + if usage.CacheWriteTokens != 15 { + t.Errorf("CacheWriteTokens = %d, want 15", usage.CacheWriteTokens) + } + if usage.TotalTokens != 175 { + t.Errorf("TotalTokens = %d, want 175", usage.TotalTokens) + } +} + +func TestResolveUsageCacheReadPathPriority(t *testing.T) { + usage := resolveUsage([]byte(`{ + "usage": { + "prompt_tokens": 100, + "completion_tokens": 20, + "cache_read_input_tokens": 40, + "prompt_tokens_details": { + "cached_tokens": 75 + } + } + }`)) + + if usage == nil { + t.Fatal("resolveUsage returned nil") + } + if usage.CacheReadTokens != 40 { + t.Errorf("CacheReadTokens = %d, want 40 (Anthropic path should win)", usage.CacheReadTokens) + } +} + +func TestResolveUsageCacheCreationTokensPriority(t *testing.T) { + usage := resolveUsage([]byte(`{ + "usage": { + "prompt_tokens": 100, + "completion_tokens": 20, + "cache_creation_input_tokens": 30, + "prompt_tokens_details": { + "cache_creation_tokens": 15 + } + } + }`)) + + if usage == nil { + t.Fatal("resolveUsage returned nil") + } + if usage.CacheWriteTokens != 30 { + t.Errorf("CacheWriteTokens = %d, want 30 (Anthropic top-level path should win over prompt_tokens_details)", usage.CacheWriteTokens) + } +}