alibaba · AlapinEnjoyer · Jun 28, 2026 · Jun 28, 2026 · Jun 30, 2026 · Jun 30, 2026
diff --git a/internal/llm/usage_resolver.go b/internal/llm/usage_resolver.go
@@ -27,24 +27,37 @@ var completionTokensPaths = []string{
 }
 
 var cacheReadTokensPaths = []string{
-	"usage.cache_read_input_tokens",                // Anthropic
-	"cache_read_input_tokens",                      // flat at root
-	"usage.prompt_tokens_details.cache_tokens_hit", // some providers
-	"usage.prompt_tokens_details.cache_tokens",     // some providers
+	"usage.cache_read_input_tokens",                  // Anthropic
+	"cache_read_input_tokens",                        // flat at root
+	"data.usage.cache_read_input_tokens",             // wrapped Anthropic-compatible proxy
+	"usage.prompt_tokens_details.cached_tokens",      // OpenAI-compatible providers
+	"data.usage.prompt_tokens_details.cached_tokens", // wrapped OpenAI-compatible providers
 }
 
 var cacheWriteTokensPaths = []string{
-	"usage.cache_creation_input_tokens", // Anthropic / proxy
-	"cache_creation_input_tokens",       // flat at root
+	"usage.cache_creation_input_tokens",                      // Anthropic / proxy
+	"cache_creation_input_tokens",                            // flat at root
+	"data.usage.cache_creation_input_tokens",                 // wrapped Anthropic-compatible proxy
+	"usage.prompt_tokens_details.cache_creation_tokens",      // ApexRoute / LLM Gateway — proxy normalization of Anthropic cache_creation_input_tokens
+	"data.usage.prompt_tokens_details.cache_creation_tokens", // wrapped proxy normalization
 }
 
+// anthropicCacheReadPathCount is the number of Anthropic-style cache read paths
+// at the start of cacheReadTokensPaths. OpenAI-style paths follow; under OpenAI
+// semantics cached tokens are already included in prompt_tokens.
+const anthropicCacheReadPathCount = 3
+
+// anthropicCacheWritePathCount is the number of Anthropic-style cache write paths
+// at the start of cacheWriteTokensPaths.
+const anthropicCacheWritePathCount = 3
+
 // totalTokensPaths is an ordered list of JSON paths to try when extracting
 // total token count from a response body. Paths are dot-separated keys that
 // navigate through nested map[string]any objects. The first match wins.
 var totalTokensPaths = []string{
 	"usage.total_tokens",      // OpenAI standard
 	"total_tokens",            // flat at root
-	"data.usage.total_tokens", // wrapped in data layer (some proxy APIs)
+	"data.usage.total_tokens", // wrapped in data layer
 }
 
 // resolveUsage parses raw JSON bytes into a map and extracts token usage
@@ -58,8 +71,8 @@ func resolveUsage(raw []byte) *UsageInfo {
 	total, hasAny := probePath(rawBody, totalTokensPaths)
 	prompt, _ := probePath(rawBody, promptTokensPaths)
 	completion, _ := probePath(rawBody, completionTokensPaths)
-	cacheRead, _ := probePath(rawBody, cacheReadTokensPaths)
-	cacheWrite, _ := probePath(rawBody, cacheWriteTokensPaths)
+	cacheRead, cacheReadIdx, _ := probePathIndex(rawBody, cacheReadTokensPaths)
+	cacheWrite, cacheWriteIdx, _ := probePathIndex(rawBody, cacheWriteTokensPaths)
 
 	if !hasAny && prompt == 0 && completion == 0 {
 		return nil
@@ -74,8 +87,17 @@ func resolveUsage(raw []byte) *UsageInfo {
 	}
 
 	// If TotalTokens wasn't explicitly available but we have prompt+completion, compute it.
+	// Anthropic reports cache tokens separately from input_tokens, so include them in the
+	// fallback total. OpenAI prompt_tokens already includes cached_tokens, so only add cache
+	// counts when they came from Anthropic-style top-level fields.
 	if total == 0 && (prompt > 0 || completion > 0) {
-		ui.TotalTokens = prompt + completion + cacheRead + cacheWrite
+		ui.TotalTokens = prompt + completion
+		if cacheReadIdx >= 0 && cacheReadIdx < anthropicCacheReadPathCount {
+			ui.TotalTokens += cacheRead
+		}
+		if cacheWriteIdx >= 0 && cacheWriteIdx < anthropicCacheWritePathCount {
+			ui.TotalTokens += cacheWrite
+		}
 	}
 
 	return ui
@@ -84,7 +106,13 @@ func resolveUsage(raw []byte) *UsageInfo {
 // probePath walks through each candidate path in order, returning the first
 // int64 value found along with true. Returns (0, false) if none match.
 func probePath(root map[string]any, paths []string) (int64, bool) {
-	for _, p := range paths {
+	v, _, ok := probePathIndex(root, paths)
+	return v, ok
+}
+
+// probePathIndex is like probePath but also returns the index of the matched path.
+func probePathIndex(root map[string]any, paths []string) (int64, int, bool) {
+	for i, p := range paths {
 		parts := strings.Split(p, ".")
 
 		var current any = root
@@ -101,13 +129,13 @@ func probePath(root map[string]any, paths []string) (int64, bool) {
 
 		switch v := current.(type) {
 		case float64:
-			return int64(v), true
+			return int64(v), i, true
 		case int64:
-			return v, true
+			return v, i, true
 		case int:
-			return int64(v), true
+			return int64(v), i, true
 		}
 	next:
 	}
-	return 0, false
+	return 0, -1, false
 }
diff --git a/internal/llm/usage_resolver_test.go b/internal/llm/usage_resolver_test.go
@@ -0,0 +1,123 @@
+package llm
+
+import "testing"
+
+func TestResolveUsageOpenAICompatibleCachedTokens(t *testing.T) {
+	usage := resolveUsage([]byte(`{
+		"usage": {
+			"prompt_tokens": 100,
+			"completion_tokens": 20,
+			"total_tokens": 120,
+			"prompt_tokens_details": {
+				"cached_tokens": 75
+			}
+		}
+	}`))
+
+	if usage == nil {
+		t.Fatal("resolveUsage returned nil")
+	}
+	if usage.CacheReadTokens != 75 {
+		t.Errorf("CacheReadTokens = %d, want 75", usage.CacheReadTokens)
+	}
+	if usage.PromptTokens != 100 {
+		t.Errorf("PromptTokens = %d, want 100", usage.PromptTokens)
+	}
+	if usage.CompletionTokens != 20 {
+		t.Errorf("CompletionTokens = %d, want 20", usage.CompletionTokens)
+	}
+}
+
+func TestResolveUsageWrappedCachedTokens(t *testing.T) {
+	usage := resolveUsage([]byte(`{
+		"data": {
+			"usage": {
+				"prompt_tokens": 100,
+				"completion_tokens": 20,
+				"prompt_tokens_details": {
+					"cached_tokens": 75,
+					"cache_creation_tokens": 10
+				}
+			}
+		}
+	}`))
+
+	if usage == nil {
+		t.Fatal("resolveUsage returned nil")
+	}
+	if usage.CacheReadTokens != 75 {
+		t.Errorf("CacheReadTokens = %d, want 75", usage.CacheReadTokens)
+	}
+	if usage.CacheWriteTokens != 10 {
+		t.Errorf("CacheWriteTokens = %d, want 10", usage.CacheWriteTokens)
+	}
+	if usage.TotalTokens != 120 {
+		t.Errorf("TotalTokens = %d, want 120 (OpenAI cached tokens are included in prompt_tokens)", usage.TotalTokens)
+	}
+}
+
+func TestResolveUsageWrappedAnthropicCompatibleCacheTokens(t *testing.T) {
+	usage := resolveUsage([]byte(`{
+		"data": {
+			"usage": {
+				"prompt_tokens": 100,
+				"completion_tokens": 20,
+				"cache_read_input_tokens": 40,
+				"cache_creation_input_tokens": 15
+			}
+		}
+	}`))
+
+	if usage == nil {
+		t.Fatal("resolveUsage returned nil")
+	}
+	if usage.CacheReadTokens != 40 {
+		t.Errorf("CacheReadTokens = %d, want 40", usage.CacheReadTokens)
+	}
+	if usage.CacheWriteTokens != 15 {
+		t.Errorf("CacheWriteTokens = %d, want 15", usage.CacheWriteTokens)
+	}
+	if usage.TotalTokens != 175 {
+		t.Errorf("TotalTokens = %d, want 175", usage.TotalTokens)
+	}
+}
+
+func TestResolveUsageCacheReadPathPriority(t *testing.T) {
+	usage := resolveUsage([]byte(`{
+		"usage": {
+			"prompt_tokens": 100,
+			"completion_tokens": 20,
+			"cache_read_input_tokens": 40,
+			"prompt_tokens_details": {
+				"cached_tokens": 75
+			}
+		}
+	}`))
+
+	if usage == nil {
+		t.Fatal("resolveUsage returned nil")
+	}
+	if usage.CacheReadTokens != 40 {
+		t.Errorf("CacheReadTokens = %d, want 40 (Anthropic path should win)", usage.CacheReadTokens)
+	}
+}
+
+func TestResolveUsageCacheCreationTokensPriority(t *testing.T) {
+	usage := resolveUsage([]byte(`{
+		"usage": {
+			"prompt_tokens": 100,
+			"completion_tokens": 20,
+			"cache_creation_input_tokens": 30,
+			"prompt_tokens_details": {
+				"cache_creation_tokens": 15
+			}
+		}
+	}`))
+
+	if usage == nil {
+		t.Fatal("resolveUsage returned nil")
+	}
+	if usage.CacheWriteTokens != 30 {
+		t.Errorf("CacheWriteTokens = %d, want 30 (Anthropic top-level path should win over prompt_tokens_details)", usage.CacheWriteTokens)
+	}
+}