diff --git a/config.example.yaml b/config.example.yaml index 5dd872eae8..92a347c9b9 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -179,6 +179,7 @@ nonstream-keepalive-interval: 0 # cache-user-id: true # optional: default is false; set true to reuse cached user_id per API key instead of generating a random one each request # experimental-cch-signing: false # optional: default is false; when true, sign the final /v1/messages body using the current Claude Code cch algorithm # # keep this disabled unless you explicitly need the behavior, so upstream seed changes fall back to legacy proxy behavior +# remove-tools-cache-control: false # optional: remove tools.cache_control for upstreams that don't support it (e.g., AWS Bedrock proxies) # Default headers for Claude API requests. Update when Claude Code releases new versions. # In legacy mode, user-agent/package-version/runtime-version/timeout are used as fallbacks diff --git a/internal/config/config.go b/internal/config/config.go index 15847f57e0..37f37311e8 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -372,6 +372,10 @@ type ClaudeKey struct { // Claude /v1/messages requests. It is disabled by default so upstream seed // changes do not alter the proxy's legacy behavior. ExperimentalCCHSigning bool `yaml:"experimental-cch-signing,omitempty" json:"experimental-cch-signing,omitempty"` + + // RemoveToolsCacheControl removes cache_control from tools for this provider. + // Use this for upstreams that don't support Anthropic prompt caching on tools (e.g., AWS Bedrock proxies). + RemoveToolsCacheControl bool `yaml:"remove-tools-cache-control,omitempty" json:"remove-tools-cache-control,omitempty"` } func (k ClaudeKey) GetAPIKey() string { return k.APIKey } diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index fced14d817..a47195078c 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -139,7 +139,13 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r body = normalizeClaudeTemperatureForThinking(body) // Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support) - if countCacheControls(body) == 0 { + if removeToolsCacheControl(auth) { + body = stripToolsCacheControl(body) + if countCacheControls(body) == 0 { + body = injectSystemCacheControl(body) + body = injectMessagesCacheControl(body) + } + } else if countCacheControls(body) == 0 { body = ensureCacheControl(body) } @@ -310,7 +316,13 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A body = normalizeClaudeTemperatureForThinking(body) // Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support) - if countCacheControls(body) == 0 { + if removeToolsCacheControl(auth) { + body = stripToolsCacheControl(body) + if countCacheControls(body) == 0 { + body = injectSystemCacheControl(body) + body = injectMessagesCacheControl(body) + } + } else if countCacheControls(body) == 0 { body = ensureCacheControl(body) } @@ -488,6 +500,17 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut body = checkSystemInstructions(body) } + // Apply remove-tools-cache-control stripping for count_tokens too. + if removeToolsCacheControl(auth) { + body = stripToolsCacheControl(body) + if countCacheControls(body) == 0 { + body = injectSystemCacheControl(body) + body = injectMessagesCacheControl(body) + } + } else if countCacheControls(body) == 0 { + body = ensureCacheControl(body) + } + // Keep count_tokens requests compatible with Anthropic cache-control constraints too. body = enforceCacheControlLimit(body, 4) body = normalizeCacheControlTTL(body) @@ -1849,6 +1872,36 @@ func injectToolsCacheControl(payload []byte) []byte { return result } +// removeToolsCacheControl checks whether this auth has remove-tools-cache-control enabled. +func removeToolsCacheControl(auth *cliproxyauth.Auth) bool { + return auth != nil && auth.Attributes != nil && + strings.EqualFold(strings.TrimSpace(auth.Attributes["remove_tools_cache_control"]), "true") +} + +// stripToolsCacheControl removes cache_control from all tools in the tools array. +func stripToolsCacheControl(payload []byte) []byte { + tools := gjson.GetBytes(payload, "tools") + if !tools.Exists() || !tools.IsArray() { + return payload + } + + idx := 0 + tools.ForEach(func(_, tool gjson.Result) bool { + if tool.Get("cache_control").Exists() { + path := fmt.Sprintf("tools.%d.cache_control", idx) + if updated, err := sjson.DeleteBytes(payload, path); err != nil { + log.Warnf("failed to strip cache_control from tools array at %s: %v", path, err) + } else { + payload = updated + } + } + idx++ + return true + }) + + return payload +} + // injectSystemCacheControl adds cache_control to the last element in the system prompt. // Converts string system prompts to array format if needed. // This only adds cache_control if NO system element already has it. diff --git a/internal/runtime/executor/claude_executor_test.go b/internal/runtime/executor/claude_executor_test.go index 2cf969bb5f..de39c0f6cf 100644 --- a/internal/runtime/executor/claude_executor_test.go +++ b/internal/runtime/executor/claude_executor_test.go @@ -1094,6 +1094,65 @@ func TestEnforceCacheControlLimit_ToolOnlyPayloadStillRespectsLimit(t *testing.T } } +func TestRemoveToolsCacheControl(t *testing.T) { + tests := []struct { + name string + auth *cliproxyauth.Auth + want bool + }{ + { + name: "nil auth", + auth: nil, + want: false, + }, + { + name: "missing attribute", + auth: &cliproxyauth.Auth{Attributes: map[string]string{ + "api_key": "key-123", + }}, + want: false, + }, + { + name: "true attribute", + auth: &cliproxyauth.Auth{Attributes: map[string]string{ + "remove_tools_cache_control": " true ", + }}, + want: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := removeToolsCacheControl(tt.auth); got != tt.want { + t.Fatalf("removeToolsCacheControl() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestStripToolsCacheControl(t *testing.T) { + payload := []byte(`{ + "tools": [ + {"name":"t1","cache_control":{"type":"ephemeral"}}, + {"name":"t2","cache_control":{"type":"ephemeral","ttl":"1h"}}, + {"name":"t3"} + ], + "system": [{"type":"text","text":"s1","cache_control":{"type":"ephemeral"}}] + }`) + + out := stripToolsCacheControl(payload) + + if gjson.GetBytes(out, "tools.0.cache_control").Exists() { + t.Fatalf("tools.0.cache_control should be removed") + } + if gjson.GetBytes(out, "tools.1.cache_control").Exists() { + t.Fatalf("tools.1.cache_control should be removed") + } + if got := gjson.GetBytes(out, "system.0.cache_control.type").String(); got != "ephemeral" { + t.Fatalf("system cache_control should stay untouched, got %q", got) + } +} + func TestClaudeExecutor_CountTokens_AppliesCacheControlGuards(t *testing.T) { var seenBody []byte server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -1144,6 +1203,54 @@ func TestClaudeExecutor_CountTokens_AppliesCacheControlGuards(t *testing.T) { } } +func TestClaudeExecutor_CountTokens_RemoveToolsCacheControl(t *testing.T) { + var seenBody []byte + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + seenBody = bytes.Clone(body) + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"input_tokens":42}`)) + })) + defer server.Close() + + executor := NewClaudeExecutor(&config.Config{}) + auth := &cliproxyauth.Auth{Attributes: map[string]string{ + "api_key": "key-123", + "base_url": server.URL, + "remove_tools_cache_control": "true", + }} + + payload := []byte(`{ + "tools": [ + {"name":"t1","cache_control":{"type":"ephemeral"}}, + {"name":"t2","cache_control":{"type":"ephemeral","ttl":"1h"}} + ], + "system": [{"type":"text","text":"s1","cache_control":{"type":"ephemeral"}}], + "messages": [{"role":"user","content":[{"type":"text","text":"hello"}]}] + }`) + + _, err := executor.CountTokens(context.Background(), auth, cliproxyexecutor.Request{ + Model: "claude-3-5-haiku-20241022", + Payload: payload, + }, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")}) + if err != nil { + t.Fatalf("CountTokens error: %v", err) + } + + if len(seenBody) == 0 { + t.Fatal("expected count_tokens request body to be captured") + } + if gjson.GetBytes(seenBody, "tools.0.cache_control").Exists() { + t.Fatalf("tools.0.cache_control should be stripped in count_tokens") + } + if gjson.GetBytes(seenBody, "tools.1.cache_control").Exists() { + t.Fatalf("tools.1.cache_control should be stripped in count_tokens") + } + if got := gjson.GetBytes(seenBody, "system.0.cache_control.type").String(); got != "ephemeral" { + t.Fatalf("system cache_control should stay untouched, got %q", got) + } +} + func hasTTLOrderingViolation(payload []byte) bool { seen5m := false violates := false diff --git a/internal/watcher/synthesizer/config.go b/internal/watcher/synthesizer/config.go index 52ae9a4808..69fa1b15a4 100644 --- a/internal/watcher/synthesizer/config.go +++ b/internal/watcher/synthesizer/config.go @@ -117,6 +117,9 @@ func (s *ConfigSynthesizer) synthesizeClaudeKeys(ctx *SynthesisContext) []*corea attrs["models_hash"] = hash } addConfigHeadersToAttrs(ck.Headers, attrs) + if ck.RemoveToolsCacheControl { + attrs["remove_tools_cache_control"] = "true" + } proxyURL := strings.TrimSpace(ck.ProxyURL) a := &coreauth.Auth{ ID: id, diff --git a/internal/watcher/synthesizer/config_test.go b/internal/watcher/synthesizer/config_test.go index 437f18d11e..9e8039510b 100644 --- a/internal/watcher/synthesizer/config_test.go +++ b/internal/watcher/synthesizer/config_test.go @@ -160,9 +160,10 @@ func TestConfigSynthesizer_ClaudeKeys(t *testing.T) { Config: &config.Config{ ClaudeKey: []config.ClaudeKey{ { - APIKey: "sk-ant-api-xxx", - Prefix: "main", - BaseURL: "https://api.anthropic.com", + APIKey: "sk-ant-api-xxx", + Prefix: "main", + BaseURL: "https://api.anthropic.com", + RemoveToolsCacheControl: true, Models: []config.ClaudeModel{ {Name: "claude-3-opus"}, {Name: "claude-3-sonnet"}, @@ -194,6 +195,9 @@ func TestConfigSynthesizer_ClaudeKeys(t *testing.T) { if auths[0].Attributes["api_key"] != "sk-ant-api-xxx" { t.Errorf("expected api_key sk-ant-api-xxx, got %s", auths[0].Attributes["api_key"]) } + if auths[0].Attributes["remove_tools_cache_control"] != "true" { + t.Errorf("expected remove_tools_cache_control=true, got %s", auths[0].Attributes["remove_tools_cache_control"]) + } if _, ok := auths[0].Attributes["models_hash"]; !ok { t.Error("expected models_hash in attributes") }