Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ nonstream-keepalive-interval: 0
# cache-user-id: true # optional: default is false; set true to reuse cached user_id per API key instead of generating a random one each request
# experimental-cch-signing: false # optional: default is false; when true, sign the final /v1/messages body using the current Claude Code cch algorithm
# # keep this disabled unless you explicitly need the behavior, so upstream seed changes fall back to legacy proxy behavior
# remove-tools-cache-control: false # optional: remove tools.cache_control for upstreams that don't support it (e.g., AWS Bedrock proxies)

# Default headers for Claude API requests. Update when Claude Code releases new versions.
# In legacy mode, user-agent/package-version/runtime-version/timeout are used as fallbacks
Expand Down
4 changes: 4 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,10 @@ type ClaudeKey struct {
// Claude /v1/messages requests. It is disabled by default so upstream seed
// changes do not alter the proxy's legacy behavior.
ExperimentalCCHSigning bool `yaml:"experimental-cch-signing,omitempty" json:"experimental-cch-signing,omitempty"`

// RemoveToolsCacheControl removes cache_control from tools for this provider.
// Use this for upstreams that don't support Anthropic prompt caching on tools (e.g., AWS Bedrock proxies).
RemoveToolsCacheControl bool `yaml:"remove-tools-cache-control,omitempty" json:"remove-tools-cache-control,omitempty"`
}

func (k ClaudeKey) GetAPIKey() string { return k.APIKey }
Expand Down
57 changes: 55 additions & 2 deletions internal/runtime/executor/claude_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,13 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
body = normalizeClaudeTemperatureForThinking(body)

// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
if countCacheControls(body) == 0 {
if removeToolsCacheControl(auth) {
body = stripToolsCacheControl(body)
if countCacheControls(body) == 0 {
body = injectSystemCacheControl(body)
body = injectMessagesCacheControl(body)
}
} else if countCacheControls(body) == 0 {
body = ensureCacheControl(body)
}
Comment on lines +142 to 150
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This block of logic for handling cache_control is duplicated in ExecuteStream at lines 310-318. To improve maintainability and avoid potential inconsistencies, consider extracting this logic into a separate helper function. This function could then be called from both Execute and ExecuteStream.


Expand Down Expand Up @@ -310,7 +316,13 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
body = normalizeClaudeTemperatureForThinking(body)

// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
if countCacheControls(body) == 0 {
if removeToolsCacheControl(auth) {
body = stripToolsCacheControl(body)
if countCacheControls(body) == 0 {
body = injectSystemCacheControl(body)
body = injectMessagesCacheControl(body)
}
} else if countCacheControls(body) == 0 {
body = ensureCacheControl(body)
}

Expand Down Expand Up @@ -488,6 +500,17 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
body = checkSystemInstructions(body)
}

// Apply remove-tools-cache-control stripping for count_tokens too.
if removeToolsCacheControl(auth) {
body = stripToolsCacheControl(body)
if countCacheControls(body) == 0 {
body = injectSystemCacheControl(body)
body = injectMessagesCacheControl(body)
}
} else if countCacheControls(body) == 0 {
body = ensureCacheControl(body)
}

// Keep count_tokens requests compatible with Anthropic cache-control constraints too.
body = enforceCacheControlLimit(body, 4)
body = normalizeCacheControlTTL(body)
Expand Down Expand Up @@ -1849,6 +1872,36 @@ func injectToolsCacheControl(payload []byte) []byte {
return result
}

// removeToolsCacheControl checks whether this auth has remove-tools-cache-control enabled.
func removeToolsCacheControl(auth *cliproxyauth.Auth) bool {
return auth != nil && auth.Attributes != nil &&
strings.EqualFold(strings.TrimSpace(auth.Attributes["remove_tools_cache_control"]), "true")
}

// stripToolsCacheControl removes cache_control from all tools in the tools array.
func stripToolsCacheControl(payload []byte) []byte {
tools := gjson.GetBytes(payload, "tools")
if !tools.Exists() || !tools.IsArray() {
return payload
}

idx := 0
tools.ForEach(func(_, tool gjson.Result) bool {
if tool.Get("cache_control").Exists() {
path := fmt.Sprintf("tools.%d.cache_control", idx)
if updated, err := sjson.DeleteBytes(payload, path); err != nil {
log.Warnf("failed to strip cache_control from tools array at %s: %v", path, err)
} else {
payload = updated
}
}
idx++
return true
})

return payload
}

// injectSystemCacheControl adds cache_control to the last element in the system prompt.
// Converts string system prompts to array format if needed.
// This only adds cache_control if NO system element already has it.
Expand Down
107 changes: 107 additions & 0 deletions internal/runtime/executor/claude_executor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1094,6 +1094,65 @@ func TestEnforceCacheControlLimit_ToolOnlyPayloadStillRespectsLimit(t *testing.T
}
}

func TestRemoveToolsCacheControl(t *testing.T) {
tests := []struct {
name string
auth *cliproxyauth.Auth
want bool
}{
{
name: "nil auth",
auth: nil,
want: false,
},
{
name: "missing attribute",
auth: &cliproxyauth.Auth{Attributes: map[string]string{
"api_key": "key-123",
}},
want: false,
},
{
name: "true attribute",
auth: &cliproxyauth.Auth{Attributes: map[string]string{
"remove_tools_cache_control": " true ",
}},
want: true,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := removeToolsCacheControl(tt.auth); got != tt.want {
t.Fatalf("removeToolsCacheControl() = %v, want %v", got, tt.want)
}
})
}
}

func TestStripToolsCacheControl(t *testing.T) {
payload := []byte(`{
"tools": [
{"name":"t1","cache_control":{"type":"ephemeral"}},
{"name":"t2","cache_control":{"type":"ephemeral","ttl":"1h"}},
{"name":"t3"}
],
"system": [{"type":"text","text":"s1","cache_control":{"type":"ephemeral"}}]
}`)

out := stripToolsCacheControl(payload)

if gjson.GetBytes(out, "tools.0.cache_control").Exists() {
t.Fatalf("tools.0.cache_control should be removed")
}
if gjson.GetBytes(out, "tools.1.cache_control").Exists() {
t.Fatalf("tools.1.cache_control should be removed")
}
if got := gjson.GetBytes(out, "system.0.cache_control.type").String(); got != "ephemeral" {
t.Fatalf("system cache_control should stay untouched, got %q", got)
}
}

func TestClaudeExecutor_CountTokens_AppliesCacheControlGuards(t *testing.T) {
var seenBody []byte
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -1144,6 +1203,54 @@ func TestClaudeExecutor_CountTokens_AppliesCacheControlGuards(t *testing.T) {
}
}

func TestClaudeExecutor_CountTokens_RemoveToolsCacheControl(t *testing.T) {
var seenBody []byte
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
body, _ := io.ReadAll(r.Body)
seenBody = bytes.Clone(body)
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"input_tokens":42}`))
}))
defer server.Close()

executor := NewClaudeExecutor(&config.Config{})
auth := &cliproxyauth.Auth{Attributes: map[string]string{
"api_key": "key-123",
"base_url": server.URL,
"remove_tools_cache_control": "true",
}}

payload := []byte(`{
"tools": [
{"name":"t1","cache_control":{"type":"ephemeral"}},
{"name":"t2","cache_control":{"type":"ephemeral","ttl":"1h"}}
],
"system": [{"type":"text","text":"s1","cache_control":{"type":"ephemeral"}}],
"messages": [{"role":"user","content":[{"type":"text","text":"hello"}]}]
}`)

_, err := executor.CountTokens(context.Background(), auth, cliproxyexecutor.Request{
Model: "claude-3-5-haiku-20241022",
Payload: payload,
}, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")})
if err != nil {
t.Fatalf("CountTokens error: %v", err)
}

if len(seenBody) == 0 {
t.Fatal("expected count_tokens request body to be captured")
}
if gjson.GetBytes(seenBody, "tools.0.cache_control").Exists() {
t.Fatalf("tools.0.cache_control should be stripped in count_tokens")
}
if gjson.GetBytes(seenBody, "tools.1.cache_control").Exists() {
t.Fatalf("tools.1.cache_control should be stripped in count_tokens")
}
if got := gjson.GetBytes(seenBody, "system.0.cache_control.type").String(); got != "ephemeral" {
t.Fatalf("system cache_control should stay untouched, got %q", got)
}
}

func hasTTLOrderingViolation(payload []byte) bool {
seen5m := false
violates := false
Expand Down
3 changes: 3 additions & 0 deletions internal/watcher/synthesizer/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ func (s *ConfigSynthesizer) synthesizeClaudeKeys(ctx *SynthesisContext) []*corea
attrs["models_hash"] = hash
}
addConfigHeadersToAttrs(ck.Headers, attrs)
if ck.RemoveToolsCacheControl {
attrs["remove_tools_cache_control"] = "true"
}
proxyURL := strings.TrimSpace(ck.ProxyURL)
a := &coreauth.Auth{
ID: id,
Expand Down
10 changes: 7 additions & 3 deletions internal/watcher/synthesizer/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,10 @@ func TestConfigSynthesizer_ClaudeKeys(t *testing.T) {
Config: &config.Config{
ClaudeKey: []config.ClaudeKey{
{
APIKey: "sk-ant-api-xxx",
Prefix: "main",
BaseURL: "https://api.anthropic.com",
APIKey: "sk-ant-api-xxx",
Prefix: "main",
BaseURL: "https://api.anthropic.com",
RemoveToolsCacheControl: true,
Models: []config.ClaudeModel{
{Name: "claude-3-opus"},
{Name: "claude-3-sonnet"},
Expand Down Expand Up @@ -194,6 +195,9 @@ func TestConfigSynthesizer_ClaudeKeys(t *testing.T) {
if auths[0].Attributes["api_key"] != "sk-ant-api-xxx" {
t.Errorf("expected api_key sk-ant-api-xxx, got %s", auths[0].Attributes["api_key"])
}
if auths[0].Attributes["remove_tools_cache_control"] != "true" {
t.Errorf("expected remove_tools_cache_control=true, got %s", auths[0].Attributes["remove_tools_cache_control"])
}
if _, ok := auths[0].Attributes["models_hash"]; !ok {
t.Error("expected models_hash in attributes")
}
Expand Down
Loading