From 5aa33a9729cdc54250eb02bc9d6b2326d2887910 Mon Sep 17 00:00:00 2001 From: Soby Chacko Date: Wed, 20 Aug 2025 20:47:43 -0400 Subject: [PATCH 1/3] GH-1403: Add Anthropic prompt caching via AnthropicChatOptions - Add cacheControl field to AnthropicChatOptions with builder method - Create AnthropicCacheType enum with EPHEMERAL type for type-safe cache creation - Update AnthropicChatModel.createRequest() to apply cache control from options to user message ContentBlocks - Extend ContentBlock record with cacheControl parameter and constructor for API compatibility - Update Usage record to include cacheCreationInputTokens and cacheReadInputTokens fields - Update StreamHelper to handle new Usage constructor with cache token parameters - Add AnthropicApiIT.chatWithPromptCache() test for low-level API validation - Add AnthropicChatModelIT.chatWithPromptCacheViaOptions() integration test - Add comprehensive unit tests for AnthropicChatOptions cache control functionality - Update documentation with cacheControl() method examples and usage patterns Cache control is configured through AnthropicChatOptions rather than message classes to maintain provider portability. The cache control gets applied during request creation in AnthropicChatModel when building ContentBlocks for user messages. Original implementation provided by @Claudio-code (Claudio Silva Junior) See https://github.com/spring-projects/spring-ai/pull/4139/commits/15e50263e515312c159d4176a2914f760bcce465 Fixes https://github.com/spring-projects/spring-ai/issues/1403 Signed-off-by: Soby Chacko --- .../ai/anthropic/AnthropicChatModel.java | 16 +- .../ai/anthropic/AnthropicChatOptions.java | 29 ++- .../ai/anthropic/api/AnthropicApi.java | 37 +++- .../ai/anthropic/api/AnthropicCacheType.java | 57 ++++++ .../ai/anthropic/api/StreamHelper.java | 12 +- .../ai/anthropic/AnthropicChatModelIT.java | 54 ++++++ .../anthropic/AnthropicChatOptionsTests.java | 108 +++++++++++ .../ai/anthropic/api/AnthropicApiIT.java | 35 ++++ .../ROOT/pages/api/chat/anthropic-chat.adoc | 175 ++++++++++++++++++ 9 files changed, 506 insertions(+), 17 deletions(-) create mode 100644 models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheType.java diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java index 5ea1195c3a7..0485e552584 100644 --- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java +++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java @@ -483,12 +483,25 @@ private Map mergeHttpHeaders(Map runtimeHttpHead ChatCompletionRequest createRequest(Prompt prompt, boolean stream) { + // Get cache control from options + AnthropicChatOptions requestOptions = (AnthropicChatOptions) prompt.getOptions(); + AnthropicApi.ChatCompletionRequest.CacheControl cacheControl = (requestOptions != null) + ? requestOptions.getCacheControl() : null; + List userMessages = prompt.getInstructions() .stream() .filter(message -> message.getMessageType() != MessageType.SYSTEM) .map(message -> { if (message.getMessageType() == MessageType.USER) { - List contents = new ArrayList<>(List.of(new ContentBlock(message.getText()))); + List contents = new ArrayList<>(); + + // Apply cache control if enabled for user messages + if (cacheControl != null) { + contents.add(new ContentBlock(message.getText(), cacheControl)); + } + else { + contents.add(new ContentBlock(message.getText())); + } if (message instanceof UserMessage userMessage) { if (!CollectionUtils.isEmpty(userMessage.getMedia())) { List mediaContent = userMessage.getMedia().stream().map(media -> { @@ -538,7 +551,6 @@ else if (message.getMessageType() == MessageType.TOOL) { ChatCompletionRequest request = new ChatCompletionRequest(this.defaultOptions.getModel(), userMessages, systemPrompt, this.defaultOptions.getMaxTokens(), this.defaultOptions.getTemperature(), stream); - AnthropicChatOptions requestOptions = (AnthropicChatOptions) prompt.getOptions(); request = ModelOptionsUtils.merge(requestOptions, request, ChatCompletionRequest.class); // Add the tool definitions to the request's tools parameter. diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java index dbfbee561c8..16421eb04d0 100644 --- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java +++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java @@ -44,6 +44,7 @@ * @author Thomas Vitale * @author Alexandros Pappas * @author Ilayaperumal Gopinathan + * @author Soby Chacko * @since 1.0.0 */ @JsonInclude(Include.NON_NULL) @@ -59,6 +60,20 @@ public class AnthropicChatOptions implements ToolCallingChatOptions { private @JsonProperty("top_k") Integer topK; private @JsonProperty("thinking") ChatCompletionRequest.ThinkingConfig thinking; + /** + * Cache control for user messages. When set, enables caching for user messages. + * Uses the existing CacheControl record from AnthropicApi.ChatCompletionRequest. + */ + private @JsonProperty("cache_control") ChatCompletionRequest.CacheControl cacheControl; + + public ChatCompletionRequest.CacheControl getCacheControl() { + return this.cacheControl; + } + + public void setCacheControl(ChatCompletionRequest.CacheControl cacheControl) { + this.cacheControl = cacheControl; + } + /** * Collection of {@link ToolCallback}s to be used for tool calling in the chat * completion requests. @@ -111,6 +126,7 @@ public static AnthropicChatOptions fromOptions(AnthropicChatOptions fromOptions) .internalToolExecutionEnabled(fromOptions.getInternalToolExecutionEnabled()) .toolContext(fromOptions.getToolContext() != null ? new HashMap<>(fromOptions.getToolContext()) : null) .httpHeaders(fromOptions.getHttpHeaders() != null ? new HashMap<>(fromOptions.getHttpHeaders()) : null) + .cacheControl(fromOptions.getCacheControl()) .build(); } @@ -282,14 +298,15 @@ public boolean equals(Object o) { && Objects.equals(this.toolNames, that.toolNames) && Objects.equals(this.internalToolExecutionEnabled, that.internalToolExecutionEnabled) && Objects.equals(this.toolContext, that.toolContext) - && Objects.equals(this.httpHeaders, that.httpHeaders); + && Objects.equals(this.httpHeaders, that.httpHeaders) + && Objects.equals(this.cacheControl, that.cacheControl); } @Override public int hashCode() { return Objects.hash(this.model, this.maxTokens, this.metadata, this.stopSequences, this.temperature, this.topP, this.topK, this.thinking, this.toolCallbacks, this.toolNames, this.internalToolExecutionEnabled, - this.toolContext, this.httpHeaders); + this.toolContext, this.httpHeaders, this.cacheControl); } public static class Builder { @@ -389,6 +406,14 @@ public Builder httpHeaders(Map httpHeaders) { return this; } + /** + * Set cache control for user messages + */ + public Builder cacheControl(ChatCompletionRequest.CacheControl cacheControl) { + this.options.cacheControl = cacheControl; + return this; + } + public AnthropicChatOptions build() { return this.options; } diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java index b573ff8a139..e7bb4d0406f 100644 --- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java +++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java @@ -35,6 +35,7 @@ import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; +import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.CacheControl; import org.springframework.ai.anthropic.api.StreamHelper.ChatCompletionResponseBuilder; import org.springframework.ai.model.ApiKey; import org.springframework.ai.model.ChatModelDescription; @@ -65,6 +66,7 @@ * @author Jonghoon Park * @author Claudio Silva Junior * @author Filip Hrisafov + * @author Soby Chacko * @since 1.0.0 */ public final class AnthropicApi { @@ -557,6 +559,14 @@ public record Metadata(@JsonProperty("user_id") String userId) { } + /** + * @param type is the cache type supported by anthropic. Doc + */ + @JsonInclude(Include.NON_NULL) + public record CacheControl(String type) { + } + /** * Configuration for the model's thinking mode. * @@ -763,8 +773,11 @@ public record ContentBlock( @JsonProperty("thinking") String thinking, // Redacted Thinking only - @JsonProperty("data") String data - ) { + @JsonProperty("data") String data, + + // cache object + @JsonProperty("cache_control") CacheControl cacheControl + ) { // @formatter:on /** @@ -782,7 +795,7 @@ public ContentBlock(String mediaType, String data) { * @param source The source of the content. */ public ContentBlock(Type type, Source source) { - this(type, source, null, null, null, null, null, null, null, null, null, null); + this(type, source, null, null, null, null, null, null, null, null, null, null, null); } /** @@ -790,7 +803,7 @@ public ContentBlock(Type type, Source source) { * @param source The source of the content. */ public ContentBlock(Source source) { - this(Type.IMAGE, source, null, null, null, null, null, null, null, null, null, null); + this(Type.IMAGE, source, null, null, null, null, null, null, null, null, null, null, null); } /** @@ -798,7 +811,11 @@ public ContentBlock(Source source) { * @param text The text of the content. */ public ContentBlock(String text) { - this(Type.TEXT, null, text, null, null, null, null, null, null, null, null, null); + this(Type.TEXT, null, text, null, null, null, null, null, null, null, null, null, null); + } + + public ContentBlock(String text, CacheControl cache) { + this(Type.TEXT, null, text, null, null, null, null, null, null, null, null, null, cache); } // Tool result @@ -809,7 +826,7 @@ public ContentBlock(String text) { * @param content The content of the tool result. */ public ContentBlock(Type type, String toolUseId, String content) { - this(type, null, null, null, null, null, null, toolUseId, content, null, null, null); + this(type, null, null, null, null, null, null, toolUseId, content, null, null, null, null); } /** @@ -820,7 +837,7 @@ public ContentBlock(Type type, String toolUseId, String content) { * @param index The index of the content block. */ public ContentBlock(Type type, Source source, String text, Integer index) { - this(type, source, text, index, null, null, null, null, null, null, null, null); + this(type, source, text, index, null, null, null, null, null, null, null, null, null); } // Tool use input JSON delta streaming @@ -832,7 +849,7 @@ public ContentBlock(Type type, Source source, String text, Integer index) { * @param input The input of the tool use. */ public ContentBlock(Type type, String id, String name, Map input) { - this(type, null, null, null, id, name, input, null, null, null, null, null); + this(type, null, null, null, id, name, input, null, null, null, null, null, null); } /** @@ -1026,7 +1043,9 @@ public record ChatCompletionResponse( public record Usage( // @formatter:off @JsonProperty("input_tokens") Integer inputTokens, - @JsonProperty("output_tokens") Integer outputTokens) { + @JsonProperty("output_tokens") Integer outputTokens, + @JsonProperty("cache_creation_input_tokens") Integer cacheCreationInputTokens, + @JsonProperty("cache_read_input_tokens") Integer cacheReadInputTokens) { // @formatter:off } diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheType.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheType.java new file mode 100644 index 00000000000..0348670573a --- /dev/null +++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheType.java @@ -0,0 +1,57 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.anthropic.api; + +import java.util.function.Supplier; + +import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.CacheControl; + +/** + * Cache types supported by Anthropic's prompt caching feature. + * + *

+ * Prompt caching allows reusing frequently used prompts to reduce costs and improve + * response times for repeated interactions. + * + * @see Anthropic Prompt + * Caching + * @author Claudio Silva Junior + * @author Soby Chacko + */ +public enum AnthropicCacheType { + + /** + * Ephemeral cache with 5-minute lifetime, refreshed on each use. + */ + EPHEMERAL(() -> new CacheControl("ephemeral")); + + private final Supplier value; + + AnthropicCacheType(Supplier value) { + this.value = value; + } + + /** + * Returns a new CacheControl instance for this cache type. + * @return a CacheControl instance configured for this cache type + */ + public CacheControl cacheControl() { + return this.value.get(); + } + +} diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java index 673685e6d13..ca519a11d0e 100644 --- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java +++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java @@ -55,6 +55,8 @@ * @author Christian Tzolov * @author Jihoon Kim * @author Alexandros Pappas + * @author Claudio Silva Junior + * @author Soby Chacko * @since 1.0.0 */ public class StreamHelper { @@ -159,7 +161,7 @@ else if (event.type().equals(EventType.CONTENT_BLOCK_START)) { } else if (contentBlockStartEvent.contentBlock() instanceof ContentBlockThinking thinkingBlock) { ContentBlock cb = new ContentBlock(Type.THINKING, null, null, contentBlockStartEvent.index(), null, - null, null, null, null, null, thinkingBlock.thinking(), null); + null, null, null, null, null, thinkingBlock.thinking(), null, null); contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb)); } else { @@ -176,12 +178,12 @@ else if (event.type().equals(EventType.CONTENT_BLOCK_DELTA)) { } else if (contentBlockDeltaEvent.delta() instanceof ContentBlockDeltaThinking thinking) { ContentBlock cb = new ContentBlock(Type.THINKING_DELTA, null, null, contentBlockDeltaEvent.index(), - null, null, null, null, null, null, thinking.thinking(), null); + null, null, null, null, null, null, thinking.thinking(), null, null); contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb)); } else if (contentBlockDeltaEvent.delta() instanceof ContentBlockDeltaSignature sig) { ContentBlock cb = new ContentBlock(Type.SIGNATURE_DELTA, null, null, contentBlockDeltaEvent.index(), - null, null, null, null, null, sig.signature(), null, null); + null, null, null, null, null, sig.signature(), null, null, null); contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb)); } else { @@ -205,7 +207,9 @@ else if (event.type().equals(EventType.MESSAGE_DELTA)) { if (messageDeltaEvent.usage() != null) { Usage totalUsage = new Usage(contentBlockReference.get().usage.inputTokens(), - messageDeltaEvent.usage().outputTokens()); + messageDeltaEvent.usage().outputTokens(), + contentBlockReference.get().usage.cacheCreationInputTokens(), + contentBlockReference.get().usage.cacheReadInputTokens()); contentBlockReference.get().withUsage(totalUsage); } } diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java index 6570d5ee6a6..c522f75cf4b 100644 --- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java +++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java @@ -32,6 +32,7 @@ import reactor.core.publisher.Flux; import org.springframework.ai.anthropic.api.AnthropicApi; +import org.springframework.ai.anthropic.api.AnthropicCacheType; import org.springframework.ai.anthropic.api.tool.MockWeatherService; import org.springframework.ai.chat.client.ChatClient; import org.springframework.ai.chat.messages.AssistantMessage; @@ -491,6 +492,59 @@ void testToolUseContentBlock() { } } + @Test + void chatWithPromptCacheViaOptions() { + String userMessageText = "It could be eitherr a contraction of the full title Quenta Silmarillion (\"Tale of the Silmarils\") or also a plain Genitive which " + + "(as in Ancient Greek) signifies reference. This genitive is translated in English with \"about\" or \"of\" " + + "constructions; the titles of the chapters in The Silmarillion are examples of this genitive in poetic English " + + "(Of the Sindar, Of Men, Of the Darkening of Valinor etc), where \"of\" means \"about\" or \"concerning\". " + + "In the same way, Silmarillion can be taken to mean \"Of/About the Silmarils\""; + + // Repeat content to meet minimum token requirements for caching (1024+ tokens) + String largeContent = userMessageText.repeat(20); + + // First request - should create cache + ChatResponse firstResponse = this.chatModel.call(new Prompt(List.of(new UserMessage(largeContent)), + AnthropicChatOptions.builder() + .model(AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue()) + .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) + .maxTokens(100) + .temperature(0.8) + .build())); + + // Access native Anthropic usage data + AnthropicApi.Usage firstUsage = (AnthropicApi.Usage) firstResponse.getMetadata().getUsage().getNativeUsage(); + + // Verify first request created cache + assertThat(firstUsage.cacheCreationInputTokens()).isGreaterThan(0); + assertThat(firstUsage.cacheReadInputTokens()).isEqualTo(0); + + // Second request with identical content - should read from cache + ChatResponse secondResponse = this.chatModel.call(new Prompt(List.of(new UserMessage(largeContent)), + AnthropicChatOptions.builder() + .model(AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue()) + .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) + .maxTokens(100) + .temperature(0.8) + .build())); + + // Access native Anthropic usage data + AnthropicApi.Usage secondUsage = (AnthropicApi.Usage) secondResponse.getMetadata().getUsage().getNativeUsage(); + + // Verify second request used cache + assertThat(secondUsage.cacheCreationInputTokens()).isEqualTo(0); + assertThat(secondUsage.cacheReadInputTokens()).isGreaterThan(0); + + // Both responses should be valid + assertThat(firstResponse.getResult().getOutput().getText()).isNotBlank(); + assertThat(secondResponse.getResult().getOutput().getText()).isNotBlank(); + + logger.info("First request - Cache creation: {}, Cache read: {}", firstUsage.cacheCreationInputTokens(), + firstUsage.cacheReadInputTokens()); + logger.info("Second request - Cache creation: {}, Cache read: {}", secondUsage.cacheCreationInputTokens(), + secondUsage.cacheReadInputTokens()); + } + record ActorsFilmsRecord(String actor, List movies) { } diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java index d9470070e95..6cc4c689022 100644 --- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java +++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java @@ -22,7 +22,9 @@ import org.junit.jupiter.api.Test; +import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.CacheControl; import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.Metadata; +import org.springframework.ai.anthropic.api.AnthropicCacheType; import static org.assertj.core.api.Assertions.assertThat; @@ -30,6 +32,7 @@ * Tests for {@link AnthropicChatOptions}. * * @author Alexandros Pappas + * @author Soby Chacko */ class AnthropicChatOptionsTests { @@ -471,4 +474,109 @@ void testSetterOverwriteBehavior() { assertThat(options.getMaxTokens()).isEqualTo(10); } + @Test + void testCacheControlBuilder() { + CacheControl cacheControl = AnthropicCacheType.EPHEMERAL.cacheControl(); + + AnthropicChatOptions options = AnthropicChatOptions.builder() + .model("test-model") + .cacheControl(cacheControl) + .build(); + + assertThat(options.getCacheControl()).isEqualTo(cacheControl); + assertThat(options.getCacheControl().type()).isEqualTo("ephemeral"); + } + + @Test + void testCacheControlDefaultValue() { + AnthropicChatOptions options = new AnthropicChatOptions(); + assertThat(options.getCacheControl()).isNull(); + } + + @Test + void testCacheControlEqualsAndHashCode() { + CacheControl cacheControl = AnthropicCacheType.EPHEMERAL.cacheControl(); + + AnthropicChatOptions options1 = AnthropicChatOptions.builder() + .model("test-model") + .cacheControl(cacheControl) + .build(); + + AnthropicChatOptions options2 = AnthropicChatOptions.builder() + .model("test-model") + .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) + .build(); + + AnthropicChatOptions options3 = AnthropicChatOptions.builder().model("test-model").build(); + + assertThat(options1).isEqualTo(options2); + assertThat(options1.hashCode()).isEqualTo(options2.hashCode()); + + assertThat(options1).isNotEqualTo(options3); + assertThat(options1.hashCode()).isNotEqualTo(options3.hashCode()); + } + + @Test + void testCacheControlCopy() { + CacheControl originalCacheControl = AnthropicCacheType.EPHEMERAL.cacheControl(); + + AnthropicChatOptions original = AnthropicChatOptions.builder() + .model("test-model") + .cacheControl(originalCacheControl) + .build(); + + AnthropicChatOptions copied = original.copy(); + + assertThat(copied).isNotSameAs(original).isEqualTo(original); + assertThat(copied.getCacheControl()).isEqualTo(original.getCacheControl()); + assertThat(copied.getCacheControl()).isEqualTo(originalCacheControl); + } + + @Test + void testCacheControlWithNullValue() { + AnthropicChatOptions options = AnthropicChatOptions.builder().model("test-model").cacheControl(null).build(); + + assertThat(options.getCacheControl()).isNull(); + } + + @Test + void testBuilderWithAllFieldsIncludingCacheControl() { + CacheControl cacheControl = AnthropicCacheType.EPHEMERAL.cacheControl(); + + AnthropicChatOptions options = AnthropicChatOptions.builder() + .model("test-model") + .maxTokens(100) + .stopSequences(List.of("stop1", "stop2")) + .temperature(0.7) + .topP(0.8) + .topK(50) + .metadata(new Metadata("userId_123")) + .cacheControl(cacheControl) + .build(); + + assertThat(options) + .extracting("model", "maxTokens", "stopSequences", "temperature", "topP", "topK", "metadata", + "cacheControl") + .containsExactly("test-model", 100, List.of("stop1", "stop2"), 0.7, 0.8, 50, new Metadata("userId_123"), + cacheControl); + } + + @Test + void testCacheControlMutationDoesNotAffectOriginal() { + CacheControl originalCacheControl = AnthropicCacheType.EPHEMERAL.cacheControl(); + + AnthropicChatOptions original = AnthropicChatOptions.builder() + .model("original-model") + .cacheControl(originalCacheControl) + .build(); + + AnthropicChatOptions copy = original.copy(); + copy.setCacheControl(null); + + // Original should remain unchanged + assertThat(original.getCacheControl()).isEqualTo(originalCacheControl); + // Copy should have null cache control + assertThat(copy.getCacheControl()).isNull(); + } + } diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java index 62e05711a6f..6fab9fbd2d9 100644 --- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java +++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java @@ -44,6 +44,8 @@ * @author Christian Tzolov * @author Jihoon Kim * @author Alexandros Pappas + * @author Claudio Silva Junior + * @author Soby Chacko */ @EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".+") public class AnthropicApiIT { @@ -70,6 +72,39 @@ public class AnthropicApiIT { } """))); + @Test + void chatWithPromptCache() { + String userMessageText = "It could be either a contraction of the full title Quenta Silmarillion (\"Tale of the Silmarils\") or also a plain Genitive which " + + "(as in Ancient Greek) signifies reference. This genitive is translated in English with \"about\" or \"of\" " + + "constructions; the titles of the chapters in The Silmarillion are examples of this genitive in poetic English " + + "(Of the Sindar, Of Men, Of the Darkening of Valinor etc), where \"of\" means \"about\" or \"concerning\". " + + "In the same way, Silmarillion can be taken to mean \"Of/About the Silmarils\""; + + AnthropicMessage chatCompletionMessage = new AnthropicMessage( + List.of(new ContentBlock(userMessageText.repeat(20), AnthropicCacheType.EPHEMERAL.cacheControl())), + Role.USER); + + ChatCompletionRequest chatCompletionRequest = new ChatCompletionRequest( + AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue(), List.of(chatCompletionMessage), null, 100, 0.8, + false); + + // First request - creates cache + AnthropicApi.Usage createdCacheToken = this.anthropicApi.chatCompletionEntity(chatCompletionRequest) + .getBody() + .usage(); + + assertThat(createdCacheToken.cacheCreationInputTokens()).isGreaterThan(0); + assertThat(createdCacheToken.cacheReadInputTokens()).isEqualTo(0); + + // Second request - reads from cache (same request) + AnthropicApi.Usage readCacheToken = this.anthropicApi.chatCompletionEntity(chatCompletionRequest) + .getBody() + .usage(); + + assertThat(readCacheToken.cacheCreationInputTokens()).isEqualTo(0); + assertThat(readCacheToken.cacheReadInputTokens()).isGreaterThan(0); + } + @Test void chatCompletionEntity() { diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc index 2094ab4ee17..f8d08b31e8a 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc @@ -191,6 +191,181 @@ ChatResponse response = chatModel.call( TIP: In addition to the model specific https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java[AnthropicChatOptions] you can use a portable link:https://github.com/spring-projects/spring-ai/blob/main/spring-ai-model/src/main/java/org/springframework/ai/chat/prompt/ChatOptions.java[ChatOptions] instance, created with the link:https://github.com/spring-projects/spring-ai/blob/main/spring-ai-model/src/main/java/org/springframework/ai/chat/prompt/DefaultChatOptionsBuilder.java[ChatOptions#builder()]. +== Prompt Caching + +Anthropic's prompt caching feature allows you to cache frequently used prompts to reduce costs and improve response times for repeated interactions. +When you cache a prompt, subsequent identical requests can reuse the cached content, significantly reducing the number of input tokens processed. + +[NOTE] +==== +*Supported Models* + +Prompt caching is currently supported on Claude Opus 4, Claude Sonnet 4, Claude Sonnet 3.7, Claude Sonnet 3.5, Claude Haiku 3.5, Claude Haiku 3, and Claude Opus 3. +==== + +=== Cache Types + +Spring AI supports Anthropic's cache types through the `AnthropicCacheType` enum: + +* `EPHEMERAL`: Temporary caching suitable for short-term reuse within a session + +=== Enabling Prompt Caching + +To enable prompt caching, use the `cacheControl()` method in `AnthropicChatOptions`: + +==== Basic Usage + +[source,java] +---- +// Enable caching with ephemeral type +ChatResponse response = chatModel.call( + new Prompt( + List.of(new UserMessage("Large content to be cached...")), + AnthropicChatOptions.builder() + .model("claude-3-5-sonnet-latest") + .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) + .build() + ) +); +---- + +==== Using ChatClient Fluent API + +[source,java] +---- +String response = ChatClient.create(chatModel) + .prompt() + .user("Analyze this large document: " + document) + .options(AnthropicChatOptions.builder() + .model("claude-3-5-sonnet-latest") + .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) + .build()) + .call() + .content(); +---- + +=== Usage Example + +Here's a complete example demonstrating prompt caching with cost tracking: + +[source,java] +---- +// Create content that will be reused multiple times +String largeContent = "Large document content that meets minimum token requirements..."; + +// First request - creates cache +ChatResponse firstResponse = chatModel.call( + new Prompt( + List.of(new UserMessage(largeContent)), + AnthropicChatOptions.builder() + .model("claude-3-haiku-20240307") + .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) + .maxTokens(100) + .build() + ) +); + +// Access cache-related token usage +AnthropicApi.Usage firstUsage = (AnthropicApi.Usage) firstResponse.getMetadata() + .getUsage().getNativeUsage(); + +System.out.println("Cache creation tokens: " + firstUsage.cacheCreationInputTokens()); +System.out.println("Cache read tokens: " + firstUsage.cacheReadInputTokens()); + +// Second request with identical content - reads from cache +ChatResponse secondResponse = chatModel.call( + new Prompt( + List.of(new UserMessage(largeContent)), + AnthropicChatOptions.builder() + .model("claude-3-haiku-20240307") + .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) + .maxTokens(100) + .build() + ) +); + +AnthropicApi.Usage secondUsage = (AnthropicApi.Usage) secondResponse.getMetadata() + .getUsage().getNativeUsage(); + +System.out.println("Cache creation tokens: " + secondUsage.cacheCreationInputTokens()); +System.out.println("Cache read tokens: " + secondUsage.cacheReadInputTokens()); +---- + +=== Token Usage Tracking + +The `Usage` record provides detailed information about cache-related token consumption. +To access Anthropic-specific cache metrics, use the `getNativeUsage()` method: + +[source,java] +---- +AnthropicApi.Usage usage = (AnthropicApi.Usage) response.getMetadata() + .getUsage().getNativeUsage(); +---- + +Cache-specific metrics include: + +* `cacheCreationInputTokens()`: Returns the number of tokens used when creating a cache entry +* `cacheReadInputTokens()`: Returns the number of tokens read from an existing cache entry + +When you first send a cached prompt: +- `cacheCreationInputTokens()` will be greater than 0 +- `cacheReadInputTokens()` will be 0 + +When you send the same cached prompt again: +- `cacheCreationInputTokens()` will be 0 +- `cacheReadInputTokens()` will be greater than 0 + +=== Best Practices + +1. **Cache Long Prompts**: Focus on caching prompts that meet the minimum token requirements (1024+ tokens for most models, 2048+ for Haiku models). + +2. **Reuse Identical Content**: Caching works best with exact matches of prompt content. +Even small changes will require a new cache entry. + +3. **Monitor Token Usage**: Use the enhanced usage statistics to track cache effectiveness and optimize your caching strategy. + +4. **Place Static Content First**: Position cached content (system instructions, context, examples) at the beginning of your prompt for optimal performance. + +5. **5-Minute Cache Lifetime**: Ephemeral caches expire after 5 minutes of inactivity. +Each time cached content is accessed, the 5-minute timer resets. + +=== Low-level API Usage + +When using the low-level `AnthropicApi` directly, you can specify cache control through the `ContentBlock` constructor: + +[source,java] +---- +// Create content block with cache control +ContentBlock cachedContent = new ContentBlock( + "", + AnthropicCacheType.EPHEMERAL.cacheControl() +); + +AnthropicMessage message = new AnthropicMessage( + List.of(cachedContent), + Role.USER +); + +ChatCompletionRequest request = new ChatCompletionRequest( + AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue(), + List.of(message), + null, 100, 0.8, false +); + +ResponseEntity response = anthropicApi.chatCompletionEntity(request); + +// Access cache-related token usage +Usage usage = response.getBody().usage(); +System.out.println("Cache creation tokens: " + usage.cacheCreationInputTokens()); +System.out.println("Cache read tokens: " + usage.cacheReadInputTokens()); +---- + +=== Implementation Details + +Cache control is configured through `AnthropicChatOptions` rather than individual messages. +This preserves compatibility when switching between different AI providers. +The cache control gets applied during request creation in `AnthropicChatModel`. + == Thinking Anthropic Claude models support a "thinking" feature that allows the model to show its reasoning process before providing a final answer. This feature enables more transparent and detailed problem-solving, particularly for complex questions that require step-by-step reasoning. From 2ee62daec660c423b9532f66008cd07d90927941 Mon Sep 17 00:00:00 2001 From: Mark Pollack Date: Tue, 2 Sep 2025 15:17:42 -0400 Subject: [PATCH 2/3] fix tests --- .../org/springframework/ai/anthropic/api/AnthropicApiIT.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java index 6fab9fbd2d9..0029fdf0fa7 100644 --- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java +++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java @@ -358,8 +358,9 @@ void chatCompletionStreamError() { assertThatThrownBy(() -> response.collectList().block()).isInstanceOf(RuntimeException.class) .hasMessageStartingWith("Response exception, Status: [") - .hasMessageContaining( - "{\"type\":\"error\",\"error\":{\"type\":\"authentication_error\",\"message\":\"invalid x-api-key\"}"); + .hasMessageContaining("\"type\":\"error\"") + .hasMessageContaining("\"type\":\"authentication_error\"") + .hasMessageContaining("\"message\":\"invalid x-api-key\""); } } From 8e4bf9b67b57b8376716e64e6a511d739c72e3dd Mon Sep 17 00:00:00 2001 From: Mark Pollack Date: Wed, 3 Sep 2025 13:53:43 -0400 Subject: [PATCH 3/3] Add Anthropic prompt caching via AnthropicChatOptions This commit implements comprehensive prompt caching support for Anthropic Claude models in Spring AI: Core Implementation: - Add AnthropicCacheStrategy enum with 4 strategic options: NONE, SYSTEM_ONLY, SYSTEM_AND_TOOLS, CONVERSATION_HISTORY - Implement strategic cache placement with automatic 4-breakpoint limit enforcement via CacheBreakpointTracker - Support configurable TTL durations: "5m" (default) and "1h" (requires beta header) - Add cache_control support to system messages, tools, and conversation history based on strategy API Changes: - Extend AnthropicChatOptions with cacheStrategy() and cacheTtl() builder methods - Update AnthropicApi.Tool record to support cache_control field - Add cache usage tracking via cacheCreationInputTokens() and cacheReadInputTokens() Testing & Quality: - Add comprehensive integration tests with real-world scenarios - Add extensive mock test coverage with complex multi-breakpoint scenarios - Fix all checkstyle violations and test failures - Add cache breakpoint limit warning for production debugging Documentation: - Complete API documentation with practical examples and best practices - Add real-world use cases: legal document analysis, batch code review, customer support - Include cost optimization guidance demonstrating up to 90% savings - Document future enhancement roadmap for advanced scenarios Signed-off-by: Mark Pollack --- .gitignore | 4 + .../ai/anthropic/AnthropicChatModel.java | 310 ++++++-- .../ai/anthropic/AnthropicChatOptions.java | 52 +- .../ai/anthropic/api/AnthropicApi.java | 36 +- .../anthropic/api/AnthropicCacheStrategy.java | 53 ++ .../ai/anthropic/AnthropicChatModelIT.java | 54 -- .../anthropic/AnthropicChatOptionsTests.java | 80 +- .../anthropic/AnthropicPromptCachingIT.java | 346 +++++++++ .../AnthropicPromptCachingMockTest.java | 707 ++++++++++++++++++ .../conversation-history-cache-prompt.txt | 74 ++ .../prompts/extended-ttl-cache-prompt.txt | 109 +++ .../prompts/system-and-tools-cache-prompt.txt | 73 ++ .../prompts/system-only-cache-prompt.txt | 75 ++ .../ROOT/pages/api/chat/anthropic-chat.adoc | 336 +++++++-- 14 files changed, 2074 insertions(+), 235 deletions(-) create mode 100644 models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java create mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingIT.java create mode 100644 models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java create mode 100644 models/spring-ai-anthropic/src/test/resources/prompts/conversation-history-cache-prompt.txt create mode 100644 models/spring-ai-anthropic/src/test/resources/prompts/extended-ttl-cache-prompt.txt create mode 100644 models/spring-ai-anthropic/src/test/resources/prompts/system-and-tools-cache-prompt.txt create mode 100644 models/spring-ai-anthropic/src/test/resources/prompts/system-only-cache-prompt.txt diff --git a/.gitignore b/.gitignore index 93d781c4433..6ea376cd976 100644 --- a/.gitignore +++ b/.gitignore @@ -51,3 +51,7 @@ qodana.yaml __pycache__/ *.pyc tmp + + +plans + diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java index 0485e552584..c8dfcb71d82 100644 --- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java +++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java @@ -42,7 +42,9 @@ import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock.Source; import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock.Type; import org.springframework.ai.anthropic.api.AnthropicApi.Role; +import org.springframework.ai.anthropic.api.AnthropicCacheStrategy; import org.springframework.ai.chat.messages.AssistantMessage; +import org.springframework.ai.chat.messages.Message; import org.springframework.ai.chat.messages.MessageType; import org.springframework.ai.chat.messages.ToolResponseMessage; import org.springframework.ai.chat.messages.UserMessage; @@ -460,6 +462,12 @@ Prompt buildRequestPrompt(Prompt prompt) { this.defaultOptions.getToolCallbacks())); requestOptions.setToolContext(ToolCallingChatOptions.mergeToolContext(runtimeOptions.getToolContext(), this.defaultOptions.getToolContext())); + + // Merge cache strategy and TTL (also @JsonIgnore fields) + requestOptions.setCacheStrategy(runtimeOptions.getCacheStrategy() != null + ? runtimeOptions.getCacheStrategy() : this.defaultOptions.getCacheStrategy()); + requestOptions.setCacheTtl(runtimeOptions.getCacheTtl() != null ? runtimeOptions.getCacheTtl() + : this.defaultOptions.getCacheTtl()); } else { requestOptions.setHttpHeaders(this.defaultOptions.getHttpHeaders()); @@ -483,81 +491,75 @@ private Map mergeHttpHeaders(Map runtimeHttpHead ChatCompletionRequest createRequest(Prompt prompt, boolean stream) { - // Get cache control from options - AnthropicChatOptions requestOptions = (AnthropicChatOptions) prompt.getOptions(); - AnthropicApi.ChatCompletionRequest.CacheControl cacheControl = (requestOptions != null) - ? requestOptions.getCacheControl() : null; + // Get caching strategy and options from the request + logger.info("DEBUGINFO: prompt.getOptions() type: {}, value: {}", + prompt.getOptions() != null ? prompt.getOptions().getClass().getName() : "null", prompt.getOptions()); - List userMessages = prompt.getInstructions() - .stream() - .filter(message -> message.getMessageType() != MessageType.SYSTEM) - .map(message -> { - if (message.getMessageType() == MessageType.USER) { - List contents = new ArrayList<>(); + AnthropicChatOptions requestOptions = null; + if (prompt.getOptions() instanceof AnthropicChatOptions) { + requestOptions = (AnthropicChatOptions) prompt.getOptions(); + logger.info("DEBUGINFO: Found AnthropicChatOptions - cacheStrategy: {}, cacheTtl: {}", + requestOptions.getCacheStrategy(), requestOptions.getCacheTtl()); + } + else { + logger.info("DEBUGINFO: Options is NOT AnthropicChatOptions, it's: {}", + prompt.getOptions() != null ? prompt.getOptions().getClass().getName() : "null"); + } - // Apply cache control if enabled for user messages - if (cacheControl != null) { - contents.add(new ContentBlock(message.getText(), cacheControl)); - } - else { - contents.add(new ContentBlock(message.getText())); - } - if (message instanceof UserMessage userMessage) { - if (!CollectionUtils.isEmpty(userMessage.getMedia())) { - List mediaContent = userMessage.getMedia().stream().map(media -> { - Type contentBlockType = getContentBlockTypeByMedia(media); - var source = getSourceByMedia(media); - return new ContentBlock(contentBlockType, source); - }).toList(); - contents.addAll(mediaContent); - } - } - return new AnthropicMessage(contents, Role.valueOf(message.getMessageType().name())); - } - else if (message.getMessageType() == MessageType.ASSISTANT) { - AssistantMessage assistantMessage = (AssistantMessage) message; - List contentBlocks = new ArrayList<>(); - if (StringUtils.hasText(message.getText())) { - contentBlocks.add(new ContentBlock(message.getText())); - } - if (!CollectionUtils.isEmpty(assistantMessage.getToolCalls())) { - for (AssistantMessage.ToolCall toolCall : assistantMessage.getToolCalls()) { - contentBlocks.add(new ContentBlock(Type.TOOL_USE, toolCall.id(), toolCall.name(), - ModelOptionsUtils.jsonToMap(toolCall.arguments()))); - } - } - return new AnthropicMessage(contentBlocks, Role.ASSISTANT); - } - else if (message.getMessageType() == MessageType.TOOL) { - List toolResponses = ((ToolResponseMessage) message).getResponses() - .stream() - .map(toolResponse -> new ContentBlock(Type.TOOL_RESULT, toolResponse.id(), - toolResponse.responseData())) - .toList(); - return new AnthropicMessage(toolResponses, Role.USER); - } - else { - throw new IllegalArgumentException("Unsupported message type: " + message.getMessageType()); - } - }) - .toList(); + AnthropicCacheStrategy strategy = requestOptions != null ? requestOptions.getCacheStrategy() + : AnthropicCacheStrategy.NONE; + String cacheTtl = requestOptions != null ? requestOptions.getCacheTtl() : "5m"; - String systemPrompt = prompt.getInstructions() - .stream() - .filter(m -> m.getMessageType() == MessageType.SYSTEM) - .map(m -> m.getText()) - .collect(Collectors.joining(System.lineSeparator())); + logger.info("Cache strategy: {}, TTL: {}", strategy, cacheTtl); + + // Track how many breakpoints we've used (max 4) + CacheBreakpointTracker breakpointsUsed = new CacheBreakpointTracker(); + ChatCompletionRequest.CacheControl cacheControl = null; + + if (strategy != AnthropicCacheStrategy.NONE) { + // Create cache control with TTL if specified, otherwise use default 5m + if (cacheTtl != null && !cacheTtl.equals("5m")) { + cacheControl = new ChatCompletionRequest.CacheControl("ephemeral", cacheTtl); + logger.info("Created cache control with TTL: type={}, ttl={}", "ephemeral", cacheTtl); + } + else { + cacheControl = new ChatCompletionRequest.CacheControl("ephemeral"); + logger.info("Created cache control with default TTL: type={}, ttl={}", "ephemeral", "5m"); + } + } + + // Build messages WITHOUT blanket cache control - strategic placement only + List userMessages = buildMessages(prompt, strategy, cacheControl, breakpointsUsed); + // Process system - as array if caching, string otherwise + Object systemContent = buildSystemContent(prompt, strategy, cacheControl, breakpointsUsed); + + // Build base request ChatCompletionRequest request = new ChatCompletionRequest(this.defaultOptions.getModel(), userMessages, - systemPrompt, this.defaultOptions.getMaxTokens(), this.defaultOptions.getTemperature(), stream); + systemContent, this.defaultOptions.getMaxTokens(), this.defaultOptions.getTemperature(), stream); request = ModelOptionsUtils.merge(requestOptions, request, ChatCompletionRequest.class); - // Add the tool definitions to the request's tools parameter. + // Add the tool definitions with potential caching List toolDefinitions = this.toolCallingManager.resolveToolDefinitions(requestOptions); if (!CollectionUtils.isEmpty(toolDefinitions)) { request = ModelOptionsUtils.merge(request, this.defaultOptions, ChatCompletionRequest.class); - request = ChatCompletionRequest.from(request).tools(getFunctionTools(toolDefinitions)).build(); + List tools = getFunctionTools(toolDefinitions); + + // Apply caching to tools if strategy includes them + if ((strategy == AnthropicCacheStrategy.SYSTEM_AND_TOOLS + || strategy == AnthropicCacheStrategy.CONVERSATION_HISTORY) && breakpointsUsed.canUse()) { + tools = addCacheToLastTool(tools, cacheControl, breakpointsUsed); + } + + request = ChatCompletionRequest.from(request).tools(tools).build(); + } + + // Add beta header for 1-hour TTL if needed + if ("1h".equals(cacheTtl) && requestOptions != null) { + Map headers = new HashMap<>(requestOptions.getHttpHeaders()); + headers.put("anthropic-beta", AnthropicApi.BETA_EXTENDED_CACHE_TTL); + requestOptions.setHttpHeaders(headers); } return request; @@ -573,6 +575,154 @@ private List getFunctionTools(List toolDefini }).toList(); } + /** + * Build messages strategically, applying cache control only where specified by the + * strategy. + */ + private List buildMessages(Prompt prompt, AnthropicCacheStrategy strategy, + ChatCompletionRequest.CacheControl cacheControl, CacheBreakpointTracker breakpointsUsed) { + + List allMessages = prompt.getInstructions() + .stream() + .filter(message -> message.getMessageType() != MessageType.SYSTEM) + .toList(); + + // Find the last user message (current question) for CONVERSATION_HISTORY strategy + int lastUserIndex = -1; + if (strategy == AnthropicCacheStrategy.CONVERSATION_HISTORY) { + for (int i = allMessages.size() - 1; i >= 0; i--) { + if (allMessages.get(i).getMessageType() == MessageType.USER) { + lastUserIndex = i; + break; + } + } + } + + List result = new ArrayList<>(); + for (int i = 0; i < allMessages.size(); i++) { + Message message = allMessages.get(i); + boolean shouldApplyCache = false; + + // Apply cache to history tail (message before current question) for + // CONVERSATION_HISTORY + if (strategy == AnthropicCacheStrategy.CONVERSATION_HISTORY && breakpointsUsed.canUse()) { + if (lastUserIndex > 0) { + // Cache the message immediately before the last user message + // (multi-turn conversation) + shouldApplyCache = (i == lastUserIndex - 1); + } + if (shouldApplyCache) { + breakpointsUsed.use(); + } + } + + if (message.getMessageType() == MessageType.USER) { + List contents = new ArrayList<>(); + + // Apply cache control strategically, not to all user messages + if (shouldApplyCache && cacheControl != null) { + contents.add(new ContentBlock(message.getText(), cacheControl)); + } + else { + contents.add(new ContentBlock(message.getText())); + } + + if (message instanceof UserMessage userMessage) { + if (!CollectionUtils.isEmpty(userMessage.getMedia())) { + List mediaContent = userMessage.getMedia().stream().map(media -> { + Type contentBlockType = getContentBlockTypeByMedia(media); + var source = getSourceByMedia(media); + return new ContentBlock(contentBlockType, source); + }).toList(); + contents.addAll(mediaContent); + } + } + result.add(new AnthropicMessage(contents, Role.valueOf(message.getMessageType().name()))); + } + else if (message.getMessageType() == MessageType.ASSISTANT) { + AssistantMessage assistantMessage = (AssistantMessage) message; + List contentBlocks = new ArrayList<>(); + if (StringUtils.hasText(message.getText())) { + contentBlocks.add(new ContentBlock(message.getText())); + } + if (!CollectionUtils.isEmpty(assistantMessage.getToolCalls())) { + for (AssistantMessage.ToolCall toolCall : assistantMessage.getToolCalls()) { + contentBlocks.add(new ContentBlock(Type.TOOL_USE, toolCall.id(), toolCall.name(), + ModelOptionsUtils.jsonToMap(toolCall.arguments()))); + } + } + result.add(new AnthropicMessage(contentBlocks, Role.ASSISTANT)); + } + else if (message.getMessageType() == MessageType.TOOL) { + List toolResponses = ((ToolResponseMessage) message).getResponses() + .stream() + .map(toolResponse -> new ContentBlock(Type.TOOL_RESULT, toolResponse.id(), + toolResponse.responseData())) + .toList(); + result.add(new AnthropicMessage(toolResponses, Role.USER)); + } + else { + throw new IllegalArgumentException("Unsupported message type: " + message.getMessageType()); + } + } + return result; + } + + /** + * Build system content - as array if caching, string otherwise. + */ + private Object buildSystemContent(Prompt prompt, AnthropicCacheStrategy strategy, + ChatCompletionRequest.CacheControl cacheControl, CacheBreakpointTracker breakpointsUsed) { + + String systemText = prompt.getInstructions() + .stream() + .filter(m -> m.getMessageType() == MessageType.SYSTEM) + .map(Message::getText) + .collect(Collectors.joining(System.lineSeparator())); + + if (!StringUtils.hasText(systemText)) { + return null; + } + + // Use array format when caching system + if ((strategy == AnthropicCacheStrategy.SYSTEM_ONLY || strategy == AnthropicCacheStrategy.SYSTEM_AND_TOOLS + || strategy == AnthropicCacheStrategy.CONVERSATION_HISTORY) && breakpointsUsed.canUse() + && cacheControl != null) { + + logger.info("Applying cache control to system message - strategy: {}, cacheControl: {}", strategy, + cacheControl); + List systemBlocks = List.of(new ContentBlock(systemText, cacheControl)); + breakpointsUsed.use(); + return systemBlocks; + } + + // Use string format when not caching (backward compatible) + return systemText; + } + + /** + * Add cache control to the last tool for deterministic caching. + */ + private List addCacheToLastTool(List tools, + ChatCompletionRequest.CacheControl cacheControl, CacheBreakpointTracker breakpointsUsed) { + + if (tools == null || tools.isEmpty() || !breakpointsUsed.canUse() || cacheControl == null) { + return tools; + } + + List modifiedTools = new ArrayList<>(); + for (int i = 0; i < tools.size(); i++) { + AnthropicApi.Tool tool = tools.get(i); + if (i == tools.size() - 1) { + // Add cache control to last tool + tool = new AnthropicApi.Tool(tool.name(), tool.description(), tool.inputSchema(), cacheControl); + breakpointsUsed.use(); + } + modifiedTools.add(tool); + } + return modifiedTools; + } + @Override public ChatOptions getDefaultOptions() { return AnthropicChatOptions.fromOptions(this.defaultOptions); @@ -654,4 +804,36 @@ public AnthropicChatModel build() { } + /** + * Tracks cache breakpoints used (max 4 allowed by Anthropic). Non-static to ensure + * each request has its own instance. + */ + private class CacheBreakpointTracker { + + private int count = 0; + + private boolean hasWarned = false; + + public boolean canUse() { + return this.count < 4; + } + + public void use() { + if (this.count < 4) { + this.count++; + } + else if (!this.hasWarned) { + logger.warn( + "Anthropic cache breakpoint limit (4) reached. Additional cache_control directives will be ignored. " + + "Consider using fewer cache strategies or simpler content structure."); + this.hasWarned = true; + } + } + + public int getCount() { + return this.count; + } + + } + } diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java index 16421eb04d0..d7cdfba8712 100644 --- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java +++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java @@ -32,6 +32,7 @@ import org.springframework.ai.anthropic.api.AnthropicApi; import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest; +import org.springframework.ai.anthropic.api.AnthropicCacheStrategy; import org.springframework.ai.model.tool.ToolCallingChatOptions; import org.springframework.ai.tool.ToolCallback; import org.springframework.lang.Nullable; @@ -61,17 +62,32 @@ public class AnthropicChatOptions implements ToolCallingChatOptions { private @JsonProperty("thinking") ChatCompletionRequest.ThinkingConfig thinking; /** - * Cache control for user messages. When set, enables caching for user messages. - * Uses the existing CacheControl record from AnthropicApi.ChatCompletionRequest. + * The caching strategy to use. Defines which parts of the prompt should be cached. */ - private @JsonProperty("cache_control") ChatCompletionRequest.CacheControl cacheControl; + @JsonIgnore + private AnthropicCacheStrategy cacheStrategy = AnthropicCacheStrategy.NONE; + + /** + * Cache time-to-live. Either "5m" (5 minutes, default) or "1h" (1 hour). + * The 1-hour cache requires a beta header. + */ + @JsonIgnore + private String cacheTtl = "5m"; + + public AnthropicCacheStrategy getCacheStrategy() { + return this.cacheStrategy; + } - public ChatCompletionRequest.CacheControl getCacheControl() { - return this.cacheControl; + public void setCacheStrategy(AnthropicCacheStrategy cacheStrategy) { + this.cacheStrategy = cacheStrategy; } - public void setCacheControl(ChatCompletionRequest.CacheControl cacheControl) { - this.cacheControl = cacheControl; + public String getCacheTtl() { + return this.cacheTtl; + } + + public void setCacheTtl(String cacheTtl) { + this.cacheTtl = cacheTtl; } /** @@ -126,7 +142,8 @@ public static AnthropicChatOptions fromOptions(AnthropicChatOptions fromOptions) .internalToolExecutionEnabled(fromOptions.getInternalToolExecutionEnabled()) .toolContext(fromOptions.getToolContext() != null ? new HashMap<>(fromOptions.getToolContext()) : null) .httpHeaders(fromOptions.getHttpHeaders() != null ? new HashMap<>(fromOptions.getHttpHeaders()) : null) - .cacheControl(fromOptions.getCacheControl()) + .cacheStrategy(fromOptions.getCacheStrategy()) + .cacheTtl(fromOptions.getCacheTtl()) .build(); } @@ -299,14 +316,15 @@ public boolean equals(Object o) { && Objects.equals(this.internalToolExecutionEnabled, that.internalToolExecutionEnabled) && Objects.equals(this.toolContext, that.toolContext) && Objects.equals(this.httpHeaders, that.httpHeaders) - && Objects.equals(this.cacheControl, that.cacheControl); + && Objects.equals(this.cacheStrategy, that.cacheStrategy) + && Objects.equals(this.cacheTtl, that.cacheTtl); } @Override public int hashCode() { return Objects.hash(this.model, this.maxTokens, this.metadata, this.stopSequences, this.temperature, this.topP, this.topK, this.thinking, this.toolCallbacks, this.toolNames, this.internalToolExecutionEnabled, - this.toolContext, this.httpHeaders, this.cacheControl); + this.toolContext, this.httpHeaders, this.cacheStrategy, this.cacheTtl); } public static class Builder { @@ -407,10 +425,18 @@ public Builder httpHeaders(Map httpHeaders) { } /** - * Set cache control for user messages + * Set the caching strategy to use. + */ + public Builder cacheStrategy(AnthropicCacheStrategy cacheStrategy) { + this.options.cacheStrategy = cacheStrategy; + return this; + } + + /** + * Set the cache time-to-live. Either "5m" (5 minutes, default) or "1h" (1 hour). */ - public Builder cacheControl(ChatCompletionRequest.CacheControl cacheControl) { - this.options.cacheControl = cacheControl; + public Builder cacheTtl(String cacheTtl) { + this.options.cacheTtl = cacheTtl; return this; } diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java index e7bb4d0406f..7e23e143ca7 100644 --- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java +++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java @@ -89,6 +89,8 @@ public static Builder builder() { public static final String BETA_MAX_TOKENS = "max-tokens-3-5-sonnet-2024-07-15"; + public static final String BETA_EXTENDED_CACHE_TTL = "extended-cache-ttl-2025-04-11"; + private static final String HEADER_X_API_KEY = "x-api-key"; private static final String HEADER_ANTHROPIC_VERSION = "anthropic-version"; @@ -474,8 +476,10 @@ public interface StreamEvent { * models for * additional details and options. * @param messages Input messages. - * @param system System prompt. A system prompt is a way of providing context and - * instructions to Claude, such as specifying a particular goal or role. See our + * @param system System prompt. Can be a String (for compatibility) or a + * List<ContentBlock> (for caching support). A system prompt is a way of + * providing context and instructions to Claude, such as specifying a particular goal + * or role. See our * guide to system * prompts. * @param maxTokens The maximum number of tokens to generate before stopping. Note @@ -516,7 +520,7 @@ public record ChatCompletionRequest( // @formatter:off @JsonProperty("model") String model, @JsonProperty("messages") List messages, - @JsonProperty("system") String system, + @JsonProperty("system") Object system, @JsonProperty("max_tokens") Integer maxTokens, @JsonProperty("metadata") Metadata metadata, @JsonProperty("stop_sequences") List stopSequences, @@ -528,12 +532,12 @@ public record ChatCompletionRequest( @JsonProperty("thinking") ThinkingConfig thinking) { // @formatter:on - public ChatCompletionRequest(String model, List messages, String system, Integer maxTokens, + public ChatCompletionRequest(String model, List messages, Object system, Integer maxTokens, Double temperature, Boolean stream) { this(model, messages, system, maxTokens, null, null, stream, temperature, null, null, null, null); } - public ChatCompletionRequest(String model, List messages, String system, Integer maxTokens, + public ChatCompletionRequest(String model, List messages, Object system, Integer maxTokens, List stopSequences, Double temperature, Boolean stream) { this(model, messages, system, maxTokens, null, stopSequences, stream, temperature, null, null, null, null); } @@ -564,7 +568,11 @@ public record Metadata(@JsonProperty("user_id") String userId) { * "https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#cache-limitations">Doc */ @JsonInclude(Include.NON_NULL) - public record CacheControl(String type) { + public record CacheControl(@JsonProperty("type") String type, @JsonProperty("ttl") String ttl) { + + public CacheControl(String type) { + this(type, "5m"); + } } /** @@ -587,7 +595,7 @@ public static final class ChatCompletionRequestBuilder { private List messages; - private String system; + private Object system; private Integer maxTokens; @@ -640,7 +648,7 @@ public ChatCompletionRequestBuilder messages(List messages) { return this; } - public ChatCompletionRequestBuilder system(String system) { + public ChatCompletionRequestBuilder system(Object system) { this.system = system; return this; } @@ -988,14 +996,24 @@ public Source(String url) { * @param name The name of the tool. * @param description A description of the tool. * @param inputSchema The input schema of the tool. + * @param cacheControl Optional cache control for this tool. */ @JsonInclude(Include.NON_NULL) public record Tool( // @formatter:off @JsonProperty("name") String name, @JsonProperty("description") String description, - @JsonProperty("input_schema") Map inputSchema) { + @JsonProperty("input_schema") Map inputSchema, + @JsonProperty("cache_control") CacheControl cacheControl) { // @formatter:on + + /** + * Constructor for backward compatibility without cache control. + */ + public Tool(String name, String description, Map inputSchema) { + this(name, description, inputSchema, null); + } + } // CB START EVENT diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java new file mode 100644 index 00000000000..e94a1a220c5 --- /dev/null +++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java @@ -0,0 +1,53 @@ +/* + * Copyright 2023-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.anthropic.api; + +/** + * Defines the caching strategy for Anthropic prompt caching. Anthropic allows up to 4 + * cache breakpoints per request, and the cache hierarchy follows the order: tools → + * system → messages. + * + * @author Mark Pollack + * @since 1.1.0 + */ +public enum AnthropicCacheStrategy { + + /** + * No caching (default behavior). + */ + NONE, + + /** + * Cache system instructions only. Places a cache breakpoint on the system message + * content. + */ + SYSTEM_ONLY, + + /** + * Cache system instructions and tool definitions. Places cache breakpoints on the + * last tool and system message content. + */ + SYSTEM_AND_TOOLS, + + /** + * Cache the entire conversation history up to (but not including) the current user + * question. This is ideal for multi-turn conversations where you want to reuse the + * conversation context while asking new questions. + */ + CONVERSATION_HISTORY + +} diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java index c522f75cf4b..6570d5ee6a6 100644 --- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java +++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java @@ -32,7 +32,6 @@ import reactor.core.publisher.Flux; import org.springframework.ai.anthropic.api.AnthropicApi; -import org.springframework.ai.anthropic.api.AnthropicCacheType; import org.springframework.ai.anthropic.api.tool.MockWeatherService; import org.springframework.ai.chat.client.ChatClient; import org.springframework.ai.chat.messages.AssistantMessage; @@ -492,59 +491,6 @@ void testToolUseContentBlock() { } } - @Test - void chatWithPromptCacheViaOptions() { - String userMessageText = "It could be eitherr a contraction of the full title Quenta Silmarillion (\"Tale of the Silmarils\") or also a plain Genitive which " - + "(as in Ancient Greek) signifies reference. This genitive is translated in English with \"about\" or \"of\" " - + "constructions; the titles of the chapters in The Silmarillion are examples of this genitive in poetic English " - + "(Of the Sindar, Of Men, Of the Darkening of Valinor etc), where \"of\" means \"about\" or \"concerning\". " - + "In the same way, Silmarillion can be taken to mean \"Of/About the Silmarils\""; - - // Repeat content to meet minimum token requirements for caching (1024+ tokens) - String largeContent = userMessageText.repeat(20); - - // First request - should create cache - ChatResponse firstResponse = this.chatModel.call(new Prompt(List.of(new UserMessage(largeContent)), - AnthropicChatOptions.builder() - .model(AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue()) - .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) - .maxTokens(100) - .temperature(0.8) - .build())); - - // Access native Anthropic usage data - AnthropicApi.Usage firstUsage = (AnthropicApi.Usage) firstResponse.getMetadata().getUsage().getNativeUsage(); - - // Verify first request created cache - assertThat(firstUsage.cacheCreationInputTokens()).isGreaterThan(0); - assertThat(firstUsage.cacheReadInputTokens()).isEqualTo(0); - - // Second request with identical content - should read from cache - ChatResponse secondResponse = this.chatModel.call(new Prompt(List.of(new UserMessage(largeContent)), - AnthropicChatOptions.builder() - .model(AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue()) - .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) - .maxTokens(100) - .temperature(0.8) - .build())); - - // Access native Anthropic usage data - AnthropicApi.Usage secondUsage = (AnthropicApi.Usage) secondResponse.getMetadata().getUsage().getNativeUsage(); - - // Verify second request used cache - assertThat(secondUsage.cacheCreationInputTokens()).isEqualTo(0); - assertThat(secondUsage.cacheReadInputTokens()).isGreaterThan(0); - - // Both responses should be valid - assertThat(firstResponse.getResult().getOutput().getText()).isNotBlank(); - assertThat(secondResponse.getResult().getOutput().getText()).isNotBlank(); - - logger.info("First request - Cache creation: {}, Cache read: {}", firstUsage.cacheCreationInputTokens(), - firstUsage.cacheReadInputTokens()); - logger.info("Second request - Cache creation: {}, Cache read: {}", secondUsage.cacheCreationInputTokens(), - secondUsage.cacheReadInputTokens()); - } - record ActorsFilmsRecord(String actor, List movies) { } diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java index 6cc4c689022..c5959be9fa4 100644 --- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java +++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java @@ -22,9 +22,8 @@ import org.junit.jupiter.api.Test; -import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.CacheControl; import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.Metadata; -import org.springframework.ai.anthropic.api.AnthropicCacheType; +import org.springframework.ai.anthropic.api.AnthropicCacheStrategy; import static org.assertj.core.api.Assertions.assertThat; @@ -475,39 +474,40 @@ void testSetterOverwriteBehavior() { } @Test - void testCacheControlBuilder() { - CacheControl cacheControl = AnthropicCacheType.EPHEMERAL.cacheControl(); - + void testCacheStrategyBuilder() { AnthropicChatOptions options = AnthropicChatOptions.builder() .model("test-model") - .cacheControl(cacheControl) + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS) .build(); - assertThat(options.getCacheControl()).isEqualTo(cacheControl); - assertThat(options.getCacheControl().type()).isEqualTo("ephemeral"); + assertThat(options.getCacheStrategy()).isEqualTo(AnthropicCacheStrategy.SYSTEM_AND_TOOLS); } @Test - void testCacheControlDefaultValue() { + void testCacheStrategyDefaultValue() { AnthropicChatOptions options = new AnthropicChatOptions(); - assertThat(options.getCacheControl()).isNull(); + assertThat(options.getCacheStrategy()).isEqualTo(AnthropicCacheStrategy.NONE); + assertThat(options.getCacheTtl()).isEqualTo("5m"); } @Test - void testCacheControlEqualsAndHashCode() { - CacheControl cacheControl = AnthropicCacheType.EPHEMERAL.cacheControl(); - + void testCacheStrategyEqualsAndHashCode() { AnthropicChatOptions options1 = AnthropicChatOptions.builder() .model("test-model") - .cacheControl(cacheControl) + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS) + .cacheTtl("1h") .build(); AnthropicChatOptions options2 = AnthropicChatOptions.builder() .model("test-model") - .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS) + .cacheTtl("1h") .build(); - AnthropicChatOptions options3 = AnthropicChatOptions.builder().model("test-model").build(); + AnthropicChatOptions options3 = AnthropicChatOptions.builder() + .model("test-model") + .cacheStrategy(AnthropicCacheStrategy.NONE) + .build(); assertThat(options1).isEqualTo(options2); assertThat(options1.hashCode()).isEqualTo(options2.hashCode()); @@ -517,32 +517,30 @@ void testCacheControlEqualsAndHashCode() { } @Test - void testCacheControlCopy() { - CacheControl originalCacheControl = AnthropicCacheType.EPHEMERAL.cacheControl(); - + void testCacheStrategyCopy() { AnthropicChatOptions original = AnthropicChatOptions.builder() .model("test-model") - .cacheControl(originalCacheControl) + .cacheStrategy(AnthropicCacheStrategy.CONVERSATION_HISTORY) + .cacheTtl("1h") .build(); AnthropicChatOptions copied = original.copy(); assertThat(copied).isNotSameAs(original).isEqualTo(original); - assertThat(copied.getCacheControl()).isEqualTo(original.getCacheControl()); - assertThat(copied.getCacheControl()).isEqualTo(originalCacheControl); + assertThat(copied.getCacheStrategy()).isEqualTo(original.getCacheStrategy()); + assertThat(copied.getCacheTtl()).isEqualTo(original.getCacheTtl()); } @Test - void testCacheControlWithNullValue() { - AnthropicChatOptions options = AnthropicChatOptions.builder().model("test-model").cacheControl(null).build(); + void testCacheStrategyWithDefaultValues() { + AnthropicChatOptions options = AnthropicChatOptions.builder().model("test-model").build(); - assertThat(options.getCacheControl()).isNull(); + assertThat(options.getCacheStrategy()).isEqualTo(AnthropicCacheStrategy.NONE); + assertThat(options.getCacheTtl()).isEqualTo("5m"); } @Test - void testBuilderWithAllFieldsIncludingCacheControl() { - CacheControl cacheControl = AnthropicCacheType.EPHEMERAL.cacheControl(); - + void testBuilderWithAllFieldsIncludingCacheStrategy() { AnthropicChatOptions options = AnthropicChatOptions.builder() .model("test-model") .maxTokens(100) @@ -551,32 +549,36 @@ void testBuilderWithAllFieldsIncludingCacheControl() { .topP(0.8) .topK(50) .metadata(new Metadata("userId_123")) - .cacheControl(cacheControl) + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_ONLY) + .cacheTtl("1h") .build(); assertThat(options) .extracting("model", "maxTokens", "stopSequences", "temperature", "topP", "topK", "metadata", - "cacheControl") + "cacheStrategy", "cacheTtl") .containsExactly("test-model", 100, List.of("stop1", "stop2"), 0.7, 0.8, 50, new Metadata("userId_123"), - cacheControl); + AnthropicCacheStrategy.SYSTEM_ONLY, "1h"); } @Test - void testCacheControlMutationDoesNotAffectOriginal() { - CacheControl originalCacheControl = AnthropicCacheType.EPHEMERAL.cacheControl(); - + void testCacheStrategyMutationDoesNotAffectOriginal() { AnthropicChatOptions original = AnthropicChatOptions.builder() .model("original-model") - .cacheControl(originalCacheControl) + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS) + .cacheTtl("1h") .build(); AnthropicChatOptions copy = original.copy(); - copy.setCacheControl(null); + copy.setCacheStrategy(AnthropicCacheStrategy.NONE); + copy.setCacheTtl("5m"); // Original should remain unchanged - assertThat(original.getCacheControl()).isEqualTo(originalCacheControl); - // Copy should have null cache control - assertThat(copy.getCacheControl()).isNull(); + assertThat(original.getCacheStrategy()).isEqualTo(AnthropicCacheStrategy.SYSTEM_AND_TOOLS); + assertThat(original.getCacheTtl()).isEqualTo("1h"); + + // Copy should have modified values + assertThat(copy.getCacheStrategy()).isEqualTo(AnthropicCacheStrategy.NONE); + assertThat(copy.getCacheTtl()).isEqualTo("5m"); } } diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingIT.java new file mode 100644 index 00000000000..dfe032ec7ac --- /dev/null +++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingIT.java @@ -0,0 +1,346 @@ +/* + * Copyright 2023-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.anthropic; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.springframework.ai.anthropic.api.AnthropicApi; +import org.springframework.ai.anthropic.api.AnthropicCacheStrategy; +import org.springframework.ai.anthropic.api.tool.MockWeatherService; +import org.springframework.ai.chat.client.ChatClient; +import org.springframework.ai.chat.client.advisor.MessageChatMemoryAdvisor; +import org.springframework.ai.chat.memory.ChatMemory; +import org.springframework.ai.chat.memory.InMemoryChatMemoryRepository; +import org.springframework.ai.chat.memory.MessageWindowChatMemory; +import org.springframework.ai.chat.messages.Message; +import org.springframework.ai.chat.messages.SystemMessage; +import org.springframework.ai.chat.messages.UserMessage; +import org.springframework.ai.chat.model.ChatResponse; +import org.springframework.ai.chat.prompt.Prompt; +import org.springframework.ai.tool.function.FunctionToolCallback; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.core.io.Resource; +import org.springframework.core.io.ResourceLoader; +import org.springframework.util.StreamUtils; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Integration tests for Anthropic prompt caching functionality. + * + * Tests various caching strategies to ensure proper cache breakpoint placement and + * optimal cache utilization according to Anthropic's best practices. + */ +@SpringBootTest(classes = AnthropicTestConfiguration.class) +@EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".+") +public class AnthropicPromptCachingIT { + + private static final Logger logger = LoggerFactory.getLogger(AnthropicPromptCachingIT.class); + + @Autowired + private AnthropicChatModel chatModel; + + @Autowired + private ResourceLoader resourceLoader; + + private String loadPrompt(String filename) { + try { + Resource resource = this.resourceLoader.getResource("classpath:prompts/" + filename); + String basePrompt = StreamUtils.copyToString(resource.getInputStream(), StandardCharsets.UTF_8); + // Add unique timestamp to prevent cache collisions across test runs + return basePrompt + "\n\nTest execution timestamp: " + System.currentTimeMillis(); + } + catch (IOException e) { + throw new RuntimeException("Failed to load prompt: " + filename, e); + } + } + + /** + * Helper method to safely get AnthropicApi.Usage, returning null if not available. + * This handles the case where getNativeUsage() returns null for tool-based + * interactions. + */ + private AnthropicApi.Usage getAnthropicUsage(ChatResponse response) { + if (response == null || response.getMetadata() == null || response.getMetadata().getUsage() == null) { + return null; + } + Object nativeUsage = response.getMetadata().getUsage().getNativeUsage(); + return (nativeUsage instanceof AnthropicApi.Usage usage) ? usage : null; + } + + @Test + void shouldCacheSystemMessageOnly() { + String systemPrompt = loadPrompt("system-only-cache-prompt.txt"); + + AnthropicChatOptions options = AnthropicChatOptions.builder() + .model(AnthropicApi.ChatModel.CLAUDE_SONNET_4.getValue()) + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_ONLY) + .maxTokens(150) + .temperature(0.3) + .build(); + + ChatResponse response = this.chatModel.call(new Prompt( + List.of(new SystemMessage(systemPrompt), new UserMessage("What is microservices architecture?")), + options)); + + assertThat(response).isNotNull(); + assertThat(response.getResult().getOutput().getText()).isNotEmpty(); + logger.info("System-only cache response: {}", response.getResult().getOutput().getText()); + + // For system-only caching, we should have native usage available + AnthropicApi.Usage usage = getAnthropicUsage(response); + assertThat(usage).isNotNull(); + + // Check cache behavior - either cache creation OR cache read should occur + boolean cacheCreated = usage.cacheCreationInputTokens() > 0; + boolean cacheRead = usage.cacheReadInputTokens() > 0; + assertThat(cacheCreated || cacheRead) + .withFailMessage("Expected either cache creation or cache read tokens, but got creation=%d, read=%d", + usage.cacheCreationInputTokens(), usage.cacheReadInputTokens()) + .isTrue(); + assertThat(cacheCreated && cacheRead) + .withFailMessage("Cache creation and read should not happen simultaneously") + .isFalse(); + + logger.info("Cache creation tokens: {}, Cache read tokens: {}", usage.cacheCreationInputTokens(), + usage.cacheReadInputTokens()); + } + + @Test + void shouldCacheSystemAndTools() { + String systemPrompt = loadPrompt("system-and-tools-cache-prompt.txt"); + + // Mock weather service + MockWeatherService weatherService = new MockWeatherService(); + + AnthropicChatOptions options = AnthropicChatOptions.builder() + .model(AnthropicApi.ChatModel.CLAUDE_SONNET_4.getValue()) + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS) + .maxTokens(200) + .temperature(0.3) + .toolCallbacks(FunctionToolCallback.builder("getCurrentWeather", weatherService) + .description("Get current weather for a location") + .inputType(MockWeatherService.Request.class) + .build()) + .build(); + + ChatResponse response = this.chatModel.call( + new Prompt( + List.of(new SystemMessage(systemPrompt), + new UserMessage( + "What's the weather like in San Francisco and should I go for a walk?")), + options)); + + assertThat(response).isNotNull(); + assertThat(response.getResult().getOutput().getText()).isNotEmpty(); + logger.info("System and tools cache response: {}", response.getResult().getOutput().getText()); + + // Anthropic's API doesn't provide cache usage metadata for tool-based + // interactions + // Validate what we can: configuration works and tools are called successfully + AnthropicApi.Usage usage = getAnthropicUsage(response); + if (usage != null) { + // If we get usage metadata, validate cache behavior + boolean cacheCreated = usage.cacheCreationInputTokens() > 0; + boolean cacheRead = usage.cacheReadInputTokens() > 0; + assertThat(cacheCreated || cacheRead) + .withFailMessage("Expected either cache creation or cache read tokens, but got creation=%d, read=%d", + usage.cacheCreationInputTokens(), usage.cacheReadInputTokens()) + .isTrue(); + assertThat(cacheCreated && cacheRead) + .withFailMessage("Cache creation and read should not happen simultaneously") + .isFalse(); + + logger.info("Cache creation tokens: {}, Cache read tokens: {}", usage.cacheCreationInputTokens(), + usage.cacheReadInputTokens()); + } + else { + logger.debug("Native usage metadata not available for tool-based interactions - this is expected"); + // Validate functional correctness: tools were called and response generated + assertThat(response.getResult().getOutput().getText()).isNotEmpty(); + // Ensure the weather service was actually called (indirect validation) + // Note: Full cache validation would require mocking the Anthropic API + } + } + + @Test + void shouldCacheConversationHistory() { + // Create a conversation ID for this test + String conversationId = "history-cache-test-" + System.currentTimeMillis(); + + // Set up ChatMemory and advisor + ChatMemory chatMemory = MessageWindowChatMemory.builder() + .chatMemoryRepository(new InMemoryChatMemoryRepository()) + .build(); + + MessageChatMemoryAdvisor advisor = MessageChatMemoryAdvisor.builder(chatMemory) + .conversationId(conversationId) + .build(); + + ChatClient chatClient = ChatClient.builder(this.chatModel) + .defaultAdvisors(advisor) + .defaultSystem(loadPrompt("conversation-history-cache-prompt.txt")) + .build(); + + // Build up conversation history + chatClient.prompt() + .user("My name is Alice and I work as a data scientist at TechCorp.") + .advisors(a -> a.param(ChatMemory.CONVERSATION_ID, conversationId)) + .call() + .content(); + + chatClient.prompt() + .user("I specialize in machine learning and have 5 years of experience with Python and R.") + .advisors(a -> a.param(ChatMemory.CONVERSATION_ID, conversationId)) + .call() + .content(); + + chatClient.prompt() + .user("Recently I've been working on a recommendation system for our e-commerce platform.") + .advisors(a -> a.param(ChatMemory.CONVERSATION_ID, conversationId)) + .call() + .content(); + + // Now use caching for the next conversation turn + String response = chatClient.prompt() + .user("What career advice would you give me based on our conversation?") + .options(AnthropicChatOptions.builder() + .model(AnthropicApi.ChatModel.CLAUDE_SONNET_4.getValue()) + .cacheStrategy(AnthropicCacheStrategy.CONVERSATION_HISTORY) + .maxTokens(200) + .temperature(0.3) + .build()) + .advisors(a -> a.param(ChatMemory.CONVERSATION_ID, conversationId)) + .call() + .content(); + + assertThat(response).isNotEmpty(); + assertThat(response.toLowerCase()).contains("alice"); + logger.info("Conversation history cache response: {}", response); + + // Verify the conversation was remembered + List memoryMessages = chatMemory.get(conversationId); + assertThat(memoryMessages).hasSizeGreaterThan(6); // At least 4 user + 4 assistant + // messages + } + + @Test + void shouldHandleExtendedTtlCaching() { + String systemPrompt = loadPrompt("extended-ttl-cache-prompt.txt"); + + AnthropicChatOptions options = AnthropicChatOptions.builder() + .model(AnthropicApi.ChatModel.CLAUDE_SONNET_4.getValue()) + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_ONLY) + .cacheTtl("1h") // 1-hour TTL requires beta header + .maxTokens(100) + .temperature(0.3) + .build(); + + ChatResponse response = this.chatModel + .call(new Prompt(List.of(new SystemMessage(systemPrompt), new UserMessage("What is 2+2?")), options)); + + assertThat(response).isNotNull(); + assertThat(response.getResult().getOutput().getText()).contains("4"); + logger.info("Extended TTL cache response: {}", response.getResult().getOutput().getText()); + + // Check cache behavior - either cache creation OR cache read should occur + logger.info("DEBUG: About to get usage metadata for extended TTL test"); + AnthropicApi.Usage usage = (AnthropicApi.Usage) response.getMetadata().getUsage().getNativeUsage(); + logger.info("DEBUG: Got usage metadata for extended TTL test: {}", usage); + assertThat(usage).isNotNull(); + + boolean cacheCreated = usage.cacheCreationInputTokens() > 0; + boolean cacheRead = usage.cacheReadInputTokens() > 0; + assertThat(cacheCreated || cacheRead) + .withFailMessage("Expected either cache creation or cache read tokens, but got creation=%d, read=%d", + usage.cacheCreationInputTokens(), usage.cacheReadInputTokens()) + .isTrue(); + assertThat(cacheCreated && cacheRead) + .withFailMessage("Cache creation and read should not happen simultaneously") + .isFalse(); + + logger.info("Extended TTL - Cache creation tokens: {}, Cache read tokens: {}", usage.cacheCreationInputTokens(), + usage.cacheReadInputTokens()); + } + + @Test + void shouldNotCacheWithNoneStrategy() { + String systemPrompt = "You are a helpful assistant."; + + AnthropicChatOptions options = AnthropicChatOptions.builder() + .model(AnthropicApi.ChatModel.CLAUDE_SONNET_4.getValue()) + .cacheStrategy(AnthropicCacheStrategy.NONE) // Explicit no caching + .maxTokens(50) + .temperature(0.3) + .build(); + + ChatResponse response = this.chatModel + .call(new Prompt(List.of(new SystemMessage(systemPrompt), new UserMessage("Hello!")), options)); + + assertThat(response).isNotNull(); + assertThat(response.getResult().getOutput().getText()).isNotEmpty(); + logger.info("No cache response: {}", response.getResult().getOutput().getText()); + + // Verify NO cache tokens are created (NONE strategy) + AnthropicApi.Usage usage = (AnthropicApi.Usage) response.getMetadata().getUsage().getNativeUsage(); + assertThat(usage.cacheCreationInputTokens()).isEqualTo(0); + assertThat(usage.cacheReadInputTokens()).isEqualTo(0); + logger.info("No cache strategy - Cache creation tokens: {}, Cache read tokens: {}", + usage.cacheCreationInputTokens(), usage.cacheReadInputTokens()); + } + + @Test + void shouldHandleMultipleCacheStrategiesInSession() { + // Test that we can switch between different caching strategies + List responses = new ArrayList<>(); + + // First: System only + responses.add(this.chatModel + .call(new Prompt(List.of(new SystemMessage("You are a math tutor."), new UserMessage("What is calculus?")), + AnthropicChatOptions.builder() + .model(AnthropicApi.ChatModel.CLAUDE_SONNET_4.getValue()) + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_ONLY) + .maxTokens(100) + .build()))); + + // Second: No caching + responses.add(this.chatModel.call(new Prompt(List.of(new UserMessage("What's 5+5?")), + AnthropicChatOptions.builder() + .model(AnthropicApi.ChatModel.CLAUDE_SONNET_4.getValue()) + .cacheStrategy(AnthropicCacheStrategy.NONE) + .maxTokens(50) + .build()))); + + // Verify all responses + for (int i = 0; i < responses.size(); i++) { + ChatResponse response = responses.get(i); + assertThat(response).isNotNull(); + assertThat(response.getResult().getOutput().getText()).isNotEmpty(); + logger.info("Response {}: {}", i + 1, response.getResult().getOutput().getText()); + } + } + +} diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java new file mode 100644 index 00000000000..56ffff3d881 --- /dev/null +++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java @@ -0,0 +1,707 @@ +/* + * Copyright 2023-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.anthropic; + +import java.io.IOException; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import okhttp3.mockwebserver.MockResponse; +import okhttp3.mockwebserver.MockWebServer; +import okhttp3.mockwebserver.RecordedRequest; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import org.springframework.ai.anthropic.api.AnthropicApi; +import org.springframework.ai.anthropic.api.AnthropicCacheStrategy; +import org.springframework.ai.chat.client.ChatClient; +import org.springframework.ai.chat.messages.SystemMessage; +import org.springframework.ai.chat.messages.UserMessage; +import org.springframework.ai.chat.model.ChatResponse; +import org.springframework.ai.chat.prompt.Prompt; +import org.springframework.ai.tool.annotation.Tool; +import org.springframework.ai.tool.method.MethodToolCallback; +import org.springframework.ai.tool.support.ToolDefinitions; +import org.springframework.util.ReflectionUtils; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Mock tests for Anthropic prompt caching functionality with tool calling validation. + * Tests the wire format and cache control headers without requiring real API calls. + * + * @author Mark Pollack + * @since 1.1.0 + */ +class AnthropicPromptCachingMockTest { + + private MockWebServer mockWebServer; + + private AnthropicChatModel chatModel; + + private final ObjectMapper objectMapper = new ObjectMapper(); + + @BeforeEach + void setUp() throws IOException { + this.mockWebServer = new MockWebServer(); + this.mockWebServer.start(); + + String baseUrl = this.mockWebServer.url("/").toString(); + AnthropicApi anthropicApi = AnthropicApi.builder().apiKey("test-api-key").baseUrl(baseUrl).build(); + this.chatModel = AnthropicChatModel.builder().anthropicApi(anthropicApi).build(); + } + + @AfterEach + void tearDown() throws IOException { + this.mockWebServer.shutdown(); + } + + @Test + void testSystemOnlyCacheStrategy() throws Exception { + // Mock response + String mockResponse = """ + { + "id": "msg_test123", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Hello! I understand you want to test caching." + } + ], + "model": "claude-3-7-sonnet", + "stop_reason": "end_turn", + "stop_sequence": null, + "usage": { + "input_tokens": 50, + "output_tokens": 20 + } + } + """; + + this.mockWebServer + .enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json")); + + // Test with SYSTEM_ONLY cache strategy + AnthropicChatOptions options = AnthropicChatOptions.builder() + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_ONLY) + .build(); + + Prompt prompt = new Prompt( + List.of(new SystemMessage("You are a helpful assistant."), new UserMessage("Test message")), options); + + ChatResponse response = this.chatModel.call(prompt); + + // Verify request was made + RecordedRequest recordedRequest = this.mockWebServer.takeRequest(1, TimeUnit.SECONDS); + assertThat(recordedRequest).isNotNull(); + + // Parse and validate request body + JsonNode requestBody = this.objectMapper.readTree(recordedRequest.getBody().readUtf8()); + + // Verify system message has cache control + assertThat(requestBody.has("system")).isTrue(); + JsonNode systemNode = requestBody.get("system"); + if (systemNode.isArray()) { + JsonNode lastSystemBlock = systemNode.get(systemNode.size() - 1); + assertThat(lastSystemBlock.has("cache_control")).isTrue(); + assertThat(lastSystemBlock.get("cache_control").get("type").asText()).isEqualTo("ephemeral"); + } + + // Verify response + assertThat(response).isNotNull(); + assertThat(response.getResult().getOutput().getText()).contains("Hello!"); + } + + @Test + void testSystemAndToolsCacheStrategy() throws Exception { + // Mock response + String mockResponse = """ + { + "id": "msg_test123", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "text", + "text": "I'll help you with the weather information." + } + ], + "model": "claude-3-7-sonnet", + "stop_reason": "end_turn", + "usage": { + "input_tokens": 150, + "output_tokens": 25 + } + } + """; + + this.mockWebServer + .enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json")); + + // Create tool callback + var toolMethod = ReflectionUtils.findMethod(TestTools.class, "getWeather", String.class); + MethodToolCallback toolCallback = MethodToolCallback.builder() + .toolDefinition(ToolDefinitions.builder(toolMethod).description("Get weather for a location").build()) + .toolMethod(toolMethod) + .build(); + + // Test with SYSTEM_AND_TOOLS cache strategy + AnthropicChatOptions options = AnthropicChatOptions.builder() + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS) + .toolCallbacks(List.of(toolCallback)) + .build(); + + ChatClient chatClient = ChatClient.create(this.chatModel); + String response = chatClient.prompt() + .user("What's the weather like in San Francisco?") + .options(options) + .call() + .content(); + + // Verify request was made + RecordedRequest recordedRequest = this.mockWebServer.takeRequest(1, TimeUnit.SECONDS); + assertThat(recordedRequest).isNotNull(); + + // Parse and validate request body + JsonNode requestBody = this.objectMapper.readTree(recordedRequest.getBody().readUtf8()); + + // Verify tools array exists and last tool has cache control + assertThat(requestBody.has("tools")).isTrue(); + JsonNode toolsArray = requestBody.get("tools"); + assertThat(toolsArray.isArray()).isTrue(); + assertThat(toolsArray.size()).isGreaterThan(0); + + JsonNode lastTool = toolsArray.get(toolsArray.size() - 1); + assertThat(lastTool.has("cache_control")).isTrue(); + assertThat(lastTool.get("cache_control").get("type").asText()).isEqualTo("ephemeral"); + + // Verify system message also has cache control + if (requestBody.has("system")) { + JsonNode systemNode = requestBody.get("system"); + if (systemNode.isArray()) { + JsonNode lastSystemBlock = systemNode.get(systemNode.size() - 1); + assertThat(lastSystemBlock.has("cache_control")).isTrue(); + } + } + + // Verify response + assertThat(response).contains("weather information"); + } + + @Test + void testConversationHistoryCacheStrategy() throws Exception { + // Mock response + String mockResponse = """ + { + "id": "msg_test123", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Based on our previous conversation, I can help with that." + } + ], + "model": "claude-3-7-sonnet", + "stop_reason": "end_turn", + "usage": { + "input_tokens": 200, + "output_tokens": 30 + } + } + """; + + this.mockWebServer + .enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json")); + + // Test with CONVERSATION_HISTORY cache strategy + AnthropicChatOptions options = AnthropicChatOptions.builder() + .cacheStrategy(AnthropicCacheStrategy.CONVERSATION_HISTORY) + .build(); + + // Create a prompt with conversation history + Prompt prompt = new Prompt(List.of(new UserMessage("Previous question about weather"), + new UserMessage("What about tomorrow's forecast?")), options); + + ChatResponse response = this.chatModel.call(prompt); + + // Verify request was made + RecordedRequest recordedRequest = this.mockWebServer.takeRequest(1, TimeUnit.SECONDS); + assertThat(recordedRequest).isNotNull(); + + // Parse and validate request body + JsonNode requestBody = this.objectMapper.readTree(recordedRequest.getBody().readUtf8()); + + // Verify messages array exists + assertThat(requestBody.has("messages")).isTrue(); + JsonNode messagesArray = requestBody.get("messages"); + assertThat(messagesArray.isArray()).isTrue(); + assertThat(messagesArray.size()).isGreaterThan(1); + + // Verify the second-to-last message has cache control (conversation history) + if (messagesArray.size() >= 2) { + JsonNode secondToLastMessage = messagesArray.get(messagesArray.size() - 2); + assertThat(secondToLastMessage.has("content")).isTrue(); + JsonNode contentArray = secondToLastMessage.get("content"); + if (contentArray.isArray() && contentArray.size() > 0) { + JsonNode lastContentBlock = contentArray.get(contentArray.size() - 1); + assertThat(lastContentBlock.has("cache_control")).isTrue(); + assertThat(lastContentBlock.get("cache_control").get("type").asText()).isEqualTo("ephemeral"); + } + } + + // Verify response + assertThat(response).isNotNull(); + assertThat(response.getResult().getOutput().getText()).contains("previous conversation"); + } + + @Test + void testNoCacheStrategy() throws Exception { + // Mock response + String mockResponse = """ + { + "id": "msg_test123", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Simple response without caching." + } + ], + "model": "claude-3-7-sonnet", + "stop_reason": "end_turn", + "usage": { + "input_tokens": 20, + "output_tokens": 10 + } + } + """; + + this.mockWebServer + .enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json")); + + // Test with NONE cache strategy (default) + AnthropicChatOptions options = AnthropicChatOptions.builder() + .cacheStrategy(AnthropicCacheStrategy.NONE) + .build(); + + Prompt prompt = new Prompt("Simple test message", options); + ChatResponse response = this.chatModel.call(prompt); + + // Verify request was made + RecordedRequest recordedRequest = this.mockWebServer.takeRequest(1, TimeUnit.SECONDS); + assertThat(recordedRequest).isNotNull(); + + // Parse and validate request body + JsonNode requestBody = this.objectMapper.readTree(recordedRequest.getBody().readUtf8()); + + // Verify NO cache_control fields exist anywhere + String requestBodyString = requestBody.toString(); + assertThat(requestBodyString).doesNotContain("cache_control"); + + // Verify response + assertThat(response).isNotNull(); + assertThat(response.getResult().getOutput().getText()).contains("Simple response"); + } + + @Test + void testCacheTtlHeader() throws Exception { + // Mock response + String mockResponse = """ + { + "id": "msg_test123", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Response with 1-hour cache TTL." + } + ], + "model": "claude-3-7-sonnet", + "stop_reason": "end_turn", + "usage": { + "input_tokens": 30, + "output_tokens": 15 + } + } + """; + + this.mockWebServer + .enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json")); + + // Test with 1-hour cache TTL + AnthropicChatOptions options = AnthropicChatOptions.builder() + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_ONLY) + .cacheTtl("1h") + .build(); + + Prompt prompt = new Prompt( + List.of(new SystemMessage("You are a helpful assistant."), new UserMessage("Test message")), options); + + this.chatModel.call(prompt); + + // Verify request was made + RecordedRequest recordedRequest = this.mockWebServer.takeRequest(1, TimeUnit.SECONDS); + assertThat(recordedRequest).isNotNull(); + + // Verify the beta header is present for 1-hour cache + assertThat(recordedRequest.getHeader("anthropic-beta")).contains("extended-cache-ttl-2025-04-11"); + } + + @Test + void testFourBreakpointLimitEnforcement() throws Exception { + // Mock response + String mockResponse = """ + { + "id": "msg_test123", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Response with maximum cache breakpoints." + } + ], + "model": "claude-3-7-sonnet", + "stop_reason": "end_turn", + "usage": { + "input_tokens": 500, + "output_tokens": 20 + } + } + """; + + this.mockWebServer + .enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json")); + + // Create multiple tools to test breakpoint limits + var weatherMethod = ReflectionUtils.findMethod(TestTools.class, "getWeather", String.class); + var calculateMethod = ReflectionUtils.findMethod(TestTools.class, "calculate", String.class); + var searchMethod = ReflectionUtils.findMethod(TestTools.class, "search", String.class); + + MethodToolCallback weatherTool = MethodToolCallback.builder() + .toolDefinition(ToolDefinitions.builder(weatherMethod).description("Get weather information").build()) + .toolMethod(weatherMethod) + .build(); + + MethodToolCallback calculateTool = MethodToolCallback.builder() + .toolDefinition(ToolDefinitions.builder(calculateMethod).description("Calculate expressions").build()) + .toolMethod(calculateMethod) + .build(); + + MethodToolCallback searchTool = MethodToolCallback.builder() + .toolDefinition(ToolDefinitions.builder(searchMethod).description("Search for information").build()) + .toolMethod(searchMethod) + .build(); + + // Test with SYSTEM_AND_TOOLS strategy and multiple large system messages + AnthropicChatOptions options = AnthropicChatOptions.builder() + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS) + .toolCallbacks(List.of(weatherTool, calculateTool, searchTool)) + .build(); + + // Create multiple large system messages and user messages to potentially exceed 4 + // breakpoints + String largeSystemMsg1 = "System message 1: " + "A".repeat(1200); + String largeSystemMsg2 = "System message 2: " + "B".repeat(1200); + String largeUserMsg1 = "User message 1: " + "C".repeat(1200); + String largeUserMsg2 = "User message 2: " + "D".repeat(1200); + + Prompt prompt = new Prompt(List.of(new SystemMessage(largeSystemMsg1), new SystemMessage(largeSystemMsg2), + new UserMessage(largeUserMsg1), new UserMessage(largeUserMsg2)), options); + + this.chatModel.call(prompt); + + // Verify request was made + RecordedRequest recordedRequest = this.mockWebServer.takeRequest(1, TimeUnit.SECONDS); + assertThat(recordedRequest).isNotNull(); + + // Parse and validate request body + JsonNode requestBody = this.objectMapper.readTree(recordedRequest.getBody().readUtf8()); + + // Count cache_control occurrences in the entire request + int cacheControlCount = countCacheControlOccurrences(requestBody); + + // Verify we don't exceed Anthropic's 4-breakpoint limit + assertThat(cacheControlCount).isLessThanOrEqualTo(4) + .withFailMessage("Cache breakpoints should not exceed 4, but found %d", cacheControlCount); + } + + @Test + void testWireFormatConsistency() throws Exception { + // Mock response + String mockResponse = """ + { + "id": "msg_test123", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Response for wire format test." + } + ], + "model": "claude-3-7-sonnet", + "stop_reason": "end_turn", + "usage": { + "input_tokens": 200, + "output_tokens": 15 + } + } + """; + + this.mockWebServer + .enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json")); + + // Test with SYSTEM_ONLY caching strategy + AnthropicChatOptions options = AnthropicChatOptions.builder() + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_ONLY) + .build(); + + Prompt prompt = new Prompt( + List.of(new SystemMessage("You are a helpful assistant."), new UserMessage("Hello!")), options); + + this.chatModel.call(prompt); + + // Verify request was made + RecordedRequest recordedRequest = this.mockWebServer.takeRequest(1, TimeUnit.SECONDS); + assertThat(recordedRequest).isNotNull(); + + // Parse and validate request body + JsonNode requestBody = this.objectMapper.readTree(recordedRequest.getBody().readUtf8()); + + // Verify that cache_control is included in the wire format for SYSTEM_ONLY + // strategy + // Anthropic's API will handle token threshold validation + + // For SYSTEM_ONLY caching, system message should be in the "system" field with + // cache_control + assertThat(requestBody.has("system")).withFailMessage("SYSTEM_ONLY strategy should include system field") + .isTrue(); + + JsonNode systemNode = requestBody.get("system"); + if (systemNode.isArray()) { + JsonNode lastSystemBlock = systemNode.get(systemNode.size() - 1); + assertThat(lastSystemBlock.has("cache_control")) + .withFailMessage("SYSTEM_ONLY strategy should include cache_control in wire format") + .isTrue(); + assertThat(lastSystemBlock.get("cache_control").get("type").asText()).isEqualTo("ephemeral"); + } + else if (systemNode.isTextual()) { + // Simple text system message should still have cache_control applied at the + // message level + // Check if there's a cache_control field at the system level or in a wrapper + assertThat(requestBody.toString()).contains("cache_control") + .withFailMessage("SYSTEM_ONLY strategy should include cache_control in wire format"); + } + } + + @Test + void testComplexMultiBreakpointScenario() throws Exception { + // Mock response + String mockResponse = """ + { + "id": "msg_test123", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "text", + "text": "Response for complex multi-breakpoint scenario." + } + ], + "model": "claude-3-7-sonnet", + "stop_reason": "end_turn", + "usage": { + "input_tokens": 800, + "output_tokens": 25 + } + } + """; + + this.mockWebServer + .enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json")); + + // Create tools for complex scenario + var toolMethod = ReflectionUtils.findMethod(TestTools.class, "getWeather", String.class); + MethodToolCallback toolCallback = MethodToolCallback.builder() + .toolDefinition(ToolDefinitions.builder(toolMethod).description("Complex weather tool").build()) + .toolMethod(toolMethod) + .build(); + + // Test SYSTEM_AND_TOOLS with large content and conversation history + AnthropicChatOptions options = AnthropicChatOptions.builder() + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS) + .toolCallbacks(List.of(toolCallback)) + .build(); + + // Create large system message (should get cached) + String largeSystemMessage = "System: You are a weather assistant. " + "X".repeat(1200); + + // Create conversation with multiple user messages (history scenario) + String userMessage1 = "Previous question about weather in NYC " + "Y".repeat(1200); + String userMessage2 = "Follow-up question about tomorrow's forecast " + "Z".repeat(1200); + String currentUserMessage = "What about this weekend?"; + + Prompt prompt = new Prompt(List.of(new SystemMessage(largeSystemMessage), new UserMessage(userMessage1), + new UserMessage(userMessage2), new UserMessage(currentUserMessage)), options); + + this.chatModel.call(prompt); + + // Verify request was made + RecordedRequest recordedRequest = this.mockWebServer.takeRequest(1, TimeUnit.SECONDS); + assertThat(recordedRequest).isNotNull(); + + // Parse and validate request body + JsonNode requestBody = this.objectMapper.readTree(recordedRequest.getBody().readUtf8()); + + // Verify system message has cache control (SYSTEM_AND_TOOLS strategy) + assertThat(requestBody.has("system")).isTrue(); + JsonNode systemNode = requestBody.get("system"); + if (systemNode.isArray()) { + JsonNode lastSystemBlock = systemNode.get(systemNode.size() - 1); + assertThat(lastSystemBlock.has("cache_control")).isTrue(); + } + + // Verify tools have cache control (SYSTEM_AND_TOOLS strategy) + assertThat(requestBody.has("tools")).isTrue(); + JsonNode toolsArray = requestBody.get("tools"); + if (toolsArray.isArray() && toolsArray.size() > 0) { + JsonNode lastTool = toolsArray.get(toolsArray.size() - 1); + assertThat(lastTool.has("cache_control")).isTrue(); + } + + // Verify proper ordering and cache control placement + int cacheControlCount = countCacheControlOccurrences(requestBody); + assertThat(cacheControlCount).isLessThanOrEqualTo(4) + .withFailMessage("Complex scenario should not exceed 4 cache breakpoints, found %d", cacheControlCount); + + // Verify cache_control is only on the LAST blocks of each section (system, tools) + // This ensures proper breakpoint placement according to Anthropic's requirements + verifyCacheControlPlacement(requestBody); + } + + /** + * Helper method to count cache_control occurrences in the request JSON. + */ + private int countCacheControlOccurrences(JsonNode node) { + int count = 0; + if (node.isObject()) { + if (node.has("cache_control")) { + count++; + } + var fields = node.fields(); + while (fields.hasNext()) { + var entry = fields.next(); + count += countCacheControlOccurrences(entry.getValue()); + } + } + else if (node.isArray()) { + for (JsonNode child : node) { + count += countCacheControlOccurrences(child); + } + } + return count; + } + + /** + * Helper method to verify cache_control is only placed on the last blocks of each + * section. + */ + private void verifyCacheControlPlacement(JsonNode requestBody) { + // Verify system cache control is only on the last system block + if (requestBody.has("system")) { + JsonNode systemNode = requestBody.get("system"); + if (systemNode.isArray()) { + for (int i = 0; i < systemNode.size() - 1; i++) { + JsonNode systemBlock = systemNode.get(i); + assertThat(systemBlock.has("cache_control")).isFalse() + .withFailMessage("Only the last system block should have cache_control, but block %d has it", + i); + } + } + } + + // Verify tools cache control is only on the last tool + if (requestBody.has("tools")) { + JsonNode toolsArray = requestBody.get("tools"); + if (toolsArray.isArray()) { + for (int i = 0; i < toolsArray.size() - 1; i++) { + JsonNode tool = toolsArray.get(i); + assertThat(tool.has("cache_control")).isFalse() + .withFailMessage("Only the last tool should have cache_control, but tool %d has it", i); + } + } + } + + // Verify messages cache control is only on the last content block of the + // appropriate message + if (requestBody.has("messages")) { + JsonNode messagesArray = requestBody.get("messages"); + if (messagesArray.isArray()) { + // For conversation history caching, only second-to-last message should + // have cache control + for (int i = 0; i < messagesArray.size(); i++) { + JsonNode message = messagesArray.get(i); + if (message.has("content") && message.get("content").isArray()) { + JsonNode contentArray = message.get("content"); + for (int j = 0; j < contentArray.size() - 1; j++) { + JsonNode contentBlock = contentArray.get(j); + if (i != messagesArray.size() - 2 || j != contentArray.size() - 1) { + // Only the last content block of the second-to-last + // message should have cache_control + assertThat(contentBlock.has("cache_control")).isFalse() + .withFailMessage( + "Unexpected cache_control placement in message %d, content block %d", i, j); + } + } + } + } + } + } + } + + /** + * Test tools class for mock testing. + */ + public static class TestTools { + + @Tool(description = "Get weather information for a location") + public static String getWeather(String location) { + return "Weather in " + location + " is sunny, 22°C"; + } + + @Tool(description = "Calculate mathematical expressions") + public static String calculate(String expression) { + return "Result: 42"; + } + + @Tool(description = "Search for information") + public static String search(String query) { + return "Search results for: " + query; + } + + } + +} diff --git a/models/spring-ai-anthropic/src/test/resources/prompts/conversation-history-cache-prompt.txt b/models/spring-ai-anthropic/src/test/resources/prompts/conversation-history-cache-prompt.txt new file mode 100644 index 00000000000..1b724bc8100 --- /dev/null +++ b/models/spring-ai-anthropic/src/test/resources/prompts/conversation-history-cache-prompt.txt @@ -0,0 +1,74 @@ +You are an experienced career counselor and professional development expert with over 15 years of experience +helping technology professionals advance their careers in software engineering, data science, and emerging tech fields. +Your expertise spans career transitions, skill development, industry trends, and strategic career planning. + +When providing career guidance, always consider these essential dimensions: +1. Current market trends and emerging technologies affecting career trajectories +2. Skills gap analysis and strategic upskilling recommendations for competitive advantage +3. Industry-specific compensation benchmarks and negotiation strategies +4. Professional networking approaches and personal brand development +5. Leadership development pathways and technical career progression options +6. Work-life balance considerations and remote work best practices +7. Interview preparation strategies and portfolio development guidance +8. Career transition planning including timing, risk mitigation, and bridge strategies +9. Performance evaluation optimization and promotion pathway planning +10. Entrepreneurial opportunities and freelancing vs full-time employment trade-offs + +## Career Development Framework for Conversation History Caching + +### Technical Skills Assessment and Development +Provide comprehensive technical skill evaluation: +- Current technology stack assessment with market relevance analysis +- Emerging technology identification and learning prioritization strategies +- Certification and formal education recommendations with ROI calculations +- Hands-on project suggestions to demonstrate competency and build portfolios +- Open source contribution strategies for visibility and community engagement +- Technical writing and speaking opportunities for thought leadership development +- Mentorship and reverse mentoring opportunities for skill exchange + +### Career Progression Strategy Planning +Develop strategic career advancement plans: +- Individual contributor vs management track decision frameworks +- Technical leadership roles and architectural responsibility progression +- Cross-functional collaboration skills for broader organizational impact +- Product management and business strategy understanding for technical leaders +- Agile and project management methodologies for delivery excellence +- Stakeholder communication and executive presentation skills development +- International and remote work opportunities for global career expansion + +### Industry and Market Analysis +Analyze technology industry trends comprehensively: +- Startup vs enterprise career path comparisons with risk-reward analysis +- Industry sector analysis including fintech, healthcare, education, and government +- Geographic market opportunities and cost of living considerations +- Remote work impact on career opportunities and compensation structures +- Freelancing and consulting market dynamics with rate optimization +- Technology adoption cycles and their impact on career longevity +- Economic factors affecting technology hiring and investment patterns + +### Professional Development and Networking +Guide strategic professional relationship building: +- Conference attendance and speaking engagement strategies for visibility +- Professional association participation and leadership opportunities +- Alumni network activation and industry meetup engagement tactics +- Social media presence optimization for professional brand building +- Mentorship relationship development both as mentor and mentee +- Cross-industry networking for diverse perspective and opportunity access +- International professional relationships for global career opportunities + +### Performance and Compensation Optimization +Optimize career advancement and compensation: +- Performance review preparation and goal-setting strategies for maximum impact +- Compensation negotiation tactics with market research and timing considerations +- Equity and stock option evaluation for startup and growth company positions +- Benefits package optimization including health, retirement, and professional development +- Professional development budget utilization for strategic skill building +- Side project and passive income development for financial diversification +- Career pivoting strategies with income protection and transition planning + +Always provide personalized, actionable advice based on individual circumstances and career goals. +Consider market conditions, personal constraints, and long-term career sustainability. +Focus on building transferable skills and maintaining adaptability in a rapidly changing technology landscape. + +This system prompt is specifically designed for testing conversation history caching strategies and contains sufficient tokens +to trigger Anthropic's prompt caching mechanism with Claude Sonnet 4 (1024+ token threshold). \ No newline at end of file diff --git a/models/spring-ai-anthropic/src/test/resources/prompts/extended-ttl-cache-prompt.txt b/models/spring-ai-anthropic/src/test/resources/prompts/extended-ttl-cache-prompt.txt new file mode 100644 index 00000000000..70d66a0b072 --- /dev/null +++ b/models/spring-ai-anthropic/src/test/resources/prompts/extended-ttl-cache-prompt.txt @@ -0,0 +1,109 @@ +You are a comprehensive mathematical assistant specializing in arithmetic, algebra, calculus, statistics, and advanced mathematical concepts. +Your expertise spans elementary mathematics through graduate-level topics, with particular strength in problem-solving methodologies. + +When addressing mathematical problems, always consider these fundamental aspects: +1. Problem comprehension and identification of given information and unknowns +2. Selection of appropriate mathematical methods and solution strategies +3. Step-by-step solution development with clear logical progression +4. Verification of results through alternative methods or sanity checks +5. Interpretation of solutions in context with practical applications +6. Common error identification and prevention strategies +7. Conceptual understanding reinforcement through analogies and examples +8. Connections to broader mathematical principles and theorems +9. Computational accuracy and precision considerations +10. Communication of mathematical reasoning in accessible language + +## Mathematical Problem-Solving Framework for Extended TTL Caching + +### Arithmetic and Number Theory +Provide comprehensive arithmetic analysis: +- Basic operations with integers, fractions, and decimal number systems +- Prime factorization and greatest common divisor calculations +- Modular arithmetic applications in cryptography and computer science +- Number base conversions between binary, octal, decimal, and hexadecimal systems +- Rational and irrational number properties with proof techniques +- Complex number operations including polar and rectangular forms +- Mathematical induction proofs for number theory propositions + +### Algebraic Problem Solving +Develop algebraic solution strategies: +- Linear equation systems using substitution, elimination, and matrix methods +- Quadratic equation solutions with discriminant analysis and graphical interpretation +- Polynomial factorization techniques including synthetic division and rational root theorem +- Exponential and logarithmic equation solving with change of base formulas +- Inequality solving with graphical representation and interval notation +- Function composition and inverse function determination +- Abstract algebra concepts including groups, rings, and fields + +### Calculus and Analysis +Analyze calculus problems comprehensively: +- Limit evaluation using algebraic manipulation and L'Hôpital's rule +- Derivative calculations with chain rule, product rule, and quotient rule applications +- Integration techniques including substitution, parts, and partial fractions +- Applications of derivatives in optimization and related rate problems +- Definite integral applications in area, volume, and physics problems +- Series convergence analysis with ratio, root, and integral tests +- Multivariable calculus including partial derivatives and multiple integrals + +### Statistical Analysis and Probability +Examine statistical methods thoroughly: +- Descriptive statistics including measures of central tendency and dispersion +- Probability distributions with normal, binomial, and Poisson applications +- Hypothesis testing with Type I and Type II error analysis +- Confidence interval construction and interpretation +- Regression analysis with correlation coefficient interpretation +- Analysis of variance (ANOVA) for comparing multiple groups +- Bayesian inference and conditional probability applications + +### Applied Mathematics and Modeling +Model real-world problems mathematically: +- Linear programming with simplex method and graphical solutions +- Differential equation modeling for population growth and decay +- Game theory applications in economics and strategic decision making +- Graph theory for network analysis and optimization problems +- Numerical analysis methods for approximation and error estimation +- Operations research techniques for resource allocation and scheduling +- Financial mathematics including compound interest and annuity calculations + +Always provide clear explanations with multiple solution approaches where applicable. +Include graphical representations and real-world applications to enhance understanding. +Emphasize mathematical reasoning and proof techniques to develop analytical thinking skills. + +### Additional Mathematical Problem-Solving Strategies for Extended TTL Testing + +#### Advanced Topics and Specialized Areas +Explore comprehensive mathematical domains: +- Abstract Algebra: Group theory, ring theory, field theory applications +- Real Analysis: Measure theory, functional analysis, topology concepts +- Complex Analysis: Analytic functions, contour integration, residue theory +- Discrete Mathematics: Graph theory, combinatorics, number theory applications +- Linear Algebra: Matrix decompositions, eigenvalue problems, vector spaces +- Differential Geometry: Manifolds, curvature, tensor calculus applications +- Optimization Theory: Linear programming, nonlinear optimization, convex analysis +- Probability Theory: Stochastic processes, measure-theoretic probability, limit theorems +- Mathematical Logic: Set theory, model theory, proof theory foundations + +#### Computational Mathematics and Numerical Methods +Address computational aspects thoroughly: +- Numerical Linear Algebra: LU decomposition, QR factorization, singular value decomposition +- Numerical Integration: Gaussian quadrature, adaptive quadrature methods, Monte Carlo integration +- Ordinary Differential Equations: Runge-Kutta methods, multistep methods, boundary value problems +- Partial Differential Equations: Finite difference methods, finite element analysis, spectral methods +- Interpolation and Approximation: Spline interpolation, Chebyshev polynomials, least squares approximation +- Root Finding: Newton-Raphson method, bisection method, secant method applications +- Optimization Algorithms: Gradient descent, Newton's method, simplex algorithm implementations + +#### Mathematical Modeling and Real-World Applications +Connect theory to practical implementations: +- Engineering Mathematics: Fourier analysis, Laplace transforms, control theory applications +- Mathematical Biology: Population dynamics, epidemic modeling, biochemical reaction networks +- Mathematical Physics: Quantum mechanics, relativity theory, statistical mechanics principles +- Mathematical Economics: Game theory, optimization in economics, financial mathematics modeling +- Actuarial Mathematics: Life insurance, annuities, pension fund calculations, risk assessment +- Cryptography: Number theory applications, elliptic curve cryptography, hash functions +- Signal Processing: Digital signal processing, wavelets, time-frequency analysis techniques + +This system prompt is specifically designed for testing extended TTL caching strategies and contains sufficient tokens +to trigger Anthropic's prompt caching mechanism with Claude Sonnet 4 (1024+ token threshold). The expanded content +ensures we exceed the minimum token requirement significantly to guarantee cache creation rather than relying on +borderline token counts that might fail cache threshold requirements. \ No newline at end of file diff --git a/models/spring-ai-anthropic/src/test/resources/prompts/system-and-tools-cache-prompt.txt b/models/spring-ai-anthropic/src/test/resources/prompts/system-and-tools-cache-prompt.txt new file mode 100644 index 00000000000..d888deaed5d --- /dev/null +++ b/models/spring-ai-anthropic/src/test/resources/prompts/system-and-tools-cache-prompt.txt @@ -0,0 +1,73 @@ +You are a comprehensive weather analysis assistant specializing in meteorological data interpretation and outdoor activity recommendations. +Your expertise encompasses understanding complex weather patterns, atmospheric conditions, and their impact on various outdoor activities. + +When analyzing weather data, always consider these critical factors: +1. Temperature variations throughout the day and their impact on comfort levels +2. Precipitation probability, intensity, and duration affecting outdoor plans +3. Wind speed and direction influencing perceived temperature and activity safety +4. Humidity levels affecting comfort and heat index calculations +5. UV index and sun exposure recommendations for health and safety +6. Atmospheric pressure changes indicating weather pattern shifts +7. Visibility conditions for driving and outdoor navigation +8. Air quality indices for respiratory health considerations +9. Seasonal patterns and historical weather trends for context +10. Local microclimate effects in urban vs rural environments + +## Weather Analysis Framework for System and Tools Caching + +### Temperature Analysis +Provide detailed temperature assessments: +- Current temperature readings with heat index or wind chill calculations +- Daily temperature ranges including minimum and maximum predictions +- Comfort zone analysis for different age groups and activity levels +- Thermal comfort indices considering humidity, wind, and solar radiation +- Clothing recommendations based on effective temperature measurements +- Risk assessments for heat-related illnesses or cold exposure +- Optimal timing recommendations for temperature-sensitive activities + +### Precipitation Assessment +Analyze precipitation patterns comprehensively: +- Current precipitation type, intensity, and accumulation rates +- Probability forecasts with confidence intervals and timing predictions +- Impact assessments on outdoor activities, transportation, and infrastructure +- Flood risk evaluations for low-lying areas and drainage systems +- Snow and ice formation potential with safety implications +- Seasonal precipitation trends and drought or flood pattern analysis +- Agricultural and ecological impacts of current and forecast precipitation + +### Wind Conditions Evaluation +Assess wind impacts thoroughly: +- Current wind speed, direction, and gust measurements +- Wind chill calculations and perceived temperature effects +- Safety considerations for high-wind activities and structural concerns +- Maritime and aviation wind impact assessments +- Dust and pollen dispersion patterns affected by wind conditions +- Energy generation potential for wind-powered systems +- Fire weather conditions and wildfire risk assessments + +### Atmospheric Monitoring +Monitor comprehensive atmospheric conditions: +- Barometric pressure trends indicating weather system movements +- Humidity levels with comfort and health impact assessments +- Air quality measurements including particulate matter and pollutants +- UV radiation levels with skin protection recommendations +- Visibility assessments for transportation and outdoor activities +- Lightning detection and severe weather warning systems +- Climate change indicators and long-term trend analysis + +### Activity Recommendations +Provide specific outdoor activity guidance: +- Walking, hiking, and running condition assessments with safety protocols +- Sports and recreational activity suitability ratings +- Gardening and agricultural work timing recommendations +- Construction and outdoor work safety guidelines +- Travel and transportation condition evaluations +- Photography and outdoor event planning considerations +- Emergency preparedness and severe weather response protocols + +Always provide specific, actionable recommendations with safety considerations paramount. +Include quantitative data where available and explain the reasoning behind recommendations. +Consider vulnerable populations including children, elderly, and individuals with health conditions. + +This system prompt is specifically designed for testing system and tools caching strategies and contains sufficient tokens +to trigger Anthropic's prompt caching mechanism with Claude Sonnet 4 (1024+ token threshold). \ No newline at end of file diff --git a/models/spring-ai-anthropic/src/test/resources/prompts/system-only-cache-prompt.txt b/models/spring-ai-anthropic/src/test/resources/prompts/system-only-cache-prompt.txt new file mode 100644 index 00000000000..c4e41121109 --- /dev/null +++ b/models/spring-ai-anthropic/src/test/resources/prompts/system-only-cache-prompt.txt @@ -0,0 +1,75 @@ +You are an expert software architect specializing in distributed systems and cloud-native applications. +Your responses should be detailed, technically accurate, and include comprehensive best practices +for scalability, reliability, maintainability, and cost-effectiveness in modern software systems. + +When discussing architecture patterns, always consider these critical aspects: +1. Scalability implications and potential bottlenecks across multiple dimensions including compute, storage, network, and database resources +2. Fault tolerance and error handling strategies including circuit breakers, bulkheads, timeouts, retries, and graceful degradation +3. Data consistency and transaction management including eventual consistency patterns, saga patterns, and distributed transaction challenges +4. Security considerations and access patterns including authentication, authorization, encryption at rest and in transit, and zero-trust principles +5. Monitoring and observability requirements including distributed tracing, structured logging, metrics collection, and alerting strategies +6. Performance optimization opportunities including caching strategies, CDN usage, database indexing, and query optimization +7. Cost optimization strategies including resource rightsizing, reserved capacity planning, and multi-cloud cost management +8. Team structure and Conway's Law implications including microservice boundaries, team autonomy, and communication patterns +9. DevOps and deployment strategies including CI/CD pipelines, infrastructure as code, and automated testing approaches +10. Compliance and governance requirements including data privacy regulations, audit trails, and regulatory compliance frameworks + +## Detailed Architecture Guidelines for System-Only Caching + +### Microservices Design Patterns +When designing microservices, implement these essential patterns: +- API Gateway pattern for centralized request routing and cross-cutting concerns +- Service mesh for inter-service communication, security, and observability +- Event sourcing for maintaining audit trails and enabling event-driven architectures +- CQRS (Command Query Responsibility Segregation) for optimal read/write performance +- Bulkhead pattern to isolate critical resources and prevent cascade failures +- Circuit breaker pattern with exponential backoff for external service resilience +- Saga pattern for distributed transaction management across service boundaries + +### Data Management Strategies +Implement robust data management approaches: +- Database per service pattern to ensure data encapsulation and service autonomy +- Event-driven data synchronization using message queues and event streams +- Polyglot persistence choosing optimal data stores for specific use cases +- Read replicas and sharding strategies for horizontal scaling +- Data versioning and schema evolution strategies for backward compatibility +- Distributed caching with Redis or similar for improved performance +- Data governance frameworks ensuring data quality, lineage, and compliance + +### Security Best Practices +Implement defense-in-depth security measures: +- OAuth 2.0 and OpenID Connect for authentication and authorization +- JWT tokens with proper expiration and refresh token mechanisms +- API rate limiting and throttling to prevent abuse and DDoS attacks +- Encryption at rest using AES-256 and encryption in transit with TLS 1.3 +- Secret management using HashiCorp Vault or AWS Secrets Manager +- Network segmentation with VPCs, subnets, and security groups +- Regular security audits, vulnerability scanning, and penetration testing + +### Monitoring and Observability +Establish comprehensive observability: +- Distributed tracing with OpenTelemetry or Jaeger for request flow analysis +- Centralized logging with ELK stack or similar for log aggregation and analysis +- Application metrics using Prometheus and Grafana for monitoring and alerting +- Health checks and readiness probes for service availability monitoring +- SLA/SLO definitions with error budgets for reliability measurements +- Alert management with PagerDuty or similar for incident response +- Performance monitoring with APM tools like New Relic or AppDynamics + +### Infrastructure and DevOps +Implement modern infrastructure practices: +- Infrastructure as Code using Terraform, CloudFormation, or Pulumi +- Container orchestration with Kubernetes for scalable deployments +- GitOps workflows with ArgoCD or Flux for automated deployments +- Blue-green or canary deployment strategies for zero-downtime releases +- Automated testing pipelines including unit, integration, and end-to-end tests +- Code quality gates with SonarQube and static analysis tools +- Disaster recovery planning with backup strategies and failover procedures + +Always provide concrete examples, architectural diagrams when helpful, code snippets in relevant programming languages, +and real-world case studies from companies like Netflix, Amazon, Google, Microsoft, and other technology leaders. +Consider both the technical and business implications of architectural decisions, including time-to-market, +development velocity, operational overhead, and long-term maintainability costs. + +This system prompt is specifically designed for testing system-only caching strategies and contains sufficient tokens +to trigger Anthropic's prompt caching mechanism with Claude Sonnet 4 (1024+ token threshold). \ No newline at end of file diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc index f8d08b31e8a..428df5d2e7f 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc @@ -193,7 +193,7 @@ TIP: In addition to the model specific https://github.com/spring-projects/spring == Prompt Caching -Anthropic's prompt caching feature allows you to cache frequently used prompts to reduce costs and improve response times for repeated interactions. +Anthropic's https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching[prompt caching feature] allows you to cache frequently used prompts to reduce costs and improve response times for repeated interactions. When you cache a prompt, subsequent identical requests can reuse the cached content, significantly reducing the number of input tokens processed. [NOTE] @@ -201,66 +201,151 @@ When you cache a prompt, subsequent identical requests can reuse the cached cont *Supported Models* Prompt caching is currently supported on Claude Opus 4, Claude Sonnet 4, Claude Sonnet 3.7, Claude Sonnet 3.5, Claude Haiku 3.5, Claude Haiku 3, and Claude Opus 3. + +*Token Requirements* + +Different models have different minimum token thresholds for cache effectiveness: +- Claude Sonnet 4: 1024+ tokens +- Claude Haiku models: 2048+ tokens +- Other models: 1024+ tokens ==== -=== Cache Types +=== Cache Strategies + +Spring AI provides strategic cache placement through the `AnthropicCacheStrategy` enum: -Spring AI supports Anthropic's cache types through the `AnthropicCacheType` enum: +* `NONE`: Disables prompt caching completely +* `SYSTEM_ONLY`: Caches only the system message content +* `SYSTEM_AND_TOOLS`: Caches system message and tool definitions +* `CONVERSATION_HISTORY`: Caches conversation history in chat memory scenarios -* `EPHEMERAL`: Temporary caching suitable for short-term reuse within a session +This strategic approach ensures optimal cache breakpoint placement while staying within Anthropic's 4-breakpoint limit. === Enabling Prompt Caching -To enable prompt caching, use the `cacheControl()` method in `AnthropicChatOptions`: +To enable prompt caching, use the `cacheStrategy()` method in `AnthropicChatOptions`: -==== Basic Usage +==== System-Only Caching [source,java] ---- -// Enable caching with ephemeral type +// Cache system message content ChatResponse response = chatModel.call( new Prompt( - List.of(new UserMessage("Large content to be cached...")), + List.of( + new SystemMessage("You are a helpful AI assistant with extensive knowledge..."), + new UserMessage("What is machine learning?") + ), AnthropicChatOptions.builder() - .model("claude-3-5-sonnet-latest") - .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) + .model("claude-sonnet-4") + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_ONLY) + .maxTokens(500) .build() ) ); ---- +==== System and Tools Caching + +[source,java] +---- +// Cache system message and tool definitions +ChatResponse response = chatModel.call( + new Prompt( + List.of( + new SystemMessage("You are a weather analysis assistant..."), + new UserMessage("What's the weather like in San Francisco?") + ), + AnthropicChatOptions.builder() + .model("claude-sonnet-4") + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_AND_TOOLS) + .toolCallbacks(weatherToolCallback) + .maxTokens(500) + .build() + ) +); +---- + +==== Conversation History Caching + +[source,java] +---- +// Cache conversation history with ChatClient and memory +ChatClient chatClient = ChatClient.builder(chatModel) + .defaultSystem("You are a personalized career counselor...") + .defaultAdvisors(MessageChatMemoryAdvisor.builder(chatMemory) + .conversationId(conversationId) + .build()) + .build(); + +String response = chatClient.prompt() + .user("What career advice would you give me?") + .options(AnthropicChatOptions.builder() + .model("claude-sonnet-4") + .cacheStrategy(AnthropicCacheStrategy.CONVERSATION_HISTORY) + .maxTokens(500) + .build()) + .call() + .content(); +---- + ==== Using ChatClient Fluent API [source,java] ---- String response = ChatClient.create(chatModel) .prompt() + .system("You are an expert document analyst...") .user("Analyze this large document: " + document) .options(AnthropicChatOptions.builder() - .model("claude-3-5-sonnet-latest") - .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) + .model("claude-sonnet-4") + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_ONLY) .build()) .call() .content(); ---- +=== Advanced Caching Options + +==== Extended TTL Caching + +For longer cache lifetimes, you can specify a custom TTL (requires beta features): + +[source,java] +---- +ChatResponse response = chatModel.call( + new Prompt( + List.of(new SystemMessage(largeSystemPrompt)), + AnthropicChatOptions.builder() + .model("claude-sonnet-4") + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_ONLY) + .cacheTtl("1h") // 1-hour cache lifetime + .maxTokens(500) + .build() + ) +); +---- + === Usage Example Here's a complete example demonstrating prompt caching with cost tracking: [source,java] ---- -// Create content that will be reused multiple times -String largeContent = "Large document content that meets minimum token requirements..."; +// Create system content that will be reused multiple times +String largeSystemPrompt = "You are an expert software architect specializing in distributed systems..."; // First request - creates cache ChatResponse firstResponse = chatModel.call( new Prompt( - List.of(new UserMessage(largeContent)), + List.of( + new SystemMessage(largeSystemPrompt), + new UserMessage("What is microservices architecture?") + ), AnthropicChatOptions.builder() - .model("claude-3-haiku-20240307") - .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) - .maxTokens(100) + .model("claude-sonnet-4") + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_ONLY) + .maxTokens(500) .build() ) ); @@ -272,14 +357,17 @@ AnthropicApi.Usage firstUsage = (AnthropicApi.Usage) firstResponse.getMetadata() System.out.println("Cache creation tokens: " + firstUsage.cacheCreationInputTokens()); System.out.println("Cache read tokens: " + firstUsage.cacheReadInputTokens()); -// Second request with identical content - reads from cache +// Second request with same system prompt - reads from cache ChatResponse secondResponse = chatModel.call( new Prompt( - List.of(new UserMessage(largeContent)), + List.of( + new SystemMessage(largeSystemPrompt), + new UserMessage("What are the benefits of event sourcing?") + ), AnthropicChatOptions.builder() - .model("claude-3-haiku-20240307") - .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) - .maxTokens(100) + .model("claude-sonnet-4") + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_ONLY) + .maxTokens(500) .build() ) ); @@ -287,8 +375,8 @@ ChatResponse secondResponse = chatModel.call( AnthropicApi.Usage secondUsage = (AnthropicApi.Usage) secondResponse.getMetadata() .getUsage().getNativeUsage(); -System.out.println("Cache creation tokens: " + secondUsage.cacheCreationInputTokens()); -System.out.println("Cache read tokens: " + secondUsage.cacheReadInputTokens()); +System.out.println("Cache creation tokens: " + secondUsage.cacheCreationInputTokens()); // Should be 0 +System.out.println("Cache read tokens: " + secondUsage.cacheReadInputTokens()); // Should be > 0 ---- === Token Usage Tracking @@ -315,56 +403,192 @@ When you send the same cached prompt again: - `cacheCreationInputTokens()` will be 0 - `cacheReadInputTokens()` will be greater than 0 -=== Best Practices +=== Real-World Use Cases -1. **Cache Long Prompts**: Focus on caching prompts that meet the minimum token requirements (1024+ tokens for most models, 2048+ for Haiku models). +==== Legal Document Analysis -2. **Reuse Identical Content**: Caching works best with exact matches of prompt content. -Even small changes will require a new cache entry. +Analyze large legal contracts or compliance documents efficiently by caching document content across multiple questions: -3. **Monitor Token Usage**: Use the enhanced usage statistics to track cache effectiveness and optimize your caching strategy. +[source,java] +---- +// Load a legal contract (PDF or text) +String legalContract = loadDocument("merger-agreement.pdf"); // ~3000 tokens -4. **Place Static Content First**: Position cached content (system instructions, context, examples) at the beginning of your prompt for optimal performance. +// System prompt with legal expertise +String legalSystemPrompt = "You are an expert legal analyst specializing in corporate law. " + + "Analyze the following contract and provide precise answers about terms, obligations, and risks: " + + legalContract; -5. **5-Minute Cache Lifetime**: Ephemeral caches expire after 5 minutes of inactivity. -Each time cached content is accessed, the 5-minute timer resets. +// First analysis - creates cache +ChatResponse riskAnalysis = chatModel.call( + new Prompt( + List.of( + new SystemMessage(legalSystemPrompt), + new UserMessage("What are the key termination clauses and associated penalties?") + ), + AnthropicChatOptions.builder() + .model("claude-sonnet-4") + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_ONLY) + .maxTokens(1000) + .build() + ) +); -=== Low-level API Usage +// Subsequent questions reuse cached document - 90% cost savings +ChatResponse obligationAnalysis = chatModel.call( + new Prompt( + List.of( + new SystemMessage(legalSystemPrompt), // Same content - cache hit + new UserMessage("List all financial obligations and payment schedules.") + ), + AnthropicChatOptions.builder() + .model("claude-sonnet-4") + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_ONLY) + .maxTokens(1000) + .build() + ) +); +---- -When using the low-level `AnthropicApi` directly, you can specify cache control through the `ContentBlock` constructor: +==== Batch Code Review + +Process multiple code files with consistent review criteria while caching the review guidelines: [source,java] ---- -// Create content block with cache control -ContentBlock cachedContent = new ContentBlock( - "", - AnthropicCacheType.EPHEMERAL.cacheControl() +// Define comprehensive code review guidelines +String reviewGuidelines = """ + You are a senior software engineer conducting code reviews. Apply these criteria: + - Security vulnerabilities and best practices + - Performance optimizations and memory usage + - Code maintainability and readability + - Testing coverage and edge cases + - Design patterns and architecture compliance + """; + +List codeFiles = Arrays.asList( + "UserService.java", "PaymentController.java", "SecurityConfig.java" ); -AnthropicMessage message = new AnthropicMessage( - List.of(cachedContent), - Role.USER -); +List reviews = new ArrayList<>(); + +for (String filename : codeFiles) { + String sourceCode = loadSourceFile(filename); + + ChatResponse review = chatModel.call( + new Prompt( + List.of( + new SystemMessage(reviewGuidelines), // Cached across all reviews + new UserMessage("Review this " + filename + " code:\n\n" + sourceCode) + ), + AnthropicChatOptions.builder() + .model("claude-sonnet-4") + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_ONLY) + .maxTokens(800) + .build() + ) + ); + + reviews.add(review.getResult().getOutput().getText()); +} -ChatCompletionRequest request = new ChatCompletionRequest( - AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue(), - List.of(message), - null, 100, 0.8, false -); +// Guidelines cached after first request, subsequent reviews are faster and cheaper +---- -ResponseEntity response = anthropicApi.chatCompletionEntity(request); +==== Customer Support with Knowledge Base -// Access cache-related token usage -Usage usage = response.getBody().usage(); -System.out.println("Cache creation tokens: " + usage.cacheCreationInputTokens()); -System.out.println("Cache read tokens: " + usage.cacheReadInputTokens()); +Create a customer support system that caches your product knowledge base for consistent, accurate responses: + +[source,java] ---- +// Load comprehensive product knowledge +String knowledgeBase = """ + PRODUCT DOCUMENTATION: + - API endpoints and authentication methods + - Common troubleshooting procedures + - Billing and subscription details + - Integration guides and examples + - Known issues and workarounds + """ + loadProductDocs(); // ~2500 tokens + +@Service +public class CustomerSupportService { + + public String handleCustomerQuery(String customerQuery, String customerId) { + ChatResponse response = chatModel.call( + new Prompt( + List.of( + new SystemMessage("You are a helpful customer support agent. " + + "Use this knowledge base to provide accurate solutions: " + knowledgeBase), + new UserMessage("Customer " + customerId + " asks: " + customerQuery) + ), + AnthropicChatOptions.builder() + .model("claude-sonnet-4") + .cacheStrategy(AnthropicCacheStrategy.SYSTEM_ONLY) + .maxTokens(600) + .build() + ) + ); + + return response.getResult().getOutput().getText(); + } +} + +// Knowledge base is cached across all customer queries +// Multiple support agents can benefit from the same cached content +---- + +=== Best Practices + +1. **Choose the Right Strategy**: + - Use `SYSTEM_ONLY` for reusable system prompts and instructions + - Use `SYSTEM_AND_TOOLS` when you have both system content and tool definitions to cache + - Use `CONVERSATION_HISTORY` with ChatClient memory for multi-turn conversations + - Use `NONE` to explicitly disable caching + +2. **Meet Token Requirements**: Focus on caching content that meets the minimum token requirements (1024+ tokens for Sonnet 4, 2048+ for Haiku models). + +3. **Reuse Identical Content**: Caching works best with exact matches of prompt content. +Even small changes will require a new cache entry. + +4. **Monitor Token Usage**: Use the cache usage statistics to track cache effectiveness: + ```java + AnthropicApi.Usage usage = (AnthropicApi.Usage) response.getMetadata().getUsage().getNativeUsage(); + if (usage != null) { + System.out.println("Cache creation: " + usage.cacheCreationInputTokens()); + System.out.println("Cache read: " + usage.cacheReadInputTokens()); + } + ``` + +5. **Strategic Cache Placement**: The implementation automatically places cache breakpoints at optimal locations based on your chosen strategy, ensuring compliance with Anthropic's 4-breakpoint limit. + +6. **Cache Lifetime**: Default caches expire after 5 minutes of inactivity (can be extended to 1 hour with `cacheTtl()`). +Each time cached content is accessed, the timer resets. + +7. **Tool Caching Limitations**: Be aware that tool-based interactions may not provide cache usage metadata in the response. === Implementation Details -Cache control is configured through `AnthropicChatOptions` rather than individual messages. -This preserves compatibility when switching between different AI providers. -The cache control gets applied during request creation in `AnthropicChatModel`. +The prompt caching implementation in Spring AI follows these key design principles: + +1. **Strategic Cache Placement**: Cache breakpoints are automatically placed at optimal locations based on the chosen strategy, ensuring compliance with Anthropic's 4-breakpoint limit. + +2. **Provider Portability**: Cache configuration is done through `AnthropicChatOptions` rather than individual messages, preserving compatibility when switching between different AI providers. + +3. **Thread Safety**: The cache breakpoint tracking is implemented with thread-safe mechanisms to handle concurrent requests correctly. + +4. **Automatic Content Ordering**: The implementation ensures proper on-the-wire ordering of JSON content blocks and cache controls according to Anthropic's API requirements. + +=== Future Enhancements + +The current cache strategies are designed to handle **90% of common use cases** effectively. For applications requiring more granular control, future enhancements may include: + +- **Message-level cache control** for fine-grained breakpoint placement +- **Multi-block content caching** within individual messages +- **Advanced cache boundary selection** for complex tool scenarios +- **Mixed TTL strategies** for optimized cache hierarchies + +These enhancements will maintain full backward compatibility while unlocking Anthropic's complete prompt caching capabilities for specialized use cases. == Thinking