feat(anthropic): Add support for streaming thinking events

Add necessary types and update stream processing to handle Anthropic's 'thinking' content blocks and deltas in streaming responses. This resolves an issue where an IllegalArgumentException was thrown for unhandled thinking event types.
format

Added docs

Signed-off-by: Alexandros Pappas <apappascs@gmail.com>
This commit is contained in:
Alexandros Pappas
2025-04-18 12:24:16 +02:00
committed by Mark Pollack
parent c4e434a5e5
commit 12f19dbfe0
8 changed files with 432 additions and 53 deletions

View File

@@ -1,2 +1,2 @@
[Anthropic 3 Chat Documentation](https://docs.spring.io/spring-ai/reference/api/chat/anthropic-chat.html)
[Anthropic Chat Documentation](https://docs.spring.io/spring-ai/reference/api/chat/anthropic-chat.html)

View File

@@ -1124,8 +1124,11 @@ public final class AnthropicApi {
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.EXISTING_PROPERTY, property = "type",
visible = true)
@JsonSubTypes({ @JsonSubTypes.Type(value = ContentBlockToolUse.class, name = "tool_use"),
@JsonSubTypes.Type(value = ContentBlockText.class, name = "text") })
@JsonSubTypes({
@JsonSubTypes.Type(value = ContentBlockToolUse.class, name = "tool_use"),
@JsonSubTypes.Type(value = ContentBlockText.class, name = "text"),
@JsonSubTypes.Type(value = ContentBlockThinking.class, name = "thinking")
})
public interface ContentBlockBody {
String type();
}
@@ -1157,6 +1160,19 @@ public final class AnthropicApi {
@JsonProperty("type") String type,
@JsonProperty("text") String text) implements ContentBlockBody {
}
/**
* Thinking content block.
* @param type The content block type.
* @param thinking The thinking content.
*/
@JsonInclude(Include.NON_NULL)
public record ContentBlockThinking(
@JsonProperty("type") String type,
@JsonProperty("thinking") String thinking,
@JsonProperty("signature") String signature) implements ContentBlockBody {
}
}
// @formatter:on

View File

@@ -26,9 +26,12 @@ import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock.Type;
import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockDeltaEvent;
import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockDeltaEvent.ContentBlockDeltaJson;
import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockDeltaEvent.ContentBlockDeltaText;
import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockDeltaEvent.ContentBlockDeltaThinking;
import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockDeltaEvent.ContentBlockDeltaSignature;
import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockStartEvent;
import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockStartEvent.ContentBlockText;
import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockStartEvent.ContentBlockToolUse;
import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockStartEvent.ContentBlockThinking;
import org.springframework.ai.anthropic.api.AnthropicApi.EventType;
import org.springframework.ai.anthropic.api.AnthropicApi.MessageDeltaEvent;
import org.springframework.ai.anthropic.api.AnthropicApi.MessageStartEvent;
@@ -36,19 +39,19 @@ import org.springframework.ai.anthropic.api.AnthropicApi.Role;
import org.springframework.ai.anthropic.api.AnthropicApi.StreamEvent;
import org.springframework.ai.anthropic.api.AnthropicApi.ToolUseAggregationEvent;
import org.springframework.ai.anthropic.api.AnthropicApi.Usage;
import org.springframework.util.Assert;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;
/**
* Helper class to support streaming function calling.
* Helper class to support streaming function calling and thinking events.
* <p>
* It can merge the streamed {@link StreamEvent} chunks in case of function calling
* message.
* message. It passes through other events like text, thinking, and signature deltas.
*
* @author Mariusz Bernacki
* @author Christian Tzolov
* @author Jihoon Kim
* @author Alexandros Pappas
* @since 1.0.0
*/
public class StreamHelper {
@@ -61,13 +64,16 @@ public class StreamHelper {
}
public boolean isToolUseFinish(StreamEvent event) {
if (event == null || event.type() == null || event.type() != EventType.CONTENT_BLOCK_STOP) {
return false;
}
return true;
// Tool use streaming sequence ends with a CONTENT_BLOCK_STOP event.
// The logic relies on the state machine (isInsideTool flag) managed in
// chatCompletionStream to know if this stop event corresponds to a tool use.
return event != null && event.type() != null && event.type() == EventType.CONTENT_BLOCK_STOP;
}
/**
* Merge the tooluse related streaming events into one aggregate event so that the
* upper layers see a single ContentBlock with the full JSON input.
*/
public StreamEvent mergeToolUseEvents(StreamEvent previousEvent, StreamEvent event) {
ToolUseAggregationEvent eventAggregator = (ToolUseAggregationEvent) previousEvent;
@@ -76,8 +82,7 @@ public class StreamHelper {
ContentBlockStartEvent contentBlockStart = (ContentBlockStartEvent) event;
if (ContentBlock.Type.TOOL_USE.getValue().equals(contentBlockStart.contentBlock().type())) {
ContentBlockStartEvent.ContentBlockToolUse cbToolUse = (ContentBlockToolUse) contentBlockStart
.contentBlock();
ContentBlockToolUse cbToolUse = (ContentBlockToolUse) contentBlockStart.contentBlock();
return eventAggregator.withIndex(contentBlockStart.index())
.withId(cbToolUse.id())
@@ -102,6 +107,14 @@ public class StreamHelper {
return event;
}
/**
* Converts a raw {@link StreamEvent} potentially containing tool use aggregates or
* other block types (text, thinking) into a {@link ChatCompletionResponse} chunk.
* @param event The incoming StreamEvent.
* @param contentBlockReference Holds the state of the response being built across
* multiple events.
* @return A ChatCompletionResponse representing the processed chunk.
*/
public ChatCompletionResponse eventToChatCompletionResponse(StreamEvent event,
AtomicReference<ChatCompletionResponseBuilder> contentBlockReference) {
@@ -135,28 +148,41 @@ public class StreamHelper {
else if (event.type().equals(EventType.CONTENT_BLOCK_START)) {
ContentBlockStartEvent contentBlockStartEvent = (ContentBlockStartEvent) event;
Assert.isTrue(contentBlockStartEvent.contentBlock().type().equals("text"),
"The json content block should have been aggregated. Unsupported content block type: "
+ contentBlockStartEvent.contentBlock().type());
ContentBlockText contentBlockText = (ContentBlockText) contentBlockStartEvent.contentBlock();
ContentBlock contentBlock = new ContentBlock(Type.TEXT, null, contentBlockText.text(),
contentBlockStartEvent.index());
contentBlockReference.get().withType(event.type().name()).withContent(List.of(contentBlock));
if (contentBlockStartEvent.contentBlock() instanceof ContentBlockText textBlock) {
ContentBlock cb = new ContentBlock(Type.TEXT, null, textBlock.text(), contentBlockStartEvent.index());
contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
}
else if (contentBlockStartEvent.contentBlock() instanceof ContentBlockThinking thinkingBlock) {
ContentBlock cb = new ContentBlock(Type.THINKING, null, null, contentBlockStartEvent.index(), null,
null, null, null, null, null, thinkingBlock.thinking(), null);
contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
}
else {
throw new IllegalArgumentException(
"Unsupported content block type: " + contentBlockStartEvent.contentBlock().type());
}
}
else if (event.type().equals(EventType.CONTENT_BLOCK_DELTA)) {
ContentBlockDeltaEvent contentBlockDeltaEvent = (ContentBlockDeltaEvent) event;
Assert.isTrue(contentBlockDeltaEvent.delta().type().equals("text_delta"),
"The json content block delta should have been aggregated. Unsupported content block type: "
+ contentBlockDeltaEvent.delta().type());
ContentBlockDeltaText deltaTxt = (ContentBlockDeltaText) contentBlockDeltaEvent.delta();
var contentBlock = new ContentBlock(Type.TEXT_DELTA, null, deltaTxt.text(), contentBlockDeltaEvent.index());
contentBlockReference.get().withType(event.type().name()).withContent(List.of(contentBlock));
if (contentBlockDeltaEvent.delta() instanceof ContentBlockDeltaText txt) {
ContentBlock cb = new ContentBlock(Type.TEXT_DELTA, null, txt.text(), contentBlockDeltaEvent.index());
contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
}
else if (contentBlockDeltaEvent.delta() instanceof ContentBlockDeltaThinking thinking) {
ContentBlock cb = new ContentBlock(Type.THINKING_DELTA, null, null, contentBlockDeltaEvent.index(),
null, null, null, null, null, null, thinking.thinking(), null);
contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
}
else if (contentBlockDeltaEvent.delta() instanceof ContentBlockDeltaSignature sig) {
ContentBlock cb = new ContentBlock(Type.SIGNATURE_DELTA, null, null, contentBlockDeltaEvent.index(),
null, null, null, null, null, sig.signature(), null, null);
contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
}
else {
throw new IllegalArgumentException(
"Unsupported content block delta type: " + contentBlockDeltaEvent.delta().type());
}
}
else if (event.type().equals(EventType.MESSAGE_DELTA)) {
@@ -173,7 +199,7 @@ public class StreamHelper {
}
if (messageDeltaEvent.usage() != null) {
var totalUsage = new Usage(contentBlockReference.get().usage.inputTokens(),
Usage totalUsage = new Usage(contentBlockReference.get().usage.inputTokens(),
messageDeltaEvent.usage().outputTokens());
contentBlockReference.get().withUsage(totalUsage);
}
@@ -189,12 +215,17 @@ public class StreamHelper {
.withStopSequence(null);
}
else {
// Any other event types that should propagate upwards without content
contentBlockReference.get().withType(event.type().name()).withContent(List.of());
}
return contentBlockReference.get().build();
}
/**
* Builder for {@link ChatCompletionResponse}. Used internally by {@link StreamHelper}
* to aggregate stream events.
*/
public static class ChatCompletionResponseBuilder {
private String type;

View File

@@ -88,8 +88,7 @@ class AnthropicChatModelIT {
}
@ParameterizedTest(name = "{0} : {displayName} ")
@ValueSource(strings = { "claude-3-7-sonnet-latest", "claude-3-5-sonnet-latest", "claude-3-5-haiku-latest",
"claude-3-opus-latest" })
@ValueSource(strings = { "claude-3-7-sonnet-latest" })
void roleTest(String modelName) {
UserMessage userMessage = new UserMessage(
"Tell me about 3 famous pirates from the Golden Age of Piracy and why they did.");
@@ -302,7 +301,7 @@ class AnthropicChatModelIT {
assertThat(generation.getOutput().getText()).contains("30", "10", "15");
assertThat(response.getMetadata()).isNotNull();
assertThat(response.getMetadata().getUsage()).isNotNull();
assertThat(response.getMetadata().getUsage().getTotalTokens()).isLessThan(4000).isGreaterThan(1800);
assertThat(response.getMetadata().getUsage().getTotalTokens()).isLessThan(4000).isGreaterThan(100);
}
@Test
@@ -429,6 +428,38 @@ class AnthropicChatModelIT {
}
}
@Test
void thinkingWithStreamingTest() {
UserMessage userMessage = new UserMessage(
"Are there an infinite number of prime numbers such that n mod 4 == 3?");
var promptOptions = AnthropicChatOptions.builder()
.model(AnthropicApi.ChatModel.CLAUDE_3_7_SONNET.getName())
.temperature(1.0) // Temperature should be set to 1 when thinking is enabled
.maxTokens(8192)
.thinking(AnthropicApi.ThinkingType.ENABLED, 2048) // Must be ≥1024 && <
// max_tokens
.build();
Flux<ChatResponse> responseFlux = this.streamingChatModel
.stream(new Prompt(List.of(userMessage), promptOptions));
String content = responseFlux.collectList()
.block()
.stream()
.map(ChatResponse::getResults)
.flatMap(List::stream)
.map(Generation::getOutput)
.map(AssistantMessage::getText)
.filter(text -> text != null && !text.isBlank())
.collect(Collectors.joining());
logger.info("Response: {}", content);
assertThat(content).isNotBlank();
assertThat(content).contains("prime numbers");
}
@Test
void testToolUseContentBlock() {
UserMessage userMessage = new UserMessage(

View File

@@ -18,18 +18,24 @@ package org.springframework.ai.anthropic.api;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import reactor.core.publisher.Flux;
import org.springframework.ai.anthropic.api.AnthropicApi.AnthropicMessage;
import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest;
import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionResponse;
import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock;
import org.springframework.ai.anthropic.api.AnthropicApi.EventType;
import org.springframework.ai.anthropic.api.AnthropicApi.Role;
import org.springframework.ai.model.ModelOptionsUtils;
import org.springframework.http.ResponseEntity;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
@@ -42,6 +48,8 @@ import static org.assertj.core.api.Assertions.assertThatThrownBy;
@EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".+")
public class AnthropicApiIT {
private static final Logger logger = LoggerFactory.getLogger(AnthropicApiIT.class);
AnthropicApi anthropicApi = AnthropicApi.builder().apiKey(System.getenv("ANTHROPIC_API_KEY")).build();
List<AnthropicApi.Tool> tools = List.of(new AnthropicApi.Tool("getCurrentWeather",
@@ -68,17 +76,26 @@ public class AnthropicApiIT {
AnthropicMessage chatCompletionMessage = new AnthropicMessage(List.of(new ContentBlock("Tell me a Joke?")),
Role.USER);
ResponseEntity<ChatCompletionResponse> response = this.anthropicApi
.chatCompletionEntity(new ChatCompletionRequest(AnthropicApi.ChatModel.CLAUDE_3_OPUS.getValue(),
List.of(chatCompletionMessage), null, 100, 0.8, false));
.chatCompletionEntity(ChatCompletionRequest.builder()
.model(AnthropicApi.ChatModel.CLAUDE_3_OPUS.getValue())
.messages(List.of(chatCompletionMessage))
.maxTokens(100)
.temperature(0.8)
.stream(false)
.build());
System.out.println(response);
logger.info("Non-Streaming Response: {}", response.getBody());
assertThat(response).isNotNull();
assertThat(response.getBody()).isNotNull();
assertThat(response.getBody().content()).isNotEmpty();
assertThat(response.getBody().content().get(0).text()).isNotBlank();
assertThat(response.getBody().stopReason()).isEqualTo("end_turn");
}
@Test
void chatCompletionWithThinking() {
AnthropicMessage chatCompletionMessage = new AnthropicMessage(List.of(new ContentBlock("Tell me a Joke?")),
AnthropicMessage chatCompletionMessage = new AnthropicMessage(
List.of(new ContentBlock("Are there an infinite number of prime numbers such that n mod 4 == 3?")),
Role.USER);
ChatCompletionRequest request = ChatCompletionRequest.builder()
@@ -93,20 +110,31 @@ public class AnthropicApiIT {
assertThat(response).isNotNull();
assertThat(response.getBody()).isNotNull();
assertThat(response.getBody().content()).isNotEmpty();
boolean foundThinkingBlock = false;
boolean foundTextBlock = false;
List<ContentBlock> content = response.getBody().content();
for (ContentBlock block : content) {
if (block.type() == ContentBlock.Type.THINKING) {
assertThat(block.thinking()).isNotBlank();
assertThat(block.signature()).isNotBlank();
foundThinkingBlock = true;
}
// Note: Redacted thinking might occur if budget is exceeded or other reasons.
if (block.type() == ContentBlock.Type.REDACTED_THINKING) {
assertThat(block.data()).isNotBlank();
}
if (block.type() == ContentBlock.Type.TEXT) {
assertThat(block.text()).isNotBlank();
foundTextBlock = true;
}
}
assertThat(foundThinkingBlock).isTrue();
assertThat(foundTextBlock).isTrue();
assertThat(response.getBody().stopReason()).isEqualTo("end_turn");
}
@Test
@@ -115,15 +143,125 @@ public class AnthropicApiIT {
AnthropicMessage chatCompletionMessage = new AnthropicMessage(List.of(new ContentBlock("Tell me a Joke?")),
Role.USER);
Flux<ChatCompletionResponse> response = this.anthropicApi.chatCompletionStream(new ChatCompletionRequest(
AnthropicApi.ChatModel.CLAUDE_3_OPUS.getValue(), List.of(chatCompletionMessage), null, 100, 0.8, true));
Flux<ChatCompletionResponse> response = this.anthropicApi.chatCompletionStream(ChatCompletionRequest.builder()
.model(AnthropicApi.ChatModel.CLAUDE_3_OPUS.getValue())
.messages(List.of(chatCompletionMessage))
.maxTokens(100)
.temperature(0.8)
.stream(true)
.build());
assertThat(response).isNotNull();
List<ChatCompletionResponse> bla = response.collectList().block();
assertThat(bla).isNotNull();
List<ChatCompletionResponse> results = response.collectList().block();
assertThat(results).isNotNull().isNotEmpty();
bla.stream().forEach(r -> System.out.println(r));
results.forEach(chunk -> logger.info("Streaming Chunk: {}", chunk));
// Verify the stream contains actual text content deltas
String aggregatedText = results.stream()
.filter(r -> !CollectionUtils.isEmpty(r.content()))
.flatMap(r -> r.content().stream())
.filter(cb -> cb.type() == ContentBlock.Type.TEXT_DELTA)
.map(ContentBlock::text)
.collect(Collectors.joining());
assertThat(aggregatedText).isNotBlank();
// Verify the final state
ChatCompletionResponse lastMeaningfulResponse = results.stream()
.filter(r -> StringUtils.hasText(r.stopReason()))
.reduce((first, second) -> second)
.orElse(results.get(results.size() - 1)); // Fallback to very last if no stop
// StopReason found earlier
assertThat(lastMeaningfulResponse.stopReason()).isEqualTo("end_turn");
assertThat(lastMeaningfulResponse.usage()).isNotNull();
assertThat(lastMeaningfulResponse.usage().outputTokens()).isPositive();
}
@Test
void chatCompletionStreamWithThinking() {
AnthropicMessage chatCompletionMessage = new AnthropicMessage(
List.of(new ContentBlock("Are there an infinite number of prime numbers such that n mod 4 == 3?")),
Role.USER);
ChatCompletionRequest request = ChatCompletionRequest.builder()
.model(AnthropicApi.ChatModel.CLAUDE_3_7_SONNET.getValue())
.messages(List.of(chatCompletionMessage))
.maxTokens(2048)
.temperature(1.0)
.stream(true)
.thinking(new ChatCompletionRequest.ThinkingConfig(AnthropicApi.ThinkingType.ENABLED, 1024))
.build();
Flux<ChatCompletionResponse> responseFlux = this.anthropicApi.chatCompletionStream(request);
assertThat(responseFlux).isNotNull();
List<ChatCompletionResponse> results = responseFlux.collectList().block();
assertThat(results).isNotNull().isNotEmpty();
results.forEach(chunk -> logger.info("Streaming Thinking Chunk: {}", chunk));
// Verify MESSAGE_START event exists
assertThat(results.stream().anyMatch(r -> EventType.MESSAGE_START.name().equals(r.type()))).isTrue();
assertThat(results.get(0).id()).isNotBlank();
assertThat(results.get(0).role()).isEqualTo(Role.ASSISTANT);
// Verify presence of THINKING_DELTA content
boolean foundThinkingDelta = results.stream()
.filter(r -> !CollectionUtils.isEmpty(r.content()))
.flatMap(r -> r.content().stream())
.anyMatch(cb -> cb.type() == ContentBlock.Type.THINKING_DELTA && StringUtils.hasText(cb.thinking()));
assertThat(foundThinkingDelta).as("Should find THINKING_DELTA content").isTrue();
// Verify presence of SIGNATURE_DELTA content
boolean foundSignatureDelta = results.stream()
.filter(r -> !CollectionUtils.isEmpty(r.content()))
.flatMap(r -> r.content().stream())
.anyMatch(cb -> cb.type() == ContentBlock.Type.SIGNATURE_DELTA && StringUtils.hasText(cb.signature()));
assertThat(foundSignatureDelta).as("Should find SIGNATURE_DELTA content").isTrue();
// Verify presence of TEXT_DELTA content (the actual answer)
boolean foundTextDelta = results.stream()
.filter(r -> !CollectionUtils.isEmpty(r.content()))
.flatMap(r -> r.content().stream())
.anyMatch(cb -> cb.type() == ContentBlock.Type.TEXT_DELTA && StringUtils.hasText(cb.text()));
assertThat(foundTextDelta).as("Should find TEXT_DELTA content").isTrue();
// Combine text deltas to check final answer structure
String aggregatedText = results.stream()
.filter(r -> !CollectionUtils.isEmpty(r.content()))
.flatMap(r -> r.content().stream())
.filter(cb -> cb.type() == ContentBlock.Type.TEXT_DELTA)
.map(ContentBlock::text)
.collect(Collectors.joining());
assertThat(aggregatedText).as("Aggregated text response should not be blank").isNotBlank();
logger.info("Aggregated Text from Stream: {}", aggregatedText);
// Verify the final state (stop reason and usage)
ChatCompletionResponse finalStateEvent = results.stream()
.filter(r -> StringUtils.hasText(r.stopReason()))
.reduce((first, second) -> second)
.orElse(null);
assertThat(finalStateEvent).as("Should find an event with stopReason").isNotNull();
assertThat(finalStateEvent.stopReason()).isEqualTo("end_turn");
assertThat(finalStateEvent.usage()).isNotNull();
assertThat(finalStateEvent.usage().outputTokens()).isPositive();
assertThat(finalStateEvent.usage().inputTokens()).isPositive();
// Verify presence of key event types
assertThat(results.stream().anyMatch(r -> EventType.CONTENT_BLOCK_START.name().equals(r.type())))
.as("Should find CONTENT_BLOCK_START event")
.isTrue();
assertThat(results.stream().anyMatch(r -> EventType.CONTENT_BLOCK_STOP.name().equals(r.type())))
.as("Should find CONTENT_BLOCK_STOP event")
.isTrue();
assertThat(results.stream()
.anyMatch(r -> EventType.MESSAGE_STOP.name().equals(r.type()) || StringUtils.hasText(r.stopReason())))
.as("Should find MESSAGE_STOP or MESSAGE_DELTA with stopReason")
.isTrue();
}
@Test
@@ -173,8 +311,13 @@ public class AnthropicApiIT {
Role.USER);
AnthropicApi api = AnthropicApi.builder().apiKey("FAKE_KEY_FOR_ERROR_RESPONSE").build();
Flux<ChatCompletionResponse> response = api.chatCompletionStream(new ChatCompletionRequest(
AnthropicApi.ChatModel.CLAUDE_3_OPUS.getValue(), List.of(chatCompletionMessage), null, 100, 0.8, true));
Flux<ChatCompletionResponse> response = api.chatCompletionStream(ChatCompletionRequest.builder()
.model(AnthropicApi.ChatModel.CLAUDE_3_OPUS.getValue())
.messages(List.of(chatCompletionMessage))
.maxTokens(100)
.temperature(0.8)
.stream(true)
.build());
assertThat(response).isNotNull();

View File

@@ -211,7 +211,7 @@ class AnthropicChatClientIT {
// @formatter:off
String response = ChatClient.create(this.chatModel).prompt()
.user("What's the weather like in San Francisco, Tokyo, and Paris? Use Celsius.")
.user("What's the weather like in San Francisco (California, USA), Tokyo (Japan), and Paris (France)? Use Celsius.")
.toolCallbacks(FunctionToolCallback.builder("getCurrentWeather", new MockWeatherService())
.inputType(MockWeatherService.Request.class)
.build())
@@ -284,7 +284,7 @@ class AnthropicChatClientIT {
}
@ParameterizedTest(name = "{0} : {displayName} ")
@ValueSource(strings = { "claude-3-opus-latest", "claude-3-5-sonnet-latest", "claude-3-7-sonnet-latest" })
@ValueSource(strings = { "claude-3-7-sonnet-latest", "claude-sonnet-4-0" })
void multiModalityEmbeddedImage(String modelName) throws IOException {
// @formatter:off
@@ -301,7 +301,7 @@ class AnthropicChatClientIT {
}
@ParameterizedTest(name = "{0} : {displayName} ")
@ValueSource(strings = { "claude-3-opus-latest", "claude-3-5-sonnet-latest", "claude-3-7-sonnet-latest" })
@ValueSource(strings = { "claude-3-7-sonnet-latest", "claude-sonnet-4-0" })
void multiModalityImageUrl(String modelName) throws IOException {
// TODO: add url method that wrapps the checked exception.

View File

@@ -14,7 +14,7 @@
*** xref:api/chatmodel.adoc[Chat Models]
**** xref:api/chat/comparison.adoc[Chat Models Comparison]
**** xref:api/chat/bedrock-converse.adoc[Amazon Bedrock Converse]
**** xref:api/chat/anthropic-chat.adoc[Anthropic 3]
**** xref:api/chat/anthropic-chat.adoc[Anthropic]
**** xref:api/chat/azure-openai-chat.adoc[Azure OpenAI]
**** xref:api/chat/deepseek-chat.adoc[DeepSeek]
**** xref:api/chat/dmr-chat.adoc[Docker Model Runner]

View File

@@ -1,4 +1,4 @@
= Anthropic 3 Chat
= Anthropic Chat
link:https://www.anthropic.com/[Anthropic Claude] is a family of foundational AI models that can be used in a variety of applications.
For developers and businesses, you can leverage the API access and build directly on top of link:https://www.anthropic.com/api[Anthropic's AI infrastructure].
@@ -191,6 +191,166 @@ ChatResponse response = chatModel.call(
TIP: In addition to the model specific https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java[AnthropicChatOptions] you can use a portable https://github.com/spring-projects/spring-ai/blob/main/spring-ai-client-chat/src/main/java/org/springframework/ai/chat/prompt/ChatOptions.java[ChatOptions] instance, created with the https://github.com/spring-projects/spring-ai/blob/main/spring-ai-client-chat/src/main/java/org/springframework/ai/chat/prompt/ChatOptionsBuilder.java[ChatOptionsBuilder#builder()].
== Thinking
Anthropic Claude models support a "thinking" feature that allows the model to show its reasoning process before providing a final answer. This feature enables more transparent and detailed problem-solving, particularly for complex questions that require step-by-step reasoning.
[NOTE]
====
*Supported Models*
The thinking feature is supported by the following Claude models:
* Claude 4 models (`claude-opus-4-20250514`, `claude-sonnet-4-20250514`)
* Claude 3.7 Sonnet (`claude-3-7-sonnet-20250219`)
*Model capabilities:*
* *Claude 3.7 Sonnet*: Returns full thinking output. Behavior is consistent but does not support summarized or interleaved thinking.
* *Claude 4 models*: Support summarized thinking, interleaved thinking, and enhanced tool integration.
API request structure is the same across all supported models, but output behavior varies.
====
=== Thinking Configuration
To enable thinking on any supported Claude model, include the following configuration in your request:
==== Required Configuration
1. **Add the `thinking` object**:
- `"type": "enabled"`
- `budget_tokens`: Token limit for reasoning (recommend starting at 1024)
2. **Token budget rules**:
- `budget_tokens` must typically be less than `max_tokens`
- Claude may use fewer tokens than allocated
- Larger budgets increase depth of reasoning but may impact latency
- When using tool use with interleaved thinking (Claude 4 only), this constraint is relaxed, but not yet supported in Spring AI.
==== Key Considerations
* **Claude 3.7** returns full thinking content in the response
* **Claude 4** returns a *summarized* version of the model's internal reasoning to reduce latency and protect sensitive content
* **Thinking tokens are billable** as part of output tokens (even if not all are visible in response)
* **Interleaved Thinking** is only available on Claude 4 models and requires the beta header `interleaved-thinking-2025-05-14`
==== Tool Integration and Interleaved Thinking
Claude 4 models support interleaved thinking with tool use, allowing the model to reason between tool calls.
[NOTE]
====
The current Spring AI implementation supports basic thinking and tool use separately, but does not yet support interleaved thinking with tool use (where thinking continues across multiple tool calls).
====
For details on interleaved thinking with tool use, see the https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#extended-thinking-with-tool-use[Anthropic documentation].
=== Non-streaming Example
Here's how to enable thinking in a non-streaming request using the ChatClient API:
[source,java]
----
ChatClient chatClient = ChatClient.create(chatModel);
// For Claude 3.7 Sonnet - explicit thinking configuration required
ChatResponse response = chatClient.prompt()
.options(AnthropicChatOptions.builder()
.model("claude-3-7-sonnet-latest")
.temperature(1.0) // Temperature should be set to 1 when thinking is enabled
.maxTokens(8192)
.thinking(AnthropicApi.ThinkingType.ENABLED, 2048) // Must be ≥1024 && < max_tokens
.build())
.user("Are there an infinite number of prime numbers such that n mod 4 == 3?")
.call()
.chatResponse();
// For Claude 4 models - thinking is enabled by default
ChatResponse response4 = chatClient.prompt()
.options(AnthropicChatOptions.builder()
.model("claude-opus-4-0")
.maxTokens(8192)
// No explicit thinking configuration needed
.build())
.user("Are there an infinite number of prime numbers such that n mod 4 == 3?")
.call()
.chatResponse();
// Process the response which may contain thinking content
for (Generation generation : response.getResults()) {
AssistantMessage message = generation.getOutput();
if (message.getText() != null) {
// Regular text response
System.out.println("Text response: " + message.getText());
}
else if (message.getMetadata().containsKey("signature")) {
// Thinking content
System.out.println("Thinking: " + message.getMetadata().get("thinking"));
System.out.println("Signature: " + message.getMetadata().get("signature"));
}
}
----
=== Streaming Example
You can also use thinking with streaming responses:
[source,java]
----
ChatClient chatClient = ChatClient.create(chatModel);
// For Claude 3.7 Sonnet - explicit thinking configuration
Flux<ChatResponse> responseFlux = chatClient.prompt()
.options(AnthropicChatOptions.builder()
.model("claude-3-7-sonnet-latest")
.temperature(1.0)
.maxTokens(8192)
.thinking(AnthropicApi.ThinkingType.ENABLED, 2048)
.build())
.user("Are there an infinite number of prime numbers such that n mod 4 == 3?")
.stream();
// For Claude 4 models - thinking is enabled by default
Flux<ChatResponse> responseFlux4 = chatClient.prompt()
.options(AnthropicChatOptions.builder()
.model("claude-opus-4-0")
.maxTokens(8192)
// No explicit thinking configuration needed
.build())
.user("Are there an infinite number of prime numbers such that n mod 4 == 3?")
.stream();
// For streaming, you might want to collect just the text responses
String textContent = responseFlux.collectList()
.block()
.stream()
.map(ChatResponse::getResults)
.flatMap(List::stream)
.map(Generation::getOutput)
.map(AssistantMessage::getText)
.filter(text -> text != null && !text.isBlank())
.collect(Collectors.joining());
----
=== Tool Use Integration
Claude 4 models integrate thinking and tool use capabilities:
* *Claude 3.7 Sonnet*: Supports both thinking and tool use, but they operate separately and require more explicit configuration
* *Claude 4 models*: Natively interleave thinking and tool use, providing deeper reasoning during tool interactions
=== Benefits of Using Thinking
The thinking feature provides several benefits:
1. **Transparency**: See the model's reasoning process and how it arrived at its conclusion
2. **Debugging**: Identify where the model might be making logical errors
3. **Education**: Use the step-by-step reasoning as a teaching tool
4. **Complex Problem Solving**: Better results on math, logic, and reasoning tasks
Note that enabling thinking requires a higher token budget, as the thinking process itself consumes tokens from your allocation.
== Tool/Function Calling
You can register custom Java Tools with the `AnthropicChatModel` and have the Anthropic Claude model intelligently choose to output a JSON object containing arguments to call one or many of the registered functions.
@@ -378,5 +538,3 @@ Follow the https://github.com/spring-projects/spring-ai/blob/main/models/spring-
=== Low-level API Examples
* The link:https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/chat/api/AnthropicApiIT.java[AnthropicApiIT.java] test provides some general examples how to use the lightweight library.