feat(anthropic): Add support for streaming thinking events

Add necessary types and update stream processing to handle Anthropic's 'thinking' content blocks and deltas in streaming responses. This resolves an issue where an IllegalArgumentException was thrown for unhandled thinking event types. format Added docs Signed-off-by: Alexandros Pappas <apappascs@gmail.com>
2025-04-18 12:24:16 +02:00
parent c4e434a5e5
commit 12f19dbfe0
8 changed files with 432 additions and 53 deletions
--- a/models/spring-ai-anthropic/README.md
+++ b/models/spring-ai-anthropic/README.md
@@ -1,2 +1,2 @@
-[Anthropic 3 Chat Documentation](https://docs.spring.io/spring-ai/reference/api/chat/anthropic-chat.html)
+[Anthropic Chat Documentation](https://docs.spring.io/spring-ai/reference/api/chat/anthropic-chat.html)

--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java
@@ -1124,8 +1124,11 @@ public final class AnthropicApi {

 		@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.EXISTING_PROPERTY, property = "type",
 				visible = true)
-		@JsonSubTypes({ @JsonSubTypes.Type(value = ContentBlockToolUse.class, name = "tool_use"),
-				@JsonSubTypes.Type(value = ContentBlockText.class, name = "text") })
+		@JsonSubTypes({
+				@JsonSubTypes.Type(value = ContentBlockToolUse.class, name = "tool_use"),
+				@JsonSubTypes.Type(value = ContentBlockText.class, name = "text"),
+				@JsonSubTypes.Type(value = ContentBlockThinking.class, name = "thinking")
+		})
 		public interface ContentBlockBody {
 			String type();
 		}
@@ -1157,6 +1160,19 @@ public final class AnthropicApi {
 			@JsonProperty("type") String type,
 			@JsonProperty("text") String text) implements ContentBlockBody {
 		}
+
+		/**
+		 * Thinking content block.
+		 * @param type The content block type.
+		 * @param thinking The thinking content.
+		 */
+		@JsonInclude(Include.NON_NULL)
+		public record ContentBlockThinking(
+			@JsonProperty("type") String type,
+			@JsonProperty("thinking") String thinking,
+			@JsonProperty("signature") String signature) implements ContentBlockBody {
+		}
+		
 	}
 	// @formatter:on

--- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java
+++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java
@@ -26,9 +26,12 @@ import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock.Type;
 import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockDeltaEvent;
 import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockDeltaEvent.ContentBlockDeltaJson;
 import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockDeltaEvent.ContentBlockDeltaText;
+import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockDeltaEvent.ContentBlockDeltaThinking;
+import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockDeltaEvent.ContentBlockDeltaSignature;
 import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockStartEvent;
 import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockStartEvent.ContentBlockText;
 import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockStartEvent.ContentBlockToolUse;
+import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlockStartEvent.ContentBlockThinking;
 import org.springframework.ai.anthropic.api.AnthropicApi.EventType;
 import org.springframework.ai.anthropic.api.AnthropicApi.MessageDeltaEvent;
 import org.springframework.ai.anthropic.api.AnthropicApi.MessageStartEvent;
@@ -36,19 +39,19 @@ import org.springframework.ai.anthropic.api.AnthropicApi.Role;
 import org.springframework.ai.anthropic.api.AnthropicApi.StreamEvent;
 import org.springframework.ai.anthropic.api.AnthropicApi.ToolUseAggregationEvent;
 import org.springframework.ai.anthropic.api.AnthropicApi.Usage;
-import org.springframework.util.Assert;
 import org.springframework.util.CollectionUtils;
 import org.springframework.util.StringUtils;

 /**
- * Helper class to support streaming function calling.
+ * Helper class to support streaming function calling and thinking events.
 * <p>
 * It can merge the streamed {@link StreamEvent} chunks in case of function calling
- * message.
+ * message. It passes through other events like text, thinking, and signature deltas.
 *
 * @author Mariusz Bernacki
 * @author Christian Tzolov
 * @author Jihoon Kim
+ * @author Alexandros Pappas
 * @since 1.0.0
 */
 public class StreamHelper {
@@ -61,13 +64,16 @@ public class StreamHelper {
 	}

 	public boolean isToolUseFinish(StreamEvent event) {
-
-		if (event == null || event.type() == null || event.type() != EventType.CONTENT_BLOCK_STOP) {
-			return false;
-		}
-		return true;
+		// Tool use streaming sequence ends with a CONTENT_BLOCK_STOP event.
+		// The logic relies on the state machine (isInsideTool flag) managed in
+		// chatCompletionStream to know if this stop event corresponds to a tool use.
+		return event != null && event.type() != null && event.type() == EventType.CONTENT_BLOCK_STOP;
 	}

+	/**
+	 * Merge the tool‑use related streaming events into one aggregate event so that the
+	 * upper layers see a single ContentBlock with the full JSON input.
+	 */
 	public StreamEvent mergeToolUseEvents(StreamEvent previousEvent, StreamEvent event) {

 		ToolUseAggregationEvent eventAggregator = (ToolUseAggregationEvent) previousEvent;
@@ -76,8 +82,7 @@ public class StreamHelper {
 			ContentBlockStartEvent contentBlockStart = (ContentBlockStartEvent) event;

 			if (ContentBlock.Type.TOOL_USE.getValue().equals(contentBlockStart.contentBlock().type())) {
-				ContentBlockStartEvent.ContentBlockToolUse cbToolUse = (ContentBlockToolUse) contentBlockStart
-					.contentBlock();
+				ContentBlockToolUse cbToolUse = (ContentBlockToolUse) contentBlockStart.contentBlock();

 				return eventAggregator.withIndex(contentBlockStart.index())
 					.withId(cbToolUse.id())
@@ -102,6 +107,14 @@ public class StreamHelper {
 		return event;
 	}

+	/**
+	 * Converts a raw {@link StreamEvent} potentially containing tool use aggregates or
+	 * other block types (text, thinking) into a {@link ChatCompletionResponse} chunk.
+	 * @param event The incoming StreamEvent.
+	 * @param contentBlockReference Holds the state of the response being built across
+	 * multiple events.
+	 * @return A ChatCompletionResponse representing the processed chunk.
+	 */
 	public ChatCompletionResponse eventToChatCompletionResponse(StreamEvent event,
 			AtomicReference<ChatCompletionResponseBuilder> contentBlockReference) {

@@ -135,28 +148,41 @@ public class StreamHelper {
 		else if (event.type().equals(EventType.CONTENT_BLOCK_START)) {
 			ContentBlockStartEvent contentBlockStartEvent = (ContentBlockStartEvent) event;

-			Assert.isTrue(contentBlockStartEvent.contentBlock().type().equals("text"),
-					"The json content block should have been aggregated. Unsupported content block type: "
-							+ contentBlockStartEvent.contentBlock().type());
-
-			ContentBlockText contentBlockText = (ContentBlockText) contentBlockStartEvent.contentBlock();
-			ContentBlock contentBlock = new ContentBlock(Type.TEXT, null, contentBlockText.text(),
-					contentBlockStartEvent.index());
-			contentBlockReference.get().withType(event.type().name()).withContent(List.of(contentBlock));
+			if (contentBlockStartEvent.contentBlock() instanceof ContentBlockText textBlock) {
+				ContentBlock cb = new ContentBlock(Type.TEXT, null, textBlock.text(), contentBlockStartEvent.index());
+				contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
+			}
+			else if (contentBlockStartEvent.contentBlock() instanceof ContentBlockThinking thinkingBlock) {
+				ContentBlock cb = new ContentBlock(Type.THINKING, null, null, contentBlockStartEvent.index(), null,
+						null, null, null, null, null, thinkingBlock.thinking(), null);
+				contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
+			}
+			else {
+				throw new IllegalArgumentException(
+						"Unsupported content block type: " + contentBlockStartEvent.contentBlock().type());
+			}
 		}
 		else if (event.type().equals(EventType.CONTENT_BLOCK_DELTA)) {
-
 			ContentBlockDeltaEvent contentBlockDeltaEvent = (ContentBlockDeltaEvent) event;

-			Assert.isTrue(contentBlockDeltaEvent.delta().type().equals("text_delta"),
-					"The json content block delta should have been aggregated. Unsupported content block type: "
-							+ contentBlockDeltaEvent.delta().type());
-
-			ContentBlockDeltaText deltaTxt = (ContentBlockDeltaText) contentBlockDeltaEvent.delta();
-
-			var contentBlock = new ContentBlock(Type.TEXT_DELTA, null, deltaTxt.text(), contentBlockDeltaEvent.index());
-
-			contentBlockReference.get().withType(event.type().name()).withContent(List.of(contentBlock));
+			if (contentBlockDeltaEvent.delta() instanceof ContentBlockDeltaText txt) {
+				ContentBlock cb = new ContentBlock(Type.TEXT_DELTA, null, txt.text(), contentBlockDeltaEvent.index());
+				contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
+			}
+			else if (contentBlockDeltaEvent.delta() instanceof ContentBlockDeltaThinking thinking) {
+				ContentBlock cb = new ContentBlock(Type.THINKING_DELTA, null, null, contentBlockDeltaEvent.index(),
+						null, null, null, null, null, null, thinking.thinking(), null);
+				contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
+			}
+			else if (contentBlockDeltaEvent.delta() instanceof ContentBlockDeltaSignature sig) {
+				ContentBlock cb = new ContentBlock(Type.SIGNATURE_DELTA, null, null, contentBlockDeltaEvent.index(),
+						null, null, null, null, null, sig.signature(), null, null);
+				contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb));
+			}
+			else {
+				throw new IllegalArgumentException(
+						"Unsupported content block delta type: " + contentBlockDeltaEvent.delta().type());
+			}
 		}
 		else if (event.type().equals(EventType.MESSAGE_DELTA)) {

@@ -173,7 +199,7 @@ public class StreamHelper {
 			}

 			if (messageDeltaEvent.usage() != null) {
-				var totalUsage = new Usage(contentBlockReference.get().usage.inputTokens(),
+				Usage totalUsage = new Usage(contentBlockReference.get().usage.inputTokens(),
 						messageDeltaEvent.usage().outputTokens());
 				contentBlockReference.get().withUsage(totalUsage);
 			}
@@ -189,12 +215,17 @@ public class StreamHelper {
 				.withStopSequence(null);
 		}
 		else {
+			// Any other event types that should propagate upwards without content
 			contentBlockReference.get().withType(event.type().name()).withContent(List.of());
 		}

 		return contentBlockReference.get().build();
 	}

+	/**
+	 * Builder for {@link ChatCompletionResponse}. Used internally by {@link StreamHelper}
+	 * to aggregate stream events.
+	 */
 	public static class ChatCompletionResponseBuilder {

 		private String type;
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java
@@ -88,8 +88,7 @@ class AnthropicChatModelIT {
 	}

 	@ParameterizedTest(name = "{0} : {displayName} ")
-	@ValueSource(strings = { "claude-3-7-sonnet-latest", "claude-3-5-sonnet-latest", "claude-3-5-haiku-latest",
-			"claude-3-opus-latest" })
+	@ValueSource(strings = { "claude-3-7-sonnet-latest" })
 	void roleTest(String modelName) {
 		UserMessage userMessage = new UserMessage(
 				"Tell me about 3 famous pirates from the Golden Age of Piracy and why they did.");
@@ -302,7 +301,7 @@ class AnthropicChatModelIT {
 		assertThat(generation.getOutput().getText()).contains("30", "10", "15");
 		assertThat(response.getMetadata()).isNotNull();
 		assertThat(response.getMetadata().getUsage()).isNotNull();
-		assertThat(response.getMetadata().getUsage().getTotalTokens()).isLessThan(4000).isGreaterThan(1800);
+		assertThat(response.getMetadata().getUsage().getTotalTokens()).isLessThan(4000).isGreaterThan(100);
 	}

 	@Test
@@ -429,6 +428,38 @@ class AnthropicChatModelIT {
 		}
 	}

+	@Test
+	void thinkingWithStreamingTest() {
+		UserMessage userMessage = new UserMessage(
+				"Are there an infinite number of prime numbers such that n mod 4 == 3?");
+
+		var promptOptions = AnthropicChatOptions.builder()
+			.model(AnthropicApi.ChatModel.CLAUDE_3_7_SONNET.getName())
+			.temperature(1.0) // Temperature should be set to 1 when thinking is enabled
+			.maxTokens(8192)
+			.thinking(AnthropicApi.ThinkingType.ENABLED, 2048) // Must be ≥1024 && <
+																// max_tokens
+			.build();
+
+		Flux<ChatResponse> responseFlux = this.streamingChatModel
+			.stream(new Prompt(List.of(userMessage), promptOptions));
+
+		String content = responseFlux.collectList()
+			.block()
+			.stream()
+			.map(ChatResponse::getResults)
+			.flatMap(List::stream)
+			.map(Generation::getOutput)
+			.map(AssistantMessage::getText)
+			.filter(text -> text != null && !text.isBlank())
+			.collect(Collectors.joining());
+
+		logger.info("Response: {}", content);
+
+		assertThat(content).isNotBlank();
+		assertThat(content).contains("prime numbers");
+	}
+
 	@Test
 	void testToolUseContentBlock() {
 		UserMessage userMessage = new UserMessage(
--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java
@@ -18,18 +18,24 @@ package org.springframework.ai.anthropic.api;

 import java.util.ArrayList;
 import java.util.List;
+import java.util.stream.Collectors;

 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import reactor.core.publisher.Flux;

 import org.springframework.ai.anthropic.api.AnthropicApi.AnthropicMessage;
 import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest;
 import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionResponse;
 import org.springframework.ai.anthropic.api.AnthropicApi.ContentBlock;
+import org.springframework.ai.anthropic.api.AnthropicApi.EventType;
 import org.springframework.ai.anthropic.api.AnthropicApi.Role;
 import org.springframework.ai.model.ModelOptionsUtils;
 import org.springframework.http.ResponseEntity;
+import org.springframework.util.CollectionUtils;
+import org.springframework.util.StringUtils;

 import static org.assertj.core.api.Assertions.assertThat;
 import static org.assertj.core.api.Assertions.assertThatThrownBy;
@@ -42,6 +48,8 @@ import static org.assertj.core.api.Assertions.assertThatThrownBy;
@EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".+")
 public class AnthropicApiIT {

+	private static final Logger logger = LoggerFactory.getLogger(AnthropicApiIT.class);
+
 	AnthropicApi anthropicApi = AnthropicApi.builder().apiKey(System.getenv("ANTHROPIC_API_KEY")).build();

 	List<AnthropicApi.Tool> tools = List.of(new AnthropicApi.Tool("getCurrentWeather",
@@ -68,17 +76,26 @@ public class AnthropicApiIT {
 		AnthropicMessage chatCompletionMessage = new AnthropicMessage(List.of(new ContentBlock("Tell me a Joke?")),
 				Role.USER);
 		ResponseEntity<ChatCompletionResponse> response = this.anthropicApi
-			.chatCompletionEntity(new ChatCompletionRequest(AnthropicApi.ChatModel.CLAUDE_3_OPUS.getValue(),
-					List.of(chatCompletionMessage), null, 100, 0.8, false));
+			.chatCompletionEntity(ChatCompletionRequest.builder()
+				.model(AnthropicApi.ChatModel.CLAUDE_3_OPUS.getValue())
+				.messages(List.of(chatCompletionMessage))
+				.maxTokens(100)
+				.temperature(0.8)
+				.stream(false)
+				.build());

-		System.out.println(response);
+		logger.info("Non-Streaming Response: {}", response.getBody());
 		assertThat(response).isNotNull();
 		assertThat(response.getBody()).isNotNull();
+		assertThat(response.getBody().content()).isNotEmpty();
+		assertThat(response.getBody().content().get(0).text()).isNotBlank();
+		assertThat(response.getBody().stopReason()).isEqualTo("end_turn");
 	}

 	@Test
 	void chatCompletionWithThinking() {
-		AnthropicMessage chatCompletionMessage = new AnthropicMessage(List.of(new ContentBlock("Tell me a Joke?")),
+		AnthropicMessage chatCompletionMessage = new AnthropicMessage(
+				List.of(new ContentBlock("Are there an infinite number of prime numbers such that n mod 4 == 3?")),
 				Role.USER);

 		ChatCompletionRequest request = ChatCompletionRequest.builder()
@@ -93,20 +110,31 @@ public class AnthropicApiIT {

 		assertThat(response).isNotNull();
 		assertThat(response.getBody()).isNotNull();
+		assertThat(response.getBody().content()).isNotEmpty();
+
+		boolean foundThinkingBlock = false;
+		boolean foundTextBlock = false;

 		List<ContentBlock> content = response.getBody().content();
 		for (ContentBlock block : content) {
 			if (block.type() == ContentBlock.Type.THINKING) {
 				assertThat(block.thinking()).isNotBlank();
 				assertThat(block.signature()).isNotBlank();
+				foundThinkingBlock = true;
 			}
+			// Note: Redacted thinking might occur if budget is exceeded or other reasons.
 			if (block.type() == ContentBlock.Type.REDACTED_THINKING) {
 				assertThat(block.data()).isNotBlank();
 			}
 			if (block.type() == ContentBlock.Type.TEXT) {
 				assertThat(block.text()).isNotBlank();
+				foundTextBlock = true;
 			}
 		}
+
+		assertThat(foundThinkingBlock).isTrue();
+		assertThat(foundTextBlock).isTrue();
+		assertThat(response.getBody().stopReason()).isEqualTo("end_turn");
 	}

 	@Test
@@ -115,15 +143,125 @@ public class AnthropicApiIT {
 		AnthropicMessage chatCompletionMessage = new AnthropicMessage(List.of(new ContentBlock("Tell me a Joke?")),
 				Role.USER);

-		Flux<ChatCompletionResponse> response = this.anthropicApi.chatCompletionStream(new ChatCompletionRequest(
-				AnthropicApi.ChatModel.CLAUDE_3_OPUS.getValue(), List.of(chatCompletionMessage), null, 100, 0.8, true));
+		Flux<ChatCompletionResponse> response = this.anthropicApi.chatCompletionStream(ChatCompletionRequest.builder()
+			.model(AnthropicApi.ChatModel.CLAUDE_3_OPUS.getValue())
+			.messages(List.of(chatCompletionMessage))
+			.maxTokens(100)
+			.temperature(0.8)
+			.stream(true)
+			.build());

 		assertThat(response).isNotNull();

-		List<ChatCompletionResponse> bla = response.collectList().block();
-		assertThat(bla).isNotNull();
+		List<ChatCompletionResponse> results = response.collectList().block();
+		assertThat(results).isNotNull().isNotEmpty();

-		bla.stream().forEach(r -> System.out.println(r));
+		results.forEach(chunk -> logger.info("Streaming Chunk: {}", chunk));
+
+		// Verify the stream contains actual text content deltas
+		String aggregatedText = results.stream()
+			.filter(r -> !CollectionUtils.isEmpty(r.content()))
+			.flatMap(r -> r.content().stream())
+			.filter(cb -> cb.type() == ContentBlock.Type.TEXT_DELTA)
+			.map(ContentBlock::text)
+			.collect(Collectors.joining());
+		assertThat(aggregatedText).isNotBlank();
+
+		// Verify the final state
+		ChatCompletionResponse lastMeaningfulResponse = results.stream()
+			.filter(r -> StringUtils.hasText(r.stopReason()))
+			.reduce((first, second) -> second)
+			.orElse(results.get(results.size() - 1)); // Fallback to very last if no stop
+
+		// StopReason found earlier
+		assertThat(lastMeaningfulResponse.stopReason()).isEqualTo("end_turn");
+		assertThat(lastMeaningfulResponse.usage()).isNotNull();
+		assertThat(lastMeaningfulResponse.usage().outputTokens()).isPositive();
+	}
+
+	@Test
+	void chatCompletionStreamWithThinking() {
+		AnthropicMessage chatCompletionMessage = new AnthropicMessage(
+				List.of(new ContentBlock("Are there an infinite number of prime numbers such that n mod 4 == 3?")),
+				Role.USER);
+
+		ChatCompletionRequest request = ChatCompletionRequest.builder()
+			.model(AnthropicApi.ChatModel.CLAUDE_3_7_SONNET.getValue())
+			.messages(List.of(chatCompletionMessage))
+			.maxTokens(2048)
+			.temperature(1.0)
+			.stream(true)
+			.thinking(new ChatCompletionRequest.ThinkingConfig(AnthropicApi.ThinkingType.ENABLED, 1024))
+			.build();
+
+		Flux<ChatCompletionResponse> responseFlux = this.anthropicApi.chatCompletionStream(request);
+
+		assertThat(responseFlux).isNotNull();
+
+		List<ChatCompletionResponse> results = responseFlux.collectList().block();
+		assertThat(results).isNotNull().isNotEmpty();
+
+		results.forEach(chunk -> logger.info("Streaming Thinking Chunk: {}", chunk));
+
+		// Verify MESSAGE_START event exists
+		assertThat(results.stream().anyMatch(r -> EventType.MESSAGE_START.name().equals(r.type()))).isTrue();
+		assertThat(results.get(0).id()).isNotBlank();
+		assertThat(results.get(0).role()).isEqualTo(Role.ASSISTANT);
+
+		// Verify presence of THINKING_DELTA content
+		boolean foundThinkingDelta = results.stream()
+			.filter(r -> !CollectionUtils.isEmpty(r.content()))
+			.flatMap(r -> r.content().stream())
+			.anyMatch(cb -> cb.type() == ContentBlock.Type.THINKING_DELTA && StringUtils.hasText(cb.thinking()));
+		assertThat(foundThinkingDelta).as("Should find THINKING_DELTA content").isTrue();
+
+		// Verify presence of SIGNATURE_DELTA content
+		boolean foundSignatureDelta = results.stream()
+			.filter(r -> !CollectionUtils.isEmpty(r.content()))
+			.flatMap(r -> r.content().stream())
+			.anyMatch(cb -> cb.type() == ContentBlock.Type.SIGNATURE_DELTA && StringUtils.hasText(cb.signature()));
+		assertThat(foundSignatureDelta).as("Should find SIGNATURE_DELTA content").isTrue();
+
+		// Verify presence of TEXT_DELTA content (the actual answer)
+		boolean foundTextDelta = results.stream()
+			.filter(r -> !CollectionUtils.isEmpty(r.content()))
+			.flatMap(r -> r.content().stream())
+			.anyMatch(cb -> cb.type() == ContentBlock.Type.TEXT_DELTA && StringUtils.hasText(cb.text()));
+		assertThat(foundTextDelta).as("Should find TEXT_DELTA content").isTrue();
+
+		// Combine text deltas to check final answer structure
+		String aggregatedText = results.stream()
+			.filter(r -> !CollectionUtils.isEmpty(r.content()))
+			.flatMap(r -> r.content().stream())
+			.filter(cb -> cb.type() == ContentBlock.Type.TEXT_DELTA)
+			.map(ContentBlock::text)
+			.collect(Collectors.joining());
+		assertThat(aggregatedText).as("Aggregated text response should not be blank").isNotBlank();
+		logger.info("Aggregated Text from Stream: {}", aggregatedText);
+
+		// Verify the final state (stop reason and usage)
+		ChatCompletionResponse finalStateEvent = results.stream()
+			.filter(r -> StringUtils.hasText(r.stopReason()))
+			.reduce((first, second) -> second)
+			.orElse(null);
+
+		assertThat(finalStateEvent).as("Should find an event with stopReason").isNotNull();
+		assertThat(finalStateEvent.stopReason()).isEqualTo("end_turn");
+		assertThat(finalStateEvent.usage()).isNotNull();
+		assertThat(finalStateEvent.usage().outputTokens()).isPositive();
+		assertThat(finalStateEvent.usage().inputTokens()).isPositive();
+
+		// Verify presence of key event types
+		assertThat(results.stream().anyMatch(r -> EventType.CONTENT_BLOCK_START.name().equals(r.type())))
+			.as("Should find CONTENT_BLOCK_START event")
+			.isTrue();
+		assertThat(results.stream().anyMatch(r -> EventType.CONTENT_BLOCK_STOP.name().equals(r.type())))
+			.as("Should find CONTENT_BLOCK_STOP event")
+			.isTrue();
+		assertThat(results.stream()
+			.anyMatch(r -> EventType.MESSAGE_STOP.name().equals(r.type()) || StringUtils.hasText(r.stopReason())))
+			.as("Should find MESSAGE_STOP or MESSAGE_DELTA with stopReason")
+			.isTrue();
 	}

 	@Test
@@ -173,8 +311,13 @@ public class AnthropicApiIT {
 				Role.USER);
 		AnthropicApi api = AnthropicApi.builder().apiKey("FAKE_KEY_FOR_ERROR_RESPONSE").build();

-		Flux<ChatCompletionResponse> response = api.chatCompletionStream(new ChatCompletionRequest(
-				AnthropicApi.ChatModel.CLAUDE_3_OPUS.getValue(), List.of(chatCompletionMessage), null, 100, 0.8, true));
+		Flux<ChatCompletionResponse> response = api.chatCompletionStream(ChatCompletionRequest.builder()
+			.model(AnthropicApi.ChatModel.CLAUDE_3_OPUS.getValue())
+			.messages(List.of(chatCompletionMessage))
+			.maxTokens(100)
+			.temperature(0.8)
+			.stream(true)
+			.build());

 		assertThat(response).isNotNull();

--- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/client/AnthropicChatClientIT.java
+++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/client/AnthropicChatClientIT.java
@@ -211,7 +211,7 @@ class AnthropicChatClientIT {

 		// @formatter:off
 		String response = ChatClient.create(this.chatModel).prompt()
-				.user("What's the weather like in San Francisco, Tokyo, and Paris?  Use Celsius.")
+				.user("What's the weather like in San Francisco (California, USA), Tokyo (Japan), and Paris (France)? Use Celsius.")
 				.toolCallbacks(FunctionToolCallback.builder("getCurrentWeather", new MockWeatherService())
 					.inputType(MockWeatherService.Request.class)
 					.build())
@@ -284,7 +284,7 @@ class AnthropicChatClientIT {
 	}

 	@ParameterizedTest(name = "{0} : {displayName} ")
-	@ValueSource(strings = { "claude-3-opus-latest", "claude-3-5-sonnet-latest", "claude-3-7-sonnet-latest" })
+	@ValueSource(strings = { "claude-3-7-sonnet-latest", "claude-sonnet-4-0" })
 	void multiModalityEmbeddedImage(String modelName) throws IOException {

 		// @formatter:off
@@ -301,7 +301,7 @@ class AnthropicChatClientIT {
 	}

 	@ParameterizedTest(name = "{0} : {displayName} ")
-	@ValueSource(strings = { "claude-3-opus-latest", "claude-3-5-sonnet-latest", "claude-3-7-sonnet-latest" })
+	@ValueSource(strings = { "claude-3-7-sonnet-latest", "claude-sonnet-4-0" })
 	void multiModalityImageUrl(String modelName) throws IOException {

 		// TODO: add url method that wrapps the checked exception.
--- a/spring-ai-docs/src/main/antora/modules/ROOT/nav.adoc
+++ b/spring-ai-docs/src/main/antora/modules/ROOT/nav.adoc
@@ -14,7 +14,7 @@
 *** xref:api/chatmodel.adoc[Chat Models]
 **** xref:api/chat/comparison.adoc[Chat Models Comparison]
 **** xref:api/chat/bedrock-converse.adoc[Amazon Bedrock Converse]
-**** xref:api/chat/anthropic-chat.adoc[Anthropic 3]
+**** xref:api/chat/anthropic-chat.adoc[Anthropic]
 **** xref:api/chat/azure-openai-chat.adoc[Azure OpenAI]
 **** xref:api/chat/deepseek-chat.adoc[DeepSeek]
 **** xref:api/chat/dmr-chat.adoc[Docker Model Runner]
--- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc
+++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc
@@ -1,4 +1,4 @@
-= Anthropic 3 Chat
+= Anthropic Chat

 link:https://www.anthropic.com/[Anthropic Claude] is a family of foundational AI models that can be used in a variety of applications.
 For developers and businesses, you can leverage the API access and build directly on top of link:https://www.anthropic.com/api[Anthropic's AI infrastructure].
@@ -191,6 +191,166 @@ ChatResponse response = chatModel.call(

 TIP: In addition to the model specific https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java[AnthropicChatOptions] you can use a portable https://github.com/spring-projects/spring-ai/blob/main/spring-ai-client-chat/src/main/java/org/springframework/ai/chat/prompt/ChatOptions.java[ChatOptions] instance, created with the https://github.com/spring-projects/spring-ai/blob/main/spring-ai-client-chat/src/main/java/org/springframework/ai/chat/prompt/ChatOptionsBuilder.java[ChatOptionsBuilder#builder()].

+== Thinking
+
+Anthropic Claude models support a "thinking" feature that allows the model to show its reasoning process before providing a final answer. This feature enables more transparent and detailed problem-solving, particularly for complex questions that require step-by-step reasoning.
+
+[NOTE]
+====
+*Supported Models*
+
+The thinking feature is supported by the following Claude models:
+
+* Claude 4 models (`claude-opus-4-20250514`, `claude-sonnet-4-20250514`)
+* Claude 3.7 Sonnet (`claude-3-7-sonnet-20250219`)
+
+*Model capabilities:*
+
+* *Claude 3.7 Sonnet*: Returns full thinking output. Behavior is consistent but does not support summarized or interleaved thinking.
+* *Claude 4 models*: Support summarized thinking, interleaved thinking, and enhanced tool integration.
+
+API request structure is the same across all supported models, but output behavior varies.
+====
+
+=== Thinking Configuration
+
+To enable thinking on any supported Claude model, include the following configuration in your request:
+
+==== Required Configuration
+
+1. **Add the `thinking` object**:
+- `"type": "enabled"`
+- `budget_tokens`: Token limit for reasoning (recommend starting at 1024)
+
+2. **Token budget rules**:
+- `budget_tokens` must typically be less than `max_tokens`
+- Claude may use fewer tokens than allocated
+- Larger budgets increase depth of reasoning but may impact latency
+- When using tool use with interleaved thinking (Claude 4 only), this constraint is relaxed, but not yet supported in Spring AI.
+
+==== Key Considerations
+
+* **Claude 3.7** returns full thinking content in the response
+* **Claude 4** returns a *summarized* version of the model's internal reasoning to reduce latency and protect sensitive content
+* **Thinking tokens are billable** as part of output tokens (even if not all are visible in response)
+* **Interleaved Thinking** is only available on Claude 4 models and requires the beta header `interleaved-thinking-2025-05-14`
+
+==== Tool Integration and Interleaved Thinking
+
+Claude 4 models support interleaved thinking with tool use, allowing the model to reason between tool calls.
+
+[NOTE]
+====
+The current Spring AI implementation supports basic thinking and tool use separately, but does not yet support interleaved thinking with tool use (where thinking continues across multiple tool calls).
+====
+
+For details on interleaved thinking with tool use, see the https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#extended-thinking-with-tool-use[Anthropic documentation].
+
+=== Non-streaming Example
+
+Here's how to enable thinking in a non-streaming request using the ChatClient API:
+
+[source,java]
+----
+ChatClient chatClient = ChatClient.create(chatModel);
+
+// For Claude 3.7 Sonnet - explicit thinking configuration required
+ChatResponse response = chatClient.prompt()
+    .options(AnthropicChatOptions.builder()
+        .model("claude-3-7-sonnet-latest")
+        .temperature(1.0)  // Temperature should be set to 1 when thinking is enabled
+        .maxTokens(8192)
+        .thinking(AnthropicApi.ThinkingType.ENABLED, 2048)  // Must be ≥1024 && < max_tokens
+        .build())
+    .user("Are there an infinite number of prime numbers such that n mod 4 == 3?")
+    .call()
+    .chatResponse();
+
+// For Claude 4 models - thinking is enabled by default
+ChatResponse response4 = chatClient.prompt()
+    .options(AnthropicChatOptions.builder()
+        .model("claude-opus-4-0")
+        .maxTokens(8192)
+        // No explicit thinking configuration needed
+        .build())
+    .user("Are there an infinite number of prime numbers such that n mod 4 == 3?")
+    .call()
+    .chatResponse();
+
+// Process the response which may contain thinking content
+for (Generation generation : response.getResults()) {
+    AssistantMessage message = generation.getOutput();
+    if (message.getText() != null) {
+        // Regular text response
+        System.out.println("Text response: " + message.getText());
+    }
+    else if (message.getMetadata().containsKey("signature")) {
+        // Thinking content
+        System.out.println("Thinking: " + message.getMetadata().get("thinking"));
+        System.out.println("Signature: " + message.getMetadata().get("signature"));
+    }
+}
+----
+
+=== Streaming Example
+
+You can also use thinking with streaming responses:
+
+[source,java]
+----
+ChatClient chatClient = ChatClient.create(chatModel);
+
+// For Claude 3.7 Sonnet - explicit thinking configuration
+Flux<ChatResponse> responseFlux = chatClient.prompt()
+    .options(AnthropicChatOptions.builder()
+        .model("claude-3-7-sonnet-latest")
+        .temperature(1.0)
+        .maxTokens(8192)
+        .thinking(AnthropicApi.ThinkingType.ENABLED, 2048)
+        .build())
+    .user("Are there an infinite number of prime numbers such that n mod 4 == 3?")
+    .stream();
+
+// For Claude 4 models - thinking is enabled by default
+Flux<ChatResponse> responseFlux4 = chatClient.prompt()
+    .options(AnthropicChatOptions.builder()
+        .model("claude-opus-4-0")
+        .maxTokens(8192)
+        // No explicit thinking configuration needed
+        .build())
+    .user("Are there an infinite number of prime numbers such that n mod 4 == 3?")
+    .stream();
+
+// For streaming, you might want to collect just the text responses
+String textContent = responseFlux.collectList()
+    .block()
+    .stream()
+    .map(ChatResponse::getResults)
+    .flatMap(List::stream)
+    .map(Generation::getOutput)
+    .map(AssistantMessage::getText)
+    .filter(text -> text != null && !text.isBlank())
+    .collect(Collectors.joining());
+----
+
+=== Tool Use Integration
+
+Claude 4 models integrate thinking and tool use capabilities:
+
+* *Claude 3.7 Sonnet*: Supports both thinking and tool use, but they operate separately and require more explicit configuration
+* *Claude 4 models*: Natively interleave thinking and tool use, providing deeper reasoning during tool interactions
+
+=== Benefits of Using Thinking
+
+The thinking feature provides several benefits:
+
+1. **Transparency**: See the model's reasoning process and how it arrived at its conclusion
+2. **Debugging**: Identify where the model might be making logical errors
+3. **Education**: Use the step-by-step reasoning as a teaching tool
+4. **Complex Problem Solving**: Better results on math, logic, and reasoning tasks
+
+Note that enabling thinking requires a higher token budget, as the thinking process itself consumes tokens from your allocation.
+
 == Tool/Function Calling

 You can register custom Java Tools with the `AnthropicChatModel` and have the Anthropic Claude model intelligently choose to output a JSON object containing arguments to call one or many of the registered functions.
@@ -378,5 +538,3 @@ Follow the https://github.com/spring-projects/spring-ai/blob/main/models/spring-

 === Low-level API Examples
 * The link:https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/chat/api/AnthropicApiIT.java[AnthropicApiIT.java] test provides some general examples how to use the lightweight library.
-
-