- Changes to includeThoughts() after Gemini 3 Pro model release

ddobrin · ddobrin · commit 6f29fc8b4869 · 2025-11-19T15:42:06.000-05:00
- documented latest change
- documented outstanding chapter on cached content support

Signed-off-by: ddobrin &lt;ddobrin@google.com&gt;
diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-google-genai/src/test/java/org/springframework/ai/model/google/genai/autoconfigure/chat/GoogleGenAiPropertiesTests.java b/auto-configurations/models/spring-ai-autoconfigure-model-google-genai/src/test/java/org/springframework/ai/model/google/genai/autoconfigure/chat/GoogleGenAiPropertiesTests.java
@@ -131,6 +131,25 @@ void extendedUsageMetadataDefaultBinding() {
 		});
 	}
 
+	@Test
+	void includeThoughtsPropertiesBinding() {
+		this.contextRunner.withPropertyValues("spring.ai.google.genai.chat.options.include-thoughts=true")
+			.run(context -> {
+				GoogleGenAiChatProperties chatProperties = context.getBean(GoogleGenAiChatProperties.class);
+				assertThat(chatProperties.getOptions().getIncludeThoughts()).isTrue();
+			});
+	}
+
+	@Test
+	void includeThoughtsDefaultBinding() {
+		// Test that defaults are applied when not specified
+		this.contextRunner.run(context -> {
+			GoogleGenAiChatProperties chatProperties = context.getBean(GoogleGenAiChatProperties.class);
+			// Should be null when not set
+			assertThat(chatProperties.getOptions().getIncludeThoughts()).isNull();
+		});
+	}
+
 	@Configuration
 	@EnableConfigurationProperties({ GoogleGenAiConnectionProperties.class, GoogleGenAiChatProperties.class,
 			GoogleGenAiEmbeddingConnectionProperties.class })
diff --git a/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatModel.java b/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatModel.java
@@ -281,9 +281,30 @@ else if (message instanceof UserMessage userMessage) {
 		}
 		else if (message instanceof AssistantMessage assistantMessage) {
 			List<Part> parts = new ArrayList<>();
+
+			// Check if there are thought signatures to restore
+			List<byte[]> thoughtSignatures = null;
+			if (assistantMessage.getMetadata() != null
+					&& assistantMessage.getMetadata().containsKey("thoughtSignatures")) {
+				Object signaturesObj = assistantMessage.getMetadata().get("thoughtSignatures");
+				if (signaturesObj instanceof List) {
+					thoughtSignatures = (List<byte[]>) signaturesObj;
+				}
+			}
+
+			// Add text part, potentially with thought signature
 			if (StringUtils.hasText(assistantMessage.getText())) {
-				parts.add(Part.fromText(assistantMessage.getText()));
+				Part.Builder partBuilder = Part.builder().text(assistantMessage.getText());
+				// If we have thought signatures, apply the first one to this text part
+				if (thoughtSignatures != null && !thoughtSignatures.isEmpty()) {
+					partBuilder.thoughtSignature(thoughtSignatures.get(0));
+					// Remove the used signature
+					thoughtSignatures = thoughtSignatures.subList(1, thoughtSignatures.size());
+				}
+				parts.add(partBuilder.build());
 			}
+
+			// Add function call parts
 			if (!CollectionUtils.isEmpty(assistantMessage.getToolCalls())) {
 				parts.addAll(assistantMessage.getToolCalls()
 					.stream()
@@ -295,6 +316,16 @@ else if (message instanceof AssistantMessage assistantMessage) {
 						.build())
 					.toList());
 			}
+
+			// If there are remaining thought signatures without corresponding content,
+			// we might need to add empty parts with thought signatures.
+			// This handles the case where the model returned only thoughts without text.
+			if (thoughtSignatures != null && !thoughtSignatures.isEmpty()) {
+				for (byte[] signature : thoughtSignatures) {
+					parts.add(Part.builder().thoughtSignature(signature).build());
+				}
+			}
+
 			return parts;
 		}
 		else if (message instanceof ToolResponseMessage toolResponseMessage) {
@@ -601,8 +632,22 @@ protected List<Generation> responseCandidateToGeneration(Candidate candidate) {
 		int candidateIndex = candidate.index().orElse(0);
 		FinishReason candidateFinishReason = candidate.finishReason().orElse(new FinishReason(FinishReason.Known.STOP));
 
-		Map<String, Object> messageMetadata = Map.of("candidateIndex", candidateIndex, "finishReason",
-				candidateFinishReason);
+		Map<String, Object> messageMetadata = new HashMap<>();
+		messageMetadata.put("candidateIndex", candidateIndex);
+		messageMetadata.put("finishReason", candidateFinishReason);
+
+		// Extract thought signatures from response parts if present
+		if (candidate.content().isPresent() && candidate.content().get().parts().isPresent()) {
+			List<Part> parts = candidate.content().get().parts().get();
+			List<byte[]> thoughtSignatures = parts.stream()
+				.filter(part -> part.thoughtSignature().isPresent())
+				.map(part -> part.thoughtSignature().get())
+				.toList();
+
+			if (!thoughtSignatures.isEmpty()) {
+				messageMetadata.put("thoughtSignatures", thoughtSignatures);
+			}
+		}
 
 		ChatGenerationMetadata chatGenerationMetadata = ChatGenerationMetadata.builder()
 			.finishReason(candidateFinishReason.toString())
@@ -713,10 +758,19 @@ GeminiRequest createGeminiRequest(Prompt prompt) {
 		if (requestOptions.getPresencePenalty() != null) {
 			configBuilder.presencePenalty(requestOptions.getPresencePenalty().floatValue());
 		}
-		if (requestOptions.getThinkingBudget() != null) {
-			configBuilder
-				.thinkingConfig(ThinkingConfig.builder().thinkingBudget(requestOptions.getThinkingBudget()).build());
+
+		// Build thinking config if either thinkingBudget or includeThoughts is set
+		if (requestOptions.getThinkingBudget() != null || requestOptions.getIncludeThoughts() != null) {
+			ThinkingConfig.Builder thinkingBuilder = ThinkingConfig.builder();
+			if (requestOptions.getThinkingBudget() != null) {
+				thinkingBuilder.thinkingBudget(requestOptions.getThinkingBudget());
+			}
+			if (requestOptions.getIncludeThoughts() != null) {
+				thinkingBuilder.includeThoughts(requestOptions.getIncludeThoughts());
+			}
+			configBuilder.thinkingConfig(thinkingBuilder.build());
 		}
+
 		if (requestOptions.getLabels() != null && !requestOptions.getLabels().isEmpty()) {
 			configBuilder.labels(requestOptions.getLabels());
 		}
@@ -1065,7 +1119,9 @@ public enum ChatModel implements ChatModelDescription {
 		 * See: <a href=
 		 * "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash-lite">gemini-2.5-flash-lite</a>
 		 */
-		GEMINI_2_5_FLASH_LIGHT("gemini-2.5-flash-lite");
+		GEMINI_2_5_FLASH_LIGHT("gemini-2.5-flash-lite"),
+
+		GEMINI_3_PRO_PREVIEW("gemini-3-pro-preview");
 
 		public final String value;
 
diff --git a/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatOptions.java b/models/spring-ai-google-genai/src/main/java/org/springframework/ai/google/genai/GoogleGenAiChatOptions.java
@@ -113,6 +113,19 @@ public class GoogleGenAiChatOptions implements ToolCallingChatOptions {
 	 */
 	private @JsonProperty("thinkingBudget") Integer thinkingBudget;
 
+	/**
+	 * Optional. Whether to include thoughts in the response.
+	 * When true, thoughts are returned if the model supports them and thoughts are available.
+	 *
+	 * <p><strong>IMPORTANT:</strong> For Gemini 3 Pro with function calling,
+	 * this MUST be set to true to avoid validation errors. Thought signatures
+	 * are automatically propagated in multi-turn conversations to maintain context.
+	 *
+	 * <p>Note: Enabling thoughts increases token usage and API costs.
+	 * This is part of the thinkingConfig in GenerationConfig.
+	 */
+	private @JsonProperty("includeThoughts") Boolean includeThoughts;
+
 	/**
 	 * Optional. Whether to include extended usage metadata in responses.
 	 * When true, includes thinking tokens, cached content, tool-use tokens, and modality details.
@@ -206,6 +219,7 @@ public static GoogleGenAiChatOptions fromOptions(GoogleGenAiChatOptions fromOpti
 		options.setInternalToolExecutionEnabled(fromOptions.getInternalToolExecutionEnabled());
 		options.setToolContext(fromOptions.getToolContext());
 		options.setThinkingBudget(fromOptions.getThinkingBudget());
+		options.setIncludeThoughts(fromOptions.getIncludeThoughts());
 		options.setLabels(fromOptions.getLabels());
 		options.setIncludeExtendedUsageMetadata(fromOptions.getIncludeExtendedUsageMetadata());
 		options.setCachedContentName(fromOptions.getCachedContentName());
@@ -357,6 +371,14 @@ public void setThinkingBudget(Integer thinkingBudget) {
 		this.thinkingBudget = thinkingBudget;
 	}
 
+	public Boolean getIncludeThoughts() {
+		return this.includeThoughts;
+	}
+
+	public void setIncludeThoughts(Boolean includeThoughts) {
+		this.includeThoughts = includeThoughts;
+	}
+
 	public Boolean getIncludeExtendedUsageMetadata() {
 		return this.includeExtendedUsageMetadata;
 	}
@@ -448,6 +470,7 @@ public boolean equals(Object o) {
 				&& Objects.equals(this.frequencyPenalty, that.frequencyPenalty)
 				&& Objects.equals(this.presencePenalty, that.presencePenalty)
 				&& Objects.equals(this.thinkingBudget, that.thinkingBudget)
+				&& Objects.equals(this.includeThoughts, that.includeThoughts)
 				&& Objects.equals(this.maxOutputTokens, that.maxOutputTokens) && Objects.equals(this.model, that.model)
 				&& Objects.equals(this.responseMimeType, that.responseMimeType)
 				&& Objects.equals(this.toolCallbacks, that.toolCallbacks)
@@ -460,21 +483,22 @@ public boolean equals(Object o) {
 	@Override
 	public int hashCode() {
 		return Objects.hash(this.stopSequences, this.temperature, this.topP, this.topK, this.candidateCount,
-				this.frequencyPenalty, this.presencePenalty, this.thinkingBudget, this.maxOutputTokens, this.model,
-				this.responseMimeType, this.toolCallbacks, this.toolNames, this.googleSearchRetrieval,
-				this.safetySettings, this.internalToolExecutionEnabled, this.toolContext, this.labels);
+				this.frequencyPenalty, this.presencePenalty, this.thinkingBudget, this.includeThoughts,
+				this.maxOutputTokens, this.model, this.responseMimeType, this.toolCallbacks, this.toolNames,
+				this.googleSearchRetrieval, this.safetySettings, this.internalToolExecutionEnabled, this.toolContext,
+				this.labels);
 	}
 
 	@Override
 	public String toString() {
 		return "GoogleGenAiChatOptions{" + "stopSequences=" + this.stopSequences + ", temperature=" + this.temperature
 				+ ", topP=" + this.topP + ", topK=" + this.topK + ", frequencyPenalty=" + this.frequencyPenalty
 				+ ", presencePenalty=" + this.presencePenalty + ", thinkingBudget=" + this.thinkingBudget
-				+ ", candidateCount=" + this.candidateCount + ", maxOutputTokens=" + this.maxOutputTokens + ", model='"
-				+ this.model + '\'' + ", responseMimeType='" + this.responseMimeType + '\'' + ", toolCallbacks="
-				+ this.toolCallbacks + ", toolNames=" + this.toolNames + ", googleSearchRetrieval="
-				+ this.googleSearchRetrieval + ", safetySettings=" + this.safetySettings + ", labels=" + this.labels
-				+ '}';
+				+ ", includeThoughts=" + this.includeThoughts + ", candidateCount=" + this.candidateCount
+				+ ", maxOutputTokens=" + this.maxOutputTokens + ", model='" + this.model + '\'' + ", responseMimeType='"
+				+ this.responseMimeType + '\'' + ", toolCallbacks=" + this.toolCallbacks + ", toolNames="
+				+ this.toolNames + ", googleSearchRetrieval=" + this.googleSearchRetrieval + ", safetySettings="
+				+ this.safetySettings + ", labels=" + this.labels + '}';
 	}
 
 	@Override
@@ -602,6 +626,11 @@ public Builder thinkingBudget(Integer thinkingBudget) {
 			return this;
 		}
 
+		public Builder includeThoughts(Boolean includeThoughts) {
+			this.options.setIncludeThoughts(includeThoughts);
+			return this;
+		}
+
 		public Builder includeExtendedUsageMetadata(Boolean includeExtendedUsageMetadata) {
 			this.options.setIncludeExtendedUsageMetadata(includeExtendedUsageMetadata);
 			return this;
diff --git a/models/spring-ai-google-genai/src/test/java/org/springframework/ai/google/genai/GoogleGenAiChatModelObservationApiKeyIT.java b/models/spring-ai-google-genai/src/test/java/org/springframework/ai/google/genai/GoogleGenAiChatModelObservationApiKeyIT.java
@@ -64,7 +64,7 @@ void beforeEach() {
 	void observationForChatOperation() {
 
 		var options = GoogleGenAiChatOptions.builder()
-			.model(GoogleGenAiChatModel.ChatModel.GEMINI_2_0_FLASH.getValue())
+			.model(GoogleGenAiChatModel.ChatModel.GEMINI_3_PRO_PREVIEW.getValue())
 			.temperature(0.7)
 			.stopSequences(List.of("this-is-the-end"))
 			.maxOutputTokens(2048)
@@ -86,7 +86,7 @@ void observationForChatOperation() {
 	void observationForStreamingOperation() {
 
 		var options = GoogleGenAiChatOptions.builder()
-			.model(GoogleGenAiChatModel.ChatModel.GEMINI_2_0_FLASH.getValue())
+			.model(GoogleGenAiChatModel.ChatModel.GEMINI_3_PRO_PREVIEW.getValue())
 			.temperature(0.7)
 			.stopSequences(List.of("this-is-the-end"))
 			.maxOutputTokens(2048)
@@ -126,7 +126,7 @@ private void validate(ChatResponseMetadata responseMetadata) {
 					AiProvider.GOOGLE_GENAI_AI.value())
 			.hasLowCardinalityKeyValue(
 					ChatModelObservationDocumentation.LowCardinalityKeyNames.REQUEST_MODEL.asString(),
-					GoogleGenAiChatModel.ChatModel.GEMINI_2_0_FLASH.getValue())
+					GoogleGenAiChatModel.ChatModel.GEMINI_3_PRO_PREVIEW.getValue())
 			.hasHighCardinalityKeyValue(
 					ChatModelObservationDocumentation.HighCardinalityKeyNames.REQUEST_MAX_TOKENS.asString(), "2048")
 			.hasHighCardinalityKeyValue(
@@ -174,8 +174,9 @@ public GoogleGenAiChatModel vertexAiEmbedding(Client genAiClient, TestObservatio
 			return GoogleGenAiChatModel.builder()
 				.genAiClient(genAiClient)
 				.observationRegistry(observationRegistry)
-				.defaultOptions(
-						GoogleGenAiChatOptions.builder().model(GoogleGenAiChatModel.ChatModel.GEMINI_2_0_FLASH).build())
+				.defaultOptions(GoogleGenAiChatOptions.builder()
+					.model(GoogleGenAiChatModel.ChatModel.GEMINI_3_PRO_PREVIEW)
+					.build())
 				.build();
 		}
 
diff --git a/models/spring-ai-google-genai/src/test/java/org/springframework/ai/google/genai/GoogleGenAiRetryTests.java b/models/spring-ai-google-genai/src/test/java/org/springframework/ai/google/genai/GoogleGenAiRetryTests.java
@@ -61,7 +61,7 @@ public void setUp() {
 				GoogleGenAiChatOptions.builder()
 					.temperature(0.7)
 					.topP(1.0)
-					.model(GoogleGenAiChatModel.ChatModel.GEMINI_2_0_FLASH.getValue())
+					.model(GoogleGenAiChatModel.ChatModel.GEMINI_3_PRO_PREVIEW.getValue())
 					.build(),
 				this.retryTemplate);
 
diff --git a/models/spring-ai-google-genai/src/test/java/org/springframework/ai/google/genai/tool/GoogleGenAiChatModelToolCallingIT.java b/models/spring-ai-google-genai/src/test/java/org/springframework/ai/google/genai/tool/GoogleGenAiChatModelToolCallingIT.java
@@ -104,7 +104,6 @@ public void functionCallTestInferredOpenApiSchema() {
 		List<Message> messages = new ArrayList<>(List.of(userMessage));
 
 		var promptOptions = GoogleGenAiChatOptions.builder()
-			.model(GoogleGenAiChatModel.ChatModel.GEMINI_2_0_FLASH)
 			.toolCallbacks(List.of(
 					FunctionToolCallback.builder("get_current_weather", new MockWeatherService())
 						.description("Get the current weather in a given location.")
@@ -125,7 +124,7 @@ public void functionCallTestInferredOpenApiSchema() {
 
 		assertThat(chatResponse.getMetadata()).isNotNull();
 		assertThat(chatResponse.getMetadata().getUsage()).isNotNull();
-		assertThat(chatResponse.getMetadata().getUsage().getTotalTokens()).isGreaterThan(150).isLessThan(330);
+		assertThat(chatResponse.getMetadata().getUsage().getTotalTokens()).isGreaterThan(150).isLessThan(500);
 
 		ChatResponse response2 = this.chatModel
 			.call(new Prompt("What is the payment status for transaction 696?", promptOptions));
@@ -145,7 +144,6 @@ public void functionCallTestInferredOpenApiSchemaStream() {
 		List<Message> messages = new ArrayList<>(List.of(userMessage));
 
 		var promptOptions = GoogleGenAiChatOptions.builder()
-			.model(GoogleGenAiChatModel.ChatModel.GEMINI_2_0_FLASH)
 			.toolCallbacks(List.of(FunctionToolCallback.builder("getCurrentWeather", new MockWeatherService())
 				.description("Get the current weather in a given location")
 				.inputType(MockWeatherService.Request.class)
@@ -178,7 +176,6 @@ public void functionCallUsageTestInferredOpenApiSchemaStreamFlash20() {
 		List<Message> messages = new ArrayList<>(List.of(userMessage));
 
 		var promptOptions = GoogleGenAiChatOptions.builder()
-			.model(GoogleGenAiChatModel.ChatModel.GEMINI_2_0_FLASH)
 			.toolCallbacks(List.of(
 					FunctionToolCallback.builder("get_current_weather", new MockWeatherService())
 						.description("Get the current weather in a given location.")
@@ -200,7 +197,7 @@ public void functionCallUsageTestInferredOpenApiSchemaStreamFlash20() {
 		assertThat(chatResponse).isNotNull();
 		assertThat(chatResponse.getMetadata()).isNotNull();
 		assertThat(chatResponse.getMetadata().getUsage()).isNotNull();
-		assertThat(chatResponse.getMetadata().getUsage().getTotalTokens()).isGreaterThan(150).isLessThan(330);
+		assertThat(chatResponse.getMetadata().getUsage().getTotalTokens()).isGreaterThan(150).isLessThan(500);
 
 	}
 
@@ -271,7 +268,7 @@ public GoogleGenAiChatModel vertexAiEmbedding(Client genAiClient) {
 			return GoogleGenAiChatModel.builder()
 				.genAiClient(genAiClient)
 				.defaultOptions(GoogleGenAiChatOptions.builder()
-					.model(GoogleGenAiChatModel.ChatModel.GEMINI_2_0_FLASH)
+					.model(GoogleGenAiChatModel.ChatModel.GEMINI_2_5_FLASH)
 					.temperature(0.9)
 					.build())
 				.build();
diff --git a/pom.xml b/pom.xml
@@ -288,7 +288,7 @@
 		<onnxruntime.version>1.19.2</onnxruntime.version>
 		<oci-sdk-version>3.63.1</oci-sdk-version>
 		<com.google.cloud.version>26.60.0</com.google.cloud.version>
-		<com.google.genai.version>1.17.0</com.google.genai.version>
+		<com.google.genai.version>1.28.0</com.google.genai.version>
 		<ibm.sdk.version>9.20.0</ibm.sdk.version>
 		<jsonschema.version>4.38.0</jsonschema.version>
 		<swagger-annotations.version>2.2.30</swagger-annotations.version>
diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/google-genai-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/google-genai-chat.adoc