Skip to content

Commit 1d52835

Browse files
committed
Provide detailed Exception message when token count exceeds max
Closes #4835 Signed-off-by: John Blum <[email protected]>
1 parent 374c09e commit 1d52835

File tree

3 files changed

+93
-3
lines changed

3 files changed

+93
-3
lines changed
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*
2+
* Copyright 2023-2025 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.springframework.ai.embedding;
18+
19+
import org.springframework.ai.document.Document;
20+
21+
/**
22+
* {@link RuntimeException} thrown when the token count of the provided content exceeds the configured maximum.
23+
*
24+
* @author John Blum
25+
* @see IllegalArgumentException
26+
* @since 1.1.0
27+
*/
28+
@SuppressWarnings("unused")
29+
public class MaxTokenCountExceededException extends IllegalArgumentException {
30+
31+
public static MaxTokenCountExceededException because(Document document, int tokenCount, int maxTokenCount) {
32+
String message = "Tokens [%d] from Document [%s] exceeds the configured maximum number of input tokens allowed [%d]"
33+
.formatted(tokenCount, document.getId(), maxTokenCount);
34+
return new MaxTokenCountExceededException(message);
35+
}
36+
37+
public MaxTokenCountExceededException() {
38+
39+
}
40+
41+
public MaxTokenCountExceededException(String message) {
42+
super(message);
43+
}
44+
45+
public MaxTokenCountExceededException(Throwable cause) {
46+
super(cause);
47+
}
48+
49+
public MaxTokenCountExceededException(String message, Throwable cause) {
50+
super(message, cause);
51+
}
52+
}

spring-ai-commons/src/main/java/org/springframework/ai/embedding/TokenCountBatchingStrategy.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
* @author Laura Trotta
5252
* @author Jihoon Kim
5353
* @author Yanming Zhou
54+
* @author John Blum
5455
* @since 1.0.0
5556
*/
5657
public class TokenCountBatchingStrategy implements BatchingStrategy {
@@ -148,8 +149,7 @@ public List<List<Document>> batch(List<Document> documents) {
148149
int tokenCount = this.tokenCountEstimator
149150
.estimate(document.getFormattedContent(this.contentFormatter, this.metadataMode));
150151
if (tokenCount > this.maxInputTokenCount) {
151-
throw new IllegalArgumentException(
152-
"Tokens in a single document exceeds the maximum number of allowed input tokens");
152+
throw MaxTokenCountExceededException.because(document, tokenCount, this.maxInputTokenCount);
153153
}
154154
documentTokens.put(document, tokenCount);
155155
}

spring-ai-commons/src/test/java/org/springframework/ai/embedding/TokenCountBatchingStrategyTests.java

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,30 @@
2222

2323
import org.junit.jupiter.api.Test;
2424

25+
import org.springframework.ai.document.ContentFormatter;
2526
import org.springframework.ai.document.Document;
27+
import org.springframework.ai.document.MetadataMode;
28+
import org.springframework.ai.tokenizer.TokenCountEstimator;
2629
import org.springframework.core.io.DefaultResourceLoader;
2730
import org.springframework.core.io.Resource;
2831

2932
import static org.assertj.core.api.Assertions.assertThat;
33+
import static org.assertj.core.api.Assertions.assertThatExceptionOfType;
3034
import static org.assertj.core.api.Assertions.assertThatThrownBy;
35+
import static org.mockito.ArgumentMatchers.any;
36+
import static org.mockito.ArgumentMatchers.anyString;
37+
import static org.mockito.ArgumentMatchers.eq;
38+
import static org.mockito.Mockito.doReturn;
39+
import static org.mockito.Mockito.mock;
40+
import static org.mockito.Mockito.times;
41+
import static org.mockito.Mockito.verify;
42+
import static org.mockito.Mockito.verifyNoMoreInteractions;
3143

3244
/**
33-
* Basic unit test for {@link TokenCountBatchingStrategy}.
45+
* Basic unit tests for {@link TokenCountBatchingStrategy}.
3446
*
3547
* @author Soby Chacko
48+
* @author John Blum
3649
*/
3750
public class TokenCountBatchingStrategyTests {
3851

@@ -54,4 +67,29 @@ void batchEmbeddingWithLargeDocumentExceedsMaxTokenSize() throws IOException {
5467
.isInstanceOf(IllegalArgumentException.class);
5568
}
5669

70+
@Test
71+
void documentTokenCountExceedsConfiguredMaxTokenCount() {
72+
73+
Document mockDocument = mock(Document.class);
74+
ContentFormatter mockContentFormatter = mock(ContentFormatter.class);
75+
TokenCountEstimator mockTokenCountEstimator = mock(TokenCountEstimator.class);
76+
77+
doReturn("123abc").when(mockDocument).getId();
78+
doReturn(10).when(mockTokenCountEstimator).estimate(anyString());
79+
doReturn("test").when(mockDocument).getFormattedContent(any(), any());
80+
81+
TokenCountBatchingStrategy batchingStrategy = new TokenCountBatchingStrategy(mockTokenCountEstimator, 9, 0.0d,
82+
mockContentFormatter, MetadataMode.EMBED);
83+
84+
assertThatExceptionOfType(MaxTokenCountExceededException.class)
85+
.isThrownBy(() -> batchingStrategy.batch(List.of(mockDocument)))
86+
.withMessage("Tokens [10] from Document [123abc] exceeds the configured maximum number of input tokens allowed [9]")
87+
.withNoCause();
88+
89+
verify(mockDocument, times(1)).getId();
90+
verify(mockDocument, times(1)).getFormattedContent(eq(mockContentFormatter), eq(MetadataMode.EMBED));
91+
verify(mockTokenCountEstimator, times(1)).estimate(eq("test"));
92+
verifyNoMoreInteractions(mockDocument, mockTokenCountEstimator);
93+
}
94+
5795
}

0 commit comments

Comments
 (0)