Skip to content

Context limit and pruning updates #6668

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jul 18, 2025
15 changes: 2 additions & 13 deletions core/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1712,23 +1712,12 @@ export interface CompleteOnboardingPayload {
apiKey?: string;
}

export type PruningStatus = "deleted-last-input" | "pruned" | "not-pruned";

export interface CompiledMessagesResult {
compiledChatMessages: ChatMessage[];
pruningStatus: PruningStatus;
didPrune: boolean;
contextPercentage: number;
}

export interface MessageOption {
precompiled: boolean;
}

export type WarningMessageLevel = "warning" | "fatal";

export type WarningCategory = "exceeded-context-length" | "deleted-last-input";

export interface WarningMessage {
message: string;
level: WarningMessageLevel;
category: WarningCategory;
}
67 changes: 31 additions & 36 deletions core/llm/countTokens.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import {
CompiledMessagesResult,
MessageContent,
MessagePart,
PruningStatus,
Tool,
} from "../index.js";
import { autodetectTemplateType } from "./autodetect.js";
Expand Down Expand Up @@ -363,9 +362,6 @@ function getTokenCountingBufferSafety(contextLength: number) {
}

const MIN_RESPONSE_TOKENS = 1000;
function getMinResponseTokens(maxTokens: number) {
return Math.min(MIN_RESPONSE_TOKENS, maxTokens);
}

function pruneRawPromptFromTop(
modelName: string,
Expand Down Expand Up @@ -425,6 +421,8 @@ function compileChatMessages({
supportsImages: boolean;
tools?: Tool[];
}): CompiledMessagesResult {
let didPrune = false;

let msgsCopy: ChatMessage[] = msgs.map((m) => ({ ...m }));

// If images not supported, convert MessagePart[] to string
Expand All @@ -446,21 +444,14 @@ function compileChatMessages({

msgsCopy = addSpaceToAnyEmptyMessages(msgsCopy);

while (msgsCopy.length > 1) {
if (isUserOrToolMsg(msgsCopy.at(-1))) {
break;
}
msgsCopy.pop();
}

// // Extract the tool sequence from the end of the message array
// const toolSequence = extractToolSequence(msgsCopy);
// Extract the tool sequence from the end of the message array
const toolSequence = extractToolSequence(msgsCopy);

// // Count tokens for all messages in the tool sequence
// let lastMessagesTokens = 0;
// for (const msg of toolSequence) {
// lastMessagesTokens += countChatMessageTokens(modelName, msg);
// }
// Count tokens for all messages in the tool sequence
let lastMessagesTokens = 0;
for (const msg of toolSequence) {
lastMessagesTokens += countChatMessageTokens(modelName, msg);
}

// System message
let systemMsgTokens = 0;
Expand All @@ -475,7 +466,7 @@ function compileChatMessages({
}

const countingSafetyBuffer = getTokenCountingBufferSafety(contextLength);
const minOutputTokens = getMinResponseTokens(maxTokens);
const minOutputTokens = Math.min(MIN_RESPONSE_TOKENS, maxTokens);

let inputTokensAvailable = contextLength;

Expand All @@ -486,19 +477,19 @@ function compileChatMessages({
// Non-negotiable messages
inputTokensAvailable -= toolTokens;
inputTokensAvailable -= systemMsgTokens;
// inputTokensAvailable -= lastMessagesTokens;
inputTokensAvailable -= lastMessagesTokens;

// Make sure there's enough context for the non-excludable items
if (inputTokensAvailable < 0) {
throw new Error(
`Not enough context available to include the system message, last user message, and tools.
There must be at least ${minOutputTokens} tokens remaining for output.
Request had the following token counts:
- contextLength: ${contextLength}
- counting safety buffer: ${countingSafetyBuffer}
- tools: ~${toolTokens}
- system message: ~${systemMsgTokens}
- max output tokens: ${maxTokens}`,
There must be at least ${minOutputTokens} tokens remaining for output.
Request had the following token counts:
- contextLength: ${contextLength}
- counting safety buffer: ${countingSafetyBuffer}
- tools: ~${toolTokens}
- system message: ~${systemMsgTokens}
- max output tokens: ${maxTokens}`,
);
}

Expand All @@ -513,12 +504,10 @@ function compileChatMessages({
};
});

let pruningStatus: PruningStatus = "not-pruned";

while (historyWithTokens.length > 0 && currentTotal > inputTokensAvailable) {
const message = historyWithTokens.shift()!;
currentTotal -= message.tokens;
pruningStatus = "pruned";
didPrune = true;

// At this point make sure no latent tool response without corresponding call
while (historyWithTokens[0]?.role === "tool") {
Expand All @@ -527,18 +516,24 @@ function compileChatMessages({
}
}

if (historyWithTokens.length === 0) {
pruningStatus = "deleted-last-input";
}

// Now reassemble
const reassembled: ChatMessage[] = [];
if (systemMsg) {
reassembled.push(systemMsg);
}
reassembled.push(...historyWithTokens.map(({ tokens, ...rest }) => rest));

return { compiledChatMessages: reassembled, pruningStatus };
reassembled.push(...toolSequence);

const inputTokens =
currentTotal + systemMsgTokens + toolTokens + lastMessagesTokens;
const availableTokens =
contextLength - countingSafetyBuffer - minOutputTokens;
const contextPercentage = inputTokens / availableTokens;
return {
compiledChatMessages: reassembled,
didPrune,
contextPercentage,
};
}

export {
Expand Down
2 changes: 1 addition & 1 deletion core/llm/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -940,7 +940,7 @@ export abstract class BaseLLM implements ILLM {
let messages = _messages;

// If not precompiled, compile the chat messages
if (!messageOptions || messageOptions.precompiled === false) {
if (!messageOptions?.precompiled) {
const { compiledChatMessages } = compileChatMessages({
modelName: completionOptions.model,
msgs: _messages,
Expand Down
3 changes: 0 additions & 3 deletions gui/src/components/StepContainer/StepContainer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@ export default function StepContainer(props: StepContainerProps) {
const historyItemAfterThis = useAppSelector(
(state) => state.session.history[props.index + 1],
);
const warningMessage = useAppSelector(
(state) => state.session.warningMessage,
);
const uiConfig = useAppSelector(selectUIConfig);

// Calculate dimming and indicator state based on latest summary index
Expand Down
77 changes: 77 additions & 0 deletions gui/src/components/mainInput/ContextStatus.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import { useAppDispatch, useAppSelector } from "../../redux/hooks";
import { saveCurrentSession } from "../../redux/thunks/session";
import { ToolTip } from "../gui/Tooltip";

const ContextStatus = () => {
const dispatch = useAppDispatch();
const contextPercentage = useAppSelector(
(state) => state.session.contextPercentage,
);
const history = useAppSelector((state) => state.session.history);
const percent = Math.round((contextPercentage ?? 0) * 100);
const isPruned = useAppSelector((state) => state.session.isPruned);
if (!isPruned && percent < 60) {
return null;
}

const barColorClass = isPruned
? "bg-error"
: percent > 80
? "bg-warning"
: "bg-description";

return (
<div>
<ToolTip
id="context-status"
closeEvents={{
// blur: false,
mouseleave: true,
click: true,
mouseup: false,
}}
clickable
>
<div className="flex flex-col gap-0">
<span className="inline-block">
{`${percent}% of context filled`}
</span>
{isPruned && (
<span className="inline-block">
{`Oldest messages are being removed`}
</span>
)}
{history.length > 0 && (
<div>
<span className="inline-block">Start a</span>{" "}
<span
className="inline-block cursor-pointer underline"
onClick={() => {
void dispatch(
saveCurrentSession({
openNewSession: true,
generateTitle: false,
}),
);
}}
>
New Session
</span>
</div>
)}
</div>
</ToolTip>
<div
data-tooltip-id="context-status"
className="border-description-muted relative h-[14px] w-[7px] rounded-[1px] border-[0.5px] border-solid md:h-[10px] md:w-[5px]"
>
<div
className={`transition-height absolute bottom-0 left-0 w-full duration-300 ease-in-out ${barColorClass}`}
style={{ height: `${percent}%` }}
></div>
</div>
</div>
);
};

export default ContextStatus;
19 changes: 0 additions & 19 deletions gui/src/components/mainInput/ContinueInputBox.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@ import { GradientBorder } from "./GradientBorder";
import { ToolbarOptions } from "./InputToolbar";
import { Lump } from "./Lump";
import { TipTapEditor } from "./TipTapEditor";
import WarningMessageBox from "./WarningMessageBox";
import EditConfigAction from "./WarningMessageBox/Actions/EditConfigAction";
import NewSessionAction from "./WarningMessageBox/Actions/NewSessionAction";

interface ContinueInputBoxProps {
isLastUserInput: boolean;
Expand Down Expand Up @@ -43,9 +40,6 @@ const EDIT_DISALLOWED_CONTEXT_PROVIDERS = [

function ContinueInputBox(props: ContinueInputBoxProps) {
const isStreaming = useAppSelector((state) => state.session.isStreaming);
const warningMessage = useAppSelector(
(state) => state.session.warningMessage,
);
const availableSlashCommands = useAppSelector(
selectSlashCommandComboBoxInputs,
);
Expand Down Expand Up @@ -125,19 +119,6 @@ function ContinueInputBox(props: ContinueInputBoxProps) {
/>
</div>
)}
{props.isLastUserInput &&
warningMessage &&
(warningMessage.category === "exceeded-context-length" ? (
<WarningMessageBox
warningMessage={warningMessage}
actions={[NewSessionAction]}
/>
) : (
<WarningMessageBox
warningMessage={warningMessage}
actions={[EditConfigAction]}
/>
))}
</div>
);
}
Expand Down
48 changes: 48 additions & 0 deletions gui/src/components/mainInput/InlineErrorMessage.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import { useContext } from "react";
import { IdeMessengerContext } from "../../context/IdeMessenger";
import { useAppDispatch, useAppSelector } from "../../redux/hooks";
import { setInlineErrorMessage } from "../../redux/slices/sessionSlice";

export type InlineErrorMessageType = "out-of-context";

export default function InlineErrorMessage() {
const dispatch = useAppDispatch();
const ideMessenger = useContext(IdeMessengerContext);
const inlineErrorMessage = useAppSelector(
(state) => state.session.inlineErrorMessage,
);
if (inlineErrorMessage === "out-of-context") {
return (
<div
className={`border-border relative m-2 flex flex-col rounded-md border border-solid bg-transparent p-4`}
>
<p className={`thread-message text-error text-center`}>
{`Message exceeds context limit.`}
</p>
<div className="text-description flex flex-row items-center justify-center gap-1.5 px-3">
<div
className="cursor-pointer text-xs hover:underline"
onClick={() => {
ideMessenger.post("config/openProfile", {
profileId: undefined,
});
}}
>
<span className="xs:flex hidden">Open config</span>
<span className="xs:hidden">Config</span>
</div>
|
<span
className="cursor-pointer text-xs hover:underline"
onClick={() => {
dispatch(setInlineErrorMessage(undefined));
}}
>
Hide
</span>
</div>
</div>
);
}
return null;
}
3 changes: 2 additions & 1 deletion gui/src/components/mainInput/InputToolbar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import ModelSelect from "../modelSelection/ModelSelect";
import { ModeSelect } from "../ModeSelect";
import { Button } from "../ui";
import { useFontSize } from "../ui/font";
import ContextStatus from "./ContextStatus";
import HoverItem from "./InputToolbar/HoverItem";

export interface ToolbarOptions {
Expand Down Expand Up @@ -170,6 +171,7 @@ function InputToolbar(props: InputToolbarProps) {
fontSize: tinyFont,
}}
>
{!isInEdit && <ContextStatus />}
{!props.toolbarOptions?.hideUseCodebase && !isInEdit && (
<div
className={`${toolsSupported ? "md:flex" : "int:flex"} hover:underline" hidden transition-colors duration-200`}
Expand Down Expand Up @@ -209,7 +211,6 @@ function InputToolbar(props: InputToolbarProps) {
</span>
</HoverItem>
)}

<Button
data-tooltip-id="enter-tooltip"
variant={props.isMainInput ? "primary" : "secondary"}
Expand Down

This file was deleted.

Loading
Loading