Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions apps/x/packages/core/src/agents/context-utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
interface AnyMessage {
[key: string]: unknown;
role: string;
content: unknown;
}

/**
* Estimates the token count for a string using a character-based heuristic.
* ~4 characters per token is a reliable average for English/mixed content.
* This intentionally over-estimates slightly to remain conservative.
*/

function estimateTokens(text: string): number {
return Math.ceil(text.length / 4);
}

/**
* Extracts a plain-text representation of a message's content for token estimation.
*/
function getMessageText(message: AnyMessage): string {
if (typeof message.content === "string") {
return message.content;
}
if (Array.isArray(message.content)) {
return (message.content as unknown[])
.map((part) => {
if (part && typeof part === "object") {
const p = part as Record<string, unknown>;
if (typeof p["text"] === "string") return p["text"];
if (typeof p["content"] === "string") return p["content"];
}
return "";
})
.join(" ");
}
return "";
}
/**
* Truncates the message list so the estimated total token count stays within
* `maxTokens`. The system message (if any) is always preserved. Messages are
* dropped from the oldest end first, so the most recent context is retained.
*
* Tool call / tool-result pairs are kept together: if the first kept message is
* a tool-result without a preceding tool-call, it is dropped to avoid sending
* an unmatched tool call to the model.
*
* @param messages Full conversation history.
* @param maxTokens Token budget (default: 80,000 — conservative cross-model limit).
* @param systemText Optional system prompt text to deduct from the budget upfront.
* @returns A (possibly shorter) messages array that fits within the budget.
*/
export function truncateMessagesToFit<T extends AnyMessage>(
messages: T[],
maxTokens = 80_000,
systemText = "",
): T[] {
const systemMessages = messages.filter((msg) => msg.role === "system");
const otherMessages = messages.filter((msg) => msg.role !== "system");

const systemTokens =
systemMessages.reduce(
(sum, msg) => sum + estimateTokens(getMessageText(msg)),
0,
) + estimateTokens(systemText);

let availableTokens = maxTokens - systemTokens;

const truncatedMessages: T[] = [];

for (let i = otherMessages.length - 1; i >= 0; i--) {
const msg = otherMessages[i];
const msgTokens = estimateTokens(getMessageText(msg));
if (msgTokens <= availableTokens) {
truncatedMessages.unshift(msg);
availableTokens -= msgTokens;
} else {
// Budget exhausted — drop this and all older messages

break;
}
}
// Never start with unmatched tool-result (AI SDK requirement)
while (truncatedMessages.length > 0 && truncatedMessages[0].role === "tool") {
truncatedMessages.shift();
}

const finalMessages = [...systemMessages, ...truncatedMessages];
if (finalMessages.length < messages.length) {
const dropped = messages.length - finalMessages.length;
console.log(
`[context-utils] Truncated ${dropped} oldest message(s) to fit context window ` +
`(budget: ${maxTokens} tokens, system: ${systemTokens} tokens).`,
);
}

return finalMessages;
}
5 changes: 4 additions & 1 deletion apps/x/packages/core/src/agents/runtime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import { getRaw as getLabelingAgentRaw } from "../knowledge/labeling_agent.js";
import { getRaw as getNoteTaggingAgentRaw } from "../knowledge/note_tagging_agent.js";
import { getRaw as getInlineTaskAgentRaw } from "../knowledge/inline_task_agent.js";
import { getRaw as getAgentNotesAgentRaw } from "../knowledge/agent_notes_agent.js";
import { truncateMessagesToFit } from "./context-utils.js";

const AGENT_NOTES_DIR = path.join(WorkDir, 'knowledge', 'Agent Notes');
const WORKDIR_CONFIG_FILE = path.join(WorkDir, 'config', 'workdir.json');
Expand Down Expand Up @@ -1285,7 +1286,9 @@ async function* streamLlm(
signal?: AbortSignal,
analytics?: StreamLlmAnalytics,
): AsyncGenerator<z.infer<typeof LlmStepStreamEvent>, void, unknown> {
const converted = convertFromMessages(messages);

const truncated = truncateMessagesToFit(messages);
const converted = convertFromMessages(truncated);
console.log(`! SENDING payload to model: `, JSON.stringify(converted))
const { fullStream } = streamText({
model,
Expand Down