Skip to content

Commit 3e58160

Browse files
committed
Replace token estimation with using last API response token usage
1 parent dcd6d84 commit 3e58160

6 files changed

+28
-140
lines changed

esbuild.js

-6
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,6 @@ const copyWasmFiles = {
2929
name: "copy-wasm-files",
3030
setup(build) {
3131
build.onEnd(() => {
32-
// tiktoken
33-
fs.copyFileSync(
34-
path.join(__dirname, "node_modules", "tiktoken", "tiktoken_bg.wasm"),
35-
path.join(__dirname, "dist", "tiktoken_bg.wasm")
36-
)
37-
3832
// tree sitter
3933
const sourceDir = path.join(__dirname, "node_modules", "web-tree-sitter")
4034
const targetDir = path.join(__dirname, "dist")

package-lock.json

+3-48
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

-2
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@
134134
"dependencies": {
135135
"@anthropic-ai/bedrock-sdk": "^0.10.2",
136136
"@anthropic-ai/sdk": "^0.26.0",
137-
"@anthropic-ai/tokenizer": "^0.0.4",
138137
"@anthropic-ai/vertex-sdk": "^0.4.1",
139138
"@types/clone-deep": "^4.0.4",
140139
"@vscode/codicons": "^0.0.36",
@@ -145,7 +144,6 @@
145144
"diff": "^5.2.0",
146145
"execa": "^9.3.0",
147146
"globby": "^14.0.2",
148-
"image-size": "^1.1.1",
149147
"openai": "^4.54.0",
150148
"os-name": "^6.0.0",
151149
"p-wait-for": "^5.0.2",

src/ClaudeDev.ts

+20-11
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ import { getApiMetrics } from "./shared/getApiMetrics"
2424
import { HistoryItem } from "./shared/HistoryItem"
2525
import { Tool, ToolName } from "./shared/Tool"
2626
import { ClaudeAskResponse } from "./shared/WebviewMessage"
27-
import { findLastIndex } from "./utils"
28-
import { isWithinContextWindow, truncateHalfConversation } from "./utils/context-management"
27+
import { findLast, findLastIndex } from "./utils"
28+
import { truncateHalfConversation } from "./utils/context-management"
2929
import { regexSearchFiles } from "./utils/ripgrep"
3030

3131
const SYSTEM_PROMPT =
@@ -1304,15 +1304,24 @@ The following additional instructions are provided by the user. They should be f
13041304
${this.customInstructions.trim()}
13051305
`
13061306
}
1307-
const isPromptWithinContextWindow = isWithinContextWindow(
1308-
this.api.getModel().info.contextWindow,
1309-
systemPrompt,
1310-
tools,
1311-
this.apiConversationHistory
1312-
)
1313-
if (!isPromptWithinContextWindow) {
1314-
const truncatedMessages = truncateHalfConversation(this.apiConversationHistory)
1315-
await this.overwriteApiConversationHistory(truncatedMessages)
1307+
1308+
// Check last API request metrics to see if we need to truncate
1309+
const lastApiReqFinished = findLast(this.claudeMessages, (m) => m.say === "api_req_finished")
1310+
if (lastApiReqFinished && lastApiReqFinished.text) {
1311+
const {
1312+
tokensIn,
1313+
tokensOut,
1314+
cacheWrites,
1315+
cacheReads,
1316+
}: { tokensIn?: number; tokensOut?: number; cacheWrites?: number; cacheReads?: number } = JSON.parse(
1317+
lastApiReqFinished.text
1318+
)
1319+
const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
1320+
const isCloseToContextWindowLimit = totalTokens >= this.api.getModel().info.contextWindow * 0.8
1321+
if (isCloseToContextWindowLimit) {
1322+
const truncatedMessages = truncateHalfConversation(this.apiConversationHistory)
1323+
await this.overwriteApiConversationHistory(truncatedMessages)
1324+
}
13161325
}
13171326
const { message, userCredits } = await this.api.createMessage(
13181327
systemPrompt,

src/utils/array-helpers.ts

+5
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,8 @@ export function findLastIndex<T>(array: Array<T>, predicate: (value: T, index: n
1515
}
1616
return -1
1717
}
18+
19+
export function findLast<T>(array: Array<T>, predicate: (value: T, index: number, obj: T[]) => boolean): T | undefined {
20+
const index = findLastIndex(array, predicate)
21+
return index === -1 ? undefined : array[index]
22+
}

src/utils/context-management.ts

-73
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,4 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
2-
import { countTokens } from "@anthropic-ai/tokenizer"
3-
import { Buffer } from "buffer"
4-
import sizeOf from "image-size"
5-
6-
export function isWithinContextWindow(
7-
contextWindow: number,
8-
systemPrompt: string,
9-
tools: Anthropic.Messages.Tool[],
10-
messages: Anthropic.Messages.MessageParam[]
11-
): boolean {
12-
const adjustedContextWindow = contextWindow * 0.75 // Buffer to account for tokenizer differences
13-
// counting tokens is expensive, so we first try to estimate before doing a more accurate calculation
14-
const estimatedTotalMessageTokens = countTokens(systemPrompt + JSON.stringify(tools) + JSON.stringify(messages))
15-
if (estimatedTotalMessageTokens <= adjustedContextWindow) {
16-
return true
17-
}
18-
const systemPromptTokens = countTokens(systemPrompt)
19-
const toolsTokens = countTokens(JSON.stringify(tools))
20-
let availableTokens = adjustedContextWindow - systemPromptTokens - toolsTokens
21-
let accurateTotalMessageTokens = messages.reduce((sum, message) => sum + countMessageTokens(message), 0)
22-
return accurateTotalMessageTokens <= availableTokens
23-
}
242

253
/*
264
We can't implement a dynamically updating sliding window as it would break prompt cache
@@ -46,54 +24,3 @@ export function truncateHalfConversation(
4624

4725
return truncatedMessages
4826
}
49-
50-
function countMessageTokens(message: Anthropic.Messages.MessageParam): number {
51-
if (typeof message.content === "string") {
52-
return countTokens(message.content)
53-
} else if (Array.isArray(message.content)) {
54-
return message.content.reduce((sum, item) => {
55-
if (typeof item === "string") {
56-
return sum + countTokens(item)
57-
} else if (item.type === "text") {
58-
return sum + countTokens(item.text)
59-
} else if (item.type === "image") {
60-
return sum + estimateImageTokens(item.source.data)
61-
} else if (item.type === "tool_use") {
62-
return sum + countTokens(JSON.stringify(item.input))
63-
} else if (item.type === "tool_result") {
64-
if (Array.isArray(item.content)) {
65-
return (
66-
sum +
67-
item.content.reduce((contentSum, contentItem) => {
68-
if (contentItem.type === "text") {
69-
return contentSum + countTokens(contentItem.text)
70-
} else if (contentItem.type === "image") {
71-
return contentSum + estimateImageTokens(contentItem.source.data)
72-
}
73-
return contentSum + countTokens(JSON.stringify(contentItem))
74-
}, 0)
75-
)
76-
} else {
77-
return sum + countTokens(item.content || "")
78-
}
79-
} else {
80-
return sum + countTokens(JSON.stringify(item))
81-
}
82-
}, 0)
83-
} else {
84-
return countTokens(JSON.stringify(message.content))
85-
}
86-
}
87-
88-
function estimateImageTokens(base64: string): number {
89-
const base64Data = base64.split(";base64,").pop()
90-
if (base64Data) {
91-
const buffer = Buffer.from(base64Data, "base64")
92-
const dimensions = sizeOf(buffer)
93-
if (dimensions.width && dimensions.height) {
94-
// "you can estimate the number of tokens used through this algorithm: tokens = (width px * height px)/750"
95-
return Math.ceil((dimensions.width * dimensions.height) / 750)
96-
}
97-
}
98-
return countTokens(base64)
99-
}

0 commit comments

Comments
 (0)