Replace token estimation with using last API response token usage

saoudrizwan · saoudrizwan · commit 3e58160d9994 · 2024-08-30T22:29:18.000-04:00
diff --git a/esbuild.js b/esbuild.js
@@ -29,12 +29,6 @@ const copyWasmFiles = {
 	name: "copy-wasm-files",
 	setup(build) {
 		build.onEnd(() => {
-			// tiktoken
-			fs.copyFileSync(
-				path.join(__dirname, "node_modules", "tiktoken", "tiktoken_bg.wasm"),
-				path.join(__dirname, "dist", "tiktoken_bg.wasm")
-			)
-
 			// tree sitter
 			const sourceDir = path.join(__dirname, "node_modules", "web-tree-sitter")
 			const targetDir = path.join(__dirname, "dist")
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -134,7 +134,6 @@
   "dependencies": {
     "@anthropic-ai/bedrock-sdk": "^0.10.2",
     "@anthropic-ai/sdk": "^0.26.0",
-    "@anthropic-ai/tokenizer": "^0.0.4",
     "@anthropic-ai/vertex-sdk": "^0.4.1",
     "@types/clone-deep": "^4.0.4",
     "@vscode/codicons": "^0.0.36",
@@ -145,7 +144,6 @@
     "diff": "^5.2.0",
     "execa": "^9.3.0",
     "globby": "^14.0.2",
-    "image-size": "^1.1.1",
     "openai": "^4.54.0",
     "os-name": "^6.0.0",
     "p-wait-for": "^5.0.2",
diff --git a/src/ClaudeDev.ts b/src/ClaudeDev.ts
@@ -24,8 +24,8 @@ import { getApiMetrics } from "./shared/getApiMetrics"
 import { HistoryItem } from "./shared/HistoryItem"
 import { Tool, ToolName } from "./shared/Tool"
 import { ClaudeAskResponse } from "./shared/WebviewMessage"
-import { findLastIndex } from "./utils"
-import { isWithinContextWindow, truncateHalfConversation } from "./utils/context-management"
+import { findLast, findLastIndex } from "./utils"
+import { truncateHalfConversation } from "./utils/context-management"
 import { regexSearchFiles } from "./utils/ripgrep"
 
 const SYSTEM_PROMPT =
@@ -1304,15 +1304,24 @@ The following additional instructions are provided by the user. They should be f
 ${this.customInstructions.trim()}
 `
 			}
-			const isPromptWithinContextWindow = isWithinContextWindow(
-				this.api.getModel().info.contextWindow,
-				systemPrompt,
-				tools,
-				this.apiConversationHistory
-			)
-			if (!isPromptWithinContextWindow) {
-				const truncatedMessages = truncateHalfConversation(this.apiConversationHistory)
-				await this.overwriteApiConversationHistory(truncatedMessages)
+
+			// Check last API request metrics to see if we need to truncate
+			const lastApiReqFinished = findLast(this.claudeMessages, (m) => m.say === "api_req_finished")
+			if (lastApiReqFinished && lastApiReqFinished.text) {
+				const {
+					tokensIn,
+					tokensOut,
+					cacheWrites,
+					cacheReads,
+				}: { tokensIn?: number; tokensOut?: number; cacheWrites?: number; cacheReads?: number } = JSON.parse(
+					lastApiReqFinished.text
+				)
+				const totalTokens = (tokensIn || 0) + (tokensOut || 0) + (cacheWrites || 0) + (cacheReads || 0)
+				const isCloseToContextWindowLimit = totalTokens >= this.api.getModel().info.contextWindow * 0.8
+				if (isCloseToContextWindowLimit) {
+					const truncatedMessages = truncateHalfConversation(this.apiConversationHistory)
+					await this.overwriteApiConversationHistory(truncatedMessages)
+				}
 			}
 			const { message, userCredits } = await this.api.createMessage(
 				systemPrompt,
diff --git a/src/utils/array-helpers.ts b/src/utils/array-helpers.ts
@@ -15,3 +15,8 @@ export function findLastIndex<T>(array: Array<T>, predicate: (value: T, index: n
 	}
 	return -1
 }
+
+export function findLast<T>(array: Array<T>, predicate: (value: T, index: number, obj: T[]) => boolean): T | undefined {
+	const index = findLastIndex(array, predicate)
+	return index === -1 ? undefined : array[index]
+}
diff --git a/src/utils/context-management.ts b/src/utils/context-management.ts
@@ -1,26 +1,4 @@
 import { Anthropic } from "@anthropic-ai/sdk"
-import { countTokens } from "@anthropic-ai/tokenizer"
-import { Buffer } from "buffer"
-import sizeOf from "image-size"
-
-export function isWithinContextWindow(
-	contextWindow: number,
-	systemPrompt: string,
-	tools: Anthropic.Messages.Tool[],
-	messages: Anthropic.Messages.MessageParam[]
-): boolean {
-	const adjustedContextWindow = contextWindow * 0.75 // Buffer to account for tokenizer differences
-	// counting tokens is expensive, so we first try to estimate before doing a more accurate calculation
-	const estimatedTotalMessageTokens = countTokens(systemPrompt + JSON.stringify(tools) + JSON.stringify(messages))
-	if (estimatedTotalMessageTokens <= adjustedContextWindow) {
-		return true
-	}
-	const systemPromptTokens = countTokens(systemPrompt)
-	const toolsTokens = countTokens(JSON.stringify(tools))
-	let availableTokens = adjustedContextWindow - systemPromptTokens - toolsTokens
-	let accurateTotalMessageTokens = messages.reduce((sum, message) => sum + countMessageTokens(message), 0)
-	return accurateTotalMessageTokens <= availableTokens
-}
 
 /*
 We can't implement a dynamically updating sliding window as it would break prompt cache
@@ -46,54 +24,3 @@ export function truncateHalfConversation(
 
 	return truncatedMessages
 }
-
-function countMessageTokens(message: Anthropic.Messages.MessageParam): number {
-	if (typeof message.content === "string") {
-		return countTokens(message.content)
-	} else if (Array.isArray(message.content)) {
-		return message.content.reduce((sum, item) => {
-			if (typeof item === "string") {
-				return sum + countTokens(item)
-			} else if (item.type === "text") {
-				return sum + countTokens(item.text)
-			} else if (item.type === "image") {
-				return sum + estimateImageTokens(item.source.data)
-			} else if (item.type === "tool_use") {
-				return sum + countTokens(JSON.stringify(item.input))
-			} else if (item.type === "tool_result") {
-				if (Array.isArray(item.content)) {
-					return (
-						sum +
-						item.content.reduce((contentSum, contentItem) => {
-							if (contentItem.type === "text") {
-								return contentSum + countTokens(contentItem.text)
-							} else if (contentItem.type === "image") {
-								return contentSum + estimateImageTokens(contentItem.source.data)
-							}
-							return contentSum + countTokens(JSON.stringify(contentItem))
-						}, 0)
-					)
-				} else {
-					return sum + countTokens(item.content || "")
-				}
-			} else {
-				return sum + countTokens(JSON.stringify(item))
-			}
-		}, 0)
-	} else {
-		return countTokens(JSON.stringify(message.content))
-	}
-}
-
-function estimateImageTokens(base64: string): number {
-	const base64Data = base64.split(";base64,").pop()
-	if (base64Data) {
-		const buffer = Buffer.from(base64Data, "base64")
-		const dimensions = sizeOf(buffer)
-		if (dimensions.width && dimensions.height) {
-			// "you can estimate the number of tokens used through this algorithm: tokens = (width px * height px)/750"
-			return Math.ceil((dimensions.width * dimensions.height) / 750)
-		}
-	}
-	return countTokens(base64)
-}

Original file line number	Diff line number	Diff line change
`@@ -15,3 +15,8 @@ export function findLastIndex<T>(array: Array<T>, predicate: (value: T, index: n`
`15`	`15`	`}`
`16`	`16`	`return -1`
`17`	`17`	`}`
	`18`	`+`
	`19`	`+export function findLast<T>(array: Array<T>, predicate: (value: T, index: number, obj: T[]) => boolean): T \| undefined {`
	`20`	`+ const index = findLastIndex(array, predicate)`
	`21`	`+ return index === -1 ? undefined : array[index]`
	`22`	`+}`