Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 57 additions & 2 deletions Sources/ManifoldInference/Services/ContextWindowManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,60 @@ public enum ContextWindowManager {
return HeuristicTokenizer().tokenCount(text)
}

/// Fixed per-part token cost for a non-text modality whose true token
/// footprint isn't visible from the `[ChatMessage]` value alone.
///
/// `.content` only exposes `.text` parts, so an image/audio part would
/// otherwise estimate as **zero** — a multimodal turn could then sail past
/// the compression trigger and overflow at generation. These are coarse
/// placeholders, not provider-accurate counts (a single image is ~85–1.5k
/// tokens depending on resolution / provider), deliberately sized to be
/// "clearly non-zero, never catastrophically wrong": they make the budget
/// math notice the part exists. A backend that knows its real tiling cost
/// should account for that at the wire layer, not here.
static let imagePartTokenEstimate = 768
static let audioPartTokenEstimate = 384
static let generatedMediaPartTokenEstimate = 256

/// Estimates the token cost of a single `MessagePart`.
///
/// - `.text` / `.thinking`: counted via the tokenizer (heuristic when nil).
/// - `.image` / `.audio` / `.generatedMedia`: a documented fixed estimate
/// (``imagePartTokenEstimate`` etc.) since the byte payload's true token
/// footprint isn't derivable here.
/// - `.toolCall`: the tool name plus its serialized JSON arguments.
/// - `.toolResult`: the serialized result payload (content + dialog).
public static func estimateTokenCount(_ part: MessagePart, tokenizer: TokenizerProvider? = nil) -> Int {
switch part {
case .text(let t):
return estimateTokenCount(t, tokenizer: tokenizer)
case .thinking(let t, _):
return estimateTokenCount(t, tokenizer: tokenizer)
case .image:
return imagePartTokenEstimate
case .audio:
return audioPartTokenEstimate
case .generatedMedia:
return generatedMediaPartTokenEstimate
case .toolCall(let call):
return estimateTokenCount("\(call.toolName) \(call.arguments)", tokenizer: tokenizer)
case .toolResult(let result):
let payload = [result.content, result.dialog].compactMap { $0 }.joined(separator: " ")
return estimateTokenCount(payload, tokenizer: tokenizer)
}
}

/// Estimates the token cost of a whole message by summing every content
/// part — text, reasoning, multimodal, and tool parts alike.
///
/// Prefer this over `estimateTokenCount(message.content, …)`: `.content`
/// discards everything but `.text` parts, so the string overload silently
/// under-counts (to zero) image/audio/tool-only messages and lets them
/// overflow the window. Sums across `contentParts` instead.
public static func estimateTokenCount(_ message: ChatMessage, tokenizer: TokenizerProvider? = nil) -> Int {
message.contentParts.reduce(0) { $0 + estimateTokenCount($1, tokenizer: tokenizer) }
}

/// Resolves the effective context size from available sources.
///
/// Priority: session override > model metadata > backend capabilities > default.
Expand Down Expand Up @@ -121,7 +175,8 @@ public enum ContextWindowManager {
var usedTokens = 0

for i in stride(from: messages.count - 1, through: 0, by: -1) {
let messageTokens = estimateTokenCount(messages[i].content, tokenizer: tokenizer)
// Sum across all parts — `.content` would miss image/audio/tool parts.
let messageTokens = estimateTokenCount(messages[i], tokenizer: tokenizer)
if usedTokens + messageTokens > available && firstKeptIndex < messages.endIndex {
break
}
Expand All @@ -141,7 +196,7 @@ public enum ContextWindowManager {
tokenizer: TokenizerProvider? = nil
) -> ContextBudget {
let systemTokens = estimateTokenCount(systemPrompt ?? "", tokenizer: tokenizer)
let messageTokens = messages.reduce(0) { $0 + estimateTokenCount($1.content, tokenizer: tokenizer) }
let messageTokens = messages.reduce(0) { $0 + estimateTokenCount($1, tokenizer: tokenizer) }
let availableForHistory = maxTokens - systemTokens - responseBuffer

return ContextBudget(
Expand Down
11 changes: 11 additions & 0 deletions Sources/ManifoldRuntime/Protocols/CompressionPolicy.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,17 @@ import ManifoldInference
/// Compression failures are logged and do not abort the turn — the existing
/// history is preserved.
///
/// ## Per-message pins are not yet threaded through this seam
///
/// `compress(history:sessionID:generate:)` passes only a `[ChatMessage]` and
/// the `sessionID` — **not** the set of user-pinned message IDs. The data
/// already exists (`ChatSession.pinnedMessageIDsRaw` / `pinnedMessageIDs` on
/// the session record), but honoring pins inside a policy requires a
/// protocol-signature change to carry the pinned-ID set (a new parameter or a
/// session handle). Until that lands, ``DefaultCompressionPolicy`` treats only
/// `.system`-role and `.memory`-kind records as load-bearing; explicit
/// per-message pins are not preserved across compression.
///
/// ## v0.26.0 Migration
///
/// The `shouldCompress` signature gained a `contextUtilization` parameter in v0.26.0.
Expand Down
Loading