Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
215 changes: 214 additions & 1 deletion packages/core/src/core/compression/utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
*/

import { describe, it, expect } from 'vitest';
import type { IContent } from '../../services/history/IContent.js';
import type { IContent, MediaBlock } from '../../services/history/IContent.js';
import {
adjustForToolCallBoundary,
findForwardValidSplitPoint,
Expand Down Expand Up @@ -65,6 +65,38 @@ function toolResponseMsg(
],
};
}
function mediaBlock(
mimeType: string,
filename?: string,
data = 'base64data',
caption?: string,
): MediaBlock {
return {
type: 'media',
mimeType,
filename,
data,
encoding: 'base64',
caption,
};
}

function humanMsgWithMedia(
text: string,
...mediaBlocks: MediaBlock[]
): IContent {
return {
speaker: 'human',
blocks: [{ type: 'text', text }, ...mediaBlocks],
};
}

function humanMsgOnlyMedia(...mediaBlocks: MediaBlock[]): IContent {
return {
speaker: 'human',
blocks: mediaBlocks,
};
}

// ---------------------------------------------------------------------------
// adjustForToolCallBoundary
Expand Down Expand Up @@ -620,4 +652,185 @@ describe('sanitizeHistoryForCompression', () => {
const result = sanitizeHistoryForCompression([msg]);
expect(result[0].speaker).toBe('human');
});

// Media block tests (Issue #1875)
it('converts media blocks to text placeholders with filename', () => {
const history = [
humanMsgOnlyMedia(mediaBlock('application/pdf', 'document.pdf')),
];
const result = sanitizeHistoryForCompression(history);
expect(result).toHaveLength(1);
expect(result[0].speaker).toBe('human');
expect(result[0].blocks).toHaveLength(1);
expect(result[0].blocks[0].type).toBe('text');
expect((result[0].blocks[0] as { text: string }).text).toBe(
'[Attached PDF: document.pdf]',
);
});

it('converts media blocks to text placeholders using mimeType when no filename', () => {
const history = [humanMsgOnlyMedia(mediaBlock('image/png'))];
const result = sanitizeHistoryForCompression(history);
expect(result).toHaveLength(1);
expect(result[0].blocks[0].type).toBe('text');
expect((result[0].blocks[0] as { text: string }).text).toBe(
'[Attached image: image/png]',
);
});
Comment thread
coderabbitai[bot] marked this conversation as resolved.

it('converts media blocks with empty filename to text placeholders using mimeType', () => {
const history = [humanMsgOnlyMedia(mediaBlock('image/png', ''))];
const result = sanitizeHistoryForCompression(history);
expect(result).toHaveLength(1);
expect(result[0].blocks[0].type).toBe('text');
expect((result[0].blocks[0] as { text: string }).text).toBe(
'[Attached image: image/png]',
);
});

it('handles different media categories (image, pdf, audio, video, unknown)', () => {
const imageBlock = mediaBlock('image/jpeg', 'photo.jpg');
const pdfBlock = mediaBlock('application/pdf', 'report.pdf');
const audioBlock = mediaBlock('audio/mp3', 'song.mp3');
const videoBlock = mediaBlock('video/mp4', 'movie.mp4');
const unknownBlock = mediaBlock('application/octet-stream', 'data.bin');

const history = [
humanMsgOnlyMedia(
imageBlock,
pdfBlock,
audioBlock,
videoBlock,
unknownBlock,
),
];
const result = sanitizeHistoryForCompression(history);
const texts = result[0].blocks.map((b) => (b as { text: string }).text);

expect(texts).toContain('[Attached image: photo.jpg]');
expect(texts).toContain('[Attached PDF: report.pdf]');
expect(texts).toContain('[Attached audio: song.mp3]');
expect(texts).toContain('[Attached video: movie.mp4]');
expect(texts).toContain('[Attached unknown: data.bin]');
});

it('handles mixed content with text + media blocks in same message', () => {
const history = [
humanMsgWithMedia(
'Please analyze this document',
mediaBlock('application/pdf', 'report.pdf'),
),
];
const result = sanitizeHistoryForCompression(history);
expect(result[0].blocks).toHaveLength(2);
expect(result[0].blocks[0].type).toBe('text');
expect((result[0].blocks[0] as { text: string }).text).toBe(
'Please analyze this document',
);
expect(result[0].blocks[1].type).toBe('text');
expect((result[0].blocks[1] as { text: string }).text).toBe(
'[Attached PDF: report.pdf]',
);
});

it('converts messages with only media blocks properly', () => {
const history = [
humanMsgOnlyMedia(mediaBlock('image/png', 'screenshot.png')),
];
const result = sanitizeHistoryForCompression(history);
expect(result).toHaveLength(1);
expect(result[0].speaker).toBe('human');
expect(result[0].blocks).toHaveLength(1);
expect(result[0].blocks[0].type).toBe('text');
expect((result[0].blocks[0] as { text: string }).text).toBe(
'[Attached image: screenshot.png]',
);
});

it('does not change speaker for media block messages (unlike tool messages)', () => {
// Media blocks keep original speaker, only tool messages get re-tagged
const msg: IContent = {
speaker: 'ai',
blocks: [mediaBlock('image/jpeg', 'photo.jpg')],
};
const result = sanitizeHistoryForCompression([msg]);
expect(result[0].speaker).toBe('ai'); // unchanged
expect(result[0].blocks[0].type).toBe('text');
expect((result[0].blocks[0] as { text: string }).text).toBe(
'[Attached image: photo.jpg]',
);
});

it('handles mixed tool and media blocks in same message', () => {
const msg: IContent = {
speaker: 'ai',
blocks: [
{ type: 'text', text: 'Analyzing file and document' },
mediaBlock('application/pdf', 'document.pdf'),
{
type: 'tool_call',
id: 'c1',
name: 'read_file',
parameters: { path: '/tmp/test' },
},
],
};
const result = sanitizeHistoryForCompression([msg]);
expect(result[0].blocks).toHaveLength(3);
expect((result[0].blocks[0] as { text: string }).text).toBe(
'Analyzing file and document',
);
expect((result[0].blocks[1] as { text: string }).text).toBe(
'[Attached PDF: document.pdf]',
);
expect((result[0].blocks[2] as { text: string }).text).toContain(
'[Tool Call: read_file]',
);
});
Comment thread
coderabbitai[bot] marked this conversation as resolved.

it('prefers media caption over filename in compression placeholders', () => {
const history = [
humanMsgOnlyMedia(
mediaBlock(
'image/png',
'diagram.png',
'base64data',
'Architecture diagram',
),
),
];
const result = sanitizeHistoryForCompression(history);
expect(result).toHaveLength(1);
expect(result[0].blocks).toHaveLength(1);
expect((result[0].blocks[0] as { text: string }).text).toBe(
'[Attached image: Architecture diagram]',
);
});

it('re-tags tool speaker to human and placeholderizes media blocks in tool messages', () => {
const msg: IContent = {
speaker: 'tool',
blocks: [
{
type: 'tool_response',
callId: 'c1',
toolName: 'read_file',
result: 'file contents here',
},
mediaBlock('image/png', 'screenshot.png'),
],
};
const result = sanitizeHistoryForCompression([msg]);
expect(result[0].speaker).toBe('human');
expect(result[0].blocks).toHaveLength(2);
expect((result[0].blocks[0] as { text: string }).text).toContain(
'[Tool Result: read_file]',
);
expect((result[0].blocks[0] as { text: string }).text).toContain(
'file contents here',
);
expect((result[0].blocks[1] as { text: string }).text).toBe(
'[Attached image: screenshot.png]',
);
});
});
48 changes: 40 additions & 8 deletions packages/core/src/core/compression/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
import type {
ContentBlock,
IContent,
MediaBlock,
TextBlock,
} from '../../services/history/IContent.js';
import type { IProvider } from '../../providers/IProvider.js';
import { classifyMediaBlock } from '../../providers/utils/mediaUtils.js';

/**
* Aggregate text from content blocks, handling spacing between text and
Expand Down Expand Up @@ -337,15 +339,40 @@ export async function runVerificationPass(
}

/**
* Convert tool_call and tool_response blocks to plain text representations
* so the compression request doesn't trip Anthropic's strict tool_use /
* tool_result pairing validation. Orphaned tool blocks (from interrupted
* loops or the loop-detector halting mid-tool-call) would otherwise cause
* 400 errors when sent to the LLM for summarisation.
* Convert a MediaBlock to a concise text placeholder for compression.
* This prevents provider-specific media types (like PDF "file" parts) from
* reaching the compression LLM call, which would cause 400 errors on providers
* that don't support certain media types.
*
* Format: [Attached <category>: <caption | filename | mimeType | unknown>]
* The identifier prefers caption for accessibility/context, then falls back to
* filename, mimeType, and finally "unknown".
*/
export function mediaBlockToCompressionPlaceholder(media: MediaBlock): string {
const category = classifyMediaBlock(media);
// Prefer caption first (for accessibility/context), then filename, then mimeType, then 'unknown'
const identifier =
media.caption?.trim() ||
media.filename?.trim() ||
media.mimeType ||
Comment thread
coderabbitai[bot] marked this conversation as resolved.
'unknown';
// Capitalize PDF label for display, keep other categories as-is
Comment thread
coderabbitai[bot] marked this conversation as resolved.
const label = category === 'pdf' ? 'PDF' : category;
return `[Attached ${label}: ${identifier}]`;
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

/**
* Convert tool_call, tool_response, and media blocks to plain text representations
* so the compression request doesn't trip provider-specific validation errors.
*
* - Tool blocks: Anthropic's strict tool_use / tool_result pairing validation
* would reject orphaned tool blocks (from interrupted loops).
* - Media blocks: Providers like Kimi don't support certain media types (e.g.,
* PDF "file" parts) and would return 400 errors.
*
* Messages whose speaker is 'tool' are re-tagged as 'human' since they
* no longer carry structural tool_result blocks. All other block types
* (text, thinking, code, media) pass through unchanged.
* no longer carry structural tool_result blocks. Messages with media blocks
* keep their original speaker since media is not speaker-specific.
*/
export function sanitizeHistoryForCompression(
messages: readonly IContent[],
Expand All @@ -354,7 +381,8 @@ export function sanitizeHistoryForCompression(
const hasToolBlocks = msg.blocks.some(
(b) => b.type === 'tool_call' || b.type === 'tool_response',
);
if (!hasToolBlocks && msg.speaker !== 'tool') {
const hasMediaBlocks = msg.blocks.some((b) => b.type === 'media');
if (!hasToolBlocks && !hasMediaBlocks && msg.speaker !== 'tool') {
return msg;
}

Expand Down Expand Up @@ -390,6 +418,10 @@ export function sanitizeHistoryForCompression(
}
return { type: 'text', text } as TextBlock;
}
if (block.type === 'media') {
const text = mediaBlockToCompressionPlaceholder(block);
return { type: 'text', text } as TextBlock;
}
return block;
})
.filter((b): b is ContentBlock => b !== null);
Expand Down
Loading