From 1ccbe1da09443bed4f34a986d86fc1f30e2e8093 Mon Sep 17 00:00:00 2001 From: gus Date: Thu, 7 May 2026 22:24:58 -0300 Subject: [PATCH] fix(privacy): strip takes fence from get_page / get_versions when token carries an allow-list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v0.28.6 (#563) introduced the per-token takes-holder allow-list: an OAuth token carries `permissions.takes_holders` and `takes_list` / `takes_search` / `think.gather` filter take rows server-side via `WHERE t.holder = ANY($allowList)` in both engines. But take rows are stored in two places per the explicit contract in `extract-takes.ts:5-13` ("markdown is canonical, the takes table is a derived index"): the structured `takes` table AND inline in `pages.compiled_truth` between `` markers as a markdown table whose `who` column IS the holder. A read-only token whose `takes_holders` is `["world"]` (the documented default-deny posture from migrate.ts:1221) can call `get_page ` and recover every non-`world` claim verbatim from the body — private hunches, founder bets, non-public sourcing notes. `get_versions` has the same shape: snapshots persist historical compiled_truth verbatim, so a caller blocked at `get_page` falls through to /history. The team already shipped a complementary fix in `chunkers/recursive.ts:49` (stripTakesFence applied before the body is chunked, so `query` results don't leak fence content). Migration v38 documents this as a "complementary fix" — the page-CRUD surface was missed. Fix strips the fence at the op layer when `ctx.takesHoldersAllowList` is set (i.e. the remote MCP path). Local CLI callers leave the field unset and keep seeing the full fence. const visibleBody = ctx.takesHoldersAllowList ? { ...page, compiled_truth: stripTakesFence(page.compiled_truth) } : page; Same shape on `get_versions` over every snapshot in the array. Re-rendering the fence with allow-list-filtered rows would require joining the takes table per version_id and inverts the markdown-canonical contract; whole-fence strip is the conservative posture that closes the leak. A future allow-list-aware re-render is an additive change that won't break the contract pinned by these tests. Test coverage in `test/takes-mcp-allowlist.serial.test.ts`: - get_page with allow-list strips fence; surrounding body kept. - get_page without allow-list (local CLI) keeps fence (back-compat). - get_page fuzzy resolution path also strips for remote tokens. - get_versions with allow-list strips fence on every snapshot. - get_versions without allow-list returns historical content intact. The pre-fix R12 PoC reported `LEAKED garry hidden take? YES` and `LEAKED brain hidden take? YES`; post-fix the same PoC reports `no` for both holders and "bypass did not reproduce". --- src/core/operations.ts | 23 +++++- test/takes-mcp-allowlist.serial.test.ts | 98 +++++++++++++++++++++++++ 2 files changed, 119 insertions(+), 2 deletions(-) diff --git a/src/core/operations.ts b/src/core/operations.ts index 5a78cf5dc..c22b9a2fa 100644 --- a/src/core/operations.ts +++ b/src/core/operations.ts @@ -16,6 +16,7 @@ import { dedupResults } from './search/dedup.ts'; import { captureEvalCandidate, isEvalCaptureEnabled, isEvalScrubEnabled } from './eval-capture.ts'; import type { HybridSearchMeta } from './types.ts'; import { extractPageLinks, isAutoLinkEnabled, isAutoTimelineEnabled, parseTimelineEntries, makeResolver, type UnresolvedFrontmatterRef } from './link-extraction.ts'; +import { stripTakesFence } from './takes-fence.ts'; import * as db from './db.ts'; // --- Types --- @@ -350,7 +351,19 @@ const get_page: Operation = { } const tags = await ctx.engine.getTags(page.slug); - return { ...page, tags, ...(resolved_slug ? { resolved_slug } : {}) }; + // Privacy boundary for the per-token takes-holder allow-list (v0.28.6). + // takes_list / takes_search / think.gather filter rows by holder at the + // SQL layer, but takes are also rendered as a markdown table inside the + // page body between TAKES_FENCE markers — `extract-takes.ts` ("markdown + // is canonical, the takes table is a derived index"). A read-only token + // restricted to e.g. `world` could call `get_page ` and recover + // every non-`world` claim verbatim from the body. Strip the fence here + // when the caller carries an allow-list (i.e. the remote MCP path). + // Local CLI callers leave takesHoldersAllowList unset and see the fence. + const visibleBody = ctx.takesHoldersAllowList + ? { ...page, compiled_truth: stripTakesFence(page.compiled_truth) } + : page; + return { ...visibleBody, tags, ...(resolved_slug ? { resolved_slug } : {}) }; }, scope: 'read', cliHints: { name: 'get', positional: ['slug'] }, @@ -1251,7 +1264,13 @@ const get_versions: Operation = { slug: { type: 'string', required: true }, }, handler: async (ctx, p) => { - return ctx.engine.getVersions(p.slug as string); + const versions = await ctx.engine.getVersions(p.slug as string); + // Same takes-allow-list privacy boundary as get_page. Snapshots persist + // historical compiled_truth verbatim, including the takes fence, so + // a remote token bypassing get_page via /history would re-introduce + // the same leak across every prior version. + if (!ctx.takesHoldersAllowList) return versions; + return versions.map(v => ({ ...v, compiled_truth: stripTakesFence(v.compiled_truth) })); }, scope: 'read', cliHints: { name: 'history', positional: ['slug'] }, diff --git a/test/takes-mcp-allowlist.serial.test.ts b/test/takes-mcp-allowlist.serial.test.ts index 6a1748e1a..d168a376a 100644 --- a/test/takes-mcp-allowlist.serial.test.ts +++ b/test/takes-mcp-allowlist.serial.test.ts @@ -15,6 +15,7 @@ import { describe, test, expect, beforeAll, afterAll } from 'bun:test'; import { PGLiteEngine } from '../src/core/pglite-engine.ts'; import { dispatchToolCall } from '../src/mcp/dispatch.ts'; +import { TAKES_FENCE_BEGIN, TAKES_FENCE_END } from '../src/core/takes-fence.ts'; let engine: PGLiteEngine; let alicePageId: number; @@ -105,6 +106,103 @@ describe('per-token takes-holder allow-list — takes_search', () => { }); }); +// --------------------------------------------------------------------------- +// Page-body channel: get_page / get_versions must respect the same allow-list. +// Take rows are stored in TWO places per the extract-takes contract: the +// `takes` table (filtered by the SQL `holder = ANY($allowList)` clause) and +// inline in `pages.compiled_truth` between TAKES_FENCE markers as a markdown +// table. Without a strip on the page-CRUD path, a `world`-only token reading +// `get_page ` recovers every non-`world` claim verbatim from the body. +// --------------------------------------------------------------------------- + +describe('per-token takes-holder allow-list — get_page body channel', () => { + const SLUG = 'people/bob-example'; + const FENCE_BODY = + '## Takes\n\n' + + `${TAKES_FENCE_BEGIN}\n` + + '\n| # | claim | kind | who | weight | since | source |\n' + + '|---|---|---|---|---|---|---|\n' + + '| 1 | CEO of Widget | fact | world | 1.0 | 2017-01 | Crustdata |\n' + + '| 2 | Strong technical founder | take | garry | 0.85 | 2026-04-29 | OH |\n' + + '| 3 | Seemed burned out in last OH | hunch | brain | 0.4 | 2026-05-01 | private |\n\n' + + `${TAKES_FENCE_END}\n` + + '\nFooter content stays.\n'; + + beforeAll(async () => { + await engine.putPage(SLUG, { title: 'Bob', type: 'person', compiled_truth: FENCE_BODY }); + }); + + test('remote token with allow-list strips fence from compiled_truth', async () => { + const result = await dispatchToolCall(engine, 'get_page', { slug: SLUG }, { + remote: true, + takesHoldersAllowList: ['world'], + }); + const page = parseResult(result) as { compiled_truth: string }; + expect(page.compiled_truth).not.toContain(TAKES_FENCE_BEGIN); + expect(page.compiled_truth).not.toContain(TAKES_FENCE_END); + expect(page.compiled_truth).not.toContain('Strong technical founder'); + expect(page.compiled_truth).not.toContain('Seemed burned out'); + expect(page.compiled_truth).not.toContain('| garry |'); + expect(page.compiled_truth).not.toContain('| brain |'); + // Surrounding body kept intact. + expect(page.compiled_truth).toContain('Footer content stays.'); + }); + + test('local CLI (no allow-list) preserves the fence — backwards compatibility', async () => { + const result = await dispatchToolCall(engine, 'get_page', { slug: SLUG }, { + remote: false, + }); + const page = parseResult(result) as { compiled_truth: string }; + expect(page.compiled_truth).toContain(TAKES_FENCE_BEGIN); + expect(page.compiled_truth).toContain('Seemed burned out'); + }); + + test('fuzzy resolution path also strips for remote token', async () => { + const result = await dispatchToolCall(engine, 'get_page', { slug: 'people/bob-example', fuzzy: true }, { + remote: true, + takesHoldersAllowList: ['world', 'garry'], + }); + const page = parseResult(result) as { compiled_truth: string }; + // Allow-list does not yet re-render filtered rows; whole fence is stripped. + // Pinned so future re-rendering work is an additive change, not a silent + // semantic flip. + expect(page.compiled_truth).not.toContain(TAKES_FENCE_BEGIN); + expect(page.compiled_truth).not.toContain('Strong technical founder'); + }); +}); + +describe('per-token takes-holder allow-list — get_versions body channel', () => { + const SLUG = 'people/carol-example'; + const FENCE_BODY = + `${TAKES_FENCE_BEGIN}\n| # | claim | kind | who |\n|---|---|---|---|\n| 1 | private hunch | hunch | brain |\n${TAKES_FENCE_END}\n`; + + beforeAll(async () => { + await engine.putPage(SLUG, { title: 'Carol', type: 'person', compiled_truth: FENCE_BODY }); + await engine.createVersion(SLUG); // snapshot now has the fence + }); + + test('remote token with allow-list strips fence from every snapshot', async () => { + const result = await dispatchToolCall(engine, 'get_versions', { slug: SLUG }, { + remote: true, + takesHoldersAllowList: ['world'], + }); + const versions = parseResult(result) as Array<{ compiled_truth: string }>; + expect(versions.length).toBeGreaterThan(0); + for (const v of versions) { + expect(v.compiled_truth).not.toContain(TAKES_FENCE_BEGIN); + expect(v.compiled_truth).not.toContain('private hunch'); + } + }); + + test('local CLI sees historical takes in snapshots', async () => { + const result = await dispatchToolCall(engine, 'get_versions', { slug: SLUG }, { + remote: false, + }); + const versions = parseResult(result) as Array<{ compiled_truth: string }>; + expect(versions.some(v => v.compiled_truth.includes('private hunch'))).toBe(true); + }); +}); + describe('think op — read-only on remote callers (Lane D landed)', () => { test('remote save/take is forced read-only via remote_persisted_blocked flag', async () => { // Without ANTHROPIC_API_KEY, runThink returns gather-only result with NO_ANTHROPIC_API_KEY warning.