From 52170f0a1c2e2050a8f1134ec67fb76256d0753b Mon Sep 17 00:00:00 2001 From: Scott Rozen Date: Fri, 15 May 2026 19:32:37 -0500 Subject: [PATCH] Phase 6: audit logging for LLM + tool invocations - New audit_log table with immutability triggers (UPDATE/DELETE blocked at the DB layer); migration in backend/migrations/audit_log.sql and appended to backend/schema.sql. - backend/src/lib/audit.ts: AuditEntry shape, recordAudit() fire-and-forget insert, hashContent() SHA-256 helper, AUDIT_LOG_ENABLED feature flag. - Tool dispatcher (lib/tools/registry.ts) records a tool_call row per invocation with duration, input/output hashes, and resolved document IDs from args + side effects; errors are recorded then re-thrown. - streamChatWithTools wraps the per-provider stream and records an llm_call row on success or error. Audit context flows through runLLMStream and the tabular generate path. - GET /audit-log returns the caller's own entries with filters (project_id, event_type, from, to, limit, offset). - Unit tests cover hashContent determinism, recordAudit insert shape, feature-flag no-op, and error swallowing. --- backend/migrations/audit_log.sql | 66 +++++++++++++++++ backend/schema.sql | 64 +++++++++++++++++ backend/src/index.ts | 2 + backend/src/lib/audit.ts | 90 +++++++++++++++++++++++ backend/src/lib/chatTools.ts | 9 +++ backend/src/lib/llm/index.ts | 59 ++++++++++++++- backend/src/lib/llm/types.ts | 17 +++++ backend/src/lib/tools/registry.ts | 115 +++++++++++++++++++++++++++--- backend/src/routes/audit.ts | 93 ++++++++++++++++++++++++ backend/src/routes/chat.ts | 1 + backend/src/routes/projectChat.ts | 1 + backend/src/routes/tabular.ts | 10 +++ backend/tests/unit/audit.test.ts | 95 ++++++++++++++++++++++++ 13 files changed, 608 insertions(+), 14 deletions(-) create mode 100644 backend/migrations/audit_log.sql create mode 100644 backend/src/lib/audit.ts create mode 100644 backend/src/routes/audit.ts create mode 100644 backend/tests/unit/audit.test.ts diff --git a/backend/migrations/audit_log.sql b/backend/migrations/audit_log.sql new file mode 100644 index 000000000..f8587f526 --- /dev/null +++ b/backend/migrations/audit_log.sql @@ -0,0 +1,66 @@ +-- Phase 6: append-only audit log. +-- +-- Every LLM call and every tool invocation lands here. Required before paid +-- data connectors so we can prove who saw what data and which model handled +-- it. Updates and deletes are blocked at the DB layer — entries are immutable +-- once written. +-- +-- user_email is denormalized for forensic retention: if a user row is later +-- deleted, the FK on user_id gets NULLed (or row removed if cascade applies) +-- but the email still appears so investigators can identify the actor. + +CREATE TABLE IF NOT EXISTS public.audit_log ( + id uuid PRIMARY KEY DEFAULT gen_random_uuid(), + user_id uuid REFERENCES auth.users(id) ON DELETE SET NULL, + user_email text, + event_type text NOT NULL CHECK (event_type IN ( + 'llm_call', + 'tool_call', + 'connector_fetch', + 'document_upload', + 'document_download' + )), + model text, + provider text, + tool_name text, + connector_id text, + project_id uuid REFERENCES public.projects(id) ON DELETE SET NULL, + document_ids uuid[], + source_license_scopes text[], + routing_policy_applied jsonb, + input_hash text, + output_hash text, + input_tokens integer, + output_tokens integer, + duration_ms integer, + status text NOT NULL CHECK (status IN ('success', 'error', 'blocked')), + error_message text, + created_at timestamptz NOT NULL DEFAULT now() +); + +CREATE INDEX IF NOT EXISTS idx_audit_log_user_created + ON public.audit_log(user_id, created_at DESC); + +CREATE INDEX IF NOT EXISTS idx_audit_log_project_created + ON public.audit_log(project_id, created_at DESC) + WHERE project_id IS NOT NULL; + +CREATE INDEX IF NOT EXISTS idx_audit_log_event_type + ON public.audit_log(event_type); + +CREATE OR REPLACE FUNCTION public.prevent_audit_log_modification() +RETURNS TRIGGER AS $$ +BEGIN + RAISE EXCEPTION 'audit_log entries are immutable'; +END; +$$ LANGUAGE plpgsql; + +DROP TRIGGER IF EXISTS audit_log_no_update ON public.audit_log; +CREATE TRIGGER audit_log_no_update BEFORE UPDATE ON public.audit_log + FOR EACH ROW EXECUTE PROCEDURE public.prevent_audit_log_modification(); + +DROP TRIGGER IF EXISTS audit_log_no_delete ON public.audit_log; +CREATE TRIGGER audit_log_no_delete BEFORE DELETE ON public.audit_log + FOR EACH ROW EXECUTE PROCEDURE public.prevent_audit_log_modification(); + +REVOKE ALL ON public.audit_log FROM anon, authenticated; diff --git a/backend/schema.sql b/backend/schema.sql index c5eaeef64..d9fc309bf 100644 --- a/backend/schema.sql +++ b/backend/schema.sql @@ -365,3 +365,67 @@ revoke all on public.tabular_cells from anon, authenticated; revoke all on public.tabular_review_chats from anon, authenticated; revoke all on public.tabular_review_chat_messages from anon, authenticated; revoke all on public.user_api_keys from anon, authenticated; + +-- --------------------------------------------------------------------------- +-- Audit log (Phase 6) +-- --------------------------------------------------------------------------- +-- +-- Append-only record of LLM calls, tool invocations, and connector fetches. +-- See backend/migrations/audit_log.sql for the canonical migration and the +-- rationale for immutability + denormalized user_email. + +create table if not exists public.audit_log ( + id uuid primary key default gen_random_uuid(), + user_id uuid references auth.users(id) on delete set null, + user_email text, + event_type text not null check (event_type in ( + 'llm_call', + 'tool_call', + 'connector_fetch', + 'document_upload', + 'document_download' + )), + model text, + provider text, + tool_name text, + connector_id text, + project_id uuid references public.projects(id) on delete set null, + document_ids uuid[], + source_license_scopes text[], + routing_policy_applied jsonb, + input_hash text, + output_hash text, + input_tokens integer, + output_tokens integer, + duration_ms integer, + status text not null check (status in ('success', 'error', 'blocked')), + error_message text, + created_at timestamptz not null default now() +); + +create index if not exists idx_audit_log_user_created + on public.audit_log(user_id, created_at desc); + +create index if not exists idx_audit_log_project_created + on public.audit_log(project_id, created_at desc) + where project_id is not null; + +create index if not exists idx_audit_log_event_type + on public.audit_log(event_type); + +create or replace function public.prevent_audit_log_modification() +returns trigger as $$ +begin + raise exception 'audit_log entries are immutable'; +end; +$$ language plpgsql; + +drop trigger if exists audit_log_no_update on public.audit_log; +create trigger audit_log_no_update before update on public.audit_log + for each row execute procedure public.prevent_audit_log_modification(); + +drop trigger if exists audit_log_no_delete on public.audit_log; +create trigger audit_log_no_delete before delete on public.audit_log + for each row execute procedure public.prevent_audit_log_modification(); + +revoke all on public.audit_log from anon, authenticated; diff --git a/backend/src/index.ts b/backend/src/index.ts index 2bbded663..69b0b2cc3 100644 --- a/backend/src/index.ts +++ b/backend/src/index.ts @@ -12,6 +12,7 @@ import { tabularRouter } from "./routes/tabular"; import { workflowsRouter } from "./routes/workflows"; import { userRouter } from "./routes/user"; import { downloadsRouter } from "./routes/downloads"; +import { auditRouter } from "./routes/audit"; try { encryptionKey(); @@ -126,6 +127,7 @@ app.use("/workflows", workflowsRouter); app.use("/user", userRouter); app.use("/users", userRouter); app.use("/download", downloadsRouter); +app.use("/audit-log", auditRouter); app.get("/health", (_req, res) => res.json({ ok: true })); diff --git a/backend/src/lib/audit.ts b/backend/src/lib/audit.ts new file mode 100644 index 000000000..4d66bf545 --- /dev/null +++ b/backend/src/lib/audit.ts @@ -0,0 +1,90 @@ +import { createHash } from "crypto"; +import type { createServerSupabase } from "./supabase"; + +export type AuditEventType = + | "llm_call" + | "tool_call" + | "connector_fetch" + | "document_upload" + | "document_download"; + +export type AuditStatus = "success" | "error" | "blocked"; + +export type LicenseScope = "public" | "licensed" | "internal"; + +export interface AuditEntry { + eventType: AuditEventType; + userId: string; + userEmail?: string; + model?: string; + provider?: string; + toolName?: string; + connectorId?: string; + projectId?: string | null; + documentIds?: string[]; + sourceLicenseScopes?: LicenseScope[]; + routingPolicyApplied?: Record; + inputHash?: string; + outputHash?: string; + inputTokens?: number; + outputTokens?: number; + durationMs?: number; + status: AuditStatus; + errorMessage?: string; +} + +type Db = ReturnType; + +export function hashContent(content: string): string { + return createHash("sha256").update(content, "utf8").digest("hex"); +} + +function isAuditEnabled(): boolean { + const raw = process.env.AUDIT_LOG_ENABLED; + if (raw === undefined) return true; + return !/^(0|false|no|off)$/i.test(raw.trim()); +} + +/** + * Insert a single audit_log row. Fire-and-forget semantics: errors are + * logged but never propagated — audit failures must not break the user + * request. Callers may still `await` to ensure ordering within a turn. + */ +export async function recordAudit(entry: AuditEntry, db: Db): Promise { + if (!isAuditEnabled()) return; + + try { + const row = { + user_id: entry.userId, + user_email: entry.userEmail ?? null, + event_type: entry.eventType, + model: entry.model ?? null, + provider: entry.provider ?? null, + tool_name: entry.toolName ?? null, + connector_id: entry.connectorId ?? null, + project_id: entry.projectId ?? null, + document_ids: entry.documentIds?.length ? entry.documentIds : null, + source_license_scopes: entry.sourceLicenseScopes?.length + ? entry.sourceLicenseScopes + : null, + routing_policy_applied: entry.routingPolicyApplied ?? null, + input_hash: entry.inputHash ?? null, + output_hash: entry.outputHash ?? null, + input_tokens: entry.inputTokens ?? null, + output_tokens: entry.outputTokens ?? null, + duration_ms: entry.durationMs ?? null, + status: entry.status, + error_message: entry.errorMessage ?? null, + }; + + const { error } = await db.from("audit_log").insert(row); + if (error) { + console.error("[audit] insert failed:", error.message); + } + } catch (err) { + console.error( + "[audit] unexpected failure:", + err instanceof Error ? err.message : String(err), + ); + } +} diff --git a/backend/src/lib/chatTools.ts b/backend/src/lib/chatTools.ts index 94c72dd0d..337b7b637 100644 --- a/backend/src/lib/chatTools.ts +++ b/backend/src/lib/chatTools.ts @@ -416,6 +416,7 @@ export async function runLLMStream(params: { docStore: DocStore; docIndex: DocIndex; userId: string; + userEmail?: string; db: ReturnType; write: (s: string) => void; workflowStore?: WorkflowStore; @@ -435,6 +436,7 @@ export async function runLLMStream(params: { docStore, docIndex, userId, + userEmail, db, write, workflowStore, @@ -453,6 +455,7 @@ export async function runLLMStream(params: { const toolCtx: ToolContext = { userId, + userEmail, db, docStore, docIndex, @@ -550,6 +553,12 @@ export async function runLLMStream(params: { maxIterations: 10, apiKeys, enableThinking: true, + audit: { + userId, + userEmail, + projectId: projectId ?? null, + db, + }, callbacks: { onContentDelta: (delta) => { iterText += delta; diff --git a/backend/src/lib/llm/index.ts b/backend/src/lib/llm/index.ts index fb5ddbf66..1d84dbe61 100644 --- a/backend/src/lib/llm/index.ts +++ b/backend/src/lib/llm/index.ts @@ -3,17 +3,70 @@ import { streamGemini, completeGeminiText } from "./gemini"; import { streamOpenAI, completeOpenAIText } from "./openai"; import { providerForModel } from "./models"; import type { StreamChatParams, StreamChatResult, UserApiKeys } from "./types"; +import { recordAudit, hashContent } from "../audit"; export * from "./types"; export * from "./models"; +function summarizeInputForAudit(params: StreamChatParams): string { + const messageSummary = params.messages + .map((m) => `${m.role}:${m.content}`) + .join("\n---\n"); + return `${params.systemPrompt}\n---\n${messageSummary}`; +} + export async function streamChatWithTools( params: StreamChatParams, ): Promise { const provider = providerForModel(params.model); - if (provider === "claude") return streamClaude(params); - if (provider === "openai") return streamOpenAI(params); - return streamGemini(params); + const startedAt = Date.now(); + try { + const result = + provider === "claude" + ? await streamClaude(params) + : provider === "openai" + ? await streamOpenAI(params) + : await streamGemini(params); + + if (params.audit) { + await recordAudit( + { + eventType: "llm_call", + userId: params.audit.userId, + userEmail: params.audit.userEmail, + projectId: params.audit.projectId ?? null, + model: params.model, + provider, + inputHash: hashContent(summarizeInputForAudit(params)), + outputHash: hashContent(result.fullText ?? ""), + durationMs: Date.now() - startedAt, + status: "success", + }, + params.audit.db, + ); + } + return result; + } catch (err) { + if (params.audit) { + await recordAudit( + { + eventType: "llm_call", + userId: params.audit.userId, + userEmail: params.audit.userEmail, + projectId: params.audit.projectId ?? null, + model: params.model, + provider, + inputHash: hashContent(summarizeInputForAudit(params)), + durationMs: Date.now() - startedAt, + status: "error", + errorMessage: + err instanceof Error ? err.message : String(err), + }, + params.audit.db, + ); + } + throw err; + } } export async function completeText(params: { diff --git a/backend/src/lib/llm/types.ts b/backend/src/lib/llm/types.ts index 55f987eff..4cf5b9301 100644 --- a/backend/src/lib/llm/types.ts +++ b/backend/src/lib/llm/types.ts @@ -1,3 +1,5 @@ +import type { createServerSupabase } from "../supabase"; + // Shared types for the LLM provider adapter. // Callers always speak OpenAI-style tools + { role, content } messages; each // provider translates internally. @@ -65,6 +67,21 @@ export type StreamChatParams = { * the reintroduction plan). */ documentFilenames?: string[]; + /** + * Optional audit metadata. When provided, the LLM adapter records one + * audit_log row per streamChatWithTools call (success or error). Omit in + * call sites that have no user context (rare — most callers should pass + * it through). The tool dispatcher logs its own tool_call rows + * independently, so missing this only suppresses the llm_call row. + */ + audit?: LlmAuditContext; +}; + +export type LlmAuditContext = { + userId: string; + userEmail?: string; + projectId?: string | null; + db: ReturnType; }; export type StreamChatResult = { diff --git a/backend/src/lib/tools/registry.ts b/backend/src/lib/tools/registry.ts index bbba9e64e..43b0b0552 100644 --- a/backend/src/lib/tools/registry.ts +++ b/backend/src/lib/tools/registry.ts @@ -16,6 +16,7 @@ import { readTableCells } from "./readTableCells"; import { generateDocxTool } from "./generateDocx"; import { editDocument } from "./editDocument"; import { replicateDocument } from "./replicateDocument"; +import { recordAudit, hashContent } from "../audit"; // Tools are added here in the order they should be advertised to the model. // Order matters: some providers weight tool selection by position. @@ -34,6 +35,52 @@ const TOOL_REGISTRY = [ export type ToolName = (typeof TOOL_REGISTRY)[number]["name"]; +const UUID_RE = + /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i; + +function extractDocumentIds(args: Record): string[] | undefined { + const candidates: unknown[] = []; + const direct = ["document_id", "doc_id", "source_document_id"]; + for (const key of direct) { + if (args[key] !== undefined) candidates.push(args[key]); + } + const arrayKeys = ["document_ids", "doc_ids"]; + for (const key of arrayKeys) { + const value = args[key]; + if (Array.isArray(value)) candidates.push(...value); + } + const ids = candidates.filter( + (v): v is string => typeof v === "string" && UUID_RE.test(v), + ); + return ids.length ? Array.from(new Set(ids)) : undefined; +} + +function collectAuditDocumentIds( + sideEffects: ToolSideEffects | undefined, + args: Record, +): string[] | undefined { + const ids = new Set(); + const fromArgs = extractDocumentIds(args); + if (fromArgs) for (const id of fromArgs) ids.add(id); + if (sideEffects) { + for (const r of sideEffects.docsRead ?? []) { + if (r.document_id) ids.add(r.document_id); + } + for (const r of sideEffects.docsCreated ?? []) { + if (r.document_id) ids.add(r.document_id); + } + for (const r of sideEffects.docsEdited ?? []) { + if (r.document_id) ids.add(r.document_id); + } + for (const r of sideEffects.docsReplicated ?? []) { + for (const c of r.copies ?? []) { + if (c.document_id) ids.add(c.document_id); + } + } + } + return ids.size ? Array.from(ids) : undefined; +} + const TOOLS_BY_NAME: ReadonlyMap = new Map( TOOL_REGISTRY.map((t) => [t.name, t]), ); @@ -105,17 +152,63 @@ export async function runToolCalls( /* ignore — tool sees empty args */ } - const result = await tool.execute(args, tc.id, ctx); - toolResults.push(result.toolResult); - - const s = result.sideEffects; - if (!s) continue; - if (s.docsRead) docsRead.push(...s.docsRead); - if (s.docsFound) docsFound.push(...s.docsFound); - if (s.docsCreated) docsCreated.push(...s.docsCreated); - if (s.docsReplicated) docsReplicated.push(...s.docsReplicated); - if (s.workflowsApplied) workflowsApplied.push(...s.workflowsApplied); - if (s.docsEdited) docsEdited.push(...s.docsEdited); + const startedAt = Date.now(); + let result: ToolExecutionResult | undefined; + let auditStatus: "success" | "error" = "success"; + let auditError: string | undefined; + try { + result = await tool.execute(args, tc.id, ctx); + toolResults.push(result.toolResult); + + const s = result.sideEffects; + if (s) { + if (s.docsRead) docsRead.push(...s.docsRead); + if (s.docsFound) docsFound.push(...s.docsFound); + if (s.docsCreated) docsCreated.push(...s.docsCreated); + if (s.docsReplicated) + docsReplicated.push(...s.docsReplicated); + if (s.workflowsApplied) + workflowsApplied.push(...s.workflowsApplied); + if (s.docsEdited) docsEdited.push(...s.docsEdited); + } + } catch (err) { + auditStatus = "error"; + auditError = + err instanceof Error ? err.message : String(err); + // Re-throw after recording — preserves existing error semantics. + await recordAudit( + { + eventType: "tool_call", + userId: ctx.userId, + userEmail: ctx.userEmail, + toolName: tool.name, + projectId: ctx.projectId ?? null, + documentIds: extractDocumentIds(args), + inputHash: hashContent(tc.function.arguments || ""), + durationMs: Date.now() - startedAt, + status: auditStatus, + errorMessage: auditError, + }, + ctx.db, + ); + throw err; + } + + await recordAudit( + { + eventType: "tool_call", + userId: ctx.userId, + userEmail: ctx.userEmail, + toolName: tool.name, + projectId: ctx.projectId ?? null, + documentIds: collectAuditDocumentIds(result.sideEffects, args), + inputHash: hashContent(tc.function.arguments || ""), + outputHash: hashContent(result.toolResult.content ?? ""), + durationMs: Date.now() - startedAt, + status: "success", + }, + ctx.db, + ); } return { diff --git a/backend/src/routes/audit.ts b/backend/src/routes/audit.ts new file mode 100644 index 000000000..dfba72c7b --- /dev/null +++ b/backend/src/routes/audit.ts @@ -0,0 +1,93 @@ +import { Router } from "express"; +import { requireAuth } from "../middleware/auth"; +import { createServerSupabase } from "../lib/supabase"; + +export const auditRouter = Router(); + +const EVENT_TYPES = new Set([ + "llm_call", + "tool_call", + "connector_fetch", + "document_upload", + "document_download", +]); + +function parseTimestamp(value: unknown): string | null { + if (typeof value !== "string" || !value.trim()) return null; + const date = new Date(value); + if (Number.isNaN(date.getTime())) return null; + return date.toISOString(); +} + +function parseIntInRange( + value: unknown, + fallback: number, + min: number, + max: number, +): number { + if (typeof value !== "string" || !value.trim()) return fallback; + const parsed = Number.parseInt(value, 10); + if (!Number.isFinite(parsed)) return fallback; + return Math.min(max, Math.max(min, parsed)); +} + +// GET /audit-log +// User-scoped — every caller sees only their own rows. +auditRouter.get("/", requireAuth, async (req, res) => { + const userId = res.locals.userId as string; + const db = createServerSupabase(); + + const projectIdRaw = req.query.project_id; + const eventTypeRaw = req.query.event_type; + const fromRaw = req.query.from; + const toRaw = req.query.to; + + if (projectIdRaw !== undefined && typeof projectIdRaw !== "string") { + return void res + .status(400) + .json({ detail: "project_id must be a string" }); + } + if ( + eventTypeRaw !== undefined && + (typeof eventTypeRaw !== "string" || !EVENT_TYPES.has(eventTypeRaw)) + ) { + return void res.status(400).json({ detail: "invalid event_type" }); + } + const from = parseTimestamp(fromRaw); + if (fromRaw !== undefined && from === null) { + return void res.status(400).json({ detail: "from is not a valid date" }); + } + const to = parseTimestamp(toRaw); + if (toRaw !== undefined && to === null) { + return void res.status(400).json({ detail: "to is not a valid date" }); + } + + const limit = parseIntInRange(req.query.limit, 100, 1, 1000); + const offset = parseIntInRange(req.query.offset, 0, 0, 1_000_000); + + let query = db + .from("audit_log") + .select("*", { count: "exact" }) + .eq("user_id", userId) + .order("created_at", { ascending: false }) + .range(offset, offset + limit - 1); + + if (typeof projectIdRaw === "string" && projectIdRaw.trim()) { + query = query.eq("project_id", projectIdRaw); + } + if (typeof eventTypeRaw === "string") { + query = query.eq("event_type", eventTypeRaw); + } + if (from) query = query.gte("created_at", from); + if (to) query = query.lte("created_at", to); + + const { data, error, count } = await query; + if (error) return void res.status(500).json({ detail: error.message }); + + res.json({ + entries: data ?? [], + limit, + offset, + total: count ?? null, + }); +}); diff --git a/backend/src/routes/chat.ts b/backend/src/routes/chat.ts index 9d002af8f..b85c8e9a5 100644 --- a/backend/src/routes/chat.ts +++ b/backend/src/routes/chat.ts @@ -558,6 +558,7 @@ chatRouter.post("/", requireAuth, async (req, res) => { docStore, docIndex, userId, + userEmail, db, write, workflowStore, diff --git a/backend/src/routes/projectChat.ts b/backend/src/routes/projectChat.ts index 388e86639..cca40cef5 100644 --- a/backend/src/routes/projectChat.ts +++ b/backend/src/routes/projectChat.ts @@ -161,6 +161,7 @@ projectChatRouter.post("/", requireAuth, async (req, res) => { docStore, docIndex, userId, + userEmail, db, write, workflowStore, diff --git a/backend/src/routes/tabular.ts b/backend/src/routes/tabular.ts index 27af28834..20046ae0d 100644 --- a/backend/src/routes/tabular.ts +++ b/backend/src/routes/tabular.ts @@ -944,6 +944,13 @@ tabularRouter.post("/:reviewId/generate", requireAuth, async (req, res) => { ); }, api_keys, + { + userId, + userEmail, + projectId: + (review.project_id as string | null) ?? null, + db, + }, ); } catch (err) { console.error( @@ -1321,6 +1328,7 @@ tabularRouter.post("/:reviewId/chat", requireAuth, async (req, res) => { docStore: new Map(), docIndex: {}, userId, + userEmail, db, write, tabularStore, @@ -1588,6 +1596,7 @@ async function queryTabularAllColumns( columns: Column[], onResult: (columnIndex: number, result: CellResult) => Promise, apiKeys?: import("../lib/llm").UserApiKeys, + audit?: import("../lib/llm").LlmAuditContext, ): Promise { const columnsDesc = columns .map((col) => { @@ -1650,6 +1659,7 @@ Rules: messages: [{ role: "user", content: USER }], tools: [], apiKeys, + audit, callbacks: { onContentDelta: (delta) => { contentBuffer += delta; diff --git a/backend/tests/unit/audit.test.ts b/backend/tests/unit/audit.test.ts new file mode 100644 index 000000000..f3755f60f --- /dev/null +++ b/backend/tests/unit/audit.test.ts @@ -0,0 +1,95 @@ +import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; +import { hashContent, recordAudit, type AuditEntry } from "../../src/lib/audit"; + +function makeDb(insertImpl?: (row: unknown) => { error: unknown }) { + const insertedRows: unknown[] = []; + const insert = vi.fn(async (row: unknown) => { + insertedRows.push(row); + return insertImpl ? insertImpl(row) : { error: null }; + }); + const from = vi.fn(() => ({ insert })); + return { db: { from } as never, insert, insertedRows }; +} + +const baseEntry: AuditEntry = { + eventType: "tool_call", + userId: "11111111-1111-1111-1111-111111111111", + toolName: "read_document", + status: "success", +}; + +beforeEach(() => { + delete process.env.AUDIT_LOG_ENABLED; +}); + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe("hashContent", () => { + it("is deterministic for the same input", () => { + expect(hashContent("hello")).toBe(hashContent("hello")); + }); + + it("differs for different inputs", () => { + expect(hashContent("a")).not.toBe(hashContent("b")); + }); + + it("returns a 64-char hex string", () => { + const out = hashContent("anything"); + expect(out).toMatch(/^[0-9a-f]{64}$/); + }); +}); + +describe("recordAudit", () => { + it("inserts a row with the expected shape", async () => { + const { db, insert, insertedRows } = makeDb(); + await recordAudit( + { + ...baseEntry, + userEmail: "u@example.com", + projectId: "22222222-2222-2222-2222-222222222222", + documentIds: ["33333333-3333-3333-3333-333333333333"], + inputHash: hashContent("in"), + outputHash: hashContent("out"), + durationMs: 42, + }, + db, + ); + expect(insert).toHaveBeenCalledTimes(1); + const row = insertedRows[0] as Record; + expect(row.event_type).toBe("tool_call"); + expect(row.user_id).toBe(baseEntry.userId); + expect(row.user_email).toBe("u@example.com"); + expect(row.tool_name).toBe("read_document"); + expect(row.project_id).toBe("22222222-2222-2222-2222-222222222222"); + expect(row.document_ids).toEqual([ + "33333333-3333-3333-3333-333333333333", + ]); + expect(row.duration_ms).toBe(42); + expect(row.status).toBe("success"); + }); + + it("is a no-op when AUDIT_LOG_ENABLED is false", async () => { + process.env.AUDIT_LOG_ENABLED = "false"; + const { db, insert } = makeDb(); + await recordAudit(baseEntry, db); + expect(insert).not.toHaveBeenCalled(); + }); + + it("swallows insert errors without throwing", async () => { + const errSpy = vi.spyOn(console, "error").mockImplementation(() => {}); + const { db } = makeDb(() => ({ error: { message: "boom" } })); + await expect(recordAudit(baseEntry, db)).resolves.toBeUndefined(); + expect(errSpy).toHaveBeenCalled(); + }); + + it("nulls out empty optional arrays", async () => { + const { db, insertedRows } = makeDb(); + await recordAudit(baseEntry, db); + const row = insertedRows[0] as Record; + expect(row.document_ids).toBeNull(); + expect(row.source_license_scopes).toBeNull(); + expect(row.user_email).toBeNull(); + }); +});