From 64a1422c4403f335ede5042b82bcd0510284f02f Mon Sep 17 00:00:00 2001 From: shawn Date: Tue, 19 May 2026 23:37:24 +0100 Subject: [PATCH] feat: structured JSON logger with per-request id and access log MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously every backend log was a free-form console.log/error, so there was no way to (a) correlate the streaming error you see in a client with the server-side line that produced it, or (b) filter on status / route / user_id from a log aggregator. The blog post about mikeoss called out the absence of structured logging, request correlation, and error context, so this PR puts the floor in. Three pieces: 1. backend/src/lib/logger.ts — ~80-line zero-dep JSON line logger. - logger.{debug,info,warn,error}({ fields }, msg) emits one JSON object per call to stdout/stderr. - level is controlled by LOG_LEVEL env var, defaulting to "info" in production and "debug" elsewhere. - logger.child({ fields }) returns a logger that prefixes every event with the given fields — used by the request middleware to bind request_id. - errFields(err) helper extracts name/message/stack from an Error. - Kept hand-written instead of pulling in pino because we only need ~30 lines today; the call sites won't change if it's later swapped. 2. backend/src/middleware/requestContext.ts — assigns a per-request id (from inbound X-Request-Id if present, else a UUID), echoes it on the response, parks a child logger on res.locals.log, and on response close emits one access-log line with method/route/status/ elapsed_ms/user_id. Wired into index.ts before the rate limiters. 3. Demonstrate adoption: replace console.error in the two streaming error paths (POST /chat, POST /projects/:projectId/chat) and the generate-title handler. Each unhandled error log now ships with the request_id so it correlates to the access log line. Left the debug-style console.log calls in chatTools.ts / documents.ts alone — they're a separate, broader cleanup. The X-Request-Id header is echoed on every response so a curl user or a frontend retry can grep their request out of the logs. --- backend/src/index.ts | 6 +- backend/src/lib/logger.ts | 86 ++++++++++++++++++++++++ backend/src/middleware/requestContext.ts | 42 ++++++++++++ backend/src/routes/chat.ts | 13 +++- backend/src/routes/projectChat.ts | 7 +- 5 files changed, 149 insertions(+), 5 deletions(-) create mode 100644 backend/src/lib/logger.ts create mode 100644 backend/src/middleware/requestContext.ts diff --git a/backend/src/index.ts b/backend/src/index.ts index 07b3b8490..07c0d6d94 100644 --- a/backend/src/index.ts +++ b/backend/src/index.ts @@ -11,6 +11,8 @@ import { tabularRouter } from "./routes/tabular"; import { workflowsRouter } from "./routes/workflows"; import { userRouter } from "./routes/user"; import { downloadsRouter } from "./routes/downloads"; +import { requestContext } from "./middleware/requestContext"; +import { logger } from "./lib/logger"; const app = express(); const PORT = process.env.PORT ?? 3001; @@ -95,6 +97,8 @@ app.use( }), ); +app.use(requestContext()); + app.use(generalLimiter); app.use(express.json({ limit: "50mb" })); @@ -122,5 +126,5 @@ app.use("/download", downloadsRouter); app.get("/health", (_req, res) => res.json({ ok: true })); app.listen(PORT, () => { - console.log(`Mike backend running on port ${PORT}`); + logger.info({ port: PORT }, "backend listening"); }); diff --git a/backend/src/lib/logger.ts b/backend/src/lib/logger.ts new file mode 100644 index 000000000..771bc09e3 --- /dev/null +++ b/backend/src/lib/logger.ts @@ -0,0 +1,86 @@ +/** + * Tiny zero-dependency structured logger. Every call emits one JSON + * line to stdout/stderr, which is the shape any log aggregator (CW + * Logs, Loki, Datadog) is happy to ingest. Fields are merged into the + * top-level object so a query like `{level=error route=/chat err_msg=*}` + * works without parsing message strings. + * + * We didn't pull in pino because we only need ~30 lines, and adding a + * dep + a bunch of transport configuration is overkill for the current + * single-process backend. If the project grows into multi-process or + * async transports, replace the body of this file with pino — the + * call sites won't have to change. + */ + +type LogLevel = "debug" | "info" | "warn" | "error"; + +type Fields = Record; + +const LEVEL_RANK: Record = { + debug: 10, + info: 20, + warn: 30, + error: 40, +}; + +function resolveMinLevel(): LogLevel { + const raw = (process.env.LOG_LEVEL ?? "").toLowerCase(); + if (raw === "debug" || raw === "info" || raw === "warn" || raw === "error") + return raw; + return process.env.NODE_ENV === "production" ? "info" : "debug"; +} + +const MIN_LEVEL = resolveMinLevel(); + +function serializeError(err: unknown): Fields { + if (err instanceof Error) { + return { + err_name: err.name, + err_msg: err.message, + err_stack: err.stack, + }; + } + return { err: typeof err === "string" ? err : JSON.stringify(err) }; +} + +function emit(level: LogLevel, fields: Fields, msg?: string) { + if (LEVEL_RANK[level] < LEVEL_RANK[MIN_LEVEL]) return; + const merged: Fields = { + ts: new Date().toISOString(), + level, + ...fields, + }; + if (msg !== undefined) merged.msg = msg; + const line = JSON.stringify(merged); + if (level === "error" || level === "warn") { + process.stderr.write(line + "\n"); + } else { + process.stdout.write(line + "\n"); + } +} + +export type Logger = { + debug(fields: Fields, msg?: string): void; + info(fields: Fields, msg?: string): void; + warn(fields: Fields, msg?: string): void; + error(fields: Fields, msg?: string): void; + /** Returns a child logger that prefixes every event with `fields`. */ + child(fields: Fields): Logger; +}; + +function make(baseFields: Fields): Logger { + return { + debug: (fields, msg) => emit("debug", { ...baseFields, ...fields }, msg), + info: (fields, msg) => emit("info", { ...baseFields, ...fields }, msg), + warn: (fields, msg) => emit("warn", { ...baseFields, ...fields }, msg), + error: (fields, msg) => emit("error", { ...baseFields, ...fields }, msg), + child: (fields) => make({ ...baseFields, ...fields }), + }; +} + +export const logger: Logger = make({}); + +/** Helper for `catch (err)` blocks. */ +export function errFields(err: unknown): Fields { + return serializeError(err); +} diff --git a/backend/src/middleware/requestContext.ts b/backend/src/middleware/requestContext.ts new file mode 100644 index 000000000..7813a154a --- /dev/null +++ b/backend/src/middleware/requestContext.ts @@ -0,0 +1,42 @@ +import { randomUUID } from "crypto"; +import type { NextFunction, Request, Response } from "express"; +import { logger } from "../lib/logger"; + +/** + * Adds a per-request id, an access log line on response close, and a + * request-scoped logger on `res.locals.log`. The id is taken from the + * inbound `X-Request-Id` header when present so it can stitch with + * upstream traces; otherwise a UUID is generated. It's echoed back on + * the response so a curl user can grep their request out of the logs. + */ +export function requestContext() { + return (req: Request, res: Response, next: NextFunction) => { + const inbound = req.header("x-request-id"); + const requestId = inbound && inbound.length <= 200 ? inbound : randomUUID(); + res.setHeader("X-Request-Id", requestId); + + const startNs = process.hrtime.bigint(); + const reqLog = logger.child({ request_id: requestId }); + res.locals.requestId = requestId; + res.locals.log = reqLog; + + res.on("close", () => { + const elapsedMs = Number(process.hrtime.bigint() - startNs) / 1e6; + // route is set by Express after match; fall back to the raw url + // when the response closed before a route was attached. + const route = req.route?.path ?? req.originalUrl ?? req.url; + reqLog.info( + { + method: req.method, + route, + status: res.statusCode, + elapsed_ms: Math.round(elapsedMs), + user_id: (res.locals.userId as string | undefined) ?? null, + }, + "request", + ); + }); + + next(); + }; +} diff --git a/backend/src/routes/chat.ts b/backend/src/routes/chat.ts index 9a39e0a9b..24e7c2c2e 100644 --- a/backend/src/routes/chat.ts +++ b/backend/src/routes/chat.ts @@ -13,6 +13,7 @@ import { import { completeText } from "../lib/llm"; import { getUserApiKeys, getUserModelSettings } from "../lib/userSettings"; import { checkProjectAccess } from "../lib/access"; +import { errFields, logger, type Logger } from "../lib/logger"; export const chatRouter = Router(); @@ -414,7 +415,8 @@ chatRouter.post("/:chatId/generate-title", requireAuth, async (req, res) => { res.json({ title }); } catch (err) { - console.error("[generate-title]", err); + const log = (res.locals.log as Logger | undefined) ?? logger; + log.error({ route: "generate-title", ...errFields(err) }, "title generation failed"); res.status(500).json({ detail: "Failed to generate title" }); } }); @@ -500,7 +502,11 @@ chatRouter.post("/", requireAuth, async (req, res) => { .select("id, title") .single(); if (error || !newChat) { - console.error("[chat/stream] failed to create chat", error); + const log = (res.locals.log as Logger | undefined) ?? logger; + log.error( + { route: "chat/stream", supabase_error: error?.message }, + "failed to create chat", + ); return void res .status(500) .json({ detail: "Failed to create chat" }); @@ -594,7 +600,8 @@ chatRouter.post("/", requireAuth, async (req, res) => { .eq("id", chatId); } } catch (err) { - console.error("[chat/stream] error:", err); + const log = (res.locals.log as Logger | undefined) ?? logger; + log.error({ route: "chat/stream", ...errFields(err) }, "stream error"); try { write( `data: ${JSON.stringify({ type: "error", message: "Stream error" })}\n\n`, diff --git a/backend/src/routes/projectChat.ts b/backend/src/routes/projectChat.ts index 5e2996152..310dd0c8d 100644 --- a/backend/src/routes/projectChat.ts +++ b/backend/src/routes/projectChat.ts @@ -13,6 +13,7 @@ import { } from "../lib/chatTools"; import { getUserApiKeys } from "../lib/userSettings"; import { checkProjectAccess } from "../lib/access"; +import { errFields, logger, type Logger } from "../lib/logger"; const PROJECT_SYSTEM_PROMPT_EXTRA = `PROJECT CONTEXT: You are operating within a project folder that contains a collection of legal documents the user has organised for a single matter. The user's questions will usually refer to one or more documents in this project — your job is to find the relevant files to work on. Use list_documents to see what is available and fetch_documents / read_document to pull in any documents you need before answering. @@ -186,7 +187,11 @@ projectChatRouter.post("/", requireAuth, async (req, res) => { .eq("id", chatId); } } catch (err) { - console.error("[project-chat/stream] error:", err); + const log = (res.locals.log as Logger | undefined) ?? logger; + log.error( + { route: "project-chat/stream", project_id: projectId, ...errFields(err) }, + "stream error", + ); try { write( `data: ${JSON.stringify({ type: "error", message: "Stream error" })}\n\n`,