|
| 1 | +#!/usr/bin/env -S deno run --allow-run="ps,pgrep,kill" |
| 2 | + |
| 3 | +// Monitors a Node.js process for high resident memory usage and, once a threshold is exceeded, repeatedly |
| 4 | +// sends SIGUSR2 signals at a configured interval so the target process can perform diagnostic actions. |
| 5 | +// Designed to help capture data around sudden RealtimeServer memory spikes. |
| 6 | +// SF dotnet should be running with environment `SF_SIGUSR2_ACTION=resourceUsage`, as |
| 7 | +// interpreted by `src/RealtimeServer/common/diagnostics.ts`. |
| 8 | +// This runs on the SF server, but more investigation would be needed to run on a Windows machine. |
| 9 | + |
| 10 | +// @ts-ignore Deno provides this module resolution at runtime. |
| 11 | +import { parseArgs } from "jsr:@std/cli/parse-args"; |
| 12 | + |
| 13 | +// Help IDE. |
| 14 | +declare const Deno: any; |
| 15 | + |
| 16 | +interface CliOptions { |
| 17 | + thresholdMib: number; |
| 18 | + intervalSeconds: number; |
| 19 | +} |
| 20 | + |
| 21 | +/** Watches for the RealtimeServer process and while its RSS is above a threshold sends SIGUSR2 with exponential backoff. */ |
| 22 | +class RtsMon { |
| 23 | + private currentIntervalSeconds: number; |
| 24 | + |
| 25 | + constructor(private readonly options: CliOptions) { |
| 26 | + this.currentIntervalSeconds = options.intervalSeconds; |
| 27 | + } |
| 28 | + |
| 29 | + async monitor(): Promise<void> { |
| 30 | + Program.log( |
| 31 | + `Monitoring RealtimeServer resource usage. Threshold: ${this.options.thresholdMib} MiB. Starting interval: ${this.options.intervalSeconds} s` |
| 32 | + ); |
| 33 | + while (true) { |
| 34 | + await this.delay(); |
| 35 | + const pid: number | undefined = await this.findRealtimeServerPid(); |
| 36 | + if (pid == null) { |
| 37 | + Program.log(`RealtimeServer not found. Waiting for it to start.`); |
| 38 | + this.resetDelay(); |
| 39 | + continue; |
| 40 | + } |
| 41 | + |
| 42 | + const memoryUsageMB: number | undefined = await this.readRssMib(pid); |
| 43 | + if (memoryUsageMB == null) { |
| 44 | + this.resetDelay(); |
| 45 | + continue; |
| 46 | + } |
| 47 | + |
| 48 | + const aboveThreshold: boolean = memoryUsageMB >= this.options.thresholdMib; |
| 49 | + if (aboveThreshold === true) { |
| 50 | + await this.sendSignal(pid); |
| 51 | + this.currentIntervalSeconds *= 2; |
| 52 | + Program.log( |
| 53 | + `RSS ${memoryUsageMB.toFixed(1)}MB >= threshold (${this.options.thresholdMib} MiB). Increasing interval to ${ |
| 54 | + this.currentIntervalSeconds |
| 55 | + } s` |
| 56 | + ); |
| 57 | + } else { |
| 58 | + if (this.currentIntervalSeconds > this.options.intervalSeconds) { |
| 59 | + // Memory usage came back down below the threshold since last check. Collect one more report. |
| 60 | + await this.sendSignal(pid); |
| 61 | + } |
| 62 | + Program.log(`RSS ${memoryUsageMB.toFixed(1)} MiB (below threshold ${this.options.thresholdMib} MiB).`); |
| 63 | + this.resetDelay(); |
| 64 | + } |
| 65 | + } |
| 66 | + } |
| 67 | + |
| 68 | + private async delay(): Promise<void> { |
| 69 | + const ms = this.currentIntervalSeconds * 1000; |
| 70 | + await new Promise(resolve => setTimeout(resolve, ms)); |
| 71 | + } |
| 72 | + |
| 73 | + private resetDelay(): void { |
| 74 | + this.currentIntervalSeconds = this.options.intervalSeconds; |
| 75 | + } |
| 76 | + |
| 77 | + private async sendSignal(pid: number): Promise<void> { |
| 78 | + try { |
| 79 | + await this.runCommand("kill", ["-SIGUSR2", String(pid)]); |
| 80 | + Program.log(`Sent SIGUSR2 to pid ${pid}`); |
| 81 | + } catch (e) { |
| 82 | + Program.logError(`Failed to send SIGUSR2 to pid ${pid}: ${(e as Error).message}`); |
| 83 | + } |
| 84 | + } |
| 85 | + |
| 86 | + private async readRssMib(pid: number): Promise<number | undefined> { |
| 87 | + try { |
| 88 | + const { code, stdout } = await this.runCommand("ps", ["--quick-pid", String(pid), "--no-headers", "-o", "rss"]); |
| 89 | + if (code !== 0) return undefined; |
| 90 | + const text: string = new TextDecoder().decode(stdout).trim(); |
| 91 | + const kib: number = Number.parseInt(text, 10); |
| 92 | + if (Number.isNaN(kib)) return undefined; |
| 93 | + return kib / 1024; // convert to MiB |
| 94 | + } catch { |
| 95 | + return undefined; |
| 96 | + } |
| 97 | + } |
| 98 | + |
| 99 | + private async findRealtimeServerPid(): Promise<number | undefined> { |
| 100 | + try { |
| 101 | + const { code, stdout } = await this.runCommand("pgrep", ["--full", "--", "node .* --port 5002"]); |
| 102 | + if (code !== 0) return undefined; |
| 103 | + const text: string = new TextDecoder().decode(stdout).trim(); |
| 104 | + const lines: string[] = text.split(/\n+/); |
| 105 | + if (lines.length === 0) return undefined; |
| 106 | + const pid: number = Number.parseInt(lines[0], 10); |
| 107 | + if (Number.isNaN(pid)) return undefined; |
| 108 | + if (lines.length > 1) { |
| 109 | + Program.log(`Warning: Multiple RealtimeServer processes found. Picking one of them.`); |
| 110 | + } |
| 111 | + return pid; |
| 112 | + } catch { |
| 113 | + return undefined; |
| 114 | + } |
| 115 | + } |
| 116 | + private async runCommand( |
| 117 | + cmd: string, |
| 118 | + args: string[] |
| 119 | + ): Promise<{ code: number; stdout: Uint8Array; stderr: Uint8Array }> { |
| 120 | + const command = new Deno.Command(cmd, { args }); |
| 121 | + return await command.output(); |
| 122 | + } |
| 123 | +} |
| 124 | + |
| 125 | +/** Handles running the program. */ |
| 126 | +class Program { |
| 127 | + static programName: string = "rtsmon"; |
| 128 | + |
| 129 | + async main(): Promise<void> { |
| 130 | + try { |
| 131 | + const options: CliOptions = this.parse(Deno.args); |
| 132 | + const watcher: RtsMon = new RtsMon(options); |
| 133 | + Deno.addSignalListener("SIGINT", () => { |
| 134 | + Program.log("Received SIGINT. Exiting."); |
| 135 | + Deno.exit(0); |
| 136 | + }); |
| 137 | + await watcher.monitor(); |
| 138 | + } catch (e) { |
| 139 | + Program.logError((e as Error).message); |
| 140 | + Deno.exit(1); |
| 141 | + } |
| 142 | + } |
| 143 | + |
| 144 | + static log(message: string): void { |
| 145 | + const timestamp: string = new Date().toISOString(); |
| 146 | + console.log(`${timestamp} ${Program.programName}: ${message}`); |
| 147 | + } |
| 148 | + |
| 149 | + static logError(message: string): void { |
| 150 | + const timestamp: string = new Date().toISOString(); |
| 151 | + console.error(`${timestamp} ${Program.programName}: ${message}`); |
| 152 | + } |
| 153 | + |
| 154 | + private parse(args: string[]): CliOptions { |
| 155 | + const parseOptions = { |
| 156 | + boolean: ["help"], |
| 157 | + default: { "threshold-mib": 1.5 * 1024, "interval-seconds": 10 } |
| 158 | + }; |
| 159 | + const parsed = parseArgs(args, parseOptions); |
| 160 | + const allowed: Set<string> = new Set(["threshold-mib", "interval-seconds", "help", "_"]); |
| 161 | + for (const key of Object.keys(parsed)) { |
| 162 | + if (allowed.has(key) === false) { |
| 163 | + Program.logError(`Unexpected argument: ${key}`); |
| 164 | + Deno.exit(1); |
| 165 | + } |
| 166 | + } |
| 167 | + if (parsed._.length > 0) { |
| 168 | + Program.logError(`Unexpected arguments: ${parsed._.join(", ")}`); |
| 169 | + Deno.exit(1); |
| 170 | + } |
| 171 | + if (parsed.help === true) { |
| 172 | + Program.log(`Usage: watch-for-rts-spike.mts [--threshold-mib N] [--interval-seconds N]`); |
| 173 | + Program.log(`Defaults: ${JSON.stringify(parseOptions.default)}`); |
| 174 | + Deno.exit(0); |
| 175 | + } |
| 176 | + if (Array.isArray(parsed._) && parsed._.length > 0) { |
| 177 | + Program.logError(`Unexpected positional arguments: ${parsed._.join(", ")}`); |
| 178 | + Deno.exit(1); |
| 179 | + } |
| 180 | + |
| 181 | + const thresholdMib: number = this.toNumber(parsed["threshold-mib"], "threshold-mib"); |
| 182 | + const intervalSeconds: number = this.toNumber(parsed["interval-seconds"], "interval-seconds"); |
| 183 | + return { thresholdMib, intervalSeconds }; |
| 184 | + } |
| 185 | + |
| 186 | + private toNumber(value: unknown, name: string): number { |
| 187 | + if (typeof value === "number") return value; |
| 188 | + throw new Error(`${name} must be a number`); |
| 189 | + } |
| 190 | +} |
| 191 | + |
| 192 | +await new Program().main(); |
0 commit comments