Skip to content

Commit fd66912

Browse files
committed
tool: Add watch-for-rts-spike
1 parent dd3c323 commit fd66912

File tree

1 file changed

+192
-0
lines changed

1 file changed

+192
-0
lines changed

scripts/watch-for-rts-spike.mts

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
#!/usr/bin/env -S deno run --allow-run="ps,pgrep,kill"
2+
3+
// Monitors a Node.js process for high resident memory usage and, once a threshold is exceeded, repeatedly
4+
// sends SIGUSR2 signals at a configured interval so the target process can perform diagnostic actions.
5+
// Designed to help capture data around sudden RealtimeServer memory spikes.
6+
// SF dotnet should be running with environment `SF_SIGUSR2_ACTION=resourceUsage`, as
7+
// interpreted by `src/RealtimeServer/common/diagnostics.ts`.
8+
// This runs on the SF server, but more investigation would be needed to run on a Windows machine.
9+
10+
// @ts-ignore Deno provides this module resolution at runtime.
11+
import { parseArgs } from "jsr:@std/cli/parse-args";
12+
13+
// Help IDE.
14+
declare const Deno: any;
15+
16+
interface CliOptions {
17+
thresholdMib: number;
18+
intervalSeconds: number;
19+
}
20+
21+
/** Watches for the RealtimeServer process and while its RSS is above a threshold sends SIGUSR2 with exponential backoff. */
22+
class RtsMon {
23+
private currentIntervalSeconds: number;
24+
25+
constructor(private readonly options: CliOptions) {
26+
this.currentIntervalSeconds = options.intervalSeconds;
27+
}
28+
29+
async monitor(): Promise<void> {
30+
Program.log(
31+
`Monitoring RealtimeServer resource usage. Threshold: ${this.options.thresholdMib} MiB. Starting interval: ${this.options.intervalSeconds} s`
32+
);
33+
while (true) {
34+
await this.delay();
35+
const pid: number | undefined = await this.findRealtimeServerPid();
36+
if (pid == null) {
37+
Program.log(`RealtimeServer not found. Waiting for it to start.`);
38+
this.resetDelay();
39+
continue;
40+
}
41+
42+
const memoryUsageMB: number | undefined = await this.readRssMib(pid);
43+
if (memoryUsageMB == null) {
44+
this.resetDelay();
45+
continue;
46+
}
47+
48+
const aboveThreshold: boolean = memoryUsageMB >= this.options.thresholdMib;
49+
if (aboveThreshold === true) {
50+
await this.sendSignal(pid);
51+
this.currentIntervalSeconds *= 2;
52+
Program.log(
53+
`RSS ${memoryUsageMB.toFixed(1)}MB >= threshold (${this.options.thresholdMib} MiB). Increasing interval to ${
54+
this.currentIntervalSeconds
55+
} s`
56+
);
57+
} else {
58+
if (this.currentIntervalSeconds > this.options.intervalSeconds) {
59+
// Memory usage came back down below the threshold since last check. Collect one more report.
60+
await this.sendSignal(pid);
61+
}
62+
Program.log(`RSS ${memoryUsageMB.toFixed(1)} MiB (below threshold ${this.options.thresholdMib} MiB).`);
63+
this.resetDelay();
64+
}
65+
}
66+
}
67+
68+
private async delay(): Promise<void> {
69+
const ms = this.currentIntervalSeconds * 1000;
70+
await new Promise(resolve => setTimeout(resolve, ms));
71+
}
72+
73+
private resetDelay(): void {
74+
this.currentIntervalSeconds = this.options.intervalSeconds;
75+
}
76+
77+
private async sendSignal(pid: number): Promise<void> {
78+
try {
79+
await this.runCommand("kill", ["-SIGUSR2", String(pid)]);
80+
Program.log(`Sent SIGUSR2 to pid ${pid}`);
81+
} catch (e) {
82+
Program.logError(`Failed to send SIGUSR2 to pid ${pid}: ${(e as Error).message}`);
83+
}
84+
}
85+
86+
private async readRssMib(pid: number): Promise<number | undefined> {
87+
try {
88+
const { code, stdout } = await this.runCommand("ps", ["--quick-pid", String(pid), "--no-headers", "-o", "rss"]);
89+
if (code !== 0) return undefined;
90+
const text: string = new TextDecoder().decode(stdout).trim();
91+
const kib: number = Number.parseInt(text, 10);
92+
if (Number.isNaN(kib)) return undefined;
93+
return kib / 1024; // convert to MiB
94+
} catch {
95+
return undefined;
96+
}
97+
}
98+
99+
private async findRealtimeServerPid(): Promise<number | undefined> {
100+
try {
101+
const { code, stdout } = await this.runCommand("pgrep", ["--full", "--", "node .* --port 5002"]);
102+
if (code !== 0) return undefined;
103+
const text: string = new TextDecoder().decode(stdout).trim();
104+
const lines: string[] = text.split(/\n+/);
105+
if (lines.length === 0) return undefined;
106+
const pid: number = Number.parseInt(lines[0], 10);
107+
if (Number.isNaN(pid)) return undefined;
108+
if (lines.length > 1) {
109+
Program.log(`Warning: Multiple RealtimeServer processes found. Picking one of them.`);
110+
}
111+
return pid;
112+
} catch {
113+
return undefined;
114+
}
115+
}
116+
private async runCommand(
117+
cmd: string,
118+
args: string[]
119+
): Promise<{ code: number; stdout: Uint8Array; stderr: Uint8Array }> {
120+
const command = new Deno.Command(cmd, { args });
121+
return await command.output();
122+
}
123+
}
124+
125+
/** Handles running the program. */
126+
class Program {
127+
static programName: string = "rtsmon";
128+
129+
async main(): Promise<void> {
130+
try {
131+
const options: CliOptions = this.parse(Deno.args);
132+
const watcher: RtsMon = new RtsMon(options);
133+
Deno.addSignalListener("SIGINT", () => {
134+
Program.log("Received SIGINT. Exiting.");
135+
Deno.exit(0);
136+
});
137+
await watcher.monitor();
138+
} catch (e) {
139+
Program.logError((e as Error).message);
140+
Deno.exit(1);
141+
}
142+
}
143+
144+
static log(message: string): void {
145+
const timestamp: string = new Date().toISOString();
146+
console.log(`${timestamp} ${Program.programName}: ${message}`);
147+
}
148+
149+
static logError(message: string): void {
150+
const timestamp: string = new Date().toISOString();
151+
console.error(`${timestamp} ${Program.programName}: ${message}`);
152+
}
153+
154+
private parse(args: string[]): CliOptions {
155+
const parseOptions = {
156+
boolean: ["help"],
157+
default: { "threshold-mib": 1.5 * 1024, "interval-seconds": 10 }
158+
};
159+
const parsed = parseArgs(args, parseOptions);
160+
const allowed: Set<string> = new Set(["threshold-mib", "interval-seconds", "help", "_"]);
161+
for (const key of Object.keys(parsed)) {
162+
if (allowed.has(key) === false) {
163+
Program.logError(`Unexpected argument: ${key}`);
164+
Deno.exit(1);
165+
}
166+
}
167+
if (parsed._.length > 0) {
168+
Program.logError(`Unexpected arguments: ${parsed._.join(", ")}`);
169+
Deno.exit(1);
170+
}
171+
if (parsed.help === true) {
172+
Program.log(`Usage: watch-for-rts-spike.mts [--threshold-mib N] [--interval-seconds N]`);
173+
Program.log(`Defaults: ${JSON.stringify(parseOptions.default)}`);
174+
Deno.exit(0);
175+
}
176+
if (Array.isArray(parsed._) && parsed._.length > 0) {
177+
Program.logError(`Unexpected positional arguments: ${parsed._.join(", ")}`);
178+
Deno.exit(1);
179+
}
180+
181+
const thresholdMib: number = this.toNumber(parsed["threshold-mib"], "threshold-mib");
182+
const intervalSeconds: number = this.toNumber(parsed["interval-seconds"], "interval-seconds");
183+
return { thresholdMib, intervalSeconds };
184+
}
185+
186+
private toNumber(value: unknown, name: string): number {
187+
if (typeof value === "number") return value;
188+
throw new Error(`${name} must be a number`);
189+
}
190+
}
191+
192+
await new Program().main();

0 commit comments

Comments
 (0)