Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 60 additions & 10 deletions electron/gateway/supervisor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -194,11 +194,53 @@ async function getListeningProcessIds(port: number): Promise<string[]> {
return [...new Set(stdout.trim().split(/\r?\n/).map((value) => value.trim()).filter(Boolean))];
}

async function stopSystemdGatewayService(): Promise<void> {
if (process.platform !== 'linux') return;

try {
const cp = await import('child_process');

const active = await new Promise<boolean>((resolve) => {
cp.exec(
'systemctl --user is-active openclaw-gateway',
{ timeout: 5000 },
(err, stdout) => {
resolve(!err && stdout.trim() === 'active');
},
);
});

if (!active) return;

logger.info('Stopping systemd user service openclaw-gateway to prevent auto-respawn');
await new Promise<void>((resolve) => {
cp.exec(
'systemctl --user stop openclaw-gateway',
{ timeout: 10000 },
(err) => {
if (err) {
logger.warn(`Failed to stop systemd gateway service: ${err.message}`);
} else {
logger.info('Successfully stopped systemd gateway service');
}
resolve();
},
);
});

await new Promise((resolve) => setTimeout(resolve, 2000));
} catch (err) {
logger.warn('Error while stopping systemd gateway service:', err);
}
}

async function terminateOrphanedProcessIds(port: number, pids: string[]): Promise<void> {
logger.info(`Found orphaned process listening on port ${port} (PIDs: ${pids.join(', ')}), attempting to kill...`);

if (process.platform === 'darwin') {
await unloadLaunchctlGatewayService();
} else if (process.platform === 'linux') {
await stopSystemdGatewayService();
}

for (const pid of pids) {
Expand Down Expand Up @@ -242,19 +284,27 @@ export async function findExistingGatewayProcess(options: {
const { port, ownedPid } = options;

try {
try {
const pids = await getListeningProcessIds(port);
if (pids.length > 0 && (!ownedPid || !pids.includes(String(ownedPid)))) {
await terminateOrphanedProcessIds(port, pids);
if (process.platform === 'win32') {
await waitForPortFree(port, 10000);
}
return null;
const pids = await getListeningProcessIds(port).catch(() => [] as string[]);

if (pids.length > 0 && (!ownedPid || !pids.includes(String(ownedPid)))) {
// Something non-owned is listening — probe before killing so we can
// adopt an externally-managed gateway (e.g. systemd) instead of
// entering a kill-restart loop.
const ready = await probeGatewayReady(port, 5000);
if (ready) {
logger.info(`Adopting existing external gateway on port ${port}`);
return { port };
}
Comment on lines +293 to +297
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Validate external gateway identity before adopting listener

This adoption branch trusts any process on the port that answers the WebSocket readiness probe, but that probe is unauthenticated and only checks for a connect.challenge event; once adopted, the startup flow proceeds to connect() and sends the gateway auth token in the handshake. In practice, a local process that binds the port and mimics the challenge can now receive credentials, whereas the previous behavior killed non-owned listeners first. Please require a stronger identity check (for example, verified ownership/service identity or authenticated probe) before returning { port } here.

Useful? React with 👍 / 👎.


// Not a healthy gateway — safe to terminate.
await terminateOrphanedProcessIds(port, pids);
if (process.platform === 'win32') {
await waitForPortFree(port, 10000);
}
} catch (err) {
logger.warn('Error checking for existing process on port:', err);
return null;
}

// Either no process is listening or it's our own — probe as before.
const ready = await probeGatewayReady(port, 5000);
return ready ? { port } : null;
} catch {
Expand Down
79 changes: 79 additions & 0 deletions tests/unit/gateway-supervisor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ const originalPlatform = process.platform;
const {
mockExec,
mockCreateServer,
mockProbeGatewayReady,
} = vi.hoisted(() => ({
mockExec: vi.fn(),
mockCreateServer: vi.fn(),
mockProbeGatewayReady: vi.fn(),
}));

vi.mock('electron', () => ({
Expand All @@ -34,6 +36,10 @@ vi.mock('net', () => ({
createServer: mockCreateServer,
}));

vi.mock('@electron/gateway/ws-client', () => ({
probeGatewayReady: mockProbeGatewayReady,
}));

class MockUtilityChild extends EventEmitter {
pid?: number;
kill = vi.fn();
Expand Down Expand Up @@ -74,6 +80,8 @@ describe('gateway supervisor process cleanup', () => {
},
};
});

mockProbeGatewayReady.mockResolvedValue(false);
});

afterEach(() => {
Expand Down Expand Up @@ -124,6 +132,9 @@ describe('gateway supervisor process cleanup', () => {
return {} as never;
});

// Probe fails — orphaned process is not a healthy gateway
mockProbeGatewayReady.mockResolvedValue(false);

const result = await findExistingGatewayProcess({ port: 18789 });
expect(result).toBeNull();

Expand All @@ -134,4 +145,72 @@ describe('gateway supervisor process cleanup', () => {
);
expect(mockCreateServer).toHaveBeenCalled();
});

it('adopts an external gateway when WebSocket probe succeeds', async () => {
setPlatform('linux');
const { findExistingGatewayProcess } = await import('@electron/gateway/supervisor');

mockExec.mockImplementation((cmd: string, _opts: object, cb: (err: Error | null, stdout: string) => void) => {
if (cmd.includes('lsof')) {
cb(null, '5555\n');
return {} as never;
}
cb(null, '');
return {} as never;
});

// Healthy external gateway is running
mockProbeGatewayReady.mockResolvedValue(true);

const result = await findExistingGatewayProcess({ port: 18789 });
expect(result).toEqual({ port: 18789 });

// Should NOT have attempted to kill the process
expect(mockExec).not.toHaveBeenCalledWith(
expect.stringContaining('SIGTERM'),
expect.anything(),
expect.anything(),
);
});

it('stops systemd service before killing orphan on Linux', { timeout: 15000 }, async () => {
vi.useFakeTimers();
vi.spyOn(process, 'kill').mockImplementation(() => true);
setPlatform('linux');
const { findExistingGatewayProcess } = await import('@electron/gateway/supervisor');

mockExec.mockImplementation((cmd: string, _opts: object, cb: (err: Error | null, stdout: string) => void) => {
if (cmd.includes('lsof')) {
cb(null, '7777\n');
return {} as never;
}
if (cmd.includes('systemctl --user is-active')) {
cb(null, 'active');
return {} as never;
}
cb(null, '');
return {} as never;
});

// Probe fails — not a healthy gateway
mockProbeGatewayReady.mockResolvedValue(false);

const resultPromise = findExistingGatewayProcess({ port: 18789 });
// Advance past all internal setTimeout delays
await vi.advanceTimersByTimeAsync(10000);
const result = await resultPromise;
expect(result).toBeNull();

expect(mockExec).toHaveBeenCalledWith(
'systemctl --user is-active openclaw-gateway',
expect.objectContaining({ timeout: 5000 }),
expect.any(Function),
);
expect(mockExec).toHaveBeenCalledWith(
'systemctl --user stop openclaw-gateway',
expect.objectContaining({ timeout: 10000 }),
expect.any(Function),
);
vi.useRealTimers();
});
});
Loading