diff --git a/electron/gateway/supervisor.ts b/electron/gateway/supervisor.ts index 34173575..b1ebf420 100644 --- a/electron/gateway/supervisor.ts +++ b/electron/gateway/supervisor.ts @@ -194,11 +194,53 @@ async function getListeningProcessIds(port: number): Promise { return [...new Set(stdout.trim().split(/\r?\n/).map((value) => value.trim()).filter(Boolean))]; } +async function stopSystemdGatewayService(): Promise { + if (process.platform !== 'linux') return; + + try { + const cp = await import('child_process'); + + const active = await new Promise((resolve) => { + cp.exec( + 'systemctl --user is-active openclaw-gateway', + { timeout: 5000 }, + (err, stdout) => { + resolve(!err && stdout.trim() === 'active'); + }, + ); + }); + + if (!active) return; + + logger.info('Stopping systemd user service openclaw-gateway to prevent auto-respawn'); + await new Promise((resolve) => { + cp.exec( + 'systemctl --user stop openclaw-gateway', + { timeout: 10000 }, + (err) => { + if (err) { + logger.warn(`Failed to stop systemd gateway service: ${err.message}`); + } else { + logger.info('Successfully stopped systemd gateway service'); + } + resolve(); + }, + ); + }); + + await new Promise((resolve) => setTimeout(resolve, 2000)); + } catch (err) { + logger.warn('Error while stopping systemd gateway service:', err); + } +} + async function terminateOrphanedProcessIds(port: number, pids: string[]): Promise { logger.info(`Found orphaned process listening on port ${port} (PIDs: ${pids.join(', ')}), attempting to kill...`); if (process.platform === 'darwin') { await unloadLaunchctlGatewayService(); + } else if (process.platform === 'linux') { + await stopSystemdGatewayService(); } for (const pid of pids) { @@ -242,19 +284,27 @@ export async function findExistingGatewayProcess(options: { const { port, ownedPid } = options; try { - try { - const pids = await getListeningProcessIds(port); - if (pids.length > 0 && (!ownedPid || !pids.includes(String(ownedPid)))) { - await terminateOrphanedProcessIds(port, pids); - if (process.platform === 'win32') { - await waitForPortFree(port, 10000); - } - return null; + const pids = await getListeningProcessIds(port).catch(() => [] as string[]); + + if (pids.length > 0 && (!ownedPid || !pids.includes(String(ownedPid)))) { + // Something non-owned is listening — probe before killing so we can + // adopt an externally-managed gateway (e.g. systemd) instead of + // entering a kill-restart loop. + const ready = await probeGatewayReady(port, 5000); + if (ready) { + logger.info(`Adopting existing external gateway on port ${port}`); + return { port }; + } + + // Not a healthy gateway — safe to terminate. + await terminateOrphanedProcessIds(port, pids); + if (process.platform === 'win32') { + await waitForPortFree(port, 10000); } - } catch (err) { - logger.warn('Error checking for existing process on port:', err); + return null; } + // Either no process is listening or it's our own — probe as before. const ready = await probeGatewayReady(port, 5000); return ready ? { port } : null; } catch { diff --git a/tests/unit/gateway-supervisor.test.ts b/tests/unit/gateway-supervisor.test.ts index e2b9e81d..786da823 100644 --- a/tests/unit/gateway-supervisor.test.ts +++ b/tests/unit/gateway-supervisor.test.ts @@ -6,9 +6,11 @@ const originalPlatform = process.platform; const { mockExec, mockCreateServer, + mockProbeGatewayReady, } = vi.hoisted(() => ({ mockExec: vi.fn(), mockCreateServer: vi.fn(), + mockProbeGatewayReady: vi.fn(), })); vi.mock('electron', () => ({ @@ -34,6 +36,10 @@ vi.mock('net', () => ({ createServer: mockCreateServer, })); +vi.mock('@electron/gateway/ws-client', () => ({ + probeGatewayReady: mockProbeGatewayReady, +})); + class MockUtilityChild extends EventEmitter { pid?: number; kill = vi.fn(); @@ -74,6 +80,8 @@ describe('gateway supervisor process cleanup', () => { }, }; }); + + mockProbeGatewayReady.mockResolvedValue(false); }); afterEach(() => { @@ -124,6 +132,9 @@ describe('gateway supervisor process cleanup', () => { return {} as never; }); + // Probe fails — orphaned process is not a healthy gateway + mockProbeGatewayReady.mockResolvedValue(false); + const result = await findExistingGatewayProcess({ port: 18789 }); expect(result).toBeNull(); @@ -134,4 +145,72 @@ describe('gateway supervisor process cleanup', () => { ); expect(mockCreateServer).toHaveBeenCalled(); }); + + it('adopts an external gateway when WebSocket probe succeeds', async () => { + setPlatform('linux'); + const { findExistingGatewayProcess } = await import('@electron/gateway/supervisor'); + + mockExec.mockImplementation((cmd: string, _opts: object, cb: (err: Error | null, stdout: string) => void) => { + if (cmd.includes('lsof')) { + cb(null, '5555\n'); + return {} as never; + } + cb(null, ''); + return {} as never; + }); + + // Healthy external gateway is running + mockProbeGatewayReady.mockResolvedValue(true); + + const result = await findExistingGatewayProcess({ port: 18789 }); + expect(result).toEqual({ port: 18789 }); + + // Should NOT have attempted to kill the process + expect(mockExec).not.toHaveBeenCalledWith( + expect.stringContaining('SIGTERM'), + expect.anything(), + expect.anything(), + ); + }); + + it('stops systemd service before killing orphan on Linux', { timeout: 15000 }, async () => { + vi.useFakeTimers(); + vi.spyOn(process, 'kill').mockImplementation(() => true); + setPlatform('linux'); + const { findExistingGatewayProcess } = await import('@electron/gateway/supervisor'); + + mockExec.mockImplementation((cmd: string, _opts: object, cb: (err: Error | null, stdout: string) => void) => { + if (cmd.includes('lsof')) { + cb(null, '7777\n'); + return {} as never; + } + if (cmd.includes('systemctl --user is-active')) { + cb(null, 'active'); + return {} as never; + } + cb(null, ''); + return {} as never; + }); + + // Probe fails — not a healthy gateway + mockProbeGatewayReady.mockResolvedValue(false); + + const resultPromise = findExistingGatewayProcess({ port: 18789 }); + // Advance past all internal setTimeout delays + await vi.advanceTimersByTimeAsync(10000); + const result = await resultPromise; + expect(result).toBeNull(); + + expect(mockExec).toHaveBeenCalledWith( + 'systemctl --user is-active openclaw-gateway', + expect.objectContaining({ timeout: 5000 }), + expect.any(Function), + ); + expect(mockExec).toHaveBeenCalledWith( + 'systemctl --user stop openclaw-gateway', + expect.objectContaining({ timeout: 10000 }), + expect.any(Function), + ); + vi.useRealTimers(); + }); });