diff --git a/electron/gateway/startup-recovery.ts b/electron/gateway/startup-recovery.ts index cc4c3b02e..012e55ac4 100644 --- a/electron/gateway/startup-recovery.ts +++ b/electron/gateway/startup-recovery.ts @@ -22,6 +22,14 @@ const TRANSIENT_START_ERROR_PATTERNS: RegExp[] = [ /Port \d+ still occupied after \d+ms/i, ]; +/** + * Patterns that indicate the gateway is already managed by systemd and cannot + * be started as a child process. Retrying is pointless in this scenario. + */ +const SYSTEMD_CONFLICT_PATTERNS: RegExp[] = [ + /already running under systemd/i, +]; + function normalizeLogLine(value: string): string { return value.trim(); } @@ -75,6 +83,24 @@ export function isTransientGatewayStartError(error: unknown): boolean { return TRANSIENT_START_ERROR_PATTERNS.some((pattern) => pattern.test(errorText)); } +/** + * Returns true when the gateway stderr indicates it is already supervised by + * systemd. In that case ClawX cannot own the process and retrying startup + * will only produce the same result. + */ +export function isSystemdConflictSignal(text: string): boolean { + const normalized = text.trim(); + if (!normalized) return false; + return SYSTEMD_CONFLICT_PATTERNS.some((pattern) => pattern.test(normalized)); +} + +/** + * Returns true when any startup stderr line signals a systemd conflict. + */ +export function hasSystemdConflictSignal(startupStderrLines: string[]): boolean { + return startupStderrLines.some(isSystemdConflictSignal); +} + export type GatewayStartupRecoveryAction = 'repair' | 'retry' | 'fail'; export function getGatewayStartupRecoveryAction(options: { @@ -84,6 +110,13 @@ export function getGatewayStartupRecoveryAction(options: { attempt: number; maxAttempts: number; }): GatewayStartupRecoveryAction { + // If the gateway reports it's already managed by systemd, retrying will not + // help. Fail immediately so the user gets a clear error state instead of + // a long retry loop. + if (hasSystemdConflictSignal(options.startupStderrLines)) { + return 'fail'; + } + if (shouldAttemptConfigAutoRepair( options.startupError, options.startupStderrLines, diff --git a/electron/gateway/startup-stderr.ts b/electron/gateway/startup-stderr.ts index 4be0a4615..c1adf1a90 100644 --- a/electron/gateway/startup-stderr.ts +++ b/electron/gateway/startup-stderr.ts @@ -38,6 +38,14 @@ export function classifyGatewayStderrMessage(message: string): GatewayStderrClas return { level: 'debug', normalized: msg }; } + // The gateway binary reports this when it detects a systemd supervisor. + // ClawX will fail fast rather than retry, so downgrade to debug to avoid + // flooding the log with repeated identical lines during the (brief) window + // before the process exits. + if (msg.includes('already running under systemd')) { + return { level: 'debug', normalized: msg }; + } + return { level: 'warn', normalized: msg }; } diff --git a/tests/unit/gateway-startup-recovery.test.ts b/tests/unit/gateway-startup-recovery.test.ts index 2692385ce..764b4fcb3 100644 --- a/tests/unit/gateway-startup-recovery.test.ts +++ b/tests/unit/gateway-startup-recovery.test.ts @@ -3,6 +3,9 @@ import { hasInvalidConfigFailureSignal, isInvalidConfigSignal, shouldAttemptConfigAutoRepair, + isSystemdConflictSignal, + hasSystemdConflictSignal, + getGatewayStartupRecoveryAction, } from '@electron/gateway/startup-recovery'; describe('gateway startup recovery heuristics', () => { @@ -48,5 +51,60 @@ describe('gateway startup recovery heuristics', () => { expect(isInvalidConfigSignal('Run: openclaw doctor --fix')).toBe(true); expect(isInvalidConfigSignal('Gateway ready after 3 attempts')).toBe(false); }); + + describe('systemd conflict detection', () => { + it('detects already-running-under-systemd signal', () => { + expect(isSystemdConflictSignal('2026-03-27T13:08:36.125+11:00 [gateway] already running under systemd; waiting 5000ms before retrying startup')).toBe(true); + expect(isSystemdConflictSignal('already running under systemd')).toBe(true); + expect(isSystemdConflictSignal('ALREADY RUNNING UNDER SYSTEMD')).toBe(true); + }); + + it('does not false-positive on unrelated messages', () => { + expect(isSystemdConflictSignal('Gateway process exited (code=1)')).toBe(false); + expect(isSystemdConflictSignal('WebSocket closed before handshake')).toBe(false); + expect(isSystemdConflictSignal('')).toBe(false); + }); + + it('hasSystemdConflictSignal returns true when any line matches', () => { + const lines = [ + 'Starting gateway...', + '[gateway] already running under systemd; waiting 5000ms before retrying startup', + 'Retrying...', + ]; + expect(hasSystemdConflictSignal(lines)).toBe(true); + }); + + it('hasSystemdConflictSignal returns false when no lines match', () => { + const lines = ['Gateway ready', 'Listening on port 18789']; + expect(hasSystemdConflictSignal(lines)).toBe(false); + }); + + it('getGatewayStartupRecoveryAction returns fail immediately on systemd conflict', () => { + const stderrLines = [ + '[gateway] already running under systemd; waiting 5000ms before retrying startup', + ]; + // Should fail even on the first attempt and even for an error that would + // normally be classified as transient. + const action = getGatewayStartupRecoveryAction({ + startupError: new Error('Gateway process exited before becoming ready (code=1)'), + startupStderrLines: stderrLines, + configRepairAttempted: false, + attempt: 1, + maxAttempts: 3, + }); + expect(action).toBe('fail'); + }); + + it('getGatewayStartupRecoveryAction still retries transient errors without systemd signal', () => { + const action = getGatewayStartupRecoveryAction({ + startupError: new Error('Gateway process exited before becoming ready (code=1)'), + startupStderrLines: [], + configRepairAttempted: false, + attempt: 1, + maxAttempts: 3, + }); + expect(action).toBe('retry'); + }); + }); });