Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions electron/gateway/startup-recovery.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ const TRANSIENT_START_ERROR_PATTERNS: RegExp[] = [
/Port \d+ still occupied after \d+ms/i,
];

/**
* Patterns that indicate the gateway is already managed by systemd and cannot
* be started as a child process. Retrying is pointless in this scenario.
*/
const SYSTEMD_CONFLICT_PATTERNS: RegExp[] = [
/already running under systemd/i,
];

function normalizeLogLine(value: string): string {
return value.trim();
}
Expand Down Expand Up @@ -75,6 +83,24 @@ export function isTransientGatewayStartError(error: unknown): boolean {
return TRANSIENT_START_ERROR_PATTERNS.some((pattern) => pattern.test(errorText));
}

/**
* Returns true when the gateway stderr indicates it is already supervised by
* systemd. In that case ClawX cannot own the process and retrying startup
* will only produce the same result.
*/
export function isSystemdConflictSignal(text: string): boolean {
const normalized = text.trim();
if (!normalized) return false;
return SYSTEMD_CONFLICT_PATTERNS.some((pattern) => pattern.test(normalized));
}

/**
* Returns true when any startup stderr line signals a systemd conflict.
*/
export function hasSystemdConflictSignal(startupStderrLines: string[]): boolean {
return startupStderrLines.some(isSystemdConflictSignal);
}

export type GatewayStartupRecoveryAction = 'repair' | 'retry' | 'fail';

export function getGatewayStartupRecoveryAction(options: {
Expand All @@ -84,6 +110,13 @@ export function getGatewayStartupRecoveryAction(options: {
attempt: number;
maxAttempts: number;
}): GatewayStartupRecoveryAction {
// If the gateway reports it's already managed by systemd, retrying will not
// help. Fail immediately so the user gets a clear error state instead of
// a long retry loop.
if (hasSystemdConflictSignal(options.startupStderrLines)) {
return 'fail';
}

if (shouldAttemptConfigAutoRepair(
options.startupError,
options.startupStderrLines,
Expand Down
8 changes: 8 additions & 0 deletions electron/gateway/startup-stderr.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ export function classifyGatewayStderrMessage(message: string): GatewayStderrClas
return { level: 'debug', normalized: msg };
}

// The gateway binary reports this when it detects a systemd supervisor.
// ClawX will fail fast rather than retry, so downgrade to debug to avoid
// flooding the log with repeated identical lines during the (brief) window
// before the process exits.
if (msg.includes('already running under systemd')) {
return { level: 'debug', normalized: msg };
}

return { level: 'warn', normalized: msg };
}

Expand Down
58 changes: 58 additions & 0 deletions tests/unit/gateway-startup-recovery.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
hasInvalidConfigFailureSignal,
isInvalidConfigSignal,
shouldAttemptConfigAutoRepair,
isSystemdConflictSignal,
hasSystemdConflictSignal,
getGatewayStartupRecoveryAction,
} from '@electron/gateway/startup-recovery';

Check failure on line 9 in tests/unit/gateway-startup-recovery.test.ts

View workflow job for this annotation

GitHub Actions / check

'getGatewayStartupRecoveryAction' is already defined

describe('gateway startup recovery heuristics', () => {
it('detects invalid-config signal from stderr lines', () => {
Expand Down Expand Up @@ -48,5 +51,60 @@
expect(isInvalidConfigSignal('Run: openclaw doctor --fix')).toBe(true);
expect(isInvalidConfigSignal('Gateway ready after 3 attempts')).toBe(false);
});

describe('systemd conflict detection', () => {
it('detects already-running-under-systemd signal', () => {
expect(isSystemdConflictSignal('2026-03-27T13:08:36.125+11:00 [gateway] already running under systemd; waiting 5000ms before retrying startup')).toBe(true);
expect(isSystemdConflictSignal('already running under systemd')).toBe(true);
expect(isSystemdConflictSignal('ALREADY RUNNING UNDER SYSTEMD')).toBe(true);
});

it('does not false-positive on unrelated messages', () => {
expect(isSystemdConflictSignal('Gateway process exited (code=1)')).toBe(false);
expect(isSystemdConflictSignal('WebSocket closed before handshake')).toBe(false);
expect(isSystemdConflictSignal('')).toBe(false);
});

it('hasSystemdConflictSignal returns true when any line matches', () => {
const lines = [
'Starting gateway...',
'[gateway] already running under systemd; waiting 5000ms before retrying startup',
'Retrying...',
];
expect(hasSystemdConflictSignal(lines)).toBe(true);
});

it('hasSystemdConflictSignal returns false when no lines match', () => {
const lines = ['Gateway ready', 'Listening on port 18789'];
expect(hasSystemdConflictSignal(lines)).toBe(false);
});

it('getGatewayStartupRecoveryAction returns fail immediately on systemd conflict', () => {
const stderrLines = [
'[gateway] already running under systemd; waiting 5000ms before retrying startup',
];
// Should fail even on the first attempt and even for an error that would
// normally be classified as transient.
const action = getGatewayStartupRecoveryAction({
startupError: new Error('Gateway process exited before becoming ready (code=1)'),
startupStderrLines: stderrLines,
configRepairAttempted: false,
attempt: 1,
maxAttempts: 3,
});
expect(action).toBe('fail');
});

it('getGatewayStartupRecoveryAction still retries transient errors without systemd signal', () => {
const action = getGatewayStartupRecoveryAction({
startupError: new Error('Gateway process exited before becoming ready (code=1)'),
startupStderrLines: [],
configRepairAttempted: false,
attempt: 1,
maxAttempts: 3,
});
expect(action).toBe('retry');
});
});
});

Loading