From 96dae9f3e0eeebcbb6477ec8ea2bc0908abb6b3e Mon Sep 17 00:00:00 2001 From: Octopus Date: Fri, 3 Apr 2026 09:53:30 +0800 Subject: [PATCH] fix: guard auth-profiles.json writes with gateway-compatible file lock (#758) ClawX's writeAuthProfiles() was doing a plain fs.writeFile() while the OpenClaw gateway uses a .lock sidecar file (acquireFileLock / withFileLock from file-lock-B4wypLkV.js) for exclusive access to the same file. This created a race where the gateway's locked READ-MODIFY-WRITE could overwrite a provider key that ClawX had just saved (or vice-versa), causing the second and subsequent providers to silently disappear from auth-profiles.json. Add withAuthProfilesLock() that mirrors the gateway's protocol: - Lock path: auth-profiles.json.lock (sidecar file) - Acquire: fs.open(lockPath, 'wx') -- atomic, throws EEXIST on collision - Lock body: JSON { pid, createdAt } for stale detection - Stale: PID no longer alive OR lock older than 30 s -> remove and retry - Release: fs.rm(lockPath, { force: true }) in a finally block Wrap the read-modify-write loops in saveProviderKeyToOpenClaw, saveOAuthTokenToOpenClaw, removeProviderKeyFromOpenClaw, and removeProviderFromOpenClaw with this lock so every Electron-side auth-profiles write participates in the same mutual-exclusion protocol as the gateway. Add three tests: lock sidecar is cleaned up on success, stale lock (dead PID) is auto-cleared, and concurrent saves within the same process are serialised so neither key is lost. --- electron/utils/openclaw-auth.ts | 178 ++++++++++++++++++++++++------- tests/unit/openclaw-auth.test.ts | 64 +++++++++++ 2 files changed, 201 insertions(+), 41 deletions(-) diff --git a/electron/utils/openclaw-auth.ts b/electron/utils/openclaw-auth.ts index b3037996f..7c8c29929 100644 --- a/electron/utils/openclaw-auth.ts +++ b/electron/utils/openclaw-auth.ts @@ -8,7 +8,7 @@ * equivalents could stall for 500 ms – 2 s+ per call, causing "Not * Responding" hangs. */ -import { access, mkdir, readFile, writeFile } from 'fs/promises'; +import { access, mkdir, open, readFile, rm, writeFile } from 'fs/promises'; import { constants, readdirSync, readFileSync, existsSync } from 'fs'; import { join } from 'path'; import { homedir } from 'os'; @@ -193,6 +193,94 @@ async function writeAuthProfiles(store: AuthProfilesStore, agentId = 'main'): Pr await writeJsonFile(getAuthProfilesPath(agentId), store); } +/** + * Check whether a process ID is still running. + * Returns false if the process is gone (ESRCH) or we get a definitive + * "no such process" signal; returns true on EPERM (process exists but + * we cannot signal it) or any other unexpected error (conservative). + */ +function isPidAlive(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch (err: unknown) { + return (err as NodeJS.ErrnoException).code === 'EPERM'; + } +} + +/** + * Run `fn` while holding the `.lock` sidecar file that the OpenClaw gateway + * uses to guard concurrent writes to auth-profiles.json. Both sides must + * participate in this protocol so their reads and writes are serialised. + * + * Protocol (mirrors node_modules/openclaw/dist/file-lock-B4wypLkV.js): + * lock path = `${auth-profiles.json}.lock` + * acquire = fs.open(lockPath, 'wx') — atomic EEXIST on collision + * lock body = JSON { pid, createdAt } — used for stale detection + * stale = PID no longer alive OR lock file older than 30 s + * release = fs.rm(lockPath, { force: true }) + */ +async function withAuthProfilesLock(agentId: string, fn: () => Promise): Promise { + const authPath = getAuthProfilesPath(agentId); + const lockPath = `${authPath}.lock`; + const staleMs = 30_000; + const maxAttempts = 10; + const baseDelayMs = 50; + + for (let attempt = 0; attempt < maxAttempts; attempt++) { + let handle: Awaited> | null = null; + try { + // Ensure parent dir exists before trying to create the lock file + await ensureDir(join(authPath, '..')); + handle = await open(lockPath, 'wx'); + await handle.writeFile( + JSON.stringify({ pid: process.pid, createdAt: new Date().toISOString() }), + 'utf-8', + ); + await handle.close(); + handle = null; + + try { + return await fn(); + } finally { + await rm(lockPath, { force: true }).catch(() => {}); + } + } catch (err: unknown) { + if (handle) { + await handle.close().catch(() => {}); + handle = null; + } + if ((err as NodeJS.ErrnoException).code !== 'EEXIST') throw err; + + // Lock file already exists — check if it is stale + const isStale = await (async (): Promise => { + try { + const raw = await readFile(lockPath, 'utf-8'); + const parsed = JSON.parse(raw) as { pid?: number; createdAt?: string }; + if (typeof parsed.pid === 'number' && !isPidAlive(parsed.pid)) return true; + if (typeof parsed.createdAt === 'string') { + const age = Date.now() - Date.parse(parsed.createdAt); + if (!Number.isFinite(age) || age > staleMs) return true; + } + return false; + } catch { + return true; // unreadable / malformed lock → treat as stale + } + })(); + + if (isStale) { + await rm(lockPath, { force: true }).catch(() => {}); + continue; // retry immediately after clearing stale lock + } + + if (attempt >= maxAttempts - 1) break; + await new Promise((resolve) => setTimeout(resolve, baseDelayMs * (attempt + 1))); + } + } + + throw new Error(`auth-profiles lock timeout for agent "${agentId}"`); +} + // ── Agent Discovery ────────────────────────────────────────────── async function discoverAgentIds(): Promise { @@ -376,29 +464,31 @@ export async function saveOAuthTokenToOpenClaw( if (agentIds.length === 0) agentIds.push('main'); for (const id of agentIds) { - const store = await readAuthProfiles(id); - const profileId = `${provider}:default`; - - store.profiles[profileId] = { - type: 'oauth', - provider, - access: token.access, - refresh: token.refresh, - expires: token.expires, - email: token.email, - projectId: token.projectId, - }; - - if (!store.order) store.order = {}; - if (!store.order[provider]) store.order[provider] = []; - if (!store.order[provider].includes(profileId)) { - store.order[provider].push(profileId); - } + await withAuthProfilesLock(id, async () => { + const store = await readAuthProfiles(id); + const profileId = `${provider}:default`; + + store.profiles[profileId] = { + type: 'oauth', + provider, + access: token.access, + refresh: token.refresh, + expires: token.expires, + email: token.email, + projectId: token.projectId, + }; + + if (!store.order) store.order = {}; + if (!store.order[provider]) store.order[provider] = []; + if (!store.order[provider].includes(profileId)) { + store.order[provider].push(profileId); + } - if (!store.lastGood) store.lastGood = {}; - store.lastGood[provider] = profileId; + if (!store.lastGood) store.lastGood = {}; + store.lastGood[provider] = profileId; - await writeAuthProfiles(store, id); + await writeAuthProfiles(store, id); + }); } console.log(`Saved OAuth token for provider "${provider}" to OpenClaw auth-profiles (agents: ${agentIds.join(', ')})`); } @@ -445,21 +535,23 @@ export async function saveProviderKeyToOpenClaw( if (agentIds.length === 0) agentIds.push('main'); for (const id of agentIds) { - const store = await readAuthProfiles(id); - const profileId = `${provider}:default`; + await withAuthProfilesLock(id, async () => { + const store = await readAuthProfiles(id); + const profileId = `${provider}:default`; - store.profiles[profileId] = { type: 'api_key', provider, key: apiKey }; + store.profiles[profileId] = { type: 'api_key', provider, key: apiKey }; - if (!store.order) store.order = {}; - if (!store.order[provider]) store.order[provider] = []; - if (!store.order[provider].includes(profileId)) { - store.order[provider].push(profileId); - } + if (!store.order) store.order = {}; + if (!store.order[provider]) store.order[provider] = []; + if (!store.order[provider].includes(profileId)) { + store.order[provider].push(profileId); + } - if (!store.lastGood) store.lastGood = {}; - store.lastGood[provider] = profileId; + if (!store.lastGood) store.lastGood = {}; + store.lastGood[provider] = profileId; - await writeAuthProfiles(store, id); + await writeAuthProfiles(store, id); + }); } console.log(`Saved API key for provider "${provider}" to OpenClaw auth-profiles (agents: ${agentIds.join(', ')})`); } @@ -475,10 +567,12 @@ export async function removeProviderKeyFromOpenClaw( if (agentIds.length === 0) agentIds.push('main'); for (const id of agentIds) { - const store = await readAuthProfiles(id); - if (removeProfileFromStore(store, `${provider}:default`, 'api_key')) { - await writeAuthProfiles(store, id); - } + await withAuthProfilesLock(id, async () => { + const store = await readAuthProfiles(id); + if (removeProfileFromStore(store, `${provider}:default`, 'api_key')) { + await writeAuthProfiles(store, id); + } + }); } console.log(`Removed API key for provider "${provider}" from OpenClaw auth-profiles (agents: ${agentIds.join(', ')})`); } @@ -491,10 +585,12 @@ export async function removeProviderFromOpenClaw(provider: string): Promise { + const store = await readAuthProfiles(id); + if (removeProfilesForProvider(store, provider)) { + await writeAuthProfiles(store, id); + } + }); } // 2. Remove from models.json (per-agent model registry used by pi-ai directly) diff --git a/tests/unit/openclaw-auth.test.ts b/tests/unit/openclaw-auth.test.ts index 6a08d7d40..af5f3cebe 100644 --- a/tests/unit/openclaw-auth.test.ts +++ b/tests/unit/openclaw-auth.test.ts @@ -562,3 +562,67 @@ describe('auth-backed provider discovery', () => { await expect(getActiveOpenClawProviders()).resolves.toEqual(new Set()); }); }); + +describe('auth-profiles file-lock protocol', () => { + beforeEach(async () => { + vi.resetModules(); + vi.restoreAllMocks(); + await rm(testHome, { recursive: true, force: true }); + await rm(testUserData, { recursive: true, force: true }); + }); + + it('removes the .lock sidecar file after a successful write', async () => { + const logSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + const { saveProviderKeyToOpenClaw } = await import('@electron/utils/openclaw-auth'); + await saveProviderKeyToOpenClaw('moonshot', 'sk-moon', 'main'); + + const lockPath = join(testHome, '.openclaw', 'agents', 'main', 'agent', 'auth-profiles.json.lock'); + await expect(readFile(lockPath, 'utf8')).rejects.toThrow(); + + logSpy.mockRestore(); + }); + + it('clears a stale lock (dead PID) and proceeds normally', async () => { + const agentDir = join(testHome, '.openclaw', 'agents', 'main', 'agent'); + await mkdir(agentDir, { recursive: true }); + const lockPath = join(agentDir, 'auth-profiles.json.lock'); + + // Plant a lock file referencing a PID that cannot possibly be alive + await writeFile( + lockPath, + JSON.stringify({ pid: 999_999_999, createdAt: new Date().toISOString() }), + 'utf8', + ); + + const logSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + const { saveProviderKeyToOpenClaw } = await import('@electron/utils/openclaw-auth'); + await saveProviderKeyToOpenClaw('anthropic', 'sk-ant', 'main'); + + const store = await readAuthProfiles('main'); + expect((store.profiles as Record)['anthropic:default'].key).toBe('sk-ant'); + + // Stale lock should have been removed + await expect(readFile(lockPath, 'utf8')).rejects.toThrow(); + + logSpy.mockRestore(); + }); + + it('concurrent writes to the same agent are serialised — no key is lost', async () => { + const logSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + const { saveProviderKeyToOpenClaw } = await import('@electron/utils/openclaw-auth'); + + // Two simultaneous saves; without locking the second read sees an empty + // store and overwrites the first key when it writes back. + await Promise.all([ + saveProviderKeyToOpenClaw('openai', 'sk-openai', 'main'), + saveProviderKeyToOpenClaw('anthropic', 'sk-anthropic', 'main'), + ]); + + const store = await readAuthProfiles('main'); + const profiles = store.profiles as Record; + expect(profiles['openai:default']?.key).toBe('sk-openai'); + expect(profiles['anthropic:default']?.key).toBe('sk-anthropic'); + + logSpy.mockRestore(); + }); +});