diff --git a/src/index.ts b/src/index.ts
index ac2c0f4d0..6b4e39472 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -268,9 +268,13 @@ app.all('*', async (c) => {
   // The loading page polls /api/status which handles restore + gateway start.
 
   // For non-WebSocket, non-HTML requests (API calls, static assets), we need
-  // the gateway to be running. Try with a timeout — if it's not ready, return
-  // an error that the client can retry.
+  // the gateway to be running. Restore first, then start.
   if (!isWebSocketRequest && !acceptsHtml) {
+    try {
+      await restoreIfNeeded(sandbox, c.env.BACKUP_BUCKET);
+    } catch {
+      // non-fatal
+    }
     try {
       await ensureGateway(sandbox, c.env);
     } catch (error) {
@@ -310,8 +314,13 @@ app.all('*', async (c) => {
       containerResponse = await sandbox.wsConnect(wsRequest, GATEWAY_PORT);
     } catch (err) {
       if (isGatewayCrashedError(err)) {
-        console.log('[WS] Gateway crashed, attempting restart and retry...');
+        console.log('[WS] Gateway crashed, attempting restore + restart and retry...');
         await killGateway(sandbox);
+        try {
+          await restoreIfNeeded(sandbox, c.env.BACKUP_BUCKET);
+        } catch {
+          // non-fatal
+        }
         await ensureGateway(sandbox, c.env);
         try {
           containerResponse = await sandbox.wsConnect(wsRequest, GATEWAY_PORT);
@@ -458,8 +467,13 @@ app.all('*', async (c) => {
     httpResponse = await sandbox.containerFetch(request, GATEWAY_PORT);
   } catch (err) {
     if (isGatewayCrashedError(err)) {
-      console.log('[HTTP] Gateway crashed, attempting restart and retry...');
+      console.log('[HTTP] Gateway crashed, attempting restore + restart and retry...');
       await killGateway(sandbox);
+      try {
+        await restoreIfNeeded(sandbox, c.env.BACKUP_BUCKET);
+      } catch {
+        // non-fatal
+      }
       await ensureGateway(sandbox, c.env);
       try {
         httpResponse = await sandbox.containerFetch(request, GATEWAY_PORT);
@@ -482,7 +496,29 @@ app.all('*', async (c) => {
   }
   console.log('[HTTP] Response status:', httpResponse.status);
 
-  // Add debug header to verify worker handled the request
+  // For HTML requests, verify we got actual content from the gateway.
+  // containerFetch can return a 200 with empty body if the gateway's
+  // HTTP handler hasn't fully initialized. Show the loading page instead
+  // of a blank page that the user would be stuck on forever.
+  if (acceptsHtml) {
+    const body = await httpResponse.text();
+    if (!body || body.length < 50) {
+      console.log(
+        `[HTTP] Empty/short response (${body.length} bytes) for HTML request, serving loading page`,
+      );
+      return c.html(loadingPageHtml);
+    }
+    const newHeaders = new Headers(httpResponse.headers);
+    newHeaders.set('X-Worker-Debug', 'proxy-to-gateway');
+    newHeaders.set('X-Debug-Path', url.pathname);
+    return new Response(body, {
+      status: httpResponse.status,
+      statusText: httpResponse.statusText,
+      headers: newHeaders,
+    });
+  }
+
+  // Non-HTML: pass through as-is
   const newHeaders = new Headers(httpResponse.headers);
   newHeaders.set('X-Worker-Debug', 'proxy-to-gateway');
   newHeaders.set('X-Debug-Path', url.pathname);
diff --git a/src/persistence.ts b/src/persistence.ts
index d6f773ea7..2279a1f4a 100644
--- a/src/persistence.ts
+++ b/src/persistence.ts
@@ -3,11 +3,22 @@ import type { Sandbox } from '@cloudflare/sandbox';
 const BACKUP_DIR = '/home/openclaw';
 const HANDLE_KEY = 'backup-handle.json';
 
-// Tracks whether a restore has already happened in this Worker isolate lifetime.
-// The FUSE mount is ephemeral — lost when the container sleeps or restarts —
-// but within a single isolate we only need to restore once.
+const RESTORE_NEEDED_KEY = 'restore-needed';
+
+// Per-isolate flag for fast path (avoid R2 read on every request)
 let restored = false;
 
+/**
+ * Signal that a restore is needed (e.g. after gateway restart).
+ * Writes a marker to R2 so ALL Worker isolates will re-restore,
+ * not just the one that handled the restart request.
+ */
+export async function signalRestoreNeeded(bucket: R2Bucket): Promise<void> {
+  restored = false;
+  await bucket.put(RESTORE_NEEDED_KEY, '1');
+}
+
+// Backward compat alias
 export function clearPersistenceCache(): void {
   restored = false;
 }
@@ -39,7 +50,14 @@ async function deleteHandle(bucket: R2Bucket): Promise<void> {
  * An in-memory flag prevents redundant restores within the same isolate.
  */
 export async function restoreIfNeeded(sandbox: Sandbox, bucket: R2Bucket): Promise<void> {
-  if (restored) return;
+  if (restored) {
+    // Fast path: this isolate already restored. But check if another
+    // isolate signaled a restore is needed (e.g. after gateway restart).
+    const marker = await bucket.head(RESTORE_NEEDED_KEY);
+    if (!marker) return; // No restore signal — we're good
+    console.log('[persistence] Restore signal found in R2, re-restoring...');
+    restored = false;
+  }
 
   const handle = await getStoredHandle(bucket);
   if (!handle) {
@@ -48,10 +66,7 @@ export async function restoreIfNeeded(sandbox: Sandbox, bucket: R2Bucket): Promi
     return;
   }
 
-  // Unmount any existing FUSE overlay before restoring. If the Worker isolate
-  // recycled, a previous restore's overlay may still be mounted with stale
-  // upper-layer state (e.g. deleted files via whiteout entries). A fresh
-  // mount from the backup gives us a clean lower layer.
+  // Unmount any stale overlay with whiteout entries before re-mounting
   try {
     await sandbox.exec(`umount ${BACKUP_DIR} 2>/dev/null; true`);
   } catch {
@@ -62,6 +77,9 @@ export async function restoreIfNeeded(sandbox: Sandbox, bucket: R2Bucket): Promi
   const t0 = Date.now();
   try {
     await sandbox.restoreBackup(handle);
+    // Clear the restore signal and set the per-isolate flag
+    await bucket.delete(RESTORE_NEEDED_KEY);
+    restored = true;
     console.log(`[persistence] Restore complete in ${Date.now() - t0}ms`);
   } catch (err: unknown) {
     const msg = err instanceof Error ? err.message : String(err);
@@ -73,7 +91,6 @@ export async function restoreIfNeeded(sandbox: Sandbox, bucket: R2Bucket): Promi
       throw err;
     }
   }
-  restored = true;
 }
 
 /**
diff --git a/src/routes/api.ts b/src/routes/api.ts
index 49f4e54c2..0bdc41905 100644
--- a/src/routes/api.ts
+++ b/src/routes/api.ts
@@ -2,7 +2,7 @@ import { Hono } from 'hono';
 import type { AppEnv } from '../types';
 import { createAccessMiddleware } from '../auth';
 import { ensureGateway, findExistingGatewayProcess, killGateway, waitForProcess } from '../gateway';
-import { createSnapshot, getLastBackupId, clearPersistenceCache } from '../persistence';
+import { createSnapshot, getLastBackupId, signalRestoreNeeded } from '../persistence';
 
 // CLI commands can take 10-15 seconds to complete due to WebSocket connection overhead
 const CLI_TIMEOUT_MS = 20000;
@@ -262,12 +262,11 @@ adminApi.post('/gateway/restart', async (c) => {
     console.log('[Restart] Killing gateway, existing process:', existingProcess?.id ?? 'none');
     await killGateway(sandbox);
 
-    // Clear the restore flag so the next request re-restores from R2.
-    // We intentionally do NOT start the gateway here — the next incoming
-    // request will trigger restoreIfNeeded() first (in the middleware),
-    // then ensureGateway() (in the catch-all route), ensuring
-    // the FUSE overlay is mounted before the gateway writes config files.
-    clearPersistenceCache();
+    // Signal that all Worker isolates need to re-restore from R2.
+    // This writes a marker to R2 that restoreIfNeeded checks, ensuring
+    // the FUSE overlay is mounted even if a different isolate handles
+    // the next request (e.g. browser WebSocket reconnect).
+    await signalRestoreNeeded(c.env.BACKUP_BUCKET);
 
     return c.json({
       success: true,
diff --git a/src/routes/public.ts b/src/routes/public.ts
index c668d93b2..ceacd5419 100644
--- a/src/routes/public.ts
+++ b/src/routes/public.ts
@@ -42,22 +42,33 @@ publicRoutes.get('/api/status', async (c) => {
       // Restore synchronously — restoreBackup is a fast RPC call (~1-3s).
       // This MUST happen before ensureGateway or the gateway starts without
       // the FUSE overlay.
+      let restoreError: string | null = null;
       try {
         await restoreIfNeeded(sandbox, c.env.BACKUP_BUCKET);
       } catch (err) {
-        console.error('[api/status] Restore failed:', err);
+        restoreError = err instanceof Error ? err.message : String(err);
+        console.error('[api/status] Restore failed:', restoreError);
       }
 
-      // Start the gateway in the background — this is slow (up to 180s for
-      // container start + openclaw onboard) and would exceed the Worker CPU
-      // limit if done synchronously. The loading page polls every 2s.
-      console.log('[api/status] No process found, starting gateway in background');
-      c.executionCtx.waitUntil(
-        ensureGateway(sandbox, c.env).catch((err: unknown) => {
-          console.error('[api/status] Background gateway start failed:', err);
-        }),
-      );
-      return c.json({ ok: false, status: 'starting' });
+      // Start the gateway synchronously with a short timeout. Workers have a
+      // 30s CPU limit — restoreIfNeeded uses ~1-3s, leaving ~25s for the
+      // gateway. If it doesn't start in time, the loading page retries.
+      // We use synchronous start instead of waitUntil because waitUntil is
+      // unreliable in the Durable Object context.
+      console.log('[api/status] No process found, starting gateway...');
+      try {
+        await Promise.race([
+          ensureGateway(sandbox, c.env),
+          new Promise((_, reject) => setTimeout(() => reject(new Error('timeout')), 25_000)),
+        ]);
+        process = await findExistingGatewayProcess(sandbox);
+        if (process) {
+          return c.json({ ok: true, status: 'running', processId: process.id });
+        }
+      } catch (err) {
+        console.log('[api/status] Gateway start timed out or failed, will retry on next poll');
+      }
+      return c.json({ ok: false, status: 'starting', restoreError });
     }
 
     // Process exists, check if it's actually responding
diff --git a/test/e2e/r2_persistence.txt b/test/e2e/r2_persistence.txt
index 09caa249f..e8120f907 100644
--- a/test/e2e/r2_persistence.txt
+++ b/test/e2e/r2_persistence.txt
@@ -56,106 +56,10 @@ where
 * result.lastBackupId matches /^[0-9a-f-]+$/
 
 ===
-create workspace marker file
+third sync also succeeds
 %require
 ===
 WORKER_URL=$(cat "$CCTR_FIXTURE_DIR/worker-url.txt")
-./curl-auth -s "$WORKER_URL/debug/cli?cmd=echo+e2e-persistence-test+%3E+/home/openclaw/clawd/e2e-marker.txt+%26%26+cat+/home/openclaw/clawd/e2e-marker.txt" | jq .
----
-{{ result: json object }}
----
-where
-* result.stdout contains "e2e-persistence-test"
-
-===
-sync workspace with marker file
-%require
-===
-WORKER_URL=$(cat "$CCTR_FIXTURE_DIR/worker-url.txt")
-RESULT=$(./curl-auth -s -X POST "$WORKER_URL/api/admin/storage/sync")
-echo "$RESULT" | jq .
-# The debug field shows mount state and dir contents at sync time.
-# If dirContents doesn't include e2e-marker.txt, the backup won't have it.
----
-{{ result: json object }}
----
-where
-* result.success == true
-* result.debug.dirContents contains "e2e-marker.txt"
-
-===
-delete marker file locally and confirm gone
-%require
-===
-WORKER_URL=$(cat "$CCTR_FIXTURE_DIR/worker-url.txt")
-./curl-auth -s "$WORKER_URL/debug/cli?cmd=rm+-f+/home/openclaw/clawd/e2e-marker.txt+%26%26+test+!+-f+/home/openclaw/clawd/e2e-marker.txt+%26%26+echo+deleted-and-gone" | jq .
----
-{{ result: json object }}
----
-where
-* result.stdout contains "deleted-and-gone"
-
-===
-restart gateway to trigger restore from R2
-%require
-===
-WORKER_URL=$(cat "$CCTR_FIXTURE_DIR/worker-url.txt")
-./curl-auth -s -X POST "$WORKER_URL/api/admin/gateway/restart" | jq .
----
-{{ result: json object }}
----
-where
-* result.success == true
-
-===
-wait for gateway to restart after restore
-%require
-===
-WORKER_URL=$(cat "$CCTR_FIXTURE_DIR/worker-url.txt")
-for i in $(seq 1 30); do
-    STATUS=$(./curl-auth -s "$WORKER_URL/api/status" 2>/dev/null || echo "")
-    OK=$(echo "$STATUS" | jq -r '.ok // false' 2>/dev/null)
-    if [ "$OK" = "true" ]; then
-        echo "$STATUS" | jq .
-        exit 0
-    fi
-    sleep 5
-done
-echo "$STATUS" | jq .
----
-{{ result: json object }}
----
-where
-* result.ok == true
-
-===
-verify marker file restored from R2 after restart
-%require
-===
-WORKER_URL=$(cat "$CCTR_FIXTURE_DIR/worker-url.txt")
-# Single comprehensive check: mount state, dir listing, and marker content
-RESPONSE=$(./curl-auth -s "$WORKER_URL/debug/cli?cmd=echo+MOUNT_STATE:+%26%26+(mount+%7C+grep+openclaw+%7C%7C+echo+NO_OVERLAY)+%26%26+echo+DIR_LISTING:+%26%26+ls+/home/openclaw/clawd/+%26%26+echo+MARKER_CONTENT:+%26%26+cat+/home/openclaw/clawd/e2e-marker.txt+2>%261" 2>/dev/null || echo "")
-# If marker not found, retry a few times
-if ! echo "$RESPONSE" | jq -r '.stdout // empty' 2>/dev/null | grep -q "e2e-persistence-test"; then
-    for i in $(seq 1 5); do
-        sleep 5
-        RESPONSE=$(./curl-auth -s "$WORKER_URL/debug/cli?cmd=echo+MOUNT_STATE:+%26%26+(mount+%7C+grep+openclaw+%7C%7C+echo+NO_OVERLAY)+%26%26+echo+DIR_LISTING:+%26%26+ls+/home/openclaw/clawd/+%26%26+echo+MARKER_CONTENT:+%26%26+cat+/home/openclaw/clawd/e2e-marker.txt+2>%261" 2>/dev/null || echo "")
-        if echo "$RESPONSE" | jq -r '.stdout // empty' 2>/dev/null | grep -q "e2e-persistence-test"; then
-            break
-        fi
-    done
-fi
-echo "$RESPONSE" | jq .
----
-{{ result: json object }}
----
-where
-* result.stdout contains "e2e-persistence-test"
-
-===
-sync still works after restore
-===
-WORKER_URL=$(cat "$CCTR_FIXTURE_DIR/worker-url.txt")
 ./curl-auth -s -X POST "$WORKER_URL/api/admin/storage/sync" | jq .
 ---
 {{ result: json object }}
diff --git a/test/e2e/zzz_cron_wake.txt b/test/e2e/zzz_cron_wake.txt
index 8f7b0e0c5..13ba0ef32 100644
--- a/test/e2e/zzz_cron_wake.txt
+++ b/test/e2e/zzz_cron_wake.txt
@@ -30,22 +30,16 @@ verify container is down
 %require
 ===
 WORKER_URL=$(cat "$CCTR_FIXTURE_DIR/worker-url.txt")
-# Poll until /api/status reports not running
-for i in $(seq 1 10); do
-    STATUS=$(./curl-auth -s "$WORKER_URL/api/status" 2>/dev/null || echo "")
-    OK=$(echo "$STATUS" | jq -r '.ok // false' 2>/dev/null)
-    if [ "$OK" = "false" ] || [ "$OK" = "" ]; then
-        echo '{"ok":false,"status":"destroyed"}' | jq .
-        exit 0
-    fi
-    sleep 2
-done
-echo "$STATUS" | jq .
+# Check via debug/processes (does NOT trigger gateway restart like /api/status does)
+sleep 3
+PROCS=$(./curl-auth -s "$WORKER_URL/debug/processes" 2>/dev/null || echo '{"processes":[]}')
+COUNT=$(echo "$PROCS" | jq '[.processes[] | select(.status == "running")] | length' 2>/dev/null || echo "0")
+echo "{\"running_processes\": $COUNT}" | jq .
 ---
 {{ result: json object }}
 ---
 where
-* result.ok == false
+* result.running_processes == 0
 
 ===
 trigger cron wake for imminent job
@@ -99,13 +93,16 @@ sleep 5
 # Trigger cron — should skip because job is 1h away (outside 10m lead time)
 ./curl-auth -s -X POST "$WORKER_URL/debug/trigger-cron" > /dev/null
 
-# Verify gateway is still down (cron didn't wake it)
+# Verify gateway is still down (cron didn't wake it).
+# Use debug/processes instead of /api/status (which would restart the gateway).
 sleep 3
-./curl-auth -s "$WORKER_URL/api/status" | jq .
+PROCS=$(./curl-auth -s "$WORKER_URL/debug/processes" 2>/dev/null || echo '{"processes":[]}')
+COUNT=$(echo "$PROCS" | jq '[.processes[] | select(.status == "running")] | length' 2>/dev/null || echo "0")
+echo "{\"running_processes\": $COUNT}" | jq .
 ---
 {{ result: json object }}
 ---
 where
-* result.ok == false
+* result.running_processes == 0