diff --git a/.bumpy/fix-no-tty-session.md b/.bumpy/fix-no-tty-session.md new file mode 100644 index 00000000..0aee7fce --- /dev/null +++ b/.bumpy/fix-no-tty-session.md @@ -0,0 +1,5 @@ +--- +varlock: patch +--- + +fix biometric session scoping for non-TTY processes diff --git a/.bumpy/ts-check.md b/.bumpy/ts-check.md new file mode 100644 index 00000000..c38d9223 --- /dev/null +++ b/.bumpy/ts-check.md @@ -0,0 +1,5 @@ +--- +env-spec-language: none +--- + +fix ts check issue diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 502a63ae..ef4e6de3 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -49,7 +49,7 @@ jobs: - name: ESLint run: bun run lint - name: TypeScript type check - run: bun run typecheck + run: bun run typecheck:all - name: Build libraries run: bun run build:libs - name: Run tests diff --git a/AGENTS.md b/AGENTS.md index 453fa8da..c6d700da 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -35,11 +35,6 @@ - This monorepo uses **bumpy** (`@varlock/bumpy`) for version management - Changeset files live in `.bumpy/` and are created with `bunx @varlock/bumpy add` (or `bun run bumpy:add`) - Standard bump types: `major`, `minor`, `patch` -- **Isolated bump types**: `minor-isolated` and `patch-isolated` are natively supported - - These suppress dependency propagation — the package itself gets bumped but dependents are **not** automatically bumped - - Use **`minor-isolated`** for minor bumps that don't affect the library API consumed by dependents (e.g., CLI-only features in `varlock` that plugins/integrations don't depend on). This is the most common use case — because all packages are still on `0.x`, `^0.y.z` ranges treat minor bumps as out-of-range, which would otherwise cascade bumps to all dependents. - - `patch-isolated` exists but is rarely needed — patch bumps on `0.x` stay within `^` ranges and don't cascade - - `major-isolated` is intentionally **not** supported (major bumps must propagate to keep semver ranges valid) - Non-interactive changeset creation (for CI/AI): `bumpy add --packages "pkg:minor" --message "description" --name "changeset-name"` - Bump files are only required when publishable packages have changed (based on `changedFilePatterns` in `.bumpy/_config.json`). Changes to CI workflows, root config files, scripts, docs, etc. do **not** require a bump file — bumpy's pre-push hook will not block in that case. diff --git a/packages/encryption-binary-rust/src/ipc.rs b/packages/encryption-binary-rust/src/ipc.rs index 56f4af47..57b51571 100644 --- a/packages/encryption-binary-rust/src/ipc.rs +++ b/packages/encryption-binary-rust/src/ipc.rs @@ -129,8 +129,8 @@ impl IpcServer { continue; } - // Get peer TTY identity - let tty_id = get_peer_tty_id(&stream); + // Get peer session identity + let tty_id = get_peer_session_id(&stream); std::thread::spawn(move || { handle_client(stream, handler, on_activity, running, tty_id); @@ -398,10 +398,10 @@ fn verify_unix_client(_stream: &UnixStream) -> bool { true } -// ── Peer TTY identity (Linux) ──────────────────────────────────── +// ── Peer session identity (Linux) ─────────────────────────────── #[cfg(target_os = "linux")] -fn get_peer_tty_id(stream: &UnixStream) -> Option { +fn get_peer_session_id(stream: &UnixStream) -> Option { use nix::sys::socket::{getsockopt, sockopt::PeerCredentials}; use std::os::fd::AsFd; @@ -412,19 +412,15 @@ fn get_peer_tty_id(stream: &UnixStream) -> Option { return None; } - // Read the process's controlling terminal from /proc - get_tty_for_pid(pid as u32) + // Prefer TTY-based identity, fall back to process tree + get_tty_session_id(pid as u32) + .or_else(|| get_ptree_session_id(pid as u32)) } +/// TTY-based session identity: tty device + session leader start time. #[cfg(target_os = "linux")] -fn get_tty_for_pid(pid: u32) -> Option { - // Read /proc//stat to get the tty_nr field (field 7, 0-indexed 6) - let stat = std::fs::read_to_string(format!("/proc/{pid}/stat")).ok()?; - - // The stat line format is: pid (comm) state ppid pgrp session tty_nr ... - // comm can contain spaces and parens, so find the last ')' first - let after_comm = stat.rfind(')')? + 2; - let fields: Vec<&str> = stat[after_comm..].split_whitespace().collect(); +fn get_tty_session_id(pid: u32) -> Option { + let fields = parse_proc_stat(pid)?; // After the closing paren: state(0) ppid(1) pgrp(2) session(3) tty_nr(4) let tty_nr: u32 = fields.get(4)?.parse().ok()?; @@ -443,20 +439,61 @@ fn get_tty_for_pid(pid: u32) -> Option { let minor = (tty_nr & 0xff) | ((tty_nr >> 12) & 0xfff00); let tty_name = format!("tty{major}:{minor}"); - Some(format!("{tty_name}:{start_time}")) + Some(format!("tty:{tty_name}:{start_time}")) } +/// Process-tree-based session identity for non-TTY processes. +/// Mirrors the macOS Swift daemon logic: walks the ancestry chain up to PID 1, +/// then uses the grandchild of the root as a stable scope key. #[cfg(target_os = "linux")] -fn get_process_start_time(pid: u32) -> Option { +fn get_ptree_session_id(pid: u32) -> Option { + let mut chain: Vec = vec![pid]; + let mut current = pid; + + for _ in 0..64 { + let ppid = get_parent_pid(current)?; + if ppid <= 1 { + break; + } + chain.push(ppid); + current = ppid; + } + + // Need at least 4 levels for a meaningful intermediate ancestor + if chain.len() < 4 { + return None; + } + + let scope_pid = chain[chain.len() - 3]; + let start_time = get_process_start_time(scope_pid).unwrap_or(0); + Some(format!("ptree:{scope_pid}:{start_time}")) +} + +/// Parse /proc//stat and return the fields after the comm closing paren. +#[cfg(target_os = "linux")] +fn parse_proc_stat(pid: u32) -> Option> { let stat = std::fs::read_to_string(format!("/proc/{pid}/stat")).ok()?; let after_comm = stat.rfind(')')? + 2; - let fields: Vec<&str> = stat[after_comm..].split_whitespace().collect(); + Some(stat[after_comm..].split_whitespace().map(|s| s.to_string()).collect()) +} + +/// Get the PPID for a given process from /proc. +#[cfg(target_os = "linux")] +fn get_parent_pid(pid: u32) -> Option { + let fields = parse_proc_stat(pid)?; + // After comm: state(0) ppid(1) + fields.get(1)?.parse().ok() +} + +#[cfg(target_os = "linux")] +fn get_process_start_time(pid: u32) -> Option { + let fields = parse_proc_stat(pid)?; // Field 19 after comm is starttime (in clock ticks since boot) fields.get(19)?.parse().ok() } #[cfg(not(any(target_os = "linux", target_os = "windows")))] -fn get_peer_tty_id(_stream: &UnixStream) -> Option { +fn get_peer_session_id(_stream: &UnixStream) -> Option { None } @@ -728,3 +765,59 @@ fn send_windows_response( Ok(()) } + +// ── Tests ─────────────────────────────────────────────────────── + +#[cfg(test)] +#[cfg(target_os = "linux")] +mod tests { + use super::*; + + #[test] + fn test_parse_proc_stat_self() { + let fields = parse_proc_stat(std::process::id()).expect("should parse own /proc/stat"); + // Should have at least 20 fields (we read up to field 19 for starttime) + assert!(fields.len() >= 20, "expected >=20 fields, got {}", fields.len()); + // Field 0 is state (single char like R, S, etc.) + assert_eq!(fields[0].len(), 1); + // Field 1 is ppid (should be > 0) + let ppid: u32 = fields[1].parse().expect("ppid should be a number"); + assert!(ppid > 0); + } + + #[test] + fn test_get_parent_pid() { + let ppid = get_parent_pid(std::process::id()).expect("should get own ppid"); + assert!(ppid > 1, "test process ppid should be > 1"); + } + + #[test] + fn test_get_process_start_time() { + let st = get_process_start_time(std::process::id()).expect("should get own start time"); + assert!(st > 0); + } + + #[test] + fn test_get_ptree_session_id_self() { + // The test runner process should have a deep enough chain + // (cargo test → test binary → ... → init), but the exact depth + // depends on the environment. Just verify it returns Some or None + // without panicking, and if Some, has the right format. + if let Some(id) = get_ptree_session_id(std::process::id()) { + assert!(id.starts_with("ptree:"), "expected ptree: prefix, got {id}"); + let parts: Vec<&str> = id.split(':').collect(); + assert_eq!(parts.len(), 3, "expected ptree:pid:starttime, got {id}"); + let _pid: u32 = parts[1].parse().expect("pid should be a number"); + let _st: u64 = parts[2].parse().expect("start time should be a number"); + } + } + + #[test] + fn test_get_tty_session_id_format() { + // May or may not have a TTY depending on how tests are run. + // Just verify it doesn't panic and has correct format if present. + if let Some(id) = get_tty_session_id(std::process::id()) { + assert!(id.starts_with("tty:"), "expected tty: prefix, got {id}"); + } + } +} diff --git a/packages/encryption-binary-swift/swift/Sources/VarlockEnclave/IPCServer.swift b/packages/encryption-binary-swift/swift/Sources/VarlockEnclave/IPCServer.swift index 7cc333a0..73db6c59 100644 --- a/packages/encryption-binary-swift/swift/Sources/VarlockEnclave/IPCServer.swift +++ b/packages/encryption-binary-swift/swift/Sources/VarlockEnclave/IPCServer.swift @@ -16,7 +16,7 @@ final class IPCServer { private var isRunning = false /// Handler for incoming messages. Second parameter is the peer's TTY identity (nil if unknown). - var messageHandler: ((_ message: [String: Any], _ ttyId: String?) -> [String: Any])? + var messageHandler: ((_ message: [String: Any], _ sessionId: String?) -> [String: Any])? /// Called after accept (new client) and after each successfully parsed JSON message. var onConnectionActivity: (() -> Void)? @@ -41,10 +41,21 @@ final class IPCServer { guard lockFD >= 0 else { throw IPCError.socketCreationFailed("Failed to create lock file: \(String(cString: strerror(errno)))") } - guard flock(lockFD, LOCK_EX | LOCK_NB) == 0 else { + if flock(lockFD, LOCK_EX | LOCK_NB) != 0 { + // Lock held by another process (possibly stuck in UE state). + // Delete the lock file and reopen — the new file gets a fresh inode + // so the old process's flock (tied to the old inode) doesn't block us. close(lockFD) - lockFD = -1 - throw IPCError.socketCreationFailed("Another daemon instance holds the lock") + unlink(lockPath) + lockFD = open(lockPath, O_CREAT | O_RDWR, 0o600) + guard lockFD >= 0 else { + throw IPCError.socketCreationFailed("Failed to recreate lock file: \(String(cString: strerror(errno)))") + } + guard flock(lockFD, LOCK_EX | LOCK_NB) == 0 else { + close(lockFD) + lockFD = -1 + throw IPCError.socketCreationFailed("Another daemon instance holds the lock") + } } // Clean up any stale socket file (safe now — we hold the lock) @@ -169,12 +180,12 @@ final class IPCServer { } } - // Resolve the peer's TTY identity once per connection - let ttyId: String? + // Resolve the peer's session identity once per connection + let sessionId: String? if let peerPid = getPeerPid(fd: fd) { - ttyId = getTtyIdentifier(forPid: peerPid) + sessionId = getSessionIdentifier(forPid: peerPid) } else { - ttyId = nil + sessionId = nil } while isRunning { @@ -206,7 +217,7 @@ final class IPCServer { onConnectionActivity?() // Handle message with the peer's TTY identity - let response = messageHandler?(json, ttyId) ?? ["error": "No handler"] + let response = messageHandler?(json, sessionId) ?? ["error": "No handler"] sendResponse(fd: fd, id: json["id"] as? String, response: response) } } diff --git a/packages/encryption-binary-swift/swift/Sources/VarlockEnclave/PeerIdentity.swift b/packages/encryption-binary-swift/swift/Sources/VarlockEnclave/PeerIdentity.swift index 9660dd7e..55c4d646 100644 --- a/packages/encryption-binary-swift/swift/Sources/VarlockEnclave/PeerIdentity.swift +++ b/packages/encryption-binary-swift/swift/Sources/VarlockEnclave/PeerIdentity.swift @@ -30,43 +30,94 @@ private func getProcessInfo(pid: pid_t) -> kinfo_proc? { return info } -/// Get a stable TTY identifier for a process. +/// Get the PPID for a given process. +private func getParentPid(pid: pid_t) -> pid_t? { + guard let info = getProcessInfo(pid: pid) else { return nil } + let ppid = info.kp_eproc.e_ppid + return ppid > 0 ? ppid : nil +} + +/// Get the start time (seconds since epoch) for a given process. +private func getStartTime(pid: pid_t) -> Int { + guard let info = getProcessInfo(pid: pid) else { return 0 } + return Int(info.kp_proc.p_starttime.tv_sec) +} + +/// Get a stable session identifier for a process. +/// +/// Prefers the controlling TTY (combined with the session leader's start time +/// to prevent TTY device reuse attacks). When no TTY is available (e.g., +/// processes spawned by VSCode/Cursor extensions, background agents, etc.), +/// walks up the process tree to find a stable ancestor for session scoping. /// -/// Combines the TTY device name with the session leader's start time. -/// The session leader is the shell process that owns the TTY (its PID equals -/// the session ID). Using its start time prevents TTY device reuse attacks -/// (where a new terminal is allocated the same /dev/ttysNNN after the old one closed). +/// The no-TTY algorithm finds the "app root" (ancestor whose PPID is 1/launchd), +/// then uses the grandchild of that app root in the peer's ancestry chain. +/// This scopes sessions narrowly — e.g., for Cursor, each Claude Code instance +/// gets its own session, while a malicious extension in the same window cannot +/// piggyback. If the tree is too shallow (peer is a direct child or grandchild +/// of the app root), returns nil (no caching, fresh auth each time). /// -/// Returns nil if the process has no controlling TTY (detached, CI, etc). -func getTtyIdentifier(forPid pid: pid_t) -> String? { +/// Returns nil if no stable identity can be determined. +func getSessionIdentifier(forPid pid: pid_t) -> String? { guard let info = getProcessInfo(pid: pid) else { return nil } + // e_tdev is dev_t (Int32). NODEV is -1 in signed representation + // (0xFFFFFFFF unsigned). Comparing Int32(-1) != UInt32.max is true in + // Swift's BinaryInteger comparison, so we must compare in the same type. let ttyDev = info.kp_eproc.e_tdev - // NODEV (0xFFFFFFFF) or 0 means no controlling tty - guard ttyDev != UInt32.max, ttyDev != 0 else { return nil } - - // Convert device number to name (e.g., "ttys003") - guard let namePtr = devname(dev_t(ttyDev), S_IFCHR) else { return nil } - let ttyName = String(cString: namePtr) - - // Get the session leader's start time for uniqueness. - // getsid() returns the session leader PID (the shell that owns the TTY), - // which is stable across all processes launched from the same terminal. - // (e_tpgid is the *foreground process group*, which changes on every command.) - let sessionLeaderPid = getsid(pid) - var startTimestamp: Int = 0 - - if sessionLeaderPid > 0, let leaderInfo = getProcessInfo(pid: sessionLeaderPid) { - startTimestamp = Int(leaderInfo.kp_proc.p_starttime.tv_sec) + let hasTty = ttyDev > 0 + + if hasTty { + // TTY-based identity: device name + session leader start time + guard let namePtr = devname(dev_t(ttyDev), S_IFCHR) else { return nil } + let ttyName = String(cString: namePtr) + + let sessionLeaderPid = getsid(pid) + var startTimestamp: Int = 0 + if sessionLeaderPid > 0, let leaderInfo = getProcessInfo(pid: sessionLeaderPid) { + startTimestamp = Int(leaderInfo.kp_proc.p_starttime.tv_sec) + } + if startTimestamp == 0 { + startTimestamp = Int(info.kp_proc.p_starttime.tv_sec) + } + + return "tty:\(ttyName):\(startTimestamp)" } - // If we couldn't get the session leader start time, fall back to the - // connecting process's own start time (less ideal but still unique per session) - if startTimestamp == 0 { - startTimestamp = Int(info.kp_proc.p_starttime.tv_sec) + // No TTY — walk up the process tree to find a scoping ancestor. + // + // Build the ancestry chain from the peer up to (but not including) PID 1. + // Example chain for Claude in Cursor: + // [node/bun, zsh, claude, extension-host, Cursor] + // indices: 0 1 2 3 4 + // + // The last element is the "app root" (PPID=1). + // We use the element at index (count - 3) — the grandchild of the app root. + // This gives us per-tool scoping (e.g., the Claude binary), which is narrow + // enough that other extensions can't piggyback, but stable across multiple + // commands spawned by that tool. + // + // If the chain is too short (< 4 elements), we can't determine a stable + // intermediate ancestor, so we return nil (no caching). + + var chain: [pid_t] = [pid] + var current = pid + // Walk up with a depth limit to avoid infinite loops + for _ in 0..<64 { + guard let ppid = getParentPid(pid: current), ppid > 1 else { break } + chain.append(ppid) + current = ppid } - return "\(ttyName):\(startTimestamp)" + // Need at least 4 levels: peer → intermediate → scope-target → app-child → app-root + // so that scope-target is a meaningful intermediate process + guard chain.count >= 4 else { return nil } + + // The grandchild of the app root: 2 levels below the last element + let scopePid = chain[chain.count - 3] + let startTime = getStartTime(pid: scopePid) + + return "ptree:\(scopePid):\(startTime)" } // MARK: - Process Verification diff --git a/packages/encryption-binary-swift/swift/Sources/VarlockEnclave/SessionManager.swift b/packages/encryption-binary-swift/swift/Sources/VarlockEnclave/SessionManager.swift index fc2e9b8c..5049a9c0 100644 --- a/packages/encryption-binary-swift/swift/Sources/VarlockEnclave/SessionManager.swift +++ b/packages/encryption-binary-swift/swift/Sources/VarlockEnclave/SessionManager.swift @@ -2,23 +2,30 @@ import Foundation import LocalAuthentication import AppKit -/// Manages biometric authentication sessions for the daemon, scoped per-TTY. +/// Manages biometric authentication sessions for the daemon, scoped per-session. /// -/// Each terminal must independently authenticate via Touch ID. This prevents -/// rogue processes in other terminals from piggybacking on an existing session. +/// Each terminal or parent application must independently authenticate via +/// Touch ID. This prevents rogue processes from piggybacking on an existing +/// session. Sessions are identified by TTY device (for terminal processes) +/// or by a stable ancestor PID (for GUI-spawned processes like VSCode extensions). /// /// Biometric reuse timeout is handled by macOS via `touchIDAuthenticationAllowableReuseDuration`. -/// This manager handles per-TTY scoping, explicit invalidation (lock command), +/// This manager handles per-session scoping, explicit invalidation (lock command), /// and system events (sleep, screen lock). final class SessionManager { /// How long Touch ID stays unlocked per terminal before re-prompting (seconds). /// Passed to macOS via `touchIDAuthenticationAllowableReuseDuration`. static let sessionTimeout: TimeInterval = 300 // 5 minutes + /// Max time to wait for evaluatePolicy (biometric prompt) before giving up. + /// Prevents the daemon from hanging forever if the prompt is dismissed oddly + /// or the Secure Enclave stops responding. + static let biometricTimeoutSeconds: TimeInterval = 60 + /// How long the daemon stays alive with no connections at all static let daemonInactivityTimeout: TimeInterval = 1800 // 30 minutes - /// Per-TTY cached LAContext (macOS owns the timeout via reuse duration) + /// Per-session cached LAContext (macOS owns the timeout via reuse duration) private var contexts: [String: LAContext] = [:] private let queue = DispatchQueue(label: "dev.varlock.session") @@ -38,22 +45,20 @@ final class SessionManager { // MARK: - Public API - /// Get or create an authenticated LAContext for the given TTY. - /// On first call per TTY, triggers Touch ID. Subsequent calls within the + /// Get or create an authenticated LAContext for the given session. + /// On first call per session, triggers Touch ID. Subsequent calls within the /// reuse duration return the cached context without re-prompting. /// - /// Processes without a controlling TTY (detached, background, etc.) always - /// require fresh authentication — they never share or cache sessions, since - /// there's no stable identity to scope the session to. - func getAuthenticatedContext(ttyId: String?) throws -> LAContext { + /// Processes with no identifiable session always require fresh authentication. + func getAuthenticatedContext(sessionId: String?) throws -> LAContext { return try queue.sync { - // For processes with a TTY, check for cached context - if let key = ttyId, let context = contexts[key] { + // Check for cached context from a previous auth in this session + if let key = sessionId, let context = contexts[key] { resetDaemonTimer() return context } - // Need fresh auth (always for no-TTY, or first time for this TTY) + // Need fresh auth (first time for this session, or always for unidentifiable callers) let context = LAContext() context.touchIDAuthenticationAllowableReuseDuration = SessionManager.sessionTimeout @@ -80,16 +85,19 @@ final class SessionManager { semaphore.signal() } - semaphore.wait() + let waitResult = semaphore.wait(timeout: .now() + SessionManager.biometricTimeoutSeconds) + if waitResult == .timedOut { + context.invalidate() + throw EnclaveError.biometricFailed("Biometric prompt timed out after \(Int(SessionManager.biometricTimeoutSeconds))s") + } if let error = evalError { throw EnclaveError.biometricFailed(error.localizedDescription) } - // Only cache if the process has a TTY identity to scope the session to. - // No-TTY callers get a fresh context every time — there's no stable - // identity to prevent session sharing across unrelated processes. - if let key = ttyId { + // Only cache if the process has a session identity to scope to. + // Unidentifiable callers get a fresh context every time. + if let key = sessionId { contexts[key] = context } resetDaemonTimer() @@ -98,7 +106,7 @@ final class SessionManager { } } - /// Invalidate all TTY sessions (used by lock command, sleep/lock events). + /// Invalidate all sessions (used by lock command, sleep/lock events). func invalidateAllSessions() { queue.sync { for (_, context) in contexts { @@ -116,17 +124,17 @@ final class SessionManager { } } - /// Whether the given TTY has a cached session. - /// Always returns false for no-TTY callers (they never cache). + /// Whether the given session has a cached context. + /// Always returns false for unidentifiable callers (they never cache). /// Note: the session may still re-prompt if macOS's reuse duration has expired. - func isSessionWarm(ttyId: String?) -> Bool { - guard let key = ttyId else { return false } + func isSessionWarm(sessionId: String?) -> Bool { + guard let key = sessionId else { return false } return queue.sync { return contexts[key] != nil } } - /// Whether any TTY has a cached session. + /// Whether any session has a cached context. func hasAnySessions() -> Bool { return queue.sync { return !contexts.isEmpty diff --git a/packages/encryption-binary-swift/swift/Sources/VarlockEnclave/main.swift b/packages/encryption-binary-swift/swift/Sources/VarlockEnclave/main.swift index cb64fa1a..65fb6d2c 100644 --- a/packages/encryption-binary-swift/swift/Sources/VarlockEnclave/main.swift +++ b/packages/encryption-binary-swift/swift/Sources/VarlockEnclave/main.swift @@ -7,19 +7,24 @@ func jsonOutput(_ dict: [String: Any]) { guard let data = try? JSONSerialization.data(withJSONObject: dict), let str = String(data: data, encoding: .utf8) else { fputs("{\"error\":\"Failed to serialize output\"}\n", stderr) - exit(1) + _exit(1) } print(str) } func jsonError(_ message: String) -> Never { jsonOutput(["error": message]) - exit(1) + // Flush stdout since _exit() won't do it for us + fflush(stdout) + // Use _exit to skip framework cleanup — LocalAuthentication teardown can + // hang in the kernel (UE state) if the Secure Enclave is unresponsive. + _exit(1) } func jsonSuccess(_ result: [String: Any]) -> Never { jsonOutput(["ok": true].merging(result) { _, new in new }) - exit(0) + fflush(stdout) + _exit(0) } // MARK: - CLI Parsing @@ -174,7 +179,9 @@ case "daemon": if let pidPath = pidPath { try? FileManager.default.removeItem(atPath: pidPath) } - exit(0) + // Use _exit to skip framework cleanup — LocalAuthentication teardown + // can hang in the kernel (UE state) if Secure Enclave is unresponsive. + _exit(0) } sessionManager.onDaemonTimeout = { @@ -185,8 +192,8 @@ case "daemon": sessionManager.noteIpcActivity() } - // Handle IPC messages (ttyId is resolved from the peer's controlling terminal) - server.messageHandler = { message, ttyId in + // Handle IPC messages (sessionId is resolved from the peer's TTY or process tree) + server.messageHandler = { message, sessionId in guard let action = message["action"] as? String else { return ["error": "Missing action"] } @@ -202,7 +209,7 @@ case "daemon": let keyId = (payload["keyId"] as? String) ?? defaultKeyId do { - let context = try sessionManager.getAuthenticatedContext(ttyId: ttyId) + let context = try sessionManager.getAuthenticatedContext(sessionId: sessionId) let decrypted = try SecureEnclaveManager.decrypt( payload: ciphertext, keyId: keyId, @@ -221,8 +228,8 @@ case "daemon": return [ "result": [ "pong": true, - "sessionWarm": sessionManager.isSessionWarm(ttyId: ttyId), - "ttyId": ttyId as Any, + "sessionWarm": sessionManager.isSessionWarm(sessionId: sessionId), + "sessionId": sessionId as Any, ], ] @@ -312,7 +319,7 @@ case "daemon": // Password reads require biometric gate do { - _ = try sessionManager.getAuthenticatedContext(ttyId: ttyId) + _ = try sessionManager.getAuthenticatedContext(sessionId: sessionId) } catch { return ["error": error.localizedDescription] } @@ -414,7 +421,8 @@ case "help", "--help", "-h": All output is JSON. Errors return {"error": "message"}. """ print(help) - exit(0) + fflush(stdout) + _exit(0) default: jsonError("Unknown command: \(command). Run with --help for usage.") diff --git a/packages/varlock/src/cli/commands/reveal.command.ts b/packages/varlock/src/cli/commands/reveal.command.ts index 9972cd91..4ae4f5eb 100644 --- a/packages/varlock/src/cli/commands/reveal.command.ts +++ b/packages/varlock/src/cli/commands/reveal.command.ts @@ -22,7 +22,8 @@ export const commandSpec = define({ path: { type: 'string', short: 'p', - description: 'Path to a specific .env file or directory to use as the entry point', + multiple: true, + description: 'Path to a specific .env file or directory to use as the entry point (can be specified multiple times)', }, env: { type: 'string', @@ -129,7 +130,7 @@ export const commandFn: TypedGunshiCommandFn = async (ctx) = const envGraph = await loadVarlockEnvGraph({ currentEnvFallback: ctx.values.env, - entryFilePath: ctx.values.path, + entryFilePaths: ctx.values.path, }); checkForSchemaErrors(envGraph); diff --git a/packages/varlock/src/lib/local-encrypt/daemon-client.ts b/packages/varlock/src/lib/local-encrypt/daemon-client.ts index a96a8001..0b94785d 100644 --- a/packages/varlock/src/lib/local-encrypt/daemon-client.ts +++ b/packages/varlock/src/lib/local-encrypt/daemon-client.ts @@ -20,12 +20,79 @@ import { getUserVarlockDir } from '../user-config-dir'; import { resolveNativeBinary } from './binary-resolver'; import type { KeychainItemMeta, KeychainItemRef } from './types'; +/** Timeout for daemon IPC messages that don't involve user interaction */ +const SEND_TIMEOUT_MS = 30_000; +/** + * Timeout for messages that may trigger biometric auth (Touch ID). + * Must exceed the Swift-side biometric timeout (60s) so the TS client + * doesn't kill the daemon while Touch ID is still waiting for the user. + * Killing mid-biometric can leave the process stuck in kernel UE state. + */ +const BIOMETRIC_TIMEOUT_MS = 90_000; +/** Timeout for interactive messages (GUI dialogs for secret input, keychain picker) */ +const INTERACTIVE_TIMEOUT_MS = 5 * 60_000; +/** How long to wait for SIGTERM before escalating to SIGKILL */ +const KILL_GRACE_MS = 2_000; + function debug(msg: string) { if (process.env.VARLOCK_DEBUG) { process.stderr.write(`[varlock:daemon-client] ${msg}\n`); } } +/** + * Kill a daemon process, escalating from SIGTERM to SIGKILL if it doesn't + * die within KILL_GRACE_MS. Handles the case where the process is already dead. + * + * Returns true if the process is confirmed dead, false if it's stuck in an + * unkillable state (e.g. macOS UE/uninterruptible Secure Enclave wait). + * Callers should clean up state files and proceed regardless — a zombie + * with no socket file is effectively dead. + */ +function killDaemonProcess(pid: number): boolean { + try { + process.kill(pid, 'SIGTERM'); + } catch { + return true; // already dead + } + + // Poll briefly to see if SIGTERM was effective + const start = Date.now(); + while (Date.now() - start < KILL_GRACE_MS) { + try { + process.kill(pid, 0); + } catch { + return true; // process exited + } + // Busy-wait in small increments (this is a rare recovery path) + Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, 100); + } + + // Still alive — force kill + debug(`daemon pid ${pid} didn't respond to SIGTERM, sending SIGKILL`); + try { + process.kill(pid, 'SIGKILL'); + } catch { + return true; // already dead + } + + // Give SIGKILL a moment to take effect + const killStart = Date.now(); + while (Date.now() - killStart < 500) { + try { + process.kill(pid, 0); + } catch { + return true; // process exited + } + Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, 50); + } + + // Process is unkillable (UE state — stuck in kernel, e.g. Secure Enclave). + // It's harmless once we remove the socket/PID files; it will clear on reboot. + debug(`daemon pid ${pid} is unkillable (likely in uninterruptible kernel wait) — proceeding anyway`); + return false; +} + function getSocketDir(): string { return path.join(getUserVarlockDir(), 'local-encrypt'); } @@ -38,6 +105,10 @@ function getSocketPath(): string { return path.join(getSocketDir(), 'daemon.sock'); } +function getLockPath(): string { + return `${getSocketPath()}.lock`; +} + function getPidPath(): string { return path.join(getSocketDir(), 'daemon.pid'); } @@ -46,6 +117,24 @@ function getDaemonInfoPath(): string { return path.join(getSocketDir(), 'daemon.info'); } +/** All state files that should be cleaned up when resetting daemon state */ +function getDaemonStateFiles(): Array { + const files = [getPidPath(), getDaemonInfoPath()]; + if (process.platform !== 'win32') { + files.push(getSocketPath(), getLockPath()); + } + return files; +} + +/** Remove all daemon state files, ignoring errors */ +function cleanupDaemonFiles(): void { + for (const file of getDaemonStateFiles()) { + try { + fs.unlinkSync(file); + } catch { /* ignore */ } + } +} + /** * Check whether the currently running daemon was spawned from the same binary * we would spawn now. Compares the resolved binary path and its mtime against @@ -95,7 +184,10 @@ function checkDaemonBinaryStale(): number | undefined { process.kill(pid, 0); // verify process is alive return pid; } catch { - return undefined; // stale PID or process already gone + // Process already gone — clean up stale files so spawnDaemon starts clean + debug('stale PID file points to dead process — cleaning up'); + cleanupDaemonFiles(); + return undefined; } } @@ -161,22 +253,8 @@ export class DaemonClient { const stalePid = this.spawnedInThisProcess ? undefined : checkDaemonBinaryStale(); if (stalePid) { debug(`killing stale daemon (pid ${stalePid}) — binary has been updated`); - try { - process.kill(stalePid, 'SIGTERM'); - } catch { - // already gone - } - // Clean up so spawnDaemon doesn't think a daemon is still running - for (const file of [getPidPath(), getDaemonInfoPath()]) { - try { - fs.unlinkSync(file); - } catch { /* ignore */ } - } - if (process.platform !== 'win32') { - try { - fs.unlinkSync(socketPath); - } catch { /* ignore */ } - } + killDaemonProcess(stalePid); + cleanupDaemonFiles(); } else { try { await this.connectToSocket(socketPath); @@ -188,9 +266,10 @@ export class DaemonClient { try { await this.spawnDaemon(); - } catch { + } catch (err) { // Another process may have won the race to spawn the daemon. // Wait briefly for it to be ready, then try connecting. + debug(`spawnDaemon failed: ${err instanceof Error ? err.message : err}`); await new Promise((r) => { setTimeout(r, 1000); }); @@ -199,16 +278,18 @@ export class DaemonClient { } async decrypt(ciphertext: string, keyId = 'varlock-default'): Promise { - await this.ensureConnected(); - const result = await this.sendMessage({ - action: 'decrypt', - payload: { ciphertext, keyId }, + return this.withRetry(async () => { + await this.ensureConnected(); + const result = await this.sendMessage({ + action: 'decrypt', + payload: { ciphertext, keyId }, + }, BIOMETRIC_TIMEOUT_MS); + if (typeof result === 'string') return result; + if (result && typeof result === 'object' && 'error' in result) { + throw new Error(String(result.error)); + } + return String(result); }); - if (typeof result === 'string') return result; - if (result && typeof result === 'object' && 'error' in result) { - throw new Error(String(result.error)); - } - return String(result); } async promptSecret(opts?: { @@ -216,68 +297,80 @@ export class DaemonClient { message?: string; keyId?: string; }): Promise { - await this.ensureConnected(); - try { - const result = await this.sendMessage({ - action: 'prompt-secret', - payload: { - itemKey: opts?.itemKey, - message: opts?.message, - keyId: opts?.keyId, - }, - }); - if (result && typeof result === 'object' && 'ciphertext' in result) { - return result.ciphertext as string; + return this.withRetry(async () => { + await this.ensureConnected(); + try { + const result = await this.sendMessage({ + action: 'prompt-secret', + payload: { + itemKey: opts?.itemKey, + message: opts?.message, + keyId: opts?.keyId, + }, + }, INTERACTIVE_TIMEOUT_MS); + if (result && typeof result === 'object' && 'ciphertext' in result) { + return result.ciphertext as string; + } + return undefined; + } catch (err) { + if (err instanceof Error && err.message === 'cancelled') return undefined; + throw err; } - return undefined; - } catch (err) { - if (err instanceof Error && err.message === 'cancelled') return undefined; - throw err; - } + }); } async invalidateSession(): Promise { - await this.ensureConnected(); - await this.sendMessage({ action: 'invalidate-session' }); + return this.withRetry(async () => { + await this.ensureConnected(); + await this.sendMessage({ action: 'invalidate-session' }); + }); } async keychainGet(opts: { service?: string; account?: string; keychain?: string; field?: string }): Promise { - await this.ensureConnected(); - const result = await this.sendMessage({ - action: 'keychain-get', - payload: opts, + return this.withRetry(async () => { + await this.ensureConnected(); + // Password reads may trigger biometric; metadata field reads won't, + // but we use the biometric timeout for both since it's harmless. + const result = await this.sendMessage({ + action: 'keychain-get', + payload: opts, + }, BIOMETRIC_TIMEOUT_MS); + if (typeof result === 'string') return result; + if (result && typeof result === 'object' && 'error' in result) { + throw new Error(String(result.error)); + } + return String(result); }); - if (typeof result === 'string') return result; - if (result && typeof result === 'object' && 'error' in result) { - throw new Error(String(result.error)); - } - return String(result); } async keychainSearch(opts?: { query?: string; keychain?: string }): Promise> { - await this.ensureConnected(); - const result = await this.sendMessage({ - action: 'keychain-search', - payload: opts ?? {}, + return this.withRetry(async () => { + await this.ensureConnected(); + const result = await this.sendMessage({ + action: 'keychain-search', + payload: opts ?? {}, + }); + return (result ?? []) as Array; }); - return (result ?? []) as Array; } async keychainPick(opts?: { itemKey?: string }): Promise { - await this.ensureConnected(); - try { - const result = await this.sendMessage({ - action: 'keychain-pick', - payload: { itemKey: opts?.itemKey }, - }); - if (result && typeof result === 'object' && 'service' in result) { - return result as KeychainItemRef; + return this.withRetry(async () => { + await this.ensureConnected(); + try { + const result = await this.sendMessage({ + action: 'keychain-pick', + payload: { itemKey: opts?.itemKey }, + }, INTERACTIVE_TIMEOUT_MS); + if (result && typeof result === 'object' && 'service' in result) { + return result as KeychainItemRef; + } + return undefined; + } catch (err) { + if (err instanceof Error && err.message === 'cancelled') return undefined; + throw err; } - return undefined; - } catch (err) { - if (err instanceof Error && err.message === 'cancelled') return undefined; - throw err; - } + }); } cleanup(): void { @@ -293,6 +386,45 @@ export class DaemonClient { // -- Private -- + /** + * Run an async operation, and on recoverable failure (timeout, connection + * closed) clean up, reconnect to the daemon, and retry once. + */ + private async withRetry(fn: () => Promise): Promise { + try { + return await fn(); + } catch (err) { + const msg = err instanceof Error ? err.message : ''; + const recoverable = msg.includes('timed out') + || msg.includes('connection closed') + || msg.includes('Not connected'); + if (!recoverable) throw err; + + debug(`recoverable error, reconnecting: ${msg}`); + this.forceCleanup(); + await this.ensureConnected(); + return await fn(); + } + } + + /** + * Aggressive cleanup: kill the daemon process if we know its PID, + * then reset client state so the next ensureConnected spawns fresh. + */ + private forceCleanup(): void { + this.cleanup(); + this.spawnedInThisProcess = false; // allow stale-binary check on reconnect + + // Try to kill the daemon by PID so we don't reconnect to a broken process + try { + const pid = parseInt(fs.readFileSync(getPidPath(), 'utf-8').trim(), 10); + killDaemonProcess(pid); + } catch { /* no PID file or already dead */ } + + // Remove stale files so spawnDaemon starts clean + cleanupDaemonFiles(); + } + private connectToSocket(socketPath: string): Promise { return new Promise((resolve, reject) => { const socket = new net.Socket(); @@ -322,6 +454,12 @@ export class DaemonClient { socket.on('close', () => { this.isConnected = false; this.socket = null; + // Reject all pending messages so callers don't hang + for (const { reject: rej } of this.messageQueue.values()) { + rej(new Error('Daemon connection closed')); + } + this.messageQueue.clear(); + this.buffer = Buffer.alloc(0); }); socket.connect(socketPath); @@ -355,7 +493,7 @@ export class DaemonClient { } } - private sendMessage(message: Record): Promise { + private sendMessage(message: Record, timeoutMs = SEND_TIMEOUT_MS): Promise { return new Promise((resolve, reject) => { if (!this.isConnected || !this.socket) { reject(new Error('Not connected to daemon')); @@ -370,7 +508,22 @@ export class DaemonClient { const lengthBuf = Buffer.alloc(4); lengthBuf.writeUInt32LE(messageBytes.length, 0); - this.messageQueue.set(messageId, { resolve, reject }); + // Timeout to prevent hanging forever on a stuck daemon + const timeout = setTimeout(() => { + this.messageQueue.delete(messageId); + reject(new Error(`Daemon message timed out after ${timeoutMs}ms (action: ${message.action})`)); + }, timeoutMs); + + this.messageQueue.set(messageId, { + resolve: (value) => { + clearTimeout(timeout); + resolve(value); + }, + reject: (err) => { + clearTimeout(timeout); + reject(err); + }, + }); this.socket.write(Buffer.concat([lengthBuf, messageBytes])); }); } @@ -396,35 +549,25 @@ export class DaemonClient { try { const pid = parseInt(fs.readFileSync(pidPath, 'utf-8').trim(), 10); process.kill(pid, 0); // Throws if process doesn't exist - // Process is alive — wait briefly and let ensureConnected retry - await new Promise((r) => { - setTimeout(r, 500); - }); - return; + + // Process is alive — verify it's actually responsive on the socket + try { + await this.connectToSocket(socketPath); + return; // daemon is alive and accepting connections + } catch { + // Alive but socket unresponsive — kill it and respawn + debug(`daemon pid ${pid} alive but socket unresponsive — killing`); + killDaemonProcess(pid); + } } catch { // Stale PID file — clean up both PID and socket } } // Clean up stale files before spawning - // On Windows, named pipes don't leave files — only clean PID and Unix sockets - if (!isWindows) { - for (const file of [socketPath, pidPath, getDaemonInfoPath()]) { - if (fs.existsSync(file)) { - fs.unlinkSync(file); - } - } - // Verify socket file is actually gone - if (fs.existsSync(socketPath)) { - throw new Error(`Failed to clean up stale socket file: ${socketPath}`); - } - } else { - // Clean PID + info files on Windows (named pipes don't leave socket files) - for (const file of [pidPath, getDaemonInfoPath()]) { - if (fs.existsSync(file)) { - fs.unlinkSync(file); - } - } + cleanupDaemonFiles(); + if (!isWindows && fs.existsSync(socketPath)) { + throw new Error(`Failed to clean up stale socket file: ${socketPath}`); } return new Promise((resolve, reject) => { diff --git a/packages/varlock/src/lib/local-encrypt/index.ts b/packages/varlock/src/lib/local-encrypt/index.ts index 091a89f2..5265af4c 100644 --- a/packages/varlock/src/lib/local-encrypt/index.ts +++ b/packages/varlock/src/lib/local-encrypt/index.ts @@ -30,23 +30,57 @@ function debug(msg: string) { } /** - * Get a TTY identifier for session scoping. - * Reads the controlling terminal from /proc/self/fd/0 or falls back to PID. + * Get a session identifier for biometric session scoping (WSL only). + * Prefers the controlling terminal; falls back to a stable ancestor PID + * found by walking the process tree (mirrors the macOS Swift daemon logic). */ -let _cachedTtyId: string | undefined; -function getSelfTtyId(): string { - if (_cachedTtyId) return _cachedTtyId; +let _cachedSessionId: string | undefined; +function getSelfSessionId(): string { + if (_cachedSessionId) return _cachedSessionId; try { const ttyPath = fs.readlinkSync('/proc/self/fd/0'); if (ttyPath && ttyPath.startsWith('/dev/')) { - _cachedTtyId = ttyPath; + _cachedSessionId = ttyPath; return ttyPath; } } catch { // Not available } - _cachedTtyId = `pid:${process.pid}`; - return _cachedTtyId; + // No TTY — walk the process tree to find a stable ancestor. + // Uses the same grandchild-of-app-root logic as the macOS daemon: + // build ancestry chain up to PID 1, then use the process 2 levels + // below the top (the grandchild of the app root). + try { + const chain: Array = [process.pid]; + let current = process.pid; + for (let i = 0; i < 64; i++) { + const stat = fs.readFileSync(`/proc/${current}/stat`, 'utf-8'); + const fields = stat.split(') '); + if (fields.length < 2) break; + const ppid = parseInt(fields[1].split(' ')[1], 10); + if (!ppid || ppid <= 1) break; + chain.push(ppid); + current = ppid; + } + if (chain.length >= 4) { + const scopePid = chain[chain.length - 3]; + // Include start time for PID-reuse resistance (field 21 after comm in /proc/stat) + let startTime = 0; + try { + const scopeStat = fs.readFileSync(`/proc/${scopePid}/stat`, 'utf-8'); + const scopeFields = scopeStat.split(') '); + if (scopeFields.length >= 2) { + startTime = parseInt(scopeFields[1].split(' ')[19], 10) || 0; + } + } catch { /* ignore */ } + _cachedSessionId = `ptree:${scopePid}:${startTime}`; + return _cachedSessionId; + } + } catch { + // Not available + } + _cachedSessionId = `pid:${process.pid}`; + return _cachedSessionId; } let _wslDaemonPrestartAttempted = false; @@ -346,10 +380,10 @@ export async function decryptValue(ciphertext: string, keyId: string = DEFAULT_K } // Use spawnSync with stdin to avoid exposing ciphertext or session // identity in process listings (visible via tasklist/procfs). - // Stdin JSON includes both the data and the TTY ID for session scoping. + // Stdin JSON includes both the data and the session ID for session scoping. const stdinPayload = JSON.stringify({ data: ciphertext, - ttyId: getSelfTtyId(), + ttyId: getSelfSessionId(), }); const runViaDaemon = (timeout: number) => spawnSync(binaryPath, ['decrypt', '--key-id', keyId, '--data-stdin', '--via-daemon'], { input: stdinPayload, diff --git a/packages/vscode-plugin/vitest.config.ts b/packages/vscode-plugin/vitest.config.mts similarity index 100% rename from packages/vscode-plugin/vitest.config.ts rename to packages/vscode-plugin/vitest.config.mts