From 5a4dc98e9a23bab7a2bb7f530832abc1ba8837bf Mon Sep 17 00:00:00 2001 From: Yunfan Yang Date: Wed, 3 Jun 2026 18:02:27 -0400 Subject: [PATCH 1/6] Register non-recursive watchers for Linux --- crates/repo_metadata/src/local_model.rs | 129 +++++++++++++-- crates/repo_metadata/src/local_model_tests.rs | 152 +++++++++++++++++- 2 files changed, 271 insertions(+), 10 deletions(-) diff --git a/crates/repo_metadata/src/local_model.rs b/crates/repo_metadata/src/local_model.rs index 7e81451264..2993abb61b 100644 --- a/crates/repo_metadata/src/local_model.rs +++ b/crates/repo_metadata/src/local_model.rs @@ -49,6 +49,7 @@ use crate::telemetry::RepoMetadataTelemetryEvent; use crate::{gitignores_for_directory, matches_gitignores, RepoMetadataError}; cfg_if::cfg_if! { if #[cfg(feature = "local_fs")] { + use std::collections::HashSet; use notify_debouncer_full::notify::RecursiveMode; use crate::entry::repo_watch_filter; use crate::repositories::{DetectedRepositories, DetectedRepositoriesEvent}; @@ -200,6 +201,12 @@ pub struct LocalRepoMetadataModel { /// alias rather than using a one-to-one bidirectional map. #[cfg(feature = "local_fs")] symlink_targets: HashMap>, + /// For lazy-loaded (non-git) roots that are watched non-recursively (Linux + /// only), the set of directories currently registered with the watcher + /// under each root — the root plus every subdirectory expanded so far. + /// Used to unregister all per-directory watches when the root is torn down. + #[cfg(feature = "local_fs")] + lazy_watched_dirs: HashMap>, } #[derive(Debug, Clone, Default)] @@ -295,6 +302,8 @@ impl LocalRepoMetadataModel { standing_query_definitions: StandingQueryDefinitions::default(), #[cfg(feature = "local_fs")] symlink_targets: HashMap::new(), + #[cfg(feature = "local_fs")] + lazy_watched_dirs: HashMap::new(), }; cfg_if::cfg_if! { if #[cfg(feature = "local_fs")] { @@ -560,6 +569,30 @@ impl LocalRepoMetadataModel { |model, (mutations, discovered_results, removed_roots, repo_path, lazy_load), ctx| { + // For lazy (non-git) roots watched non-recursively, collect the + // directories added by this batch so we can watch them after the + // tree mutations are applied (keeps live-created dirs fresh). + let new_dirs: Vec = if model + .lazy_watched_dirs + .contains_key(&repo_path) + { + mutations + .iter() + .filter_map(|mutation| { + let path = match mutation { + FileTreeMutation::AddDirectorySubtree { + dir_path, .. + } => dir_path, + FileTreeMutation::AddEmptyDirectory { path, .. } => path, + _ => return None, + }; + StandardizedPath::try_from_local(path).ok() + }) + .collect() + } else { + Vec::new() + }; + if let Some(IndexedRepoState::Indexed(state)) = model.repositories.get_mut(&repo_path) { @@ -583,7 +616,7 @@ impl LocalRepoMetadataModel { }); if !standing_delta.is_empty() { ctx.emit(RepositoryMetadataEvent::StandingQueryResultsUpdated { - path: repo_path, + path: repo_path.clone(), delta: standing_delta, }); } @@ -593,6 +626,12 @@ impl LocalRepoMetadataModel { }); } } + + // Watch any newly added directories now loaded under this lazy + // root. No-ops for git repos / recursively-watched roots. + for dir in new_dirs { + model.watch_lazy_subdir(&repo_path, &dir, ctx); + } }, ); } @@ -642,10 +681,17 @@ impl LocalRepoMetadataModel { } /// Adds or updates a repository's file tree state. + /// + /// `recursive` controls how the root is registered with the filesystem + /// watcher: git repos register recursively (and rely on gitignore pruning), + /// while lazy-loaded non-git roots on Linux register the root + /// non-recursively and watch expanded subdirectories individually. + #[cfg_attr(not(feature = "local_fs"), allow(unused_variables))] fn add_repository_internal( &mut self, repo_path: StandardizedPath, state: FileTreeState, + recursive: bool, ctx: &mut ModelContext, ) -> Result<(), RepoMetadataError> { let local_path = repo_path @@ -674,11 +720,16 @@ impl LocalRepoMetadataModel { // skills). let gitignores = crate::gitignores_for_directory(&watch_path); let force_included_paths = self.force_included_paths.clone(); + let recursive_mode = if recursive { + RecursiveMode::Recursive + } else { + RecursiveMode::NonRecursive + }; watcher.update(ctx, |watcher, _ctx| { std::mem::drop(watcher.register_path( &watch_path, repo_watch_filter(gitignores, force_included_paths), - RecursiveMode::Recursive, + recursive_mode, )); }); } @@ -711,11 +762,19 @@ impl LocalRepoMetadataModel { #[cfg(feature = "local_fs")] { if let Some(ref watcher) = self.watcher { - if let Some(local_path) = repo_path.to_local_path() { - watcher.update(ctx, |watcher, _ctx| { - std::mem::drop(watcher.unregister_path(&local_path)); - }); - } + // Lazy (non-git) roots watched non-recursively track every + // per-directory watch they registered (root + expanded + // subdirs); unregister all of them. Otherwise just the root. + let watched_dirs = self.lazy_watched_dirs.remove(repo_path); + let paths_to_unregister: Vec = match watched_dirs { + Some(dirs) => dirs.iter().filter_map(|dir| dir.to_local_path()).collect(), + None => repo_path.to_local_path().into_iter().collect(), + }; + watcher.update(ctx, |watcher, _ctx| { + for path in &paths_to_unregister { + std::mem::drop(watcher.unregister_path(path)); + } + }); } } @@ -823,8 +882,20 @@ impl LocalRepoMetadataModel { let state = FileTreeState::new_lazy_loaded(root_entry); self.standing_results.insert(path.clone(), standing_results); - self.add_repository_internal(path.clone(), state, ctx)?; + // On Linux, watch lazy (non-git) roots non-recursively to avoid + // registering an inotify watch for every directory in the subtree. + // Subdirectories get their own non-recursive watch as they are expanded + // (see `load_directory`). macOS/Windows watch a whole tree with a single + // OS handle, so recursive watching stays cheap there. + let recursive = !cfg!(target_os = "linux"); + self.add_repository_internal(path.clone(), state, recursive, ctx)?; self.lazy_loaded_paths.insert(path.clone(), 1); + if !recursive { + // Track the root so teardown can unregister it along with any + // per-directory watches added as subdirectories are expanded. + self.lazy_watched_dirs + .insert(path.clone(), HashSet::from([path.clone()])); + } Ok(()) } @@ -866,6 +937,11 @@ impl LocalRepoMetadataModel { .load_at_path(dir_path, &mut gitignores) .map_err(RepoMetadataError::BuildTree)?; + // For lazy (non-git) roots watched non-recursively on Linux, start + // watching the directory we just expanded so its direct children stay + // fresh. No-ops for git repos and recursively-watched roots. + self.watch_lazy_subdir(repo_root, dir_path, ctx); + ctx.emit(RepositoryMetadataEvent::FileTreeEntryUpdated { path: repo_root.clone(), update_type: MetadataUpdateType::FullReplace, @@ -873,6 +949,41 @@ impl LocalRepoMetadataModel { Ok(()) } + /// Registers a non-recursive watch on `dir_path` for a lazy (non-git) root + /// that is watched per-directory, recording it so it can be unregistered on + /// teardown. No-ops unless `repo_root` is such a tracked lazy root (only + /// populated on Linux) or the directory is already watched. + #[cfg(feature = "local_fs")] + fn watch_lazy_subdir( + &mut self, + repo_root: &StandardizedPath, + dir_path: &StandardizedPath, + ctx: &mut ModelContext, + ) { + let Some(watched) = self.lazy_watched_dirs.get_mut(repo_root) else { + return; + }; + if !watched.insert(dir_path.clone()) { + // Already watching this directory. + return; + } + let Some(ref watcher) = self.watcher else { + return; + }; + let Some(local_path) = dir_path.to_local_path() else { + return; + }; + let gitignores = crate::gitignores_for_directory(&local_path); + let force_included_paths = self.force_included_paths.clone(); + watcher.update(ctx, |watcher, _ctx| { + std::mem::drop(watcher.register_path( + &local_path, + repo_watch_filter(gitignores, force_included_paths), + RecursiveMode::NonRecursive, + )); + }); + } + /// Checks whether the parent directory of `path` is loaded in the given entry. fn is_parent_loaded_in_entry(entry: &FileTreeEntry, path: &StandardizedPath) -> bool { let Some(parent) = path.parent() else { @@ -1307,7 +1418,7 @@ impl LocalRepoMetadataModel { FileTreeState::new(root_entry, gitignores_for_build, Some(repository_handle)); if let Err(e) = - model.add_repository_internal(std_repo_path.clone(), state, ctx) + model.add_repository_internal(std_repo_path.clone(), state, true, ctx) { log::warn!("Failed to add repository {repo_path_str}: {e:?}"); // On failure, mark the repository as failed so waiters are notified. diff --git a/crates/repo_metadata/src/local_model_tests.rs b/crates/repo_metadata/src/local_model_tests.rs index 595b458235..08d93515a5 100644 --- a/crates/repo_metadata/src/local_model_tests.rs +++ b/crates/repo_metadata/src/local_model_tests.rs @@ -41,6 +41,8 @@ impl LocalRepoMetadataModel { standing_query_definitions: Default::default(), #[cfg(feature = "local_fs")] symlink_targets: Default::default(), + #[cfg(feature = "local_fs")] + lazy_watched_dirs: Default::default(), } } } @@ -111,7 +113,12 @@ fn repository_indexed_waits_for_pending_repo() { model_handle.update(&mut app, |model, ctx| { model - .add_repository_internal(repo_path.clone(), empty_repo_state(&repo_path), ctx) + .add_repository_internal( + repo_path.clone(), + empty_repo_state(&repo_path), + true, + ctx, + ) .expect("repository should index"); }); @@ -2036,6 +2043,7 @@ fn test_repository_operations_with_standardized_paths() { let result1 = model.add_repository_internal( StandardizedPath::from_local_canonicalized(&real_repo).unwrap(), state.clone(), + true, ctx, ); assert!(result1.is_ok()); @@ -2044,6 +2052,7 @@ fn test_repository_operations_with_standardized_paths() { let result2 = model.add_repository_internal( StandardizedPath::from_local_canonicalized(&symlink_repo).unwrap(), state.clone(), + true, ctx, ); assert!(result2.is_ok()); @@ -2052,6 +2061,7 @@ fn test_repository_operations_with_standardized_paths() { let result3 = model.add_repository_internal( StandardizedPath::from_local_canonicalized(&relative_repo).unwrap(), state.clone(), + true, ctx, ); assert!(result3.is_ok()); @@ -2118,3 +2128,143 @@ fn test_standardized_path_edge_cases() { assert!(debug_str.contains("StandardizedPath")); }); } + +/// On Linux, a lazy (non-git) root is watched non-recursively, so only the root +/// itself should be tracked initially. On other platforms the root is watched +/// recursively and nothing is tracked for per-directory teardown. +#[cfg(feature = "local_fs")] +#[test] +fn index_lazy_loaded_path_tracks_only_root() { + VirtualFS::test("lazy_root_tracking", |dirs, mut vfs| { + vfs.mkdir("workspace/sub") + .with_files(vec![Stub::FileWithContent("workspace/file.txt", "x")]); + let root = + StandardizedPath::from_local_canonicalized(&dirs.tests().join("workspace")).unwrap(); + + App::test((), |mut app| async move { + let model_handle = app.add_model(|_| LocalRepoMetadataModel::new_for_test()); + model_handle.update(&mut app, |model, ctx| { + model + .index_lazy_loaded_path(&root, ctx) + .expect("should index lazy path"); + }); + + model_handle.read(&app, |model, _ctx| { + assert!(model.is_lazy_loaded_path(&root)); + if cfg!(target_os = "linux") { + let watched = model + .lazy_watched_dirs + .get(&root) + .expect("lazy root should be tracked on linux"); + assert_eq!(watched.len(), 1); + assert!(watched.contains(&root)); + } else { + assert!(model.lazy_watched_dirs.is_empty()); + } + }); + }); + }); +} + +/// Expanding a subdirectory of a lazy root should add a per-directory watch for +/// it on Linux (so its children stay fresh) while leaving non-Linux untouched. +#[cfg(feature = "local_fs")] +#[test] +fn load_directory_tracks_expanded_subdir_for_lazy_root() { + VirtualFS::test("lazy_load_subdir_tracking", |dirs, mut vfs| { + vfs.mkdir("workspace/sub/inner"); + let root = + StandardizedPath::from_local_canonicalized(&dirs.tests().join("workspace")).unwrap(); + let sub = StandardizedPath::from_local_canonicalized(&dirs.tests().join("workspace/sub")) + .unwrap(); + + App::test((), |mut app| async move { + let model_handle = app.add_model(|_| LocalRepoMetadataModel::new_for_test()); + model_handle.update(&mut app, |model, ctx| { + model + .index_lazy_loaded_path(&root, ctx) + .expect("should index lazy path"); + model + .load_directory(&root, &sub, ctx) + .expect("should load subdir"); + }); + + model_handle.read(&app, |model, _ctx| { + if cfg!(target_os = "linux") { + let watched = model + .lazy_watched_dirs + .get(&root) + .expect("lazy root should be tracked on linux"); + assert!(watched.contains(&root)); + assert!(watched.contains(&sub)); + } else { + assert!(model.lazy_watched_dirs.is_empty()); + } + }); + }); + }); +} + +/// Indexing a git repo registers a recursive watch and must not populate the +/// lazy per-directory tracking map on any platform. +#[cfg(feature = "local_fs")] +#[test] +fn recursive_repo_does_not_track_lazy_watched_dirs() { + VirtualFS::test("recursive_repo_no_lazy_tracking", |dirs, mut vfs| { + vfs.mkdir("repo/src"); + let repo_path = + StandardizedPath::from_local_canonicalized(&dirs.tests().join("repo")).unwrap(); + + App::test((), |mut app| async move { + let model_handle = app.add_model(|_| LocalRepoMetadataModel::new_for_test()); + model_handle.update(&mut app, |model, ctx| { + model + .add_repository_internal( + repo_path.clone(), + empty_repo_state(&repo_path), + /* recursive */ true, + ctx, + ) + .expect("repo should index"); + }); + + model_handle.read(&app, |model, _ctx| { + assert!(model.lazy_watched_dirs.is_empty()); + assert!(!model.is_lazy_loaded_path(&repo_path)); + }); + }); + }); +} + +/// Tearing down a lazy root clears all of its tracked per-directory watches and +/// removes the repository state. +#[cfg(feature = "local_fs")] +#[test] +fn remove_lazy_loaded_path_clears_tracked_watches() { + VirtualFS::test("lazy_remove_clears_tracking", |dirs, mut vfs| { + vfs.mkdir("workspace/sub"); + let root = + StandardizedPath::from_local_canonicalized(&dirs.tests().join("workspace")).unwrap(); + let sub = StandardizedPath::from_local_canonicalized(&dirs.tests().join("workspace/sub")) + .unwrap(); + + App::test((), |mut app| async move { + let model_handle = app.add_model(|_| LocalRepoMetadataModel::new_for_test()); + model_handle.update(&mut app, |model, ctx| { + model + .index_lazy_loaded_path(&root, ctx) + .expect("should index lazy path"); + model + .load_directory(&root, &sub, ctx) + .expect("should load subdir"); + model.remove_lazy_loaded_path(&root, ctx); + }); + + model_handle.read(&app, |model, _ctx| { + assert!(!model.lazy_watched_dirs.contains_key(&root)); + assert!(!model.is_lazy_loaded_path(&root)); + assert!(model.repository_state(&root).is_none()); + }); + }); + }); +} From 51f8d203631e74d45859d49ea6543866d79c1fbf Mon Sep 17 00:00:00 2001 From: Yunfan Yang Date: Wed, 3 Jun 2026 18:24:46 -0400 Subject: [PATCH 2/6] self-review --- crates/repo_metadata/src/local_model.rs | 133 ++++++++++++------ crates/repo_metadata/src/local_model_tests.rs | 68 +++++---- 2 files changed, 128 insertions(+), 73 deletions(-) diff --git a/crates/repo_metadata/src/local_model.rs b/crates/repo_metadata/src/local_model.rs index 2993abb61b..4b46208187 100644 --- a/crates/repo_metadata/src/local_model.rs +++ b/crates/repo_metadata/src/local_model.rs @@ -4,9 +4,7 @@ //! This module provides a singleton model that manages repository metadata across //! all repositories tracked by Warp. -use std::collections::HashMap; -#[cfg(feature = "local_fs")] -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -49,7 +47,6 @@ use crate::telemetry::RepoMetadataTelemetryEvent; use crate::{gitignores_for_directory, matches_gitignores, RepoMetadataError}; cfg_if::cfg_if! { if #[cfg(feature = "local_fs")] { - use std::collections::HashSet; use notify_debouncer_full::notify::RecursiveMode; use crate::entry::repo_watch_filter; use crate::repositories::{DetectedRepositories, DetectedRepositoriesEvent}; @@ -167,6 +164,26 @@ impl IndexedRepoState { } } +/// How a repository's root is registered with the filesystem watcher. +#[derive(Debug)] +enum RepoWatchMode { + /// A single recursive watch on the root covers the whole subtree. Used for + /// git repos (which rely on gitignore pruning) and, on macOS/Windows, for + /// lazy non-git roots where a recursive OS watch is cheap. + RecursiveOnRoot, + /// Each loaded directory is watched non-recursively, so the number of + /// watches scales with what the user expands rather than the whole subtree. + /// Used for lazy (non-git) roots on Linux, where per-directory inotify + /// watches are otherwise prohibitively expensive. + /// + /// Carries the set of directories currently watched under the root (the + /// root plus every expanded subdirectory) so they can all be unregistered + /// on teardown. + LazyNonRecursive { + watched_dirs: HashSet, + }, +} + /// Singleton model for managing local repository metadata. /// /// This model tracks repositories on the local filesystem, using file watchers @@ -201,12 +218,12 @@ pub struct LocalRepoMetadataModel { /// alias rather than using a one-to-one bidirectional map. #[cfg(feature = "local_fs")] symlink_targets: HashMap>, - /// For lazy-loaded (non-git) roots that are watched non-recursively (Linux - /// only), the set of directories currently registered with the watcher - /// under each root — the root plus every subdirectory expanded so far. - /// Used to unregister all per-directory watches when the root is torn down. + /// How each tracked repository's root is registered with the filesystem + /// watcher. [`RepoWatchMode::LazyNonRecursive`] additionally records the set + /// of directories watched under a lazy (non-git) root so they can all be + /// unregistered on teardown. #[cfg(feature = "local_fs")] - lazy_watched_dirs: HashMap>, + repo_watch_modes: HashMap, } #[derive(Debug, Clone, Default)] @@ -303,7 +320,7 @@ impl LocalRepoMetadataModel { #[cfg(feature = "local_fs")] symlink_targets: HashMap::new(), #[cfg(feature = "local_fs")] - lazy_watched_dirs: HashMap::new(), + repo_watch_modes: HashMap::new(), }; cfg_if::cfg_if! { if #[cfg(feature = "local_fs")] { @@ -572,10 +589,10 @@ impl LocalRepoMetadataModel { // For lazy (non-git) roots watched non-recursively, collect the // directories added by this batch so we can watch them after the // tree mutations are applied (keeps live-created dirs fresh). - let new_dirs: Vec = if model - .lazy_watched_dirs - .contains_key(&repo_path) - { + let new_dirs: Vec = if matches!( + model.repo_watch_modes.get(&repo_path), + Some(RepoWatchMode::LazyNonRecursive { .. }) + ) { mutations .iter() .filter_map(|mutation| { @@ -682,16 +699,17 @@ impl LocalRepoMetadataModel { /// Adds or updates a repository's file tree state. /// - /// `recursive` controls how the root is registered with the filesystem - /// watcher: git repos register recursively (and rely on gitignore pruning), - /// while lazy-loaded non-git roots on Linux register the root - /// non-recursively and watch expanded subdirectories individually. + /// `mode` controls how the root is registered with the filesystem watcher: + /// [`RepoWatchMode::RecursiveOnRoot`] registers a single recursive watch + /// (git repos, and lazy roots off Linux), while + /// [`RepoWatchMode::LazyNonRecursive`] registers the root non-recursively + /// so expanded subdirectories can be watched individually. #[cfg_attr(not(feature = "local_fs"), allow(unused_variables))] fn add_repository_internal( &mut self, repo_path: StandardizedPath, state: FileTreeState, - recursive: bool, + mode: RepoWatchMode, ctx: &mut ModelContext, ) -> Result<(), RepoMetadataError> { let local_path = repo_path @@ -709,9 +727,18 @@ impl LocalRepoMetadataModel { )); } - // Register this path with the watcher if we have one + // Record how this root is watched and register it with the watcher (if + // any). #[cfg(feature = "local_fs")] { + let recursive_mode = match &mode { + RepoWatchMode::RecursiveOnRoot => RecursiveMode::Recursive, + RepoWatchMode::LazyNonRecursive { .. } => RecursiveMode::NonRecursive, + }; + // Capture any prior registration so we can drop stale per-directory + // watches when replacing a lazy registration (e.g. a lazy root being + // upgraded to a git repo). + let previous_mode = self.repo_watch_modes.insert(repo_path.clone(), mode); if let Some(ref watcher) = self.watcher { let watch_path = local_path.clone(); // Build the gitignore set (root + global) and force-included @@ -720,12 +747,14 @@ impl LocalRepoMetadataModel { // skills). let gitignores = crate::gitignores_for_directory(&watch_path); let force_included_paths = self.force_included_paths.clone(); - let recursive_mode = if recursive { - RecursiveMode::Recursive - } else { - RecursiveMode::NonRecursive - }; watcher.update(ctx, |watcher, _ctx| { + if let Some(RepoWatchMode::LazyNonRecursive { watched_dirs }) = &previous_mode { + for dir in watched_dirs { + if let Some(dir_path) = dir.to_local_path() { + std::mem::drop(watcher.unregister_path(&dir_path)); + } + } + } std::mem::drop(watcher.register_path( &watch_path, repo_watch_filter(gitignores, force_included_paths), @@ -758,17 +787,22 @@ impl LocalRepoMetadataModel { self.standing_results.remove(repo_path); #[cfg(feature = "local_fs")] self.clear_symlink_targets_for_repo(repo_path, ctx); - // Unregister from watcher + // Drop the recorded watch mode and unregister from the watcher. #[cfg(feature = "local_fs")] { + let watch_mode = self.repo_watch_modes.remove(repo_path); if let Some(ref watcher) = self.watcher { - // Lazy (non-git) roots watched non-recursively track every - // per-directory watch they registered (root + expanded - // subdirs); unregister all of them. Otherwise just the root. - let watched_dirs = self.lazy_watched_dirs.remove(repo_path); - let paths_to_unregister: Vec = match watched_dirs { - Some(dirs) => dirs.iter().filter_map(|dir| dir.to_local_path()).collect(), - None => repo_path.to_local_path().into_iter().collect(), + let paths_to_unregister: Vec = match watch_mode { + // Per-directory watches: unregister every tracked dir + // (root + expanded subdirs). + Some(RepoWatchMode::LazyNonRecursive { watched_dirs }) => watched_dirs + .iter() + .filter_map(|dir| dir.to_local_path()) + .collect(), + // A single recursive watch lives on the root. + Some(RepoWatchMode::RecursiveOnRoot) | None => { + repo_path.to_local_path().into_iter().collect() + } }; watcher.update(ctx, |watcher, _ctx| { for path in &paths_to_unregister { @@ -887,15 +921,17 @@ impl LocalRepoMetadataModel { // Subdirectories get their own non-recursive watch as they are expanded // (see `load_directory`). macOS/Windows watch a whole tree with a single // OS handle, so recursive watching stays cheap there. - let recursive = !cfg!(target_os = "linux"); - self.add_repository_internal(path.clone(), state, recursive, ctx)?; + let mode = if cfg!(target_os = "linux") { + // Start by watching just the root; expanded subdirectories are added + // to this set as they load (see `load_directory`). + RepoWatchMode::LazyNonRecursive { + watched_dirs: HashSet::from([path.clone()]), + } + } else { + RepoWatchMode::RecursiveOnRoot + }; + self.add_repository_internal(path.clone(), state, mode, ctx)?; self.lazy_loaded_paths.insert(path.clone(), 1); - if !recursive { - // Track the root so teardown can unregister it along with any - // per-directory watches added as subdirectories are expanded. - self.lazy_watched_dirs - .insert(path.clone(), HashSet::from([path.clone()])); - } Ok(()) } @@ -960,10 +996,12 @@ impl LocalRepoMetadataModel { dir_path: &StandardizedPath, ctx: &mut ModelContext, ) { - let Some(watched) = self.lazy_watched_dirs.get_mut(repo_root) else { + let Some(RepoWatchMode::LazyNonRecursive { watched_dirs }) = + self.repo_watch_modes.get_mut(repo_root) + else { return; }; - if !watched.insert(dir_path.clone()) { + if !watched_dirs.insert(dir_path.clone()) { // Already watching this directory. return; } @@ -1417,9 +1455,12 @@ impl LocalRepoMetadataModel { let state = FileTreeState::new(root_entry, gitignores_for_build, Some(repository_handle)); - if let Err(e) = - model.add_repository_internal(std_repo_path.clone(), state, true, ctx) - { + if let Err(e) = model.add_repository_internal( + std_repo_path.clone(), + state, + RepoWatchMode::RecursiveOnRoot, + ctx, + ) { log::warn!("Failed to add repository {repo_path_str}: {e:?}"); // On failure, mark the repository as failed so waiters are notified. model.mark_repository_failed(std_repo_path, e, ctx); diff --git a/crates/repo_metadata/src/local_model_tests.rs b/crates/repo_metadata/src/local_model_tests.rs index 08d93515a5..5f0b0ae3f9 100644 --- a/crates/repo_metadata/src/local_model_tests.rs +++ b/crates/repo_metadata/src/local_model_tests.rs @@ -20,7 +20,8 @@ use watcher::BulkFilesystemWatcherEvent; use crate::entry::{DirectoryEntry, Entry, FileMetadata}; use crate::file_tree_store::{FileTreeEntry, FileTreeEntryState, FileTreeState}; use crate::local_model::{ - GetContentsArgs, IndexedRepoState, LocalRepoMetadataModel, RepoUpdate, RepositoryMetadataEvent, + GetContentsArgs, IndexedRepoState, LocalRepoMetadataModel, RepoUpdate, RepoWatchMode, + RepositoryMetadataEvent, }; use crate::repositories::DetectedRepositories; use crate::watcher::DirectoryWatcher; @@ -42,7 +43,7 @@ impl LocalRepoMetadataModel { #[cfg(feature = "local_fs")] symlink_targets: Default::default(), #[cfg(feature = "local_fs")] - lazy_watched_dirs: Default::default(), + repo_watch_modes: Default::default(), } } } @@ -116,7 +117,7 @@ fn repository_indexed_waits_for_pending_repo() { .add_repository_internal( repo_path.clone(), empty_repo_state(&repo_path), - true, + RepoWatchMode::RecursiveOnRoot, ctx, ) .expect("repository should index"); @@ -2043,7 +2044,7 @@ fn test_repository_operations_with_standardized_paths() { let result1 = model.add_repository_internal( StandardizedPath::from_local_canonicalized(&real_repo).unwrap(), state.clone(), - true, + RepoWatchMode::RecursiveOnRoot, ctx, ); assert!(result1.is_ok()); @@ -2052,7 +2053,7 @@ fn test_repository_operations_with_standardized_paths() { let result2 = model.add_repository_internal( StandardizedPath::from_local_canonicalized(&symlink_repo).unwrap(), state.clone(), - true, + RepoWatchMode::RecursiveOnRoot, ctx, ); assert!(result2.is_ok()); @@ -2061,7 +2062,7 @@ fn test_repository_operations_with_standardized_paths() { let result3 = model.add_repository_internal( StandardizedPath::from_local_canonicalized(&relative_repo).unwrap(), state.clone(), - true, + RepoWatchMode::RecursiveOnRoot, ctx, ); assert!(result3.is_ok()); @@ -2151,15 +2152,20 @@ fn index_lazy_loaded_path_tracks_only_root() { model_handle.read(&app, |model, _ctx| { assert!(model.is_lazy_loaded_path(&root)); + let mode = model + .repo_watch_modes + .get(&root) + .expect("watch mode should be recorded"); if cfg!(target_os = "linux") { - let watched = model - .lazy_watched_dirs - .get(&root) - .expect("lazy root should be tracked on linux"); - assert_eq!(watched.len(), 1); - assert!(watched.contains(&root)); + // Linux: only the root is watched initially. + let RepoWatchMode::LazyNonRecursive { watched_dirs } = mode else { + panic!("expected LazyNonRecursive on linux, got {mode:?}"); + }; + assert_eq!(watched_dirs.len(), 1); + assert!(watched_dirs.contains(&root)); } else { - assert!(model.lazy_watched_dirs.is_empty()); + // Other platforms: a single recursive watch on the root. + assert!(matches!(mode, RepoWatchMode::RecursiveOnRoot)); } }); }); @@ -2190,27 +2196,32 @@ fn load_directory_tracks_expanded_subdir_for_lazy_root() { }); model_handle.read(&app, |model, _ctx| { + let mode = model + .repo_watch_modes + .get(&root) + .expect("watch mode should be recorded"); if cfg!(target_os = "linux") { - let watched = model - .lazy_watched_dirs - .get(&root) - .expect("lazy root should be tracked on linux"); - assert!(watched.contains(&root)); - assert!(watched.contains(&sub)); + // Linux: the expanded subdir is now watched alongside the root. + let RepoWatchMode::LazyNonRecursive { watched_dirs } = mode else { + panic!("expected LazyNonRecursive on linux, got {mode:?}"); + }; + assert!(watched_dirs.contains(&root)); + assert!(watched_dirs.contains(&sub)); } else { - assert!(model.lazy_watched_dirs.is_empty()); + // Other platforms: a single recursive watch on the root. + assert!(matches!(mode, RepoWatchMode::RecursiveOnRoot)); } }); }); }); } -/// Indexing a git repo registers a recursive watch and must not populate the -/// lazy per-directory tracking map on any platform. +/// Indexing a git repo records a recursive watch mode (not a lazy one) and is +/// not tracked as a lazy-loaded path, on any platform. #[cfg(feature = "local_fs")] #[test] -fn recursive_repo_does_not_track_lazy_watched_dirs() { - VirtualFS::test("recursive_repo_no_lazy_tracking", |dirs, mut vfs| { +fn recursive_repo_uses_recursive_watch_mode() { + VirtualFS::test("recursive_repo_watch_mode", |dirs, mut vfs| { vfs.mkdir("repo/src"); let repo_path = StandardizedPath::from_local_canonicalized(&dirs.tests().join("repo")).unwrap(); @@ -2222,14 +2233,17 @@ fn recursive_repo_does_not_track_lazy_watched_dirs() { .add_repository_internal( repo_path.clone(), empty_repo_state(&repo_path), - /* recursive */ true, + RepoWatchMode::RecursiveOnRoot, ctx, ) .expect("repo should index"); }); model_handle.read(&app, |model, _ctx| { - assert!(model.lazy_watched_dirs.is_empty()); + assert!(matches!( + model.repo_watch_modes.get(&repo_path), + Some(RepoWatchMode::RecursiveOnRoot) + )); assert!(!model.is_lazy_loaded_path(&repo_path)); }); }); @@ -2261,7 +2275,7 @@ fn remove_lazy_loaded_path_clears_tracked_watches() { }); model_handle.read(&app, |model, _ctx| { - assert!(!model.lazy_watched_dirs.contains_key(&root)); + assert!(!model.repo_watch_modes.contains_key(&root)); assert!(!model.is_lazy_loaded_path(&root)); assert!(model.repository_state(&root).is_none()); }); From 8e2df5a774f1f0fecc6f95d350e77538324a571e Mon Sep 17 00:00:00 2001 From: Yunfan Yang Date: Wed, 3 Jun 2026 23:17:03 -0400 Subject: [PATCH 3/6] address comments --- crates/repo_metadata/src/entry.rs | 9 + crates/repo_metadata/src/entry_tests.rs | 40 +++ crates/repo_metadata/src/local_model.rs | 268 ++++++++++++------ crates/repo_metadata/src/local_model_tests.rs | 177 +++++++++--- 4 files changed, 367 insertions(+), 127 deletions(-) diff --git a/crates/repo_metadata/src/entry.rs b/crates/repo_metadata/src/entry.rs index c5fedbbaa8..ff5b7b3784 100644 --- a/crates/repo_metadata/src/entry.rs +++ b/crates/repo_metadata/src/entry.rs @@ -1027,6 +1027,15 @@ pub fn repo_watch_filter( ) -> WatchFilter { let should_watch = move |path: &Path| should_watch_repo_directory(path, &gitignores, &force_included_paths); + // INVARIANT: gitignore lives only in the descend predicate + // (`should_watch_repo_directory`), never in the emit predicate. The descend + // predicate is consulted only by the inotify backend (Linux) at recursive + // registration time, so pruning gitignored dirs is purely a Linux + // watch-cost control. The emit predicate must NOT filter gitignored paths: + // on FSEvents/ReadDirectoryChangesW (macOS/Windows) the descend predicate is + // ignored and the recursive root watch still delivers gitignored events, and + // on Linux on-demand non-recursive watches likewise rely on those events + // flowing through. It only suppresses non-allowlisted `.git/` internals. WatchFilter::with_filter( Arc::new(should_watch), Arc::new(|path: &Path| !should_ignore_git_path(path)), diff --git a/crates/repo_metadata/src/entry_tests.rs b/crates/repo_metadata/src/entry_tests.rs index b9849daef3..a9659470e0 100644 --- a/crates/repo_metadata/src/entry_tests.rs +++ b/crates/repo_metadata/src/entry_tests.rs @@ -839,6 +839,46 @@ fn should_watch_directory_in_git_path_prunes_non_allowlisted_subtrees() { "/repo/.git/config" ))); } + +/// Documents the watch-filter invariant: gitignore is applied only by the +/// descend predicate (`should_watch_repo_directory`), so gitignored paths are +/// pruned from recursive registration but their events are still emitted (the +/// emit predicate only suppresses non-allowlisted `.git/` internals). +#[test] +fn gitignore_affects_descend_predicate_but_not_emitted_events() { + use std::path::Path; + + use super::{gitignores_for_directory, should_ignore_git_path, should_watch_repo_directory}; + + let temp_dir = tempfile::tempdir().unwrap(); + let root_path = dunce::canonicalize(temp_dir.path()).unwrap(); + fs::write(root_path.join(".gitignore"), "node_modules/\n").unwrap(); + fs::create_dir(root_path.join("node_modules")).unwrap(); + fs::create_dir(root_path.join("src")).unwrap(); + + let gitignores = gitignores_for_directory(&root_path); + let node_modules = root_path.join("node_modules"); + let src = root_path.join("src"); + + // Descend predicate: the gitignored dir is pruned from recursive + // registration, while a tracked dir is still descended into. + assert!(!should_watch_repo_directory( + &node_modules, + &gitignores, + &[] + )); + assert!(should_watch_repo_directory(&src, &gitignores, &[])); + + // Emit predicate building block (`!should_ignore_git_path`): gitignored, + // non-`.git` paths are NOT suppressed, so their events still flow. Only + // non-allowlisted `.git/` internals are suppressed. + assert!(!should_ignore_git_path(&node_modules)); + assert!(!should_ignore_git_path(&node_modules.join("pkg/index.js"))); + assert!(should_ignore_git_path(Path::new( + "/repo/.git/objects/ab/blob" + ))); +} + #[test] fn test_is_shared_git_ref() { use std::path::Path; diff --git a/crates/repo_metadata/src/local_model.rs b/crates/repo_metadata/src/local_model.rs index 4b46208187..b99e416dd5 100644 --- a/crates/repo_metadata/src/local_model.rs +++ b/crates/repo_metadata/src/local_model.rs @@ -164,24 +164,45 @@ impl IndexedRepoState { } } -/// How a repository's root is registered with the filesystem watcher. -#[derive(Debug)] -enum RepoWatchMode { +/// How a repository's ROOT directory is registered with the filesystem watcher. +/// +/// This is orthogonal to the set of on-demand per-directory watches a repo may +/// also hold (see [`RepoWatch::extra_dirs`]): a recursive root can now carry +/// extra non-recursive watches too. +// +// Without `local_fs` there is no watcher, so `NonRecursive` is never +// constructed; the variant still exists to keep the watch-mode model intact. +#[cfg_attr(not(feature = "local_fs"), allow(dead_code))] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum RootWatchMode { /// A single recursive watch on the root covers the whole subtree. Used for - /// git repos (which rely on gitignore pruning) and, on macOS/Windows, for - /// lazy non-git roots where a recursive OS watch is cheap. - RecursiveOnRoot, - /// Each loaded directory is watched non-recursively, so the number of - /// watches scales with what the user expands rather than the whole subtree. - /// Used for lazy (non-git) roots on Linux, where per-directory inotify - /// watches are otherwise prohibitively expensive. - /// - /// Carries the set of directories currently watched under the root (the - /// root plus every expanded subdirectory) so they can all be unregistered - /// on teardown. - LazyNonRecursive { - watched_dirs: HashSet, - }, + /// git repos (which rely on gitignore pruning in the watch descend filter) + /// and, on macOS/Windows, for lazy non-git roots where a recursive OS watch + /// is cheap. + Recursive, + /// The root is watched non-recursively; each loaded directory gets its own + /// non-recursive watch, so the number of watches scales with what the user + /// expands rather than the whole subtree. Used for lazy (non-git) roots on + /// Linux, where per-directory inotify watches are otherwise prohibitively + /// expensive. + NonRecursive, +} + +/// Tracks how a repository is registered with the filesystem watcher. +/// +/// `root_mode` describes how the ROOT directory is registered. `extra_dirs` +/// holds the on-demand NON-recursive watches we register on individual +/// subdirectories that the root watch does not cover: every loaded subdir under +/// a non-recursive root, or expanded gitignored dirs under a recursive root on +/// Linux. The root itself is never stored here — it is unregistered directly by +/// its repo path on teardown. In practice `extra_dirs` is only populated on +/// Linux, since other backends deliver gitignored events through the recursive +/// root watch. +#[cfg(feature = "local_fs")] +#[derive(Debug)] +struct RepoWatch { + root_mode: RootWatchMode, + extra_dirs: HashSet, } /// Singleton model for managing local repository metadata. @@ -218,12 +239,11 @@ pub struct LocalRepoMetadataModel { /// alias rather than using a one-to-one bidirectional map. #[cfg(feature = "local_fs")] symlink_targets: HashMap>, - /// How each tracked repository's root is registered with the filesystem - /// watcher. [`RepoWatchMode::LazyNonRecursive`] additionally records the set - /// of directories watched under a lazy (non-git) root so they can all be - /// unregistered on teardown. + /// How each tracked repository is registered with the filesystem watcher, + /// including any on-demand per-directory watches recorded for teardown. See + /// [`RepoWatch`]. #[cfg(feature = "local_fs")] - repo_watch_modes: HashMap, + repo_watches: HashMap, } #[derive(Debug, Clone, Default)] @@ -320,7 +340,7 @@ impl LocalRepoMetadataModel { #[cfg(feature = "local_fs")] symlink_targets: HashMap::new(), #[cfg(feature = "local_fs")] - repo_watch_modes: HashMap::new(), + repo_watches: HashMap::new(), }; cfg_if::cfg_if! { if #[cfg(feature = "local_fs")] { @@ -586,12 +606,20 @@ impl LocalRepoMetadataModel { |model, (mutations, discovered_results, removed_roots, repo_path, lazy_load), ctx| { - // For lazy (non-git) roots watched non-recursively, collect the - // directories added by this batch so we can watch them after the - // tree mutations are applied (keeps live-created dirs fresh). + // Only a non-recursive root (lazy non-git roots on + // Linux) needs to watch newly added directories: nothing + // under such a root is covered by a recursive watch, so + // each new dir needs its own watch. Collect them before + // the mutations are consumed. + // + // Recursive roots don't need this: non-ignored new dirs + // are auto-followed by the recursive backend, and + // gitignored new dirs are added as lazy placeholders that + // get watched only when the user expands them (see + // `load_directory`). let new_dirs: Vec = if matches!( - model.repo_watch_modes.get(&repo_path), - Some(RepoWatchMode::LazyNonRecursive { .. }) + model.repo_watches.get(&repo_path).map(|w| w.root_mode), + Some(RootWatchMode::NonRecursive) ) { mutations .iter() @@ -644,10 +672,11 @@ impl LocalRepoMetadataModel { } } - // Watch any newly added directories now loaded under this lazy - // root. No-ops for git repos / recursively-watched roots. + // Watch any newly added directories under a non-recursive + // root. `new_dirs` is empty for recursive roots, so this + // is a no-op there. for dir in new_dirs { - model.watch_lazy_subdir(&repo_path, &dir, ctx); + model.watch_subdir(&repo_path, &dir, ctx); } }, ); @@ -699,17 +728,17 @@ impl LocalRepoMetadataModel { /// Adds or updates a repository's file tree state. /// - /// `mode` controls how the root is registered with the filesystem watcher: - /// [`RepoWatchMode::RecursiveOnRoot`] registers a single recursive watch + /// `root_mode` controls how the root is registered with the filesystem + /// watcher: [`RootWatchMode::Recursive`] registers a single recursive watch /// (git repos, and lazy roots off Linux), while - /// [`RepoWatchMode::LazyNonRecursive`] registers the root non-recursively - /// so expanded subdirectories can be watched individually. + /// [`RootWatchMode::NonRecursive`] registers the root non-recursively so + /// expanded subdirectories can be watched individually. #[cfg_attr(not(feature = "local_fs"), allow(unused_variables))] fn add_repository_internal( &mut self, repo_path: StandardizedPath, state: FileTreeState, - mode: RepoWatchMode, + root_mode: RootWatchMode, ctx: &mut ModelContext, ) -> Result<(), RepoMetadataError> { let local_path = repo_path @@ -731,14 +760,21 @@ impl LocalRepoMetadataModel { // any). #[cfg(feature = "local_fs")] { - let recursive_mode = match &mode { - RepoWatchMode::RecursiveOnRoot => RecursiveMode::Recursive, - RepoWatchMode::LazyNonRecursive { .. } => RecursiveMode::NonRecursive, + let recursive_mode = match root_mode { + RootWatchMode::Recursive => RecursiveMode::Recursive, + RootWatchMode::NonRecursive => RecursiveMode::NonRecursive, }; - // Capture any prior registration so we can drop stale per-directory - // watches when replacing a lazy registration (e.g. a lazy root being - // upgraded to a git repo). - let previous_mode = self.repo_watch_modes.insert(repo_path.clone(), mode); + // Replace any prior registration, dropping its root watch and stale + // per-directory watches before re-registering (e.g. a lazy + // non-recursive root upgraded to a recursive git repo, whose old + // per-dir watches would otherwise duplicate the recursive coverage). + let previous = self.repo_watches.insert( + repo_path.clone(), + RepoWatch { + root_mode, + extra_dirs: HashSet::new(), + }, + ); if let Some(ref watcher) = self.watcher { let watch_path = local_path.clone(); // Build the gitignore set (root + global) and force-included @@ -747,13 +783,21 @@ impl LocalRepoMetadataModel { // skills). let gitignores = crate::gitignores_for_directory(&watch_path); let force_included_paths = self.force_included_paths.clone(); + let had_previous = previous.is_some(); + let previous_extra: Vec = previous + .map(|prev| { + prev.extra_dirs + .iter() + .filter_map(|dir| dir.to_local_path()) + .collect() + }) + .unwrap_or_default(); watcher.update(ctx, |watcher, _ctx| { - if let Some(RepoWatchMode::LazyNonRecursive { watched_dirs }) = &previous_mode { - for dir in watched_dirs { - if let Some(dir_path) = dir.to_local_path() { - std::mem::drop(watcher.unregister_path(&dir_path)); - } - } + if had_previous { + std::mem::drop(watcher.unregister_path(&watch_path)); + } + for dir in &previous_extra { + std::mem::drop(watcher.unregister_path(dir)); } std::mem::drop(watcher.register_path( &watch_path, @@ -787,23 +831,23 @@ impl LocalRepoMetadataModel { self.standing_results.remove(repo_path); #[cfg(feature = "local_fs")] self.clear_symlink_targets_for_repo(repo_path, ctx); - // Drop the recorded watch mode and unregister from the watcher. + // Drop the recorded watch entry and unregister from the watcher. #[cfg(feature = "local_fs")] { - let watch_mode = self.repo_watch_modes.remove(repo_path); + let removed = self.repo_watches.remove(repo_path); if let Some(ref watcher) = self.watcher { - let paths_to_unregister: Vec = match watch_mode { - // Per-directory watches: unregister every tracked dir - // (root + expanded subdirs). - Some(RepoWatchMode::LazyNonRecursive { watched_dirs }) => watched_dirs - .iter() - .filter_map(|dir| dir.to_local_path()) - .collect(), - // A single recursive watch lives on the root. - Some(RepoWatchMode::RecursiveOnRoot) | None => { - repo_path.to_local_path().into_iter().collect() - } - }; + // Uniform teardown: the root watch lives on the repo path, + // plus any on-demand per-directory watches in `extra_dirs`. + let mut paths_to_unregister: Vec = + repo_path.to_local_path().into_iter().collect(); + if let Some(removed) = removed { + paths_to_unregister.extend( + removed + .extra_dirs + .iter() + .filter_map(|dir| dir.to_local_path()), + ); + } watcher.update(ctx, |watcher, _ctx| { for path in &paths_to_unregister { std::mem::drop(watcher.unregister_path(path)); @@ -921,16 +965,12 @@ impl LocalRepoMetadataModel { // Subdirectories get their own non-recursive watch as they are expanded // (see `load_directory`). macOS/Windows watch a whole tree with a single // OS handle, so recursive watching stays cheap there. - let mode = if cfg!(target_os = "linux") { - // Start by watching just the root; expanded subdirectories are added - // to this set as they load (see `load_directory`). - RepoWatchMode::LazyNonRecursive { - watched_dirs: HashSet::from([path.clone()]), - } + let root_mode = if cfg!(target_os = "linux") { + RootWatchMode::NonRecursive } else { - RepoWatchMode::RecursiveOnRoot + RootWatchMode::Recursive }; - self.add_repository_internal(path.clone(), state, mode, ctx)?; + self.add_repository_internal(path.clone(), state, root_mode, ctx)?; self.lazy_loaded_paths.insert(path.clone(), 1); Ok(()) } @@ -973,10 +1013,11 @@ impl LocalRepoMetadataModel { .load_at_path(dir_path, &mut gitignores) .map_err(RepoMetadataError::BuildTree)?; - // For lazy (non-git) roots watched non-recursively on Linux, start - // watching the directory we just expanded so its direct children stay - // fresh. No-ops for git repos and recursively-watched roots. - self.watch_lazy_subdir(repo_root, dir_path, ctx); + // Start watching the directory we just expanded so its direct children + // stay fresh. For a non-recursive root this covers every expanded + // subdir; for a recursive root it covers gitignored dirs pruned from the + // root watch on Linux. No-op when the root watch already covers it. + self.watch_subdir(repo_root, dir_path, ctx); ctx.emit(RepositoryMetadataEvent::FileTreeEntryUpdated { path: repo_root.clone(), @@ -985,41 +1026,80 @@ impl LocalRepoMetadataModel { Ok(()) } - /// Registers a non-recursive watch on `dir_path` for a lazy (non-git) root - /// that is watched per-directory, recording it so it can be unregistered on - /// teardown. No-ops unless `repo_root` is such a tracked lazy root (only - /// populated on Linux) or the directory is already watched. + /// Registers an on-demand non-recursive watch on `dir_path` when the root + /// watch does not already cover it, recording it in `extra_dirs` so it can + /// be unregistered on teardown. No-ops if `repo_root` is not tracked, the + /// directory is already watched, or the root watch already covers it. + /// + /// Decision by root mode: + /// - [`RootWatchMode::NonRecursive`]: nothing under the root is covered, so + /// every loaded subdir gets its own watch. + /// - [`RootWatchMode::Recursive`]: the root watch already covers non-pruned + /// subtrees; only gitignored dirs are pruned, and only on Linux (other + /// backends ignore the descend filter and still deliver their events), so + /// we watch those to keep expanded gitignored folders fresh. #[cfg(feature = "local_fs")] - fn watch_lazy_subdir( + fn watch_subdir( &mut self, repo_root: &StandardizedPath, dir_path: &StandardizedPath, ctx: &mut ModelContext, ) { - let Some(RepoWatchMode::LazyNonRecursive { watched_dirs }) = - self.repo_watch_modes.get_mut(repo_root) - else { + let Some(repo_watch) = self.repo_watches.get(repo_root) else { return; }; - if !watched_dirs.insert(dir_path.clone()) { + if repo_watch.extra_dirs.contains(dir_path) { // Already watching this directory. return; } - let Some(ref watcher) = self.watcher else { - return; + let root_mode = repo_watch.root_mode; + + let should_watch = match root_mode { + RootWatchMode::NonRecursive => true, + RootWatchMode::Recursive => { + cfg!(target_os = "linux") && self.dir_pruned_from_root_watch(repo_root, dir_path) + } }; + if !should_watch { + return; + } + let Some(local_path) = dir_path.to_local_path() else { return; }; let gitignores = crate::gitignores_for_directory(&local_path); let force_included_paths = self.force_included_paths.clone(); - watcher.update(ctx, |watcher, _ctx| { - std::mem::drop(watcher.register_path( - &local_path, - repo_watch_filter(gitignores, force_included_paths), - RecursiveMode::NonRecursive, - )); - }); + if let Some(repo_watch) = self.repo_watches.get_mut(repo_root) { + repo_watch.extra_dirs.insert(dir_path.clone()); + } + if let Some(ref watcher) = self.watcher { + watcher.update(ctx, |watcher, _ctx| { + std::mem::drop(watcher.register_path( + &local_path, + repo_watch_filter(gitignores, force_included_paths), + RecursiveMode::NonRecursive, + )); + }); + } + } + + /// Returns whether `dir_path` would be pruned from `repo_root`'s recursive + /// watch by the gitignore descend filter (i.e. it is gitignored relative to + /// the repo root). Used to decide whether an expanded directory needs its + /// own watch. + #[cfg(feature = "local_fs")] + fn dir_pruned_from_root_watch( + &self, + repo_root: &StandardizedPath, + dir_path: &StandardizedPath, + ) -> bool { + let Some(IndexedRepoState::Indexed(state)) = self.repositories.get(repo_root) else { + return false; + }; + let Some(local) = dir_path.to_local_path() else { + return false; + }; + Self::path_is_ignored(&local, &state.gitignores) } /// Checks whether the parent directory of `path` is loaded in the given entry. @@ -1458,7 +1538,7 @@ impl LocalRepoMetadataModel { if let Err(e) = model.add_repository_internal( std_repo_path.clone(), state, - RepoWatchMode::RecursiveOnRoot, + RootWatchMode::Recursive, ctx, ) { log::warn!("Failed to add repository {repo_path_str}: {e:?}"); diff --git a/crates/repo_metadata/src/local_model_tests.rs b/crates/repo_metadata/src/local_model_tests.rs index 5f0b0ae3f9..3e77360eba 100644 --- a/crates/repo_metadata/src/local_model_tests.rs +++ b/crates/repo_metadata/src/local_model_tests.rs @@ -20,8 +20,8 @@ use watcher::BulkFilesystemWatcherEvent; use crate::entry::{DirectoryEntry, Entry, FileMetadata}; use crate::file_tree_store::{FileTreeEntry, FileTreeEntryState, FileTreeState}; use crate::local_model::{ - GetContentsArgs, IndexedRepoState, LocalRepoMetadataModel, RepoUpdate, RepoWatchMode, - RepositoryMetadataEvent, + GetContentsArgs, IndexedRepoState, LocalRepoMetadataModel, RepoUpdate, RepositoryMetadataEvent, + RootWatchMode, }; use crate::repositories::DetectedRepositories; use crate::watcher::DirectoryWatcher; @@ -43,7 +43,7 @@ impl LocalRepoMetadataModel { #[cfg(feature = "local_fs")] symlink_targets: Default::default(), #[cfg(feature = "local_fs")] - repo_watch_modes: Default::default(), + repo_watches: Default::default(), } } } @@ -117,7 +117,7 @@ fn repository_indexed_waits_for_pending_repo() { .add_repository_internal( repo_path.clone(), empty_repo_state(&repo_path), - RepoWatchMode::RecursiveOnRoot, + RootWatchMode::Recursive, ctx, ) .expect("repository should index"); @@ -2044,7 +2044,7 @@ fn test_repository_operations_with_standardized_paths() { let result1 = model.add_repository_internal( StandardizedPath::from_local_canonicalized(&real_repo).unwrap(), state.clone(), - RepoWatchMode::RecursiveOnRoot, + RootWatchMode::Recursive, ctx, ); assert!(result1.is_ok()); @@ -2053,7 +2053,7 @@ fn test_repository_operations_with_standardized_paths() { let result2 = model.add_repository_internal( StandardizedPath::from_local_canonicalized(&symlink_repo).unwrap(), state.clone(), - RepoWatchMode::RecursiveOnRoot, + RootWatchMode::Recursive, ctx, ); assert!(result2.is_ok()); @@ -2062,7 +2062,7 @@ fn test_repository_operations_with_standardized_paths() { let result3 = model.add_repository_internal( StandardizedPath::from_local_canonicalized(&relative_repo).unwrap(), state.clone(), - RepoWatchMode::RecursiveOnRoot, + RootWatchMode::Recursive, ctx, ); assert!(result3.is_ok()); @@ -2152,20 +2152,18 @@ fn index_lazy_loaded_path_tracks_only_root() { model_handle.read(&app, |model, _ctx| { assert!(model.is_lazy_loaded_path(&root)); - let mode = model - .repo_watch_modes + let repo_watch = model + .repo_watches .get(&root) - .expect("watch mode should be recorded"); + .expect("watch should be recorded"); if cfg!(target_os = "linux") { - // Linux: only the root is watched initially. - let RepoWatchMode::LazyNonRecursive { watched_dirs } = mode else { - panic!("expected LazyNonRecursive on linux, got {mode:?}"); - }; - assert_eq!(watched_dirs.len(), 1); - assert!(watched_dirs.contains(&root)); + // Linux: the root is watched non-recursively and no subdirs + // are tracked yet. + assert_eq!(repo_watch.root_mode, RootWatchMode::NonRecursive); + assert!(repo_watch.extra_dirs.is_empty()); } else { // Other platforms: a single recursive watch on the root. - assert!(matches!(mode, RepoWatchMode::RecursiveOnRoot)); + assert_eq!(repo_watch.root_mode, RootWatchMode::Recursive); } }); }); @@ -2196,20 +2194,19 @@ fn load_directory_tracks_expanded_subdir_for_lazy_root() { }); model_handle.read(&app, |model, _ctx| { - let mode = model - .repo_watch_modes + let repo_watch = model + .repo_watches .get(&root) - .expect("watch mode should be recorded"); + .expect("watch should be recorded"); if cfg!(target_os = "linux") { - // Linux: the expanded subdir is now watched alongside the root. - let RepoWatchMode::LazyNonRecursive { watched_dirs } = mode else { - panic!("expected LazyNonRecursive on linux, got {mode:?}"); - }; - assert!(watched_dirs.contains(&root)); - assert!(watched_dirs.contains(&sub)); + // Linux: the expanded subdir now has its own non-recursive + // watch; the root is never stored in `extra_dirs`. + assert_eq!(repo_watch.root_mode, RootWatchMode::NonRecursive); + assert!(repo_watch.extra_dirs.contains(&sub)); + assert!(!repo_watch.extra_dirs.contains(&root)); } else { // Other platforms: a single recursive watch on the root. - assert!(matches!(mode, RepoWatchMode::RecursiveOnRoot)); + assert_eq!(repo_watch.root_mode, RootWatchMode::Recursive); } }); }); @@ -2233,23 +2230,137 @@ fn recursive_repo_uses_recursive_watch_mode() { .add_repository_internal( repo_path.clone(), empty_repo_state(&repo_path), - RepoWatchMode::RecursiveOnRoot, + RootWatchMode::Recursive, ctx, ) .expect("repo should index"); }); model_handle.read(&app, |model, _ctx| { - assert!(matches!( - model.repo_watch_modes.get(&repo_path), - Some(RepoWatchMode::RecursiveOnRoot) - )); + let repo_watch = model + .repo_watches + .get(&repo_path) + .expect("watch should be recorded"); + assert_eq!(repo_watch.root_mode, RootWatchMode::Recursive); + assert!(repo_watch.extra_dirs.is_empty()); assert!(!model.is_lazy_loaded_path(&repo_path)); }); }); }); } +/// Expanding a gitignored directory inside a git repo registers an on-demand +/// non-recursive watch for it on Linux (where the recursive root watch prunes +/// gitignored dirs), while other platforms rely on the recursive root watch. +#[cfg(feature = "local_fs")] +#[test] +fn load_directory_watches_expanded_gitignored_dir_for_git_repo() { + VirtualFS::test("git_repo_gitignored_expand", |dirs, mut vfs| { + vfs.mkdir("repo/node_modules/pkg") + .with_files(vec![Stub::FileWithContent( + "repo/.gitignore", + "node_modules/\n", + )]); + let repo_path = + StandardizedPath::from_local_canonicalized(&dirs.tests().join("repo")).unwrap(); + let node_modules = + StandardizedPath::from_local_canonicalized(&dirs.tests().join("repo/node_modules")) + .unwrap(); + + App::test((), |mut app| async move { + let model_handle = app.add_model(|_| LocalRepoMetadataModel::new_for_test()); + let (gitignore, _) = Gitignore::new(dirs.tests().join("repo/.gitignore")); + let root = Entry::Directory(DirectoryEntry { + path: repo_path.clone(), + children: Vec::new(), + ignored: false, + loaded: true, + }); + let state = FileTreeState::new(root, vec![gitignore], None); + + model_handle.update(&mut app, |model, ctx| { + model + .add_repository_internal( + repo_path.clone(), + state, + RootWatchMode::Recursive, + ctx, + ) + .expect("repo should index"); + model + .load_directory(&repo_path, &node_modules, ctx) + .expect("should load gitignored dir"); + }); + + model_handle.read(&app, |model, _ctx| { + let repo_watch = model + .repo_watches + .get(&repo_path) + .expect("watch should be recorded"); + assert_eq!(repo_watch.root_mode, RootWatchMode::Recursive); + if cfg!(target_os = "linux") { + // Linux prunes node_modules from the recursive root watch, so + // expanding it registers an on-demand non-recursive watch. + assert!(repo_watch.extra_dirs.contains(&node_modules)); + } else { + // Other backends still deliver gitignored events through the + // recursive root watch, so no extra watch is registered. + assert!(repo_watch.extra_dirs.is_empty()); + } + }); + }); + }); +} + +/// Removing a git repo clears its tracked watch entry (root plus any on-demand +/// per-directory watches for expanded gitignored dirs). +#[cfg(feature = "local_fs")] +#[test] +fn remove_repository_clears_extra_dir_watches() { + VirtualFS::test("git_repo_remove_clears_extra", |dirs, mut vfs| { + vfs.mkdir("repo/build/out") + .with_files(vec![Stub::FileWithContent("repo/.gitignore", "build/\n")]); + let repo_path = + StandardizedPath::from_local_canonicalized(&dirs.tests().join("repo")).unwrap(); + let build = + StandardizedPath::from_local_canonicalized(&dirs.tests().join("repo/build")).unwrap(); + + App::test((), |mut app| async move { + let model_handle = app.add_model(|_| LocalRepoMetadataModel::new_for_test()); + let (gitignore, _) = Gitignore::new(dirs.tests().join("repo/.gitignore")); + let root = Entry::Directory(DirectoryEntry { + path: repo_path.clone(), + children: Vec::new(), + ignored: false, + loaded: true, + }); + let state = FileTreeState::new(root, vec![gitignore], None); + + model_handle.update(&mut app, |model, ctx| { + model + .add_repository_internal( + repo_path.clone(), + state, + RootWatchMode::Recursive, + ctx, + ) + .expect("repo should index"); + model + .load_directory(&repo_path, &build, ctx) + .expect("should load gitignored dir"); + model + .remove_repository(&repo_path, ctx) + .expect("repo should be removed"); + }); + + model_handle.read(&app, |model, _ctx| { + assert!(!model.repo_watches.contains_key(&repo_path)); + assert!(model.repository_state(&repo_path).is_none()); + }); + }); + }); +} + /// Tearing down a lazy root clears all of its tracked per-directory watches and /// removes the repository state. #[cfg(feature = "local_fs")] @@ -2275,7 +2386,7 @@ fn remove_lazy_loaded_path_clears_tracked_watches() { }); model_handle.read(&app, |model, _ctx| { - assert!(!model.repo_watch_modes.contains_key(&root)); + assert!(!model.repo_watches.contains_key(&root)); assert!(!model.is_lazy_loaded_path(&root)); assert!(model.repository_state(&root).is_none()); }); From 03ab7d283623c066d719790055e66252502218d2 Mon Sep 17 00:00:00 2001 From: Yunfan Yang Date: Thu, 4 Jun 2026 13:27:09 -0400 Subject: [PATCH 4/6] comment --- crates/repo_metadata/src/local_model.rs | 50 +++++++++++ crates/repo_metadata/src/local_model_tests.rs | 84 +++++++++++++++++++ 2 files changed, 134 insertions(+) diff --git a/crates/repo_metadata/src/local_model.rs b/crates/repo_metadata/src/local_model.rs index b99e416dd5..668a89edf1 100644 --- a/crates/repo_metadata/src/local_model.rs +++ b/crates/repo_metadata/src/local_model.rs @@ -672,6 +672,15 @@ impl LocalRepoMetadataModel { } } + // Drop per-directory watches for any directory that was + // deleted or moved away (along with their tracked + // descendants). Without this their stale `extra_dirs` + // entries would make `watch_subdir` skip re-watching if a + // directory is later recreated at the same path. + for removed in &removed_roots { + model.unwatch_removed_subtree(&repo_path, removed, ctx); + } + // Watch any newly added directories under a non-recursive // root. `new_dirs` is empty for recursive roots, so this // is a no-op there. @@ -1083,6 +1092,47 @@ impl LocalRepoMetadataModel { } } + /// Drops any on-demand per-directory watches at or under `removed_path` when + /// a directory is deleted or moved away. Each stale path is unregistered from + /// the watcher and removed from `extra_dirs`. Without this, a directory + /// recreated at the same path would be skipped by [`watch_subdir`] (which + /// sees the stale `extra_dirs` entry) and never receive a fresh watch. + #[cfg(feature = "local_fs")] + fn unwatch_removed_subtree( + &mut self, + repo_root: &StandardizedPath, + removed_path: &StandardizedPath, + ctx: &mut ModelContext, + ) { + let Some(repo_watch) = self.repo_watches.get_mut(repo_root) else { + return; + }; + // The removed directory plus any tracked descendants are now stale. + // `starts_with` is component-aware and matches the path itself, so this + // covers both an exact removed dir and everything expanded beneath it. + let stale: Vec = repo_watch + .extra_dirs + .iter() + .filter(|dir| dir.starts_with(removed_path)) + .cloned() + .collect(); + if stale.is_empty() { + return; + } + for dir in &stale { + repo_watch.extra_dirs.remove(dir); + } + if let Some(ref watcher) = self.watcher { + let local_paths: Vec = + stale.iter().filter_map(|dir| dir.to_local_path()).collect(); + watcher.update(ctx, |watcher, _ctx| { + for path in &local_paths { + std::mem::drop(watcher.unregister_path(path)); + } + }); + } + } + /// Returns whether `dir_path` would be pruned from `repo_root`'s recursive /// watch by the gitignore descend filter (i.e. it is gitignored relative to /// the repo root). Used to decide whether an expanded directory needs its diff --git a/crates/repo_metadata/src/local_model_tests.rs b/crates/repo_metadata/src/local_model_tests.rs index 3e77360eba..184866ccb6 100644 --- a/crates/repo_metadata/src/local_model_tests.rs +++ b/crates/repo_metadata/src/local_model_tests.rs @@ -2393,3 +2393,87 @@ fn remove_lazy_loaded_path_clears_tracked_watches() { }); }); } + +/// Deleting an expanded subdirectory of a lazy non-recursive root drops its +/// per-directory watch (and any tracked descendants), so the entry no longer +/// lingers in `extra_dirs`. Otherwise a directory recreated at the same path +/// would be skipped by `watch_subdir` and never re-watched. +#[cfg(all(unix, feature = "local_fs"))] +#[test] +fn deleted_subdir_drops_its_tracked_watch() { + VirtualFS::test("lazy_delete_subdir_drops_watch", |dirs, mut vfs| { + vfs.mkdir("workspace/sub/inner"); + let root = + StandardizedPath::from_local_canonicalized(&dirs.tests().join("workspace")).unwrap(); + let sub = StandardizedPath::from_local_canonicalized(&dirs.tests().join("workspace/sub")) + .unwrap(); + let inner = + StandardizedPath::from_local_canonicalized(&dirs.tests().join("workspace/sub/inner")) + .unwrap(); + let sub_local = sub.to_local_path().unwrap(); + + App::test((), |mut app| async move { + let model_handle = app.add_model(|_| LocalRepoMetadataModel::new_for_test()); + model_handle.update(&mut app, |model, ctx| { + model + .index_lazy_loaded_path(&root, ctx) + .expect("should index lazy path"); + model + .load_directory(&root, &sub, ctx) + .expect("should load subdir"); + model + .load_directory(&root, &inner, ctx) + .expect("should load nested subdir"); + }); + + // Only a non-recursive (Linux) root tracks per-directory watches. + if !cfg!(target_os = "linux") { + return; + } + + model_handle.read(&app, |model, _ctx| { + let repo_watch = model.repo_watches.get(&root).expect("watch recorded"); + assert!(repo_watch.extra_dirs.contains(&sub)); + assert!(repo_watch.extra_dirs.contains(&inner)); + }); + + // Wait for the spawned watcher-event handling to finish by + // listening for the tree update it emits. + let (tx, rx) = oneshot::channel(); + let sender = Rc::new(RefCell::new(Some(tx))); + let root_for_event = root.clone(); + app.update(|ctx| { + ctx.subscribe_to_model(&model_handle, move |_, event, _ctx| { + if let RepositoryMetadataEvent::FileTreeEntryUpdated { path, .. } = event { + if path == &root_for_event { + if let Some(tx) = sender.borrow_mut().take() { + let _ = tx.send(()); + } + } + } + }); + }); + + model_handle.update(&mut app, |model, ctx| { + model.handle_watcher_event( + &BulkFilesystemWatcherEvent { + deleted: std::collections::HashSet::from([sub_local]), + ..Default::default() + }, + ctx, + ); + }); + rx.with_timeout(Duration::from_secs(5)) + .await + .expect("timed out waiting for tree update") + .expect("tree update sender dropped"); + + model_handle.read(&app, |model, _ctx| { + let repo_watch = model.repo_watches.get(&root).expect("watch recorded"); + // The deleted subdir and its tracked descendant are dropped. + assert!(!repo_watch.extra_dirs.contains(&sub)); + assert!(!repo_watch.extra_dirs.contains(&inner)); + }); + }); + }); +} From a0b03b2cce9b494d63e69c208103a79416884f2f Mon Sep 17 00:00:00 2001 From: Yunfan Yang Date: Thu, 4 Jun 2026 16:23:28 -0400 Subject: [PATCH 5/6] wasm --- crates/repo_metadata/src/local_model.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/repo_metadata/src/local_model.rs b/crates/repo_metadata/src/local_model.rs index 668a89edf1..f18ad539ed 100644 --- a/crates/repo_metadata/src/local_model.rs +++ b/crates/repo_metadata/src/local_model.rs @@ -4,7 +4,9 @@ //! This module provides a singleton model that manages repository metadata across //! all repositories tracked by Warp. -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; +#[cfg(feature = "local_fs")] +use std::collections::HashSet; use std::path::{Path, PathBuf}; use std::sync::Arc; From b614b4ea05b54f888c3c006a802bbe6f9be651d1 Mon Sep 17 00:00:00 2001 From: Yunfan Yang Date: Thu, 4 Jun 2026 23:22:08 -0400 Subject: [PATCH 6/6] comment --- crates/repo_metadata/src/entry.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/crates/repo_metadata/src/entry.rs b/crates/repo_metadata/src/entry.rs index ff5b7b3784..c5fedbbaa8 100644 --- a/crates/repo_metadata/src/entry.rs +++ b/crates/repo_metadata/src/entry.rs @@ -1027,15 +1027,6 @@ pub fn repo_watch_filter( ) -> WatchFilter { let should_watch = move |path: &Path| should_watch_repo_directory(path, &gitignores, &force_included_paths); - // INVARIANT: gitignore lives only in the descend predicate - // (`should_watch_repo_directory`), never in the emit predicate. The descend - // predicate is consulted only by the inotify backend (Linux) at recursive - // registration time, so pruning gitignored dirs is purely a Linux - // watch-cost control. The emit predicate must NOT filter gitignored paths: - // on FSEvents/ReadDirectoryChangesW (macOS/Windows) the descend predicate is - // ignored and the recursive root watch still delivers gitignored events, and - // on Linux on-demand non-recursive watches likewise rely on those events - // flowing through. It only suppresses non-allowlisted `.git/` internals. WatchFilter::with_filter( Arc::new(should_watch), Arc::new(|path: &Path| !should_ignore_git_path(path)),