From cf3ee43ac0dd0984691e740dfb78c26bd74895a9 Mon Sep 17 00:00:00 2001 From: Oz Date: Wed, 3 Jun 2026 13:45:28 +0000 Subject: [PATCH] fix: filter gitignored directories from inotify recursive watches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The repo_watch_filter() used by repository and metadata watchers only pruned .git/ internal directories but did not skip gitignored directories (node_modules/, target/, etc.). When watching a repository recursively, inotify registers watches on every subdirectory — for projects with large ignored trees this consumed 11+ GB of memory from HashMap growth and rehashing in the notify crate's EventLoop. Changes: - Add repo_watch_filter_with_gitignores() that combines git-internal path filtering with gitignore-based directory pruning in the descend predicate, following the pattern already used by the codebase indexer. - Update DirectoryWatcher::start_watching_directory() and LocalRepoMetadataModel::add_repository_internal() to load root-level gitignore rules and pass them to the new filter. - Fix FileModel::register_file_path() and fallback_to_individual_watchers() to use NonRecursive mode on the parent directory (matching open()'s existing correct behavior) instead of Recursive on the file path. Co-Authored-By: Oz --- crates/repo_metadata/src/entry.rs | 21 +++++++++++++++++++++ crates/repo_metadata/src/local_model.rs | 5 +++-- crates/repo_metadata/src/watcher.rs | 6 ++++-- crates/warp_files/src/lib.rs | 25 +++++++++++++++++++------ 4 files changed, 47 insertions(+), 10 deletions(-) diff --git a/crates/repo_metadata/src/entry.rs b/crates/repo_metadata/src/entry.rs index 614794796d..e0bf87483d 100644 --- a/crates/repo_metadata/src/entry.rs +++ b/crates/repo_metadata/src/entry.rs @@ -722,6 +722,27 @@ pub fn repo_watch_filter() -> WatchFilter { ) } +/// Returns a [`WatchFilter`] that combines git-internal path filtering with +/// gitignore-based directory pruning. +/// +/// The descend predicate prunes both `.git/` internals (same as [`repo_watch_filter`]) +/// and directories matched by the provided gitignore rules (e.g. `node_modules/`, +/// `target/`). This prevents the recursive walk from registering inotify watches on +/// large ignored directory trees that can consume gigabytes of memory. +/// +/// The emit predicate is unchanged: it forwards everything outside `.git/` plus +/// allowlisted files inside `.git/`. +#[cfg(feature = "local_fs")] +pub fn repo_watch_filter_with_gitignores(gitignores: Arc>) -> WatchFilter { + WatchFilter::with_filter( + Arc::new(move |path: &Path| { + should_watch_directory_in_git_path(path) + && !matches_gitignores(path, true, gitignores.as_slice(), true) + }), + Arc::new(|path: &Path| !should_ignore_git_path(path)), + ) +} + /// Determines whether a file should be parsed by a treesitter query. For now the main criteria is it shouldn't /// exceed the given file size limit. pub fn is_file_parsable(path: &Path) -> Result { diff --git a/crates/repo_metadata/src/local_model.rs b/crates/repo_metadata/src/local_model.rs index c929e99751..98e042739f 100644 --- a/crates/repo_metadata/src/local_model.rs +++ b/crates/repo_metadata/src/local_model.rs @@ -29,7 +29,7 @@ use crate::{gitignores_for_directory, matches_gitignores, RepoMetadataError}; cfg_if::cfg_if! { if #[cfg(feature = "local_fs")] { use notify_debouncer_full::notify::RecursiveMode; - use crate::entry::repo_watch_filter; + use crate::entry::repo_watch_filter_with_gitignores; use crate::repositories::{DetectedRepositories, DetectedRepositoriesEvent}; use watcher::{BulkFilesystemWatcher, BulkFilesystemWatcherEvent}; use warpui_core::SingletonEntity as _; @@ -431,10 +431,11 @@ impl LocalRepoMetadataModel { { if let Some(ref watcher) = self.watcher { let watch_path = local_path.clone(); + let gitignores = Arc::new(crate::entry::gitignores_for_directory(&watch_path)); watcher.update(ctx, |watcher, _ctx| { std::mem::drop(watcher.register_path( &watch_path, - repo_watch_filter(), + repo_watch_filter_with_gitignores(gitignores), RecursiveMode::Recursive, )); }); diff --git a/crates/repo_metadata/src/watcher.rs b/crates/repo_metadata/src/watcher.rs index 3790ff2e45..4fdfc9c81b 100644 --- a/crates/repo_metadata/src/watcher.rs +++ b/crates/repo_metadata/src/watcher.rs @@ -321,14 +321,16 @@ impl DirectoryWatcher { let local_path = directory_path.to_local_path(); let registration_future = if let Some(ref watcher) = self.watcher { if let Some(local_path) = local_path.clone() { + let gitignores = + std::sync::Arc::new(crate::entry::gitignores_for_directory(&local_path)); watcher.update(ctx, |watcher, _ctx| { use notify_debouncer_full::notify::RecursiveMode; - use crate::entry::repo_watch_filter; + use crate::entry::repo_watch_filter_with_gitignores; Some(watcher.register_path( &local_path, - repo_watch_filter(), + repo_watch_filter_with_gitignores(gitignores), RecursiveMode::Recursive, )) }) diff --git a/crates/warp_files/src/lib.rs b/crates/warp_files/src/lib.rs index 3e13c58628..0cecfd5df5 100644 --- a/crates/warp_files/src/lib.rs +++ b/crates/warp_files/src/lib.rs @@ -362,12 +362,19 @@ impl FileModel { .insert(file_path.to_path_buf(), repo_root); WatcherType::Repository } else { - // Fallback to individual file watcher + // Fallback to individual file watcher. + // Watch the parent directory (NonRecursive) instead of the file + // itself so the watch survives editors that use a + // delete+create/rename pattern (vim, sed -i, etc.). + let watch_path = file_path + .parent() + .map(|p| p.to_path_buf()) + .unwrap_or_else(|| file_path.to_path_buf()); self.watcher.update(ctx, |watcher, _ctx| { std::mem::drop(watcher.register_path( - file_path, + &watch_path, WatchFilter::accept_all(), - RecursiveMode::Recursive, + RecursiveMode::NonRecursive, )); }); WatcherType::Individual @@ -1152,12 +1159,18 @@ impl FileModel { } } - // Register individual file watcher + // Register individual file watcher on the parent directory + // (NonRecursive) so the watch survives delete+create/rename + // patterns and avoids creating recursive watches on the path. + let watch_path = path + .parent() + .map(|p| p.to_path_buf()) + .unwrap_or_else(|| path.clone()); self.watcher.update(ctx, |watcher, _ctx| { std::mem::drop(watcher.register_path( - &path, + &watch_path, WatchFilter::accept_all(), - RecursiveMode::Recursive, + RecursiveMode::NonRecursive, )); }); }