Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 59 additions & 2 deletions crates/repo_metadata/src/entry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -644,13 +644,70 @@ pub fn should_ignore_git_path(path: &Path) -> bool {
&& !is_tracking_state_git_file(path)
}

/// Well-known directory names that typically contain deeply nested dependency
/// or build trees and are almost universally gitignored. Pruning these from
/// the inotify recursive walk prevents registering hundreds of thousands of
/// watches — the dominant source of memory growth on Linux (see Sentry issue
/// 7259255054 where `notify::inotify::EventLoop::add_single_watch` allocated
/// 11 GB).
const HEAVY_DIRECTORY_NAMES: &[&str] = &[
"node_modules",
".venv",
"__pycache__",
".mypy_cache",
".pytest_cache",
".tox",
".gradle",
".cache",
];

/// Returns `true` when a non-`.git` directory's *name* (final component)
/// matches one of the known-heavy directory names that should be pruned
/// from recursive watching. Only the leaf directory name is checked, so
/// `repo/node_modules` is pruned but `repo/src/utils` is not.
fn is_heavy_directory(path: &Path) -> bool {
path.file_name()
.and_then(|n| n.to_str())
.is_some_and(|name| HEAVY_DIRECTORY_NAMES.contains(&name))
}

/// Returns `true` when the directory at `path` should be registered for watching.
/// Specifically for prefixes that lead to an allowlisted file and `false` for everything else inside `.git/`.
/// Returns `false` for `.git/` subtrees that are not allowlisted and for
/// well-known heavy directories (e.g. `node_modules`) that would cause
/// excessive inotify watch count on Linux.
/// Backward-compatible alias for [`should_watch_directory`].
///
/// Only checks `.git/` subtree pruning; callers that also need the
/// heavy-directory pruning should use [`should_watch_directory`] instead.
pub fn should_watch_directory_in_git_path(path: &Path) -> bool {
if !is_git_internal_path(path) {
return true;
}
should_watch_directory_git_internal(path)
}

/// Returns `true` when the directory at `path` should be registered for watching.
/// Returns `false` for `.git/` subtrees that are not allowlisted and for
/// well-known heavy directories (e.g. `node_modules`) that would cause
/// excessive inotify watch count on Linux.
pub fn should_watch_directory(path: &Path) -> bool {
// Prune known-heavy directories before the `.git` check — they can
// never be inside `.git/` so the order doesn't matter for correctness,
// and this early-return avoids the more expensive component iteration.
if is_heavy_directory(path) {
return false;
}

if !is_git_internal_path(path) {
return true;
}
should_watch_directory_git_internal(path)
}

/// Shared helper that implements the `.git/`-subtree pruning logic.
/// Both `should_watch_directory` and the backward-compatible
/// `should_watch_directory_in_git_path` delegate here.
fn should_watch_directory_git_internal(path: &Path) -> bool {
// Worktree paths: `.git/worktrees/<name>/...` only descends along the
// path needed to reach the allowlisted children (HEAD, index.lock,
// config.worktree, refs/heads/*, refs/remotes/<r>/*).
Expand Down Expand Up @@ -717,7 +774,7 @@ fn descend_allowlist_matches(suffix: &[Component<'_>]) -> bool {
#[cfg(feature = "local_fs")]
pub fn repo_watch_filter() -> WatchFilter {
WatchFilter::with_filter(
Arc::new(should_watch_directory_in_git_path),
Arc::new(should_watch_directory),
Arc::new(|path: &Path| !should_ignore_git_path(path)),
)
}
Expand Down
32 changes: 32 additions & 0 deletions crates/repo_metadata/src/entry_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,38 @@ fn should_watch_directory_in_git_path_prunes_non_allowlisted_subtrees() {
"/repo/.git/config"
)));
}
#[test]
fn should_watch_directory_prunes_heavy_directories() {
use std::path::Path;

use super::should_watch_directory;

// Regular source directories should be watched.
assert!(should_watch_directory(Path::new("/repo/src")));
assert!(should_watch_directory(Path::new("/repo/src/utils")));
assert!(should_watch_directory(Path::new("/repo/lib")));

// Heavy directories should be pruned.
assert!(!should_watch_directory(Path::new("/repo/node_modules")));
assert!(!should_watch_directory(Path::new("/repo/.venv")));
assert!(!should_watch_directory(Path::new("/repo/__pycache__")));
assert!(!should_watch_directory(Path::new("/repo/.mypy_cache")));
assert!(!should_watch_directory(Path::new("/repo/.gradle")));
assert!(!should_watch_directory(Path::new("/repo/.cache")));

// Nested heavy directories should also be pruned.
assert!(!should_watch_directory(Path::new(
"/repo/packages/frontend/node_modules"
)));
assert!(!should_watch_directory(Path::new("/repo/backend/.venv")));

// Git subtrees should still be pruned by the existing logic.
assert!(!should_watch_directory(Path::new("/repo/.git/objects")));
assert!(!should_watch_directory(Path::new("/repo/.git/hooks")));
assert!(should_watch_directory(Path::new("/repo/.git")));
assert!(should_watch_directory(Path::new("/repo/.git/refs/heads")));
}

#[test]
fn test_is_shared_git_ref() {
use std::path::Path;
Expand Down