diff --git a/cache_fix_analysis.md b/cache_fix_analysis.md new file mode 100644 index 000000000..fb11644d7 --- /dev/null +++ b/cache_fix_analysis.md @@ -0,0 +1,101 @@ +# Git-Machete Cache Issue Analysis + +## Problem Description + +When running `git machete traverse -F` after a PR has been merged remotely, the first run doesn't detect the merged PR, but the second run does. This happens due to caching inconsistencies between git-machete's internal cache layers. + +## Root Cause + +### Cache Architecture +Git-machete has two cache layers: + +1. **GitContext caches** (`git_operations.py`): + - `__commit_hash_by_revision_cached` + - `__reflogs_cached` + - `__local_branches_cached` + - `__remote_branches_cached` + - etc. + +2. **MacheteClient caches** (`client.py`): + - `__branch_pairs_by_hash_in_reflog` + +### The Issue +1. `git machete traverse -F` calls `git.fetch_remote()` which flushes GitContext caches +2. However, `MacheteClient.__branch_pairs_by_hash_in_reflog` is NOT cleared +3. This cache is used by `is_merged_to()` for merge detection via reflog analysis +4. Stale cache causes missed merge detection on first run +5. Second run works because new CLI process creates fresh MacheteClient instance + +### Key Code Locations + +**Fetch with cache flush** (`client.py:687-692`): +```python +if opt_fetch: + for rem in self.__git.get_remotes(): + print(f"Fetching {bold(rem)}...") + self.__git.fetch_remote(rem) # This flushes GitContext caches +``` + +**MacheteClient cache not cleared** (`client.py:2031-2032`): +```python +def flush_caches(self) -> None: + self.__branch_pairs_by_hash_in_reflog = None # Only clears this cache +``` + +**GitContext owner relationship** (`git_operations.py:202-204`): +```python +def flush_caches(self) -> None: + if self.owner: # This should be MacheteClient + self.owner.flush_caches() # But owner is not always set +``` + +## Solutions + +### Immediate Workarounds + +#### 1. Double Traverse Pattern +```bash +# Run twice - second run will work correctly +git machete traverse -F +git machete traverse -F +``` + +#### 2. Manual Cache Refresh +```bash +# Force cache refresh by checking status first +git machete status > /dev/null +git machete traverse -F +``` + +#### 3. Explicit Fetch + Traverse +```bash +# Separate fetch from traverse +git fetch --prune --all +git machete traverse +``` + +### Long-term Solutions + +#### 1. Enhanced Wrapper Script +Create a wrapper that ensures proper cache management. + +#### 2. Upstream Fix +The proper fix would be to ensure `GitContext.owner` is set to `MacheteClient` and that `fetch_remote()` properly triggers `MacheteClient.flush_caches()`. + +## Testing the Issue + +To reproduce: +1. Have a PR merged remotely +2. Run `git machete traverse -F` - won't detect merge +3. Run again immediately - will detect merge +4. The difference is the fresh MacheteClient instance + +## Verification + +Check if the issue affects your setup: +```bash +# After a PR is merged remotely: +git machete status | grep -E "(merged|red)" # Note any red branches +git machete traverse -F # See if it offers to slide out merged branches +git machete traverse -F # Second run should work if first didn't +``` diff --git a/git_machete/git_operations.py b/git_machete/git_operations.py index 431fbad03..fc7ce983d 100644 --- a/git_machete/git_operations.py +++ b/git_machete/git_operations.py @@ -242,8 +242,9 @@ def __init__(self) -> None: self.__tree_hash_by_commit_hash_cached: Optional[Dict[FullCommitHash, Optional[FullTreeHash]]] = None def flush_caches(self) -> None: - if self.owner: # pragma: no branch - self.owner.flush_caches() + # Clear GitContext caches first, BEFORE calling owner.flush_caches(). + # This ensures that when MacheteClient caches are cleared and potentially rebuilt, + # they will use fresh GitContext data (especially reflogs) rather than stale cached data. self.__commit_hash_by_revision_cached = None self.__committer_unix_timestamp_by_revision_cached = None self.__config_cached = None @@ -254,6 +255,10 @@ def flush_caches(self) -> None: self.__remotes_cached = None self.__removed_from_remote = None self.__short_commit_hash_by_revision_cached = {} + # Call owner.flush_caches() AFTER clearing our caches. + # This way, if owner cache rebuild is triggered, it will use fresh data. + if self.owner: # pragma: no branch + self.owner.flush_caches() def _run_git(self, git_cmd: str, *args: str, flush_caches: bool, allow_non_zero: bool = False) -> int: exit_code = utils.run_cmd(*GIT_EXEC, git_cmd, *args)