-
Notifications
You must be signed in to change notification settings - Fork 2
feat: sync from remote loop prevention for self-authored commits #22
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
JohnRDOrazio
wants to merge
1
commit into
dev
Choose a base branch
from
feat/upstream-sync-loop-prevention
base: dev
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| """OntoKit application-wide constants.""" | ||
|
|
||
| # Committer identities used by OntoKit for automated commits. | ||
| # Any commit with one of these committer emails is considered OntoKit-authored | ||
| # and should be excluded from upstream sync processing to prevent feedback loops. | ||
| ONTOKIT_COMMITTER_NAME = "OntoKit" | ||
| ONTOKIT_COMMITTER_EMAIL = "noreply@ontokit.dev" | ||
|
|
||
| ONTOKIT_SYNC_COMMITTER_NAME = "OntoKit Sync" | ||
| ONTOKIT_SYNC_COMMITTER_EMAIL = "sync@ontokit.dev" | ||
|
|
||
| # Set of all emails used by OntoKit as committer identity. | ||
| # Used by the webhook handler to detect and skip self-authored commits. | ||
| ONTOKIT_COMMITTER_EMAILS: frozenset[str] = frozenset( | ||
| {ONTOKIT_COMMITTER_EMAIL, ONTOKIT_SYNC_COMMITTER_EMAIL} | ||
| ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,100 @@ | ||
| """Tests for upstream sync loop prevention logic.""" | ||
|
|
||
| from typing import Any | ||
|
|
||
| from ontokit.core.constants import ( | ||
| ONTOKIT_COMMITTER_EMAIL, | ||
| ONTOKIT_COMMITTER_EMAILS, | ||
| ONTOKIT_SYNC_COMMITTER_EMAIL, | ||
| ) | ||
|
|
||
|
|
||
| def _make_commit( | ||
| email: str, | ||
| added: list[str] | None = None, | ||
| modified: list[str] | None = None, | ||
| ) -> dict[str, Any]: | ||
| """Create a minimal GitHub webhook commit payload.""" | ||
| return { | ||
| "id": "abc123", | ||
| "committer": {"name": "Test", "email": email}, | ||
| "added": added or [], | ||
| "modified": modified or [], | ||
| } | ||
|
|
||
|
|
||
| def _filter_external(commits: list[dict[str, Any]]) -> list[dict[str, Any]]: | ||
| """Reproduce the filtering logic from the webhook handler.""" | ||
| return [ | ||
| c for c in commits if c.get("committer", {}).get("email") not in ONTOKIT_COMMITTER_EMAILS | ||
| ] | ||
|
|
||
|
|
||
| class TestCommitterEmailConstants: | ||
| """Verify identity constants are correctly defined.""" | ||
|
|
||
| def test_ontokit_emails_in_set(self) -> None: | ||
| assert ONTOKIT_COMMITTER_EMAIL in ONTOKIT_COMMITTER_EMAILS | ||
| assert ONTOKIT_SYNC_COMMITTER_EMAIL in ONTOKIT_COMMITTER_EMAILS | ||
|
|
||
| def test_set_is_frozen(self) -> None: | ||
| assert isinstance(ONTOKIT_COMMITTER_EMAILS, frozenset) | ||
|
|
||
|
|
||
| class TestWebhookCommitFiltering: | ||
| """Test the commit filtering logic used in the push webhook handler.""" | ||
|
|
||
| def test_all_ontokit_commits_filtered(self) -> None: | ||
| """Push with only OntoKit commits → no external commits → skip sync.""" | ||
| commits = [ | ||
| _make_commit(ONTOKIT_COMMITTER_EMAIL, modified=["ontology.ttl"]), | ||
| _make_commit(ONTOKIT_SYNC_COMMITTER_EMAIL, modified=["ontology.ttl"]), | ||
| ] | ||
| assert _filter_external(commits) == [] | ||
|
|
||
| def test_external_commits_pass_through(self) -> None: | ||
| """Push with external commits → they pass through the filter.""" | ||
| commits = [ | ||
| _make_commit("dev@example.com", modified=["ontology.ttl"]), | ||
| ] | ||
| result = _filter_external(commits) | ||
| assert len(result) == 1 | ||
| assert result[0]["committer"]["email"] == "dev@example.com" | ||
|
|
||
| def test_mixed_commits_only_keep_external(self) -> None: | ||
| """Push with both OntoKit and external commits → only external kept.""" | ||
| commits = [ | ||
| _make_commit(ONTOKIT_SYNC_COMMITTER_EMAIL, modified=["ontology.ttl"]), | ||
| _make_commit("ci-bot@corp.com", modified=["ontology.ttl"]), | ||
| _make_commit(ONTOKIT_COMMITTER_EMAIL, modified=["README.md"]), | ||
| ] | ||
| result = _filter_external(commits) | ||
| assert len(result) == 1 | ||
| assert result[0]["committer"]["email"] == "ci-bot@corp.com" | ||
|
|
||
| def test_empty_commits_list(self) -> None: | ||
| """Empty push payload → no external commits.""" | ||
| assert _filter_external([]) == [] | ||
|
|
||
| def test_commit_without_committer_field(self) -> None: | ||
| """Malformed commit without committer → treated as external (safe default).""" | ||
| commits = [{"id": "abc", "added": [], "modified": ["ontology.ttl"]}] | ||
| result = _filter_external(commits) | ||
| assert len(result) == 1 | ||
|
|
||
| def test_file_touch_detection_uses_only_external(self) -> None: | ||
| """Only external commits' file lists should matter for triggering sync.""" | ||
| ontokit_commit = _make_commit(ONTOKIT_SYNC_COMMITTER_EMAIL, modified=["ontology.ttl"]) | ||
| external_commit = _make_commit("dev@example.com", modified=["README.md"]) | ||
|
|
||
| external = _filter_external([ontokit_commit, external_commit]) | ||
|
|
||
| # Collect touched files from external commits only | ||
| touched: set[str] = set() | ||
| for c in external: | ||
| touched.update(c.get("added", [])) | ||
| touched.update(c.get("modified", [])) | ||
|
|
||
| # ontology.ttl was only touched by the OntoKit commit, not external | ||
| assert "ontology.ttl" not in touched | ||
| assert "README.md" in touched |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: CatholicOS/ontokit-api
Length of output: 175
🏁 Script executed:
# Check the file size first wc -l ontokit/api/routes/pull_requests.pyRepository: CatholicOS/ontokit-api
Length of output: 105
🏁 Script executed:
Repository: CatholicOS/ontokit-api
Length of output: 1351
🏁 Script executed:
Repository: CatholicOS/ontokit-api
Length of output: 3264
🏁 Script executed:
Repository: CatholicOS/ontokit-api
Length of output: 48
🏁 Script executed:
Repository: CatholicOS/ontokit-api
Length of output: 16306
🏁 Script executed:
Repository: CatholicOS/ontokit-api
Length of output: 48
🌐 Web query:
GitHub webhook push event commit object structure "removed" field💡 Result:
In a GitHub “push” webhook delivery (the
pushevent sent to your webhook URL), each item in the top-levelcommitsarray is a “commit summary” object that can include these file lists:{ "id": "…", "message": "…", "timestamp": "…", "url": "…", "author": { "name": "…", "email": "…" }, "committer": { "name": "…", "email": "…" }, "added": ["path/created.ext", "..."], "modified": ["path/changed.ext", "..."], "removed": ["path/deleted.ext", "..."] }What
removedmeans: it’s an array of repository-relative file paths that were deleted by that commit (as represented in the push payload’s commit summaries). [1]Important caveat (common source of confusion): if you’re looking at the push event payload inside GitHub Actions (
github.event/event.json), GitHub explicitly does not include theadded,removed, ormodifiedarrays in the commit objects there; you must call the API to fetch file-level details. [2][3]Sources:
[1] Push event payload example showing
removedin commit objects: GitHub Events API docs mirror (PushEvent example payload) (docs2.lfe.io)[2] GitHub Actions docs noting
added/removed/modifiedare not present in Actions push payload: (docs.github.com)[3] GitHub changelog describing removal of those fields from the Actions push payload: (github.blog)
Citations:
🏁 Script executed:
Repository: CatholicOS/ontokit-api
Length of output: 48
🏁 Script executed:
Repository: CatholicOS/ontokit-api
Length of output: 48
🏁 Script executed:
Repository: CatholicOS/ontokit-api
Length of output: 48
🏁 Script executed:
Repository: CatholicOS/ontokit-api
Length of output: 693
🏁 Script executed:
Repository: CatholicOS/ontokit-api
Length of output: 4149
🏁 Script executed:
Repository: CatholicOS/ontokit-api
Length of output: 48
🏁 Script executed:
Repository: CatholicOS/ontokit-api
Length of output: 8190
🏁 Script executed:
Repository: CatholicOS/ontokit-api
Length of output: 3765
🏁 Script executed:
Repository: CatholicOS/ontokit-api
Length of output: 442
🏁 Script executed:
Repository: CatholicOS/ontokit-api
Length of output: 2348
🏁 Script executed:
Repository: CatholicOS/ontokit-api
Length of output: 4855
🏁 Script executed:
Repository: CatholicOS/ontokit-api
Length of output: 587
Include deleted files when checking whether the tracked file changed.
touched_filesonly aggregatesaddedandmodifiedcommits. If an external push deletessync_config.file_path, the sync check is never triggered, causing webhook-based sync to miss legitimate upstream changes.🔧 Suggested fix
touched_files: set[str] = set() for commit in external_commits: - touched_files.update(commit.get("added", [])) - touched_files.update(commit.get("modified", [])) + touched_files.update(commit.get("added") or []) + touched_files.update(commit.get("modified") or []) + touched_files.update(commit.get("removed") or [])🤖 Prompt for AI Agents