diff --git a/ontokit/api/routes/pull_requests.py b/ontokit/api/routes/pull_requests.py index b0cb15c..7133450 100644 --- a/ontokit/api/routes/pull_requests.py +++ b/ontokit/api/routes/pull_requests.py @@ -660,45 +660,67 @@ async def github_webhook( push_branch = ref[len("refs/heads/") :] commits = payload.get("commits", []) - from ontokit.models.remote_sync import RemoteSyncConfig - - sync_result = await service.db.execute( - select(RemoteSyncConfig).where( - RemoteSyncConfig.project_id == project_id, - RemoteSyncConfig.enabled.is_(True), - RemoteSyncConfig.frequency == "webhook", + # Loop prevention: skip remote sync check if all commits in this push + # were authored by OntoKit itself (prevents feedback loops when + # OntoKit pushes to the same repo it tracks as upstream). + from ontokit.core.constants import ONTOKIT_COMMITTER_EMAILS + + external_commits = [ + c + for c in commits + if c.get("committer", {}).get("email") not in ONTOKIT_COMMITTER_EMAILS + ] + + if not external_commits: + logger.info( + "Skipping remote sync check for project %s: " + "all %d commit(s) in push are OntoKit-authored", + project_id, + len(commits), + ) + else: + from ontokit.models.remote_sync import RemoteSyncConfig + + sync_result = await service.db.execute( + select(RemoteSyncConfig).where( + RemoteSyncConfig.project_id == project_id, + RemoteSyncConfig.enabled.is_(True), + RemoteSyncConfig.frequency == "webhook", + ) ) - ) - sync_config = sync_result.scalar_one_or_none() - - if sync_config and sync_config.branch == push_branch: - # Check if any commit touches the tracked file - touched_files: set[str] = set() - for commit in commits: - touched_files.update(commit.get("added", [])) - touched_files.update(commit.get("modified", [])) - - if sync_config.file_path in touched_files: - previous_status = sync_config.status - sync_config.status = "checking" - await service.db.commit() - - try: - pool = await get_arq_pool() - if pool is not None: - await pool.enqueue_job("run_remote_check_task", str(project_id)) - else: + sync_config = sync_result.scalar_one_or_none() + + if sync_config and sync_config.branch == push_branch: + # Check if any external commit touches the tracked file + touched_files: set[str] = set() + for commit in external_commits: + touched_files.update(commit.get("added", [])) + touched_files.update(commit.get("modified", [])) + + if sync_config.file_path in touched_files: + previous_status = sync_config.status + sync_config.status = "checking" + await service.db.commit() + + try: + pool = await get_arq_pool() + if pool is not None: + await pool.enqueue_job( + "run_remote_check_task", str(project_id) + ) + else: + logger.warning( + "ARQ pool is None; skipping remote sync check" + " after webhook push" + ) + sync_config.status = previous_status + await service.db.commit() + except Exception: logger.warning( - "ARQ pool is None; skipping remote sync check after webhook push" + "Failed to queue remote sync check after webhook push", + exc_info=True, ) sync_config.status = previous_status await service.db.commit() - except Exception: - logger.warning( - "Failed to queue remote sync check after webhook push", - exc_info=True, - ) - sync_config.status = previous_status - await service.db.commit() return {"status": "ok"} diff --git a/ontokit/core/constants.py b/ontokit/core/constants.py new file mode 100644 index 0000000..11e97e8 --- /dev/null +++ b/ontokit/core/constants.py @@ -0,0 +1,16 @@ +"""OntoKit application-wide constants.""" + +# Committer identities used by OntoKit for automated commits. +# Any commit with one of these committer emails is considered OntoKit-authored +# and should be excluded from upstream sync processing to prevent feedback loops. +ONTOKIT_COMMITTER_NAME = "OntoKit" +ONTOKIT_COMMITTER_EMAIL = "noreply@ontokit.dev" + +ONTOKIT_SYNC_COMMITTER_NAME = "OntoKit Sync" +ONTOKIT_SYNC_COMMITTER_EMAIL = "sync@ontokit.dev" + +# Set of all emails used by OntoKit as committer identity. +# Used by the webhook handler to detect and skip self-authored commits. +ONTOKIT_COMMITTER_EMAILS: frozenset[str] = frozenset( + {ONTOKIT_COMMITTER_EMAIL, ONTOKIT_SYNC_COMMITTER_EMAIL} +) diff --git a/ontokit/git/bare_repository.py b/ontokit/git/bare_repository.py index 8c5925a..58e619b 100644 --- a/ontokit/git/bare_repository.py +++ b/ontokit/git/bare_repository.py @@ -118,9 +118,11 @@ def is_initialized(self) -> bool: def _get_signature(self, name: str | None = None, email: str | None = None) -> pygit2.Signature: """Create a pygit2 signature for commits.""" + from ontokit.core.constants import ONTOKIT_COMMITTER_EMAIL, ONTOKIT_COMMITTER_NAME + return pygit2.Signature( - name=name or "OntoKit", - email=email or "noreply@ontokit.dev", + name=name or ONTOKIT_COMMITTER_NAME, + email=email or ONTOKIT_COMMITTER_EMAIL, ) def _resolve_ref(self, ref: str) -> pygit2.Commit: diff --git a/ontokit/services/github_sync.py b/ontokit/services/github_sync.py index 7148887..e90fe10 100644 --- a/ontokit/services/github_sync.py +++ b/ontokit/services/github_sync.py @@ -191,7 +191,12 @@ def _try_merge( # Create merge commit local_commit = cast(pygit2.Commit, repo.get(local_oid)) remote_commit = cast(pygit2.Commit, repo.get(remote_oid)) - sig = pygit2.Signature("OntoKit Sync", "sync@ontokit.dev") + from ontokit.core.constants import ( + ONTOKIT_SYNC_COMMITTER_EMAIL, + ONTOKIT_SYNC_COMMITTER_NAME, + ) + + sig = pygit2.Signature(ONTOKIT_SYNC_COMMITTER_NAME, ONTOKIT_SYNC_COMMITTER_EMAIL) repo.create_commit( f"refs/heads/{branch}", sig, diff --git a/tests/unit/test_upstream_loop_prevention.py b/tests/unit/test_upstream_loop_prevention.py new file mode 100644 index 0000000..0ba251f --- /dev/null +++ b/tests/unit/test_upstream_loop_prevention.py @@ -0,0 +1,100 @@ +"""Tests for upstream sync loop prevention logic.""" + +from typing import Any + +from ontokit.core.constants import ( + ONTOKIT_COMMITTER_EMAIL, + ONTOKIT_COMMITTER_EMAILS, + ONTOKIT_SYNC_COMMITTER_EMAIL, +) + + +def _make_commit( + email: str, + added: list[str] | None = None, + modified: list[str] | None = None, +) -> dict[str, Any]: + """Create a minimal GitHub webhook commit payload.""" + return { + "id": "abc123", + "committer": {"name": "Test", "email": email}, + "added": added or [], + "modified": modified or [], + } + + +def _filter_external(commits: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Reproduce the filtering logic from the webhook handler.""" + return [ + c for c in commits if c.get("committer", {}).get("email") not in ONTOKIT_COMMITTER_EMAILS + ] + + +class TestCommitterEmailConstants: + """Verify identity constants are correctly defined.""" + + def test_ontokit_emails_in_set(self) -> None: + assert ONTOKIT_COMMITTER_EMAIL in ONTOKIT_COMMITTER_EMAILS + assert ONTOKIT_SYNC_COMMITTER_EMAIL in ONTOKIT_COMMITTER_EMAILS + + def test_set_is_frozen(self) -> None: + assert isinstance(ONTOKIT_COMMITTER_EMAILS, frozenset) + + +class TestWebhookCommitFiltering: + """Test the commit filtering logic used in the push webhook handler.""" + + def test_all_ontokit_commits_filtered(self) -> None: + """Push with only OntoKit commits → no external commits → skip sync.""" + commits = [ + _make_commit(ONTOKIT_COMMITTER_EMAIL, modified=["ontology.ttl"]), + _make_commit(ONTOKIT_SYNC_COMMITTER_EMAIL, modified=["ontology.ttl"]), + ] + assert _filter_external(commits) == [] + + def test_external_commits_pass_through(self) -> None: + """Push with external commits → they pass through the filter.""" + commits = [ + _make_commit("dev@example.com", modified=["ontology.ttl"]), + ] + result = _filter_external(commits) + assert len(result) == 1 + assert result[0]["committer"]["email"] == "dev@example.com" + + def test_mixed_commits_only_keep_external(self) -> None: + """Push with both OntoKit and external commits → only external kept.""" + commits = [ + _make_commit(ONTOKIT_SYNC_COMMITTER_EMAIL, modified=["ontology.ttl"]), + _make_commit("ci-bot@corp.com", modified=["ontology.ttl"]), + _make_commit(ONTOKIT_COMMITTER_EMAIL, modified=["README.md"]), + ] + result = _filter_external(commits) + assert len(result) == 1 + assert result[0]["committer"]["email"] == "ci-bot@corp.com" + + def test_empty_commits_list(self) -> None: + """Empty push payload → no external commits.""" + assert _filter_external([]) == [] + + def test_commit_without_committer_field(self) -> None: + """Malformed commit without committer → treated as external (safe default).""" + commits = [{"id": "abc", "added": [], "modified": ["ontology.ttl"]}] + result = _filter_external(commits) + assert len(result) == 1 + + def test_file_touch_detection_uses_only_external(self) -> None: + """Only external commits' file lists should matter for triggering sync.""" + ontokit_commit = _make_commit(ONTOKIT_SYNC_COMMITTER_EMAIL, modified=["ontology.ttl"]) + external_commit = _make_commit("dev@example.com", modified=["README.md"]) + + external = _filter_external([ontokit_commit, external_commit]) + + # Collect touched files from external commits only + touched: set[str] = set() + for c in external: + touched.update(c.get("added", [])) + touched.update(c.get("modified", [])) + + # ontology.ttl was only touched by the OntoKit commit, not external + assert "ontology.ttl" not in touched + assert "README.md" in touched