diff --git a/gittensor/classes.py b/gittensor/classes.py index 134865c4..b8c6a8ca 100644 --- a/gittensor/classes.py +++ b/gittensor/classes.py @@ -134,6 +134,7 @@ class Issue: author_github_id: Optional[str] = None # Issue author's GitHub user ID (for miner matching) is_transferred: bool = False updated_at: Optional[datetime] = None + body_or_title_edited_at: Optional[datetime] = None discovery_base_score: float = 0.0 discovery_earned_score: float = 0.0 discovery_review_quality_multiplier: float = 1.0 @@ -246,6 +247,30 @@ def from_graphql_response(cls, pr_data: dict, uid: int, hotkey: str, github_id: continue issue_author = issue.get('author') or {} author_db_id = issue_author.get('databaseId') + + body_edit_history = (issue.get('userContentEdits') or {}).get('nodes') or [] + latest_body_edit_timestamp = next( + (edit.get('editedAt') for edit in body_edit_history if edit and edit.get('editedAt')), + None, + ) + latest_body_edit_at = ( + parse_github_timestamp_to_cst(latest_body_edit_timestamp) if latest_body_edit_timestamp else None + ) + + title_rename_events = (issue.get('timelineItems') or {}).get('nodes') or [] + latest_title_rename_timestamp = next( + (rename.get('createdAt') for rename in title_rename_events if rename and rename.get('createdAt')), + None, + ) + latest_title_rename_at = ( + parse_github_timestamp_to_cst(latest_title_rename_timestamp) if latest_title_rename_timestamp else None + ) + + if latest_body_edit_at and latest_title_rename_at: + body_or_title_edited_at = max(latest_body_edit_at, latest_title_rename_at) + else: + body_or_title_edited_at = latest_body_edit_at or latest_title_rename_at + issues.append( Issue( number=issue['number'], @@ -259,6 +284,7 @@ def from_graphql_response(cls, pr_data: dict, uid: int, hotkey: str, github_id: author_association=issue.get('authorAssociation'), author_github_id=str(author_db_id) if author_db_id else None, updated_at=parse_github_timestamp_to_cst(issue['updatedAt']) if issue.get('updatedAt') else None, + body_or_title_edited_at=body_or_title_edited_at, ) ) diff --git a/gittensor/utils/github_api_tools.py b/gittensor/utils/github_api_tools.py index 4b14aa18..b539bec0 100644 --- a/gittensor/utils/github_api_tools.py +++ b/gittensor/utils/github_api_tools.py @@ -94,6 +94,14 @@ ... on User { databaseId } } authorAssociation + userContentEdits(first: 1) { + nodes { editedAt } + } + timelineItems(itemTypes: [RENAMED_TITLE_EVENT], last: 1) { + nodes { + ... on RenamedTitleEvent { createdAt } + } + } } } reviews(first: 3, states: APPROVED) { diff --git a/gittensor/validator/issue_discovery/scoring.py b/gittensor/validator/issue_discovery/scoring.py index f2a6092a..8663702b 100644 --- a/gittensor/validator/issue_discovery/scoring.py +++ b/gittensor/validator/issue_discovery/scoring.py @@ -218,10 +218,11 @@ def _collect_issues_from_prs( data.closed_count += 1 continue # No score for unsolved issues - # Anti-gaming: post-merge edit detection - if issue.updated_at and pr.merged_at and issue.updated_at > pr.merged_at: + # Anti-gaming: post-merge body/title edit detection + # Not issue.updated_at: it fires on bot comments, labels, reactions. + if issue.body_or_title_edited_at and pr.merged_at and issue.body_or_title_edited_at > pr.merged_at: bt.logging.info( - f'Issue #{issue.number} edited after PR #{pr.number} merge — 0 score, counts as closed' + f'Issue #{issue.number} body/title edited after PR #{pr.number} merge — 0 score, counts as closed' ) data.solved_count -= 1 data.closed_count += 1 diff --git a/issue_discovery/issue-discovery-rewards.md b/issue_discovery/issue-discovery-rewards.md index 02ad0107..9469b831 100644 --- a/issue_discovery/issue-discovery-rewards.md +++ b/issue_discovery/issue-discovery-rewards.md @@ -159,9 +159,7 @@ If an issue is edited at any point after the solving PR's **`merged_at`** timest Anchored to `merged_at` (not `created_at`) so discoverers can add clarifying context while a PR is in review without being penalized. -**Edit detection (current):** Uses `updated_at` as a rough proxy. Acknowledged that `updated_at` fires on bot activity, comments, labels, etc. — accept false positives for now. - -**Edit detection (future):** Upgrade to timeline/events API for body-only edit detection in a later update. +**Edit detection:** Uses GraphQL timeline events — `Issue.userContentEdits` (body edits) and `timelineItems(itemTypes: [RENAMED_TITLE_EVENT])` (title renames) — fetched inline with `closingIssuesReferences` at no extra query cost. The maximum of these two timestamps is stored as `body_or_title_edited_at` and compared against `pr.merged_at`. Bot comments, label changes, reactions, and other non-edit activity do not trigger the penalty. ### Timing / Sniping Protection @@ -341,7 +339,8 @@ The existing `issues` table stores issue-to-PR relationships. Issue discovery ne |---|---|---|---| | `author_github_id` | `VARCHAR(255)` | NULL | Issue author's GitHub user ID (for miner matching) | | `is_transferred` | `BOOLEAN` | FALSE | Whether issue was transferred (timeline API `TransferredEvent`) | -| `updated_at` | `TIMESTAMP` | NULL | GitHub's `updated_at` — rough proxy for edit detection | +| `updated_at` | `TIMESTAMP` | NULL | GitHub's `updated_at` (retained for compatibility; not used for edit detection) | +| `body_or_title_edited_at` | `TIMESTAMP` | NULL | Transient: `max(last body edit, last title rename)` from timeline events — drives post-merge edit detection | | `discovery_base_score` | `DECIMAL(15,6)` | 0.0 | Base score inherited from solving PR | | `discovery_earned_score` | `DECIMAL(15,6)` | 0.0 | Final score after all multipliers | | `discovery_review_quality_multiplier` | `DECIMAL(15,6)` | 1.0 | Cliff model: `1.1` clean, then `1.0 - 0.15n` | @@ -360,7 +359,8 @@ author_github_id: Optional[str] = None # Edit/transfer detection is_transferred: bool = False -updated_at: Optional[datetime] = None +updated_at: Optional[datetime] = None # retained for compatibility; not used for edit detection +body_or_title_edited_at: Optional[datetime] = None # max(last body edit, last title rename) # Discovery scoring (populated during issue scoring pipeline) discovery_base_score: float = 0.0 @@ -406,6 +406,5 @@ The API budget estimates assume ~256 tracked repos. If the repo list stays at 1, ## Deferred Post-Launch -- **Edit detection upgrade** — current `updated_at` proxy false-positives on bot activity, comments, labels. Future: timeline/events API for body-only edits. - **Retroactive linking timing** — if a PR merges in cycle N and issue link appears in cycle N+3, what base score is used? - **Open issue spam threshold** — if deferred from v1 (option D), add once mirror ships and scoped counting is free. diff --git a/tests/validator/test_issue_discovery_post_merge_edit.py b/tests/validator/test_issue_discovery_post_merge_edit.py new file mode 100644 index 00000000..35cd79e7 --- /dev/null +++ b/tests/validator/test_issue_discovery_post_merge_edit.py @@ -0,0 +1,124 @@ +"""Post-merge body/title edit detection. + +Benign activity (bot comments, labels) must not demote solved issues; +real body/title edits after merge must. +""" + +from datetime import datetime, timedelta, timezone +from typing import Dict + +from gittensor.classes import Issue, MinerEvaluation, PRState, PullRequest +from gittensor.constants import MIN_TOKEN_SCORE_FOR_BASE_SCORE +from gittensor.validator.issue_discovery.scoring import _collect_issues_from_prs +from gittensor.validator.utils.load_weights import RepositoryConfig + +DISCOVERER_UID = 1 +SOLVER_UID = 2 +DISCOVERER_GH = '1001' +SOLVER_GH = '2002' +REPO = 'owner/repo' + + +def _merged_at() -> datetime: + return datetime(2025, 1, 1, 12, 0, tzinfo=timezone.utc) + + +def _make_issue( + *, + updated_at: datetime = None, + body_or_title_edited_at: datetime = None, +) -> Issue: + return Issue( + number=42, + pr_number=7, + repository_full_name=REPO, + title='bug', + created_at=_merged_at() - timedelta(days=5), + closed_at=_merged_at(), + author_login='alice', + state='CLOSED', + author_github_id=DISCOVERER_GH, + updated_at=updated_at, + body_or_title_edited_at=body_or_title_edited_at, + ) + + +def _make_pr(issue: Issue) -> PullRequest: + return PullRequest( + number=7, + repository_full_name=REPO, + uid=SOLVER_UID, + hotkey='hk', + github_id=SOLVER_GH, + title='fix', + author_login='bob', + merged_at=_merged_at(), + created_at=_merged_at() - timedelta(days=1), + pr_state=PRState.MERGED, + token_score=float(MIN_TOKEN_SCORE_FOR_BASE_SCORE) + 10.0, + base_score=10.0, + issues=[issue], + ) + + +def _evaluations(pr: PullRequest) -> Dict[int, MinerEvaluation]: + discoverer = MinerEvaluation(uid=DISCOVERER_UID, hotkey='hk1', github_id=DISCOVERER_GH) + solver = MinerEvaluation(uid=SOLVER_UID, hotkey='hk2', github_id=SOLVER_GH) + solver.merged_pull_requests = [pr] + return {DISCOVERER_UID: discoverer, SOLVER_UID: solver} + + +def _repos() -> Dict[str, RepositoryConfig]: + return {REPO: RepositoryConfig(weight=1.0)} + + +def _run(pr: PullRequest): + from collections import defaultdict + + from gittensor.validator.issue_discovery.scoring import _DiscovererData + + evaluations = _evaluations(pr) + gh_to_uid = {DISCOVERER_GH: DISCOVERER_UID, SOLVER_GH: SOLVER_UID} + discoverer_data = defaultdict(lambda: _DiscovererData()) + _collect_issues_from_prs(evaluations, gh_to_uid, discoverer_data, _repos()) + return discoverer_data[DISCOVERER_GH] + + +def test_benign_updated_at_after_merge_is_ignored(): + """Bot activity bumps updated_at but not body_or_title_edited_at → stays solved.""" + issue = _make_issue( + updated_at=_merged_at() + timedelta(hours=1), # noisy bot bump + body_or_title_edited_at=None, + ) + pr = _make_pr(issue) + data = _run(pr) + + assert data.solved_count == 1 + assert data.closed_count == 0 + assert len(data.scored_issues) == 1 + + +def test_real_body_edit_after_merge_demotes(): + """An actual body edit after merge demotes solved → closed.""" + issue = _make_issue( + updated_at=_merged_at() + timedelta(hours=1), + body_or_title_edited_at=_merged_at() + timedelta(hours=1), + ) + pr = _make_pr(issue) + data = _run(pr) + + assert data.solved_count == 0 + assert data.closed_count == 1 + assert data.scored_issues == [] + + +def test_edit_before_merge_is_ignored(): + """Body edits prior to merge are fine.""" + issue = _make_issue( + body_or_title_edited_at=_merged_at() - timedelta(hours=1), + ) + pr = _make_pr(issue) + data = _run(pr) + + assert data.solved_count == 1 + assert data.closed_count == 0