Photon101 · Photon101 · May 11, 2026 · May 11, 2026 · chatgpt-codex-connector · May 11, 2026
diff --git a/README.md b/README.md
@@ -44,6 +44,7 @@ gh auth login
 ```bash
 hubsignal --query 'is:issue is:open label:"help wanted" language:Python' --limit 20
 hubsignal --query 'is:issue is:open label:"good first issue" language:Go' --min-stars 100 --exclude-archived --exclude-forks --pushed-after 2026-01-01
+hubsignal --query 'is:issue is:open label:"good first issue" language:Python' --exclude-bounty-like --exclude-repo noisy/project
 hubsignal --query 'is:issue is:open "good first issue" "agent"' --format json
 ```
 

diff --git a/src/hubsignal/cli.py b/src/hubsignal/cli.py
@@ -4,6 +4,7 @@
 import json
 import math
 import os
+import re
 import subprocess
 import sys
 import urllib.error
@@ -16,6 +17,19 @@
 
 API_ROOT = "https://api.github.com"
 DEFAULT_QUERY = 'is:issue is:open label:"help wanted"'
+BOUNTY_LIKE_PATTERNS = (
+    "bounty",
+    "earn",
+    "reward",
+    "token",
+    "airdrop",
+    "star +",
+    "stars",
+    "upvote",
+    "reaction",
+    "review an open pr",
+    "google search console",
+)
 
 
 @dataclass(frozen=True)
@@ -42,6 +56,12 @@ class RankedIssue:
     pushed_at: str | None
 
 
+@dataclass(frozen=True)
+class RankResult:
+    issues: list[RankedIssue]
+    skipped: dict[str, int]
+
+
 def gh_token() -> str | None:
     for name in ("GH_TOKEN", "GITHUB_TOKEN"):
         value = os.environ.get(name)
@@ -140,6 +160,25 @@ def score_issue(item: dict[str, Any], stars: int) -> float:
     return round(score, 2)
 
 
+def normalized_labels(item: dict[str, Any]) -> set[str]:
+    return {label["name"].lower() for label in item.get("labels", [])}
+
+
+def is_bounty_like(item: dict[str, Any]) -> bool:
+    labels = normalized_labels(item)
+    if any("bounty" in label for label in labels):
+        return True
+
+    text = " ".join(
+        [
+            str(item.get("title") or ""),
+            str(item.get("body") or ""),
+            " ".join(labels),
+        ]
+    ).lower()
+    return any(pattern in text for pattern in BOUNTY_LIKE_PATTERNS)
+
+
 def parse_date(value: str | None) -> datetime | None:
     if not value:
         return None
@@ -176,11 +215,32 @@ def rank_issues(
     exclude_archived: bool = False,
     exclude_forks: bool = False,
     pushed_after: str | None = None,
-) -> list[RankedIssue]:
+    exclude_repos: set[str] | None = None,
+    exclude_title_regex: str | None = None,
+    exclude_bounty_like: bool = False,
+) -> RankResult:
     repo_cache: dict[str, RepoDetails] = {}
     ranked = []
+    skipped = {
+        "repository": 0,
+        "title": 0,
+        "bounty_like": 0,
+    }
+    excluded = {repo.lower() for repo in exclude_repos or set()}
+    title_re = re.compile(exclude_title_regex, re.IGNORECASE) if exclude_title_regex else None
+
     for item in items:
         repo = repo_from_url(item["repository_url"])
+        if repo.lower() in excluded:
+            skipped["repository"] += 1
+            continue
+        if title_re and title_re.search(str(item.get("title") or "")):
+            skipped["title"] += 1
+            continue
+        if exclude_bounty_like and is_bounty_like(item):
+            skipped["bounty_like"] += 1
+            continue
+
         details = repo_details(repo, token, repo_cache)
         if not repo_passes_filters(
             details,
@@ -206,19 +266,33 @@ def rank_issues(
                 pushed_at=details.pushed_at,
             )
         )
-    return sorted(ranked, key=lambda issue: issue.score, reverse=True)
+    return RankResult(
+        issues=sorted(ranked, key=lambda issue: issue.score, reverse=True),
+        skipped=skipped,
+    )
 
 
-def emit_text(issues: list[RankedIssue]) -> None:
+def emit_text(result: RankResult) -> None:
     print(f"{'score':>6}  {'stars':>7}  {'repo':<32}  issue")
-    for issue in issues:
+    for issue in result.issues:
         repo = issue.repo[:32]
         print(f"{issue.score:>6.1f}  {issue.stars:>7}  {repo:<32}  {issue.title}")
         print(f"{'':>6}  {'':>7}  {'':<32}  {issue.url}")
-
-
-def emit_json(issues: list[RankedIssue]) -> None:
-    print(json.dumps([issue.__dict__ for issue in issues], indent=2))
+    skipped = {key: value for key, value in result.skipped.items() if value}
+    if skipped:
+        print(f"\nskipped: {json.dumps(skipped, sort_keys=True)}")
+
+
+def emit_json(result: RankResult) -> None:
+    print(
+        json.dumps(
+            {
+                "issues": [issue.__dict__ for issue in result.issues],
+                "skipped": result.skipped,
+            },
+            indent=2,
+        )
+    )
 
 
 def build_parser() -> argparse.ArgumentParser:
@@ -250,6 +324,22 @@ def build_parser() -> argparse.ArgumentParser:
         metavar="YYYY-MM-DD",
         help="Drop issues from repositories with no push after this date.",
     )
+    parser.add_argument(
+        "--exclude-repo",
+        action="append",
+        default=[],
+        metavar="OWNER/NAME",
+        help="Drop issues from this repository. Can be repeated.",
+    )
+    parser.add_argument(
+        "--exclude-title-regex",
+        help="Drop issues whose title matches this Python regular expression.",
+    )
+    parser.add_argument(
+        "--exclude-bounty-like",
+        action="store_true",
+        help="Drop issues that look like bounties, token rewards, or promotion tasks.",
+    )
     parser.add_argument(
         "--format",
         choices=("text", "json"),
@@ -273,25 +363,34 @@ def main(argv: list[str] | None = None) -> int:
         except ValueError:
             print("--pushed-after must use YYYY-MM-DD", file=sys.stderr)
             return 2
+    if args.exclude_title_regex:
+        try:
+            re.compile(args.exclude_title_regex)
+        except re.error as exc:
+            print(f"--exclude-title-regex is invalid: {exc}", file=sys.stderr)
+            return 2
 
     token = gh_token()
     try:
-        issues = rank_issues(
+        result = rank_issues(
             search_issues(args.query, args.limit, token),
             token,
             min_stars=args.min_stars,
             exclude_archived=args.exclude_archived,
             exclude_forks=args.exclude_forks,
             pushed_after=args.pushed_after,
+            exclude_repos=set(args.exclude_repo),
+            exclude_title_regex=args.exclude_title_regex,
+            exclude_bounty_like=args.exclude_bounty_like,
         )
     except RuntimeError as exc:
         print(str(exc), file=sys.stderr)
         return 1
 
     if args.format == "json":
-        emit_json(issues)
+        emit_json(result)
     else:
-        emit_text(issues)
+        emit_text(result)
     return 0
 
 

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -1,6 +1,13 @@
 from datetime import datetime, timedelta, timezone
 
-from hubsignal.cli import RepoDetails, repo_passes_filters, repo_from_url, score_issue
+from hubsignal.cli import (
+    RepoDetails,
+    is_bounty_like,
+    rank_issues,
+    repo_passes_filters,
+    repo_from_url,
+    score_issue,
+)
 
 
 def test_repo_from_url_extracts_owner_and_name():
@@ -59,3 +66,67 @@ def test_repo_filters_can_exclude_archived_and_forked_repos():
     assert repo_passes_filters(details, 10, False, False, None)
     assert not repo_passes_filters(details, 10, True, False, None)
     assert not repo_passes_filters(details, 10, False, True, None)
+
+
+def test_bounty_like_detection_checks_labels_title_and_body():
+    assert is_bounty_like(
+        {
+            "title": "Find a typo",
+            "body": "Earn tokens for a quick task",
+            "labels": [{"name": "good first issue"}],
+        }
+    )
+    assert is_bounty_like(
+        {
+            "title": "Add a dashboard",
+            "body": "",
+            "labels": [{"name": "bounty"}],
+        }
+    )
+    assert not is_bounty_like(
+        {
+            "title": "Document CLI behavior",
+            "body": "Small docs-only fix",
+            "labels": [{"name": "documentation"}],
+        }
+    )
+
+
+def test_rank_issues_reports_skipped_noise_without_fetching_repo_details(monkeypatch):
+    items = [
+        {
+            "repository_url": "https://api.github.com/repos/noisy/project",
+            "title": "Bounty: star + review an open PR",
+            "body": "",
+            "labels": [{"name": "good first issue"}],
+            "comments": 0,
+            "updated_at": datetime.now(timezone.utc).isoformat(),
+            "html_url": "https://github.com/noisy/project/issues/1",
+        },
+        {
+            "repository_url": "https://api.github.com/repos/owner/project",
+            "title": "Document CLI behavior",
+            "body": "",
+            "labels": [{"name": "documentation"}],
+            "comments": 1,
+            "updated_at": datetime.now(timezone.utc).isoformat(),
+            "html_url": "https://github.com/owner/project/issues/2",
+        },
+    ]
+
+    def fake_repo_details(repo, token, cache):
+        return RepoDetails(
+            name=repo,
+            stars=100,
+            archived=False,
+            fork=False,
+            pushed_at="2026-05-01T00:00:00Z",
+        )
+
+    monkeypatch.setattr("hubsignal.cli.repo_details", fake_repo_details)
+
+    result = rank_issues(items, token=None, exclude_bounty_like=True)
+
+    assert len(result.issues) == 1
+    assert result.issues[0].repo == "owner/project"
+    assert result.skipped["bounty_like"] == 1