diff --git a/README.md b/README.md index c403ec7..c5d4adf 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,7 @@ gh auth login ```bash hubsignal --query 'is:issue is:open label:"help wanted" language:Python' --limit 20 hubsignal --query 'is:issue is:open label:"good first issue" language:Go' --min-stars 100 --exclude-archived --exclude-forks --pushed-after 2026-01-01 +hubsignal --query 'is:issue is:open label:"good first issue" language:Python' --exclude-bounty-like --exclude-repo noisy/project hubsignal --query 'is:issue is:open "good first issue" "agent"' --format json ``` diff --git a/src/hubsignal/cli.py b/src/hubsignal/cli.py index 1781c04..4807f02 100644 --- a/src/hubsignal/cli.py +++ b/src/hubsignal/cli.py @@ -4,6 +4,7 @@ import json import math import os +import re import subprocess import sys import urllib.error @@ -16,6 +17,19 @@ API_ROOT = "https://api.github.com" DEFAULT_QUERY = 'is:issue is:open label:"help wanted"' +BOUNTY_LIKE_PATTERNS = ( + "bounty", + "earn", + "reward", + "token", + "airdrop", + "star +", + "stars", + "upvote", + "reaction", + "review an open pr", + "google search console", +) @dataclass(frozen=True) @@ -42,6 +56,12 @@ class RankedIssue: pushed_at: str | None +@dataclass(frozen=True) +class RankResult: + issues: list[RankedIssue] + skipped: dict[str, int] + + def gh_token() -> str | None: for name in ("GH_TOKEN", "GITHUB_TOKEN"): value = os.environ.get(name) @@ -140,6 +160,25 @@ def score_issue(item: dict[str, Any], stars: int) -> float: return round(score, 2) +def normalized_labels(item: dict[str, Any]) -> set[str]: + return {label["name"].lower() for label in item.get("labels", [])} + + +def is_bounty_like(item: dict[str, Any]) -> bool: + labels = normalized_labels(item) + if any("bounty" in label for label in labels): + return True + + text = " ".join( + [ + str(item.get("title") or ""), + str(item.get("body") or ""), + " ".join(labels), + ] + ).lower() + return any(pattern in text for pattern in BOUNTY_LIKE_PATTERNS) + + def parse_date(value: str | None) -> datetime | None: if not value: return None @@ -176,11 +215,32 @@ def rank_issues( exclude_archived: bool = False, exclude_forks: bool = False, pushed_after: str | None = None, -) -> list[RankedIssue]: + exclude_repos: set[str] | None = None, + exclude_title_regex: str | None = None, + exclude_bounty_like: bool = False, +) -> RankResult: repo_cache: dict[str, RepoDetails] = {} ranked = [] + skipped = { + "repository": 0, + "title": 0, + "bounty_like": 0, + } + excluded = {repo.lower() for repo in exclude_repos or set()} + title_re = re.compile(exclude_title_regex, re.IGNORECASE) if exclude_title_regex else None + for item in items: repo = repo_from_url(item["repository_url"]) + if repo.lower() in excluded: + skipped["repository"] += 1 + continue + if title_re and title_re.search(str(item.get("title") or "")): + skipped["title"] += 1 + continue + if exclude_bounty_like and is_bounty_like(item): + skipped["bounty_like"] += 1 + continue + details = repo_details(repo, token, repo_cache) if not repo_passes_filters( details, @@ -206,19 +266,33 @@ def rank_issues( pushed_at=details.pushed_at, ) ) - return sorted(ranked, key=lambda issue: issue.score, reverse=True) + return RankResult( + issues=sorted(ranked, key=lambda issue: issue.score, reverse=True), + skipped=skipped, + ) -def emit_text(issues: list[RankedIssue]) -> None: +def emit_text(result: RankResult) -> None: print(f"{'score':>6} {'stars':>7} {'repo':<32} issue") - for issue in issues: + for issue in result.issues: repo = issue.repo[:32] print(f"{issue.score:>6.1f} {issue.stars:>7} {repo:<32} {issue.title}") print(f"{'':>6} {'':>7} {'':<32} {issue.url}") - - -def emit_json(issues: list[RankedIssue]) -> None: - print(json.dumps([issue.__dict__ for issue in issues], indent=2)) + skipped = {key: value for key, value in result.skipped.items() if value} + if skipped: + print(f"\nskipped: {json.dumps(skipped, sort_keys=True)}") + + +def emit_json(result: RankResult) -> None: + print( + json.dumps( + { + "issues": [issue.__dict__ for issue in result.issues], + "skipped": result.skipped, + }, + indent=2, + ) + ) def build_parser() -> argparse.ArgumentParser: @@ -250,6 +324,22 @@ def build_parser() -> argparse.ArgumentParser: metavar="YYYY-MM-DD", help="Drop issues from repositories with no push after this date.", ) + parser.add_argument( + "--exclude-repo", + action="append", + default=[], + metavar="OWNER/NAME", + help="Drop issues from this repository. Can be repeated.", + ) + parser.add_argument( + "--exclude-title-regex", + help="Drop issues whose title matches this Python regular expression.", + ) + parser.add_argument( + "--exclude-bounty-like", + action="store_true", + help="Drop issues that look like bounties, token rewards, or promotion tasks.", + ) parser.add_argument( "--format", choices=("text", "json"), @@ -273,25 +363,34 @@ def main(argv: list[str] | None = None) -> int: except ValueError: print("--pushed-after must use YYYY-MM-DD", file=sys.stderr) return 2 + if args.exclude_title_regex: + try: + re.compile(args.exclude_title_regex) + except re.error as exc: + print(f"--exclude-title-regex is invalid: {exc}", file=sys.stderr) + return 2 token = gh_token() try: - issues = rank_issues( + result = rank_issues( search_issues(args.query, args.limit, token), token, min_stars=args.min_stars, exclude_archived=args.exclude_archived, exclude_forks=args.exclude_forks, pushed_after=args.pushed_after, + exclude_repos=set(args.exclude_repo), + exclude_title_regex=args.exclude_title_regex, + exclude_bounty_like=args.exclude_bounty_like, ) except RuntimeError as exc: print(str(exc), file=sys.stderr) return 1 if args.format == "json": - emit_json(issues) + emit_json(result) else: - emit_text(issues) + emit_text(result) return 0 diff --git a/tests/test_cli.py b/tests/test_cli.py index 42dcfb4..1dd626b 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,6 +1,13 @@ from datetime import datetime, timedelta, timezone -from hubsignal.cli import RepoDetails, repo_passes_filters, repo_from_url, score_issue +from hubsignal.cli import ( + RepoDetails, + is_bounty_like, + rank_issues, + repo_passes_filters, + repo_from_url, + score_issue, +) def test_repo_from_url_extracts_owner_and_name(): @@ -59,3 +66,67 @@ def test_repo_filters_can_exclude_archived_and_forked_repos(): assert repo_passes_filters(details, 10, False, False, None) assert not repo_passes_filters(details, 10, True, False, None) assert not repo_passes_filters(details, 10, False, True, None) + + +def test_bounty_like_detection_checks_labels_title_and_body(): + assert is_bounty_like( + { + "title": "Find a typo", + "body": "Earn tokens for a quick task", + "labels": [{"name": "good first issue"}], + } + ) + assert is_bounty_like( + { + "title": "Add a dashboard", + "body": "", + "labels": [{"name": "bounty"}], + } + ) + assert not is_bounty_like( + { + "title": "Document CLI behavior", + "body": "Small docs-only fix", + "labels": [{"name": "documentation"}], + } + ) + + +def test_rank_issues_reports_skipped_noise_without_fetching_repo_details(monkeypatch): + items = [ + { + "repository_url": "https://api.github.com/repos/noisy/project", + "title": "Bounty: star + review an open PR", + "body": "", + "labels": [{"name": "good first issue"}], + "comments": 0, + "updated_at": datetime.now(timezone.utc).isoformat(), + "html_url": "https://github.com/noisy/project/issues/1", + }, + { + "repository_url": "https://api.github.com/repos/owner/project", + "title": "Document CLI behavior", + "body": "", + "labels": [{"name": "documentation"}], + "comments": 1, + "updated_at": datetime.now(timezone.utc).isoformat(), + "html_url": "https://github.com/owner/project/issues/2", + }, + ] + + def fake_repo_details(repo, token, cache): + return RepoDetails( + name=repo, + stars=100, + archived=False, + fork=False, + pushed_at="2026-05-01T00:00:00Z", + ) + + monkeypatch.setattr("hubsignal.cli.repo_details", fake_repo_details) + + result = rank_issues(items, token=None, exclude_bounty_like=True) + + assert len(result.issues) == 1 + assert result.issues[0].repo == "owner/project" + assert result.skipped["bounty_like"] == 1