Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ gh auth login
```bash
hubsignal --query 'is:issue is:open label:"help wanted" language:Python' --limit 20
hubsignal --query 'is:issue is:open label:"good first issue" language:Go' --min-stars 100 --exclude-archived --exclude-forks --pushed-after 2026-01-01
hubsignal --query 'is:issue is:open label:"good first issue" language:Python' --exclude-bounty-like --exclude-repo noisy/project
hubsignal --query 'is:issue is:open "good first issue" "agent"' --format json
```

Expand Down
121 changes: 110 additions & 11 deletions src/hubsignal/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import math
import os
import re
import subprocess
import sys
import urllib.error
Expand All @@ -16,6 +17,19 @@

API_ROOT = "https://api.github.com"
DEFAULT_QUERY = 'is:issue is:open label:"help wanted"'
BOUNTY_LIKE_PATTERNS = (
"bounty",
"earn",

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Match earn as a whole word

Because is_bounty_like checks each pattern with plain substring matching, this earn entry also matches common non-bounty words such as “learn” and “learning”. When users run with --exclude-bounty-like, legitimate good-first/onboarding issues with titles or bodies like “Learn the plugin API” will be skipped as bounty-like, so this should use a word-boundary/phrase match instead of a raw substring.

Useful? React with 👍 / 👎.

"reward",
"token",
"airdrop",
"star +",
"stars",
"upvote",
"reaction",
"review an open pr",
"google search console",
)


@dataclass(frozen=True)
Expand All @@ -42,6 +56,12 @@ class RankedIssue:
pushed_at: str | None


@dataclass(frozen=True)
class RankResult:
issues: list[RankedIssue]
skipped: dict[str, int]


def gh_token() -> str | None:
for name in ("GH_TOKEN", "GITHUB_TOKEN"):
value = os.environ.get(name)
Expand Down Expand Up @@ -140,6 +160,25 @@ def score_issue(item: dict[str, Any], stars: int) -> float:
return round(score, 2)


def normalized_labels(item: dict[str, Any]) -> set[str]:
return {label["name"].lower() for label in item.get("labels", [])}


def is_bounty_like(item: dict[str, Any]) -> bool:
labels = normalized_labels(item)
if any("bounty" in label for label in labels):
return True

text = " ".join(
[
str(item.get("title") or ""),
str(item.get("body") or ""),
" ".join(labels),
]
).lower()
return any(pattern in text for pattern in BOUNTY_LIKE_PATTERNS)


def parse_date(value: str | None) -> datetime | None:
if not value:
return None
Expand Down Expand Up @@ -176,11 +215,32 @@ def rank_issues(
exclude_archived: bool = False,
exclude_forks: bool = False,
pushed_after: str | None = None,
) -> list[RankedIssue]:
exclude_repos: set[str] | None = None,
exclude_title_regex: str | None = None,
exclude_bounty_like: bool = False,
) -> RankResult:
repo_cache: dict[str, RepoDetails] = {}
ranked = []
skipped = {
"repository": 0,
"title": 0,
"bounty_like": 0,
}
excluded = {repo.lower() for repo in exclude_repos or set()}
title_re = re.compile(exclude_title_regex, re.IGNORECASE) if exclude_title_regex else None

for item in items:
repo = repo_from_url(item["repository_url"])
if repo.lower() in excluded:
skipped["repository"] += 1
continue
if title_re and title_re.search(str(item.get("title") or "")):
skipped["title"] += 1
continue
if exclude_bounty_like and is_bounty_like(item):
skipped["bounty_like"] += 1
continue

details = repo_details(repo, token, repo_cache)
if not repo_passes_filters(
details,
Expand All @@ -206,19 +266,33 @@ def rank_issues(
pushed_at=details.pushed_at,
)
)
return sorted(ranked, key=lambda issue: issue.score, reverse=True)
return RankResult(
issues=sorted(ranked, key=lambda issue: issue.score, reverse=True),
skipped=skipped,
)


def emit_text(issues: list[RankedIssue]) -> None:
def emit_text(result: RankResult) -> None:
print(f"{'score':>6} {'stars':>7} {'repo':<32} issue")
for issue in issues:
for issue in result.issues:
repo = issue.repo[:32]
print(f"{issue.score:>6.1f} {issue.stars:>7} {repo:<32} {issue.title}")
print(f"{'':>6} {'':>7} {'':<32} {issue.url}")


def emit_json(issues: list[RankedIssue]) -> None:
print(json.dumps([issue.__dict__ for issue in issues], indent=2))
skipped = {key: value for key, value in result.skipped.items() if value}
if skipped:
print(f"\nskipped: {json.dumps(skipped, sort_keys=True)}")


def emit_json(result: RankResult) -> None:
print(
json.dumps(
{
"issues": [issue.__dict__ for issue in result.issues],
"skipped": result.skipped,
},
indent=2,
)
)


def build_parser() -> argparse.ArgumentParser:
Expand Down Expand Up @@ -250,6 +324,22 @@ def build_parser() -> argparse.ArgumentParser:
metavar="YYYY-MM-DD",
help="Drop issues from repositories with no push after this date.",
)
parser.add_argument(
"--exclude-repo",
action="append",
default=[],
metavar="OWNER/NAME",
help="Drop issues from this repository. Can be repeated.",
)
parser.add_argument(
"--exclude-title-regex",
help="Drop issues whose title matches this Python regular expression.",
)
parser.add_argument(
"--exclude-bounty-like",
action="store_true",
help="Drop issues that look like bounties, token rewards, or promotion tasks.",
)
parser.add_argument(
"--format",
choices=("text", "json"),
Expand All @@ -273,25 +363,34 @@ def main(argv: list[str] | None = None) -> int:
except ValueError:
print("--pushed-after must use YYYY-MM-DD", file=sys.stderr)
return 2
if args.exclude_title_regex:
try:
re.compile(args.exclude_title_regex)
except re.error as exc:
print(f"--exclude-title-regex is invalid: {exc}", file=sys.stderr)
return 2

token = gh_token()
try:
issues = rank_issues(
result = rank_issues(
search_issues(args.query, args.limit, token),
token,
min_stars=args.min_stars,
exclude_archived=args.exclude_archived,
exclude_forks=args.exclude_forks,
pushed_after=args.pushed_after,
exclude_repos=set(args.exclude_repo),
exclude_title_regex=args.exclude_title_regex,
exclude_bounty_like=args.exclude_bounty_like,
)
except RuntimeError as exc:
print(str(exc), file=sys.stderr)
return 1

if args.format == "json":
emit_json(issues)
emit_json(result)
else:
emit_text(issues)
emit_text(result)
return 0


Expand Down
73 changes: 72 additions & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
from datetime import datetime, timedelta, timezone

from hubsignal.cli import RepoDetails, repo_passes_filters, repo_from_url, score_issue
from hubsignal.cli import (
RepoDetails,
is_bounty_like,
rank_issues,
repo_passes_filters,
repo_from_url,
score_issue,
)


def test_repo_from_url_extracts_owner_and_name():
Expand Down Expand Up @@ -59,3 +66,67 @@ def test_repo_filters_can_exclude_archived_and_forked_repos():
assert repo_passes_filters(details, 10, False, False, None)
assert not repo_passes_filters(details, 10, True, False, None)
assert not repo_passes_filters(details, 10, False, True, None)


def test_bounty_like_detection_checks_labels_title_and_body():
assert is_bounty_like(
{
"title": "Find a typo",
"body": "Earn tokens for a quick task",
"labels": [{"name": "good first issue"}],
}
)
assert is_bounty_like(
{
"title": "Add a dashboard",
"body": "",
"labels": [{"name": "bounty"}],
}
)
assert not is_bounty_like(
{
"title": "Document CLI behavior",
"body": "Small docs-only fix",
"labels": [{"name": "documentation"}],
}
)


def test_rank_issues_reports_skipped_noise_without_fetching_repo_details(monkeypatch):
items = [
{
"repository_url": "https://api.github.com/repos/noisy/project",
"title": "Bounty: star + review an open PR",
"body": "",
"labels": [{"name": "good first issue"}],
"comments": 0,
"updated_at": datetime.now(timezone.utc).isoformat(),
"html_url": "https://github.com/noisy/project/issues/1",
},
{
"repository_url": "https://api.github.com/repos/owner/project",
"title": "Document CLI behavior",
"body": "",
"labels": [{"name": "documentation"}],
"comments": 1,
"updated_at": datetime.now(timezone.utc).isoformat(),
"html_url": "https://github.com/owner/project/issues/2",
},
]

def fake_repo_details(repo, token, cache):
return RepoDetails(
name=repo,
stars=100,
archived=False,
fork=False,
pushed_at="2026-05-01T00:00:00Z",
)

monkeypatch.setattr("hubsignal.cli.repo_details", fake_repo_details)

result = rank_issues(items, token=None, exclude_bounty_like=True)

assert len(result.issues) == 1
assert result.issues[0].repo == "owner/project"
assert result.skipped["bounty_like"] == 1