diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 95e1549..10ab094 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,9 +22,11 @@ repos: entry: uv run ruff check language: system types: [python] + exclude: ^skills/ args: [--exit-non-zero-on-fix] - id: ruff-format name: ruff-format entry: uv run ruff format language: system types: [python] + exclude: ^skills/ diff --git a/contrib/README.md b/contrib/README.md index 87f140b..392587b 100644 --- a/contrib/README.md +++ b/contrib/README.md @@ -1,8 +1,10 @@ # Contrib -This directory documents contrib-related examples for `bubseek`. +This directory contains plugins for `bubseek`. -`bubseek` does not install contrib from this directory. Contrib packages remain standard Python packages and should be added through normal dependency management in `pyproject.toml`. +Contrib packages remain standard Python packages and should be added through normal dependency management in `pyproject.toml`. + +A typical plugin should work with bub as well. Typical example: diff --git a/skills/README.md b/skills/README.md new file mode 100644 index 0000000..fad1472 --- /dev/null +++ b/skills/README.md @@ -0,0 +1,7 @@ +# Skills + +This directory contains a curated collection of skills designed to address tasks across various fields. + +By default, these skills are not included in the bubseek release. + +You can install them in your project’s `.agents/skills` directory using `npx skills` or a similar command. diff --git a/src/skills/github-repo-cards/SKILL.md b/skills/github-repo-cards/SKILL.md similarity index 92% rename from src/skills/github-repo-cards/SKILL.md rename to skills/github-repo-cards/SKILL.md index 9c05aa6..77fb71a 100644 --- a/src/skills/github-repo-cards/SKILL.md +++ b/skills/github-repo-cards/SKILL.md @@ -29,7 +29,7 @@ uv run scripts/gh_repo_card.py / [--top-n 5] [--analysis "Your analys ``` The script path is relative to this skill directory: -`src/skills/github-repo-cards/scripts/gh_repo_card.py` +`skills/github-repo-cards/scripts/gh_repo_card.py` This will: 1. Call `gh` to fetch repo metadata, stargazer counts, commit activity, and top contributors. @@ -42,8 +42,7 @@ This will: uv run scripts/gh_trending_card.py [--language python] [--since daily] [--limit 10] [--output trending.svg] ``` -The script path is relative to this skill directory: -`src/skills/github-repo-cards/scripts/gh_trending_card.py` +The script path is relative to this skill directory: `scripts/gh_trending_card.py` This will: 1. Scrape GitHub trending page (or use `gh api` search with recent star sorting). diff --git a/src/skills/github-repo-cards/scripts/gh_repo_card.py b/skills/github-repo-cards/scripts/gh_repo_card.py similarity index 79% rename from src/skills/github-repo-cards/scripts/gh_repo_card.py rename to skills/github-repo-cards/scripts/gh_repo_card.py index c71b1cc..fe9c886 100755 --- a/src/skills/github-repo-cards/scripts/gh_repo_card.py +++ b/skills/github-repo-cards/scripts/gh_repo_card.py @@ -15,16 +15,63 @@ import base64 import html import json +import os import shutil import subprocess import sys import textwrap import time +import urllib.error import urllib.request +from datetime import datetime +from http import HTTPStatus from pathlib import Path from typing import cast -# ── Data fetching via gh CLI ───────────────────────────────────────────────── +# ── Data fetching via gh CLI / GitHub API ──────────────────────────────────── + +_GITHUB_API = "https://api.github.com" + + +def _github_headers(*, accept: str = "application/vnd.github+json") -> dict[str, str]: + headers = { + "Accept": accept, + "User-Agent": "bubseek-github-repo-cards", + "X-GitHub-Api-Version": "2022-11-28", + } + token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") + if token: + headers["Authorization"] = f"Bearer {token}" + return headers + + +def _http_get(url: str, *, accept: str) -> tuple[bytes, str]: + curl = shutil.which("curl") + headers = _github_headers(accept=accept) + if curl: + command = [curl, "-fsSL", "--compressed", "--retry", "2", "--connect-timeout", "20"] + for name, value in headers.items(): + command.extend(["-H", f"{name}: {value}"]) + command.append(url) + response = subprocess.run(command, capture_output=True, check=True) + return response.stdout, "application/octet-stream" + + request = urllib.request.Request(url, headers=headers) + with urllib.request.urlopen(request, timeout=20) as response: + return response.read(), response.headers.get("Content-Type", "application/octet-stream") + + +def _api_json(url: str, *, accept: str = "application/vnd.github+json") -> dict | list: + payload, _ = _http_get(url, accept=accept) + return cast(dict | list, json.loads(payload.decode("utf-8"))) + + +def _api_bytes(url: str, *, accept: str = "application/octet-stream") -> tuple[bytes, str]: + return _http_get(url, accept=accept) + + +def _gh_available() -> bool: + return shutil.which("gh") is not None def _gh(*args: str) -> str: @@ -47,18 +94,47 @@ def _gh_stats_json(endpoint: str, retries: int = 4) -> dict | list: GitHub stats APIs return ``{}`` while computing data on the first call. We retry with exponential back-off until an array is returned. """ + raw: dict | list = {} for attempt in range(retries): - raw = _gh_json("api", endpoint, "--cache", "0s") + if _gh_available(): + raw = _gh_json("api", endpoint, "--cache", "0s") + else: + try: + raw = _api_json(f"{_GITHUB_API}/{endpoint}") + except urllib.error.HTTPError as exc: + if exc.code != HTTPStatus.ACCEPTED: + raise + raw = {} + if isinstance(raw, list): return raw + delay = 2**attempt print(f" ⏳ stats computing, retry in {delay}s …", file=sys.stderr) time.sleep(delay) + return raw def fetch_repo_info(nwo: str) -> dict: """Fetch basic repo metadata.""" + if not _gh_available(): + raw = _api_json(f"{_GITHUB_API}/repos/{nwo}") + if not isinstance(raw, dict): + raise TypeError(f"Unexpected response for repository {nwo!r}") + return { + "name": raw.get("name"), + "owner": {"login": raw.get("owner", {}).get("login", "")}, + "description": raw.get("description"), + "stargazerCount": raw.get("stargazers_count", 0), + "forkCount": raw.get("forks_count", 0), + "primaryLanguage": {"name": raw.get("language") or ""}, + "licenseInfo": {"name": (raw.get("license") or {}).get("name", "")}, + "updatedAt": raw.get("updated_at"), + "url": raw.get("html_url"), + "homepageUrl": raw.get("homepage"), + } + return cast( dict, _gh_json( @@ -86,21 +162,28 @@ def fetch_stargazer_counts(nwo: str) -> list[int]: rough weekly bucketed curve. """ try: - raw = _gh( - "api", - f"repos/{nwo}/stargazers?per_page=100", - "-H", - "Accept: application/vnd.github.star+json", - "--cache", - "1h", - ) - if not raw: - return [] - items = json.loads(raw) + if _gh_available(): + raw = _gh( + "api", + f"repos/{nwo}/stargazers?per_page=100", + "-H", + "Accept: application/vnd.github.star+json", + "--cache", + "1h", + ) + if not raw: + return [] + items = json.loads(raw) + else: + items = _api_json( + f"{_GITHUB_API}/repos/{nwo}/stargazers?per_page=100", + accept="application/vnd.github.star+json", + ) + if not isinstance(items, list): return [] + from collections import Counter - from datetime import datetime weeks: Counter[str] = Counter() for item in items: @@ -125,17 +208,18 @@ def _download_avatar_b64(url: str, size: int = 64) -> str: """ fetch_url = f"{url}&s={size}" if "?" in url else f"{url}?s={size}" try: - with urllib.request.urlopen(fetch_url, timeout=10) as resp: # noqa: S310 - data = resp.read() - ct = resp.headers.get("Content-Type", "image/png") - return f"data:{ct};base64,{base64.b64encode(data).decode()}" + data, content_type = _api_bytes(fetch_url) + return f"data:{content_type};base64,{base64.b64encode(data).decode()}" except Exception: return "" def fetch_top_contributors(nwo: str, n: int = 5) -> list[dict]: """Return top-N contributors by commit count (with embedded avatar data).""" - raw = _gh_json("api", f"repos/{nwo}/contributors?per_page={n}", "--cache", "1h") + if _gh_available(): + raw = _gh_json("api", f"repos/{nwo}/contributors?per_page={n}", "--cache", "1h") + else: + raw = _api_json(f"{_GITHUB_API}/repos/{nwo}/contributors?per_page={n}") if not isinstance(raw, list): return [] results = [] @@ -149,6 +233,31 @@ def fetch_top_contributors(nwo: str, n: int = 5) -> list[dict]: return results +def build_default_analysis(info: dict) -> str: + """Generate a concise analysis paragraph from repository metadata.""" + updated_at = info.get("updatedAt") + updated_text = "" + if isinstance(updated_at, str): + try: + updated_dt = datetime.fromisoformat(updated_at.replace("Z", "+00:00")) + updated_text = updated_dt.strftime("%Y-%m-%d") + except ValueError: + updated_text = updated_at + + license_name = (info.get("licenseInfo") or {}).get("name", "") or "No license metadata" + lang = (info.get("primaryLanguage") or {}).get("name", "") or "Unknown language" + homepage = info.get("homepageUrl") or "No homepage" + + fragments = [ + f"Primary language: {lang}.", + f"License: {license_name}.", + f"Homepage: {homepage}.", + ] + if updated_text: + fragments.append(f"Last updated: {updated_text}.") + return " ".join(fragments) + + # ── SVG rendering ──────────────────────────────────────────────────────────── _LANG_COLORS: dict[str, str] = { @@ -512,7 +621,8 @@ def main() -> None: contributors = fetch_top_contributors(nwo, args.top_n) print("🎨 Rendering SVG …") - svg = render_repo_svg(info, commits, stars, contributors, analysis=args.analysis, top_n=args.top_n) + analysis = args.analysis or build_default_analysis(info) + svg = render_repo_svg(info, commits, stars, contributors, analysis=analysis, top_n=args.top_n) out.write_text(svg, encoding="utf-8") print(f" → {out}") diff --git a/src/skills/github-repo-cards/scripts/gh_trending_card.py b/skills/github-repo-cards/scripts/gh_trending_card.py similarity index 72% rename from src/skills/github-repo-cards/scripts/gh_trending_card.py rename to skills/github-repo-cards/scripts/gh_trending_card.py index 2bd8a47..2bfb7c2 100755 --- a/src/skills/github-repo-cards/scripts/gh_trending_card.py +++ b/skills/github-repo-cards/scripts/gh_trending_card.py @@ -14,15 +14,66 @@ import argparse import html import json +import os +import re import shutil import subprocess import sys import time -from datetime import UTC +import urllib.error +import urllib.parse +import urllib.request +from datetime import UTC, datetime, timedelta +from http import HTTPStatus from pathlib import Path # ── Data fetching ──────────────────────────────────────────────────────────── +_GITHUB_API = "https://api.github.com" +_TRENDING_URL = "https://github.com/trending" + + +def _github_headers(*, accept: str = "application/vnd.github+json") -> dict[str, str]: + headers = { + "Accept": accept, + "User-Agent": "bubseek-github-repo-cards", + "X-GitHub-Api-Version": "2022-11-28", + } + token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") + if token: + headers["Authorization"] = f"Bearer {token}" + return headers + + +def _http_get(url: str, *, accept: str) -> tuple[bytes, str]: + curl = shutil.which("curl") + headers = _github_headers(accept=accept) + if curl: + command = [curl, "-fsSL", "--compressed", "--retry", "2", "--connect-timeout", "20"] + for name, value in headers.items(): + command.extend(["-H", f"{name}: {value}"]) + command.append(url) + response = subprocess.run(command, capture_output=True, check=True) + return response.stdout, "application/octet-stream" + + request = urllib.request.Request(url, headers=headers) + with urllib.request.urlopen(request, timeout=20) as response: + return response.read(), response.headers.get("Content-Type", "application/octet-stream") + + +def _api_json(url: str, *, accept: str = "application/vnd.github+json") -> dict | list: + payload, _ = _http_get(url, accept=accept) + return json.loads(payload.decode("utf-8")) + + +def _api_text(url: str, *, accept: str = "text/html") -> str: + payload, _ = _http_get(url, accept=accept) + return payload.decode("utf-8") + + +def _gh_available() -> bool: + return shutil.which("gh") is not None + def _gh_json(*args: str) -> dict | list: result = subprocess.run( @@ -36,8 +87,17 @@ def _gh_json(*args: str) -> dict | list: def _gh_stats_json(endpoint: str, retries: int = 4) -> dict | list: """Fetch a GitHub stats endpoint with retry for 202 (computing) responses.""" + raw: dict | list = {} for attempt in range(retries): - raw = _gh_json("api", endpoint, "--cache", "0s") + if _gh_available(): + raw = _gh_json("api", endpoint, "--cache", "0s") + else: + try: + raw = _api_json(f"{_GITHUB_API}/{endpoint}") + except urllib.error.HTTPError as exc: + if exc.code != HTTPStatus.ACCEPTED: + raise + raw = {} if isinstance(raw, list): return raw delay = 2**attempt @@ -47,42 +107,90 @@ def _gh_stats_json(endpoint: str, retries: int = 4) -> dict | list: def fetch_trending(language: str = "", since: str = "daily", limit: int = 10) -> list[dict]: - """Approximate trending repos using GitHub search API sorted by recent stars. + """Fetch trending repositories from the public trending page. - GitHub has no public trending API, so we search for repos created/updated - recently, sorted by stars. + If parsing fails or GitHub changes the page shape, fall back to the search API. """ - from datetime import datetime, timedelta + repos = _fetch_trending_page(language=language, since=since, limit=limit) + if repos: + return repos + return _fetch_trending_via_search_api(language=language, since=since, limit=limit) + + +def _fetch_trending_page(language: str = "", since: str = "daily", limit: int = 10) -> list[dict]: + params = {"since": since} + if language: + params["l"] = language + + html_text = _api_text(f"{_TRENDING_URL}?{urllib.parse.urlencode(params)}") + repo_matches = re.findall( + r'

\s*.*?\s*

(.*?)', + html_text, + flags=re.DOTALL, + ) + + results = [] + for full_name, article_body in repo_matches[:limit]: + normalized_name = "/".join(part.strip() for part in full_name.split("/")) + description_match = re.search( + r'

\s*(.*?)\s*

', + article_body, + flags=re.DOTALL, + ) + language_match = re.search( + r'\s*(.*?)\s*', + article_body, + flags=re.DOTALL, + ) + stars_and_forks = re.findall(r'href="/[^"]+/(stargazers|forks)">\s*([\d,]+)\s*', article_body) + counts = {kind: int(count.replace(",", "")) for kind, count in stars_and_forks} + results.append({ + "full_name": normalized_name, + "description": html.unescape(_strip_tags(description_match.group(1)))[:120] if description_match else "", + "language": html.unescape(language_match.group(1).strip()) if language_match else "", + "stars": counts.get("stargazers", 0), + "forks": counts.get("forks", 0), + "commits_week": _fetch_weekly_commits(normalized_name), + }) + return results + + +def _fetch_trending_via_search_api(language: str = "", since: str = "daily", limit: int = 10) -> list[dict]: window = {"daily": 1, "weekly": 7, "monthly": 30}.get(since, 1) cutoff = (datetime.now(UTC) - timedelta(days=window)).strftime("%Y-%m-%d") q_parts = [f"pushed:>={cutoff}", "stars:>=10"] if language: q_parts.append(f"language:{language}") - query = "+".join(q_parts) + query = urllib.parse.quote_plus(" ".join(q_parts)) - raw = _gh_json( - "api", - f"search/repositories?q={query}&sort=stars&order=desc&per_page={limit}", - "--cache", - "1h", + url = f"{_GITHUB_API}/search/repositories?q={query}&sort=stars&order=desc&per_page={limit}" + raw = ( + _gh_json( + "api", f"search/repositories?q={'+'.join(q_parts)}&sort=stars&order=desc&per_page={limit}", "--cache", "1h" + ) + if _gh_available() + else _api_json(url) ) items = raw.get("items", []) if isinstance(raw, dict) else [] results = [] - for r in items[:limit]: - commits_week = _fetch_weekly_commits(r["full_name"]) + for repo in items[:limit]: results.append({ - "full_name": r["full_name"], - "description": (r.get("description") or "")[:120], - "language": r.get("language") or "", - "stars": r["stargazers_count"], - "forks": r["forks_count"], - "commits_week": commits_week, + "full_name": repo["full_name"], + "description": (repo.get("description") or "")[:120], + "language": repo.get("language") or "", + "stars": repo["stargazers_count"], + "forks": repo["forks_count"], + "commits_week": _fetch_weekly_commits(repo["full_name"]), }) return results +def _strip_tags(text: str) -> str: + return re.sub(r"<[^>]+>", "", " ".join(text.split())) + + def _fetch_weekly_commits(nwo: str) -> list[int]: """Fetch last 8 weeks of commit counts with retry for stats computation.""" try: diff --git a/src/skills/README.md b/src/skills/README.md new file mode 100644 index 0000000..94598fc --- /dev/null +++ b/src/skills/README.md @@ -0,0 +1,5 @@ +# Built-in Skills Directory + +This directory contains built-in skills for Bubseek. + +These skills are designed to provide essential functionalities and can be easily integrated into your workspace. diff --git a/tests/test_bubseek.py b/tests/test_bubseek.py index fef078d..64f1ff2 100644 --- a/tests/test_bubseek.py +++ b/tests/test_bubseek.py @@ -10,7 +10,6 @@ from typing import cast import pytest -from bub.skills import _read_skill from pydantic import ValidationError REPO_ROOT = Path(__file__).resolve().parents[1] @@ -49,31 +48,6 @@ def test_distribution_metadata_exposes_sqlalchemy_dialect_without_console_script } -def test_pyproject_includes_package_and_builtin_skills_in_wheel() -> None: - data = _load_pyproject() - - tool = _as_dict(data["tool"]) - pdm = _as_dict(tool["pdm"]) - build = _as_dict(pdm["build"]) - assert build["includes"] == [ - "src/bubseek", - "src/skills", - ] - assert build["skills"] - - -def test_bundled_skills_have_valid_frontmatter() -> None: - skill_root = REPO_ROOT / "src" / "skills" - skill_names = [] - - for skill_dir in sorted(path for path in skill_root.iterdir() if path.is_dir()): - metadata = _read_skill(skill_dir, source="builtin") - assert metadata is not None - skill_names.append(metadata.name) - - assert "github-repo-cards" in skill_names - - def test_mysql_connection_params_extract_mysql_values(monkeypatch) -> None: with imported_bubseek_modules("bubseek.oceanbase") as [oceanbase_mod]: monkeypatch.setenv(