From 4b7d44d4008ffbfd7b0a8de87316693bf59542e6 Mon Sep 17 00:00:00 2001 From: jorenham Date: Mon, 22 Jun 2026 12:20:45 +0200 Subject: [PATCH] despaghettified logging --- .../src/typestats_site/__main__.py | 8 +- .../src/typestats_site/_logging.py | 54 ++++++++++++ .../src/typestats_site/collect.py | 83 +++++++++---------- src/typestats/subprocess.py | 2 +- 4 files changed, 96 insertions(+), 51 deletions(-) create mode 100644 packages/typestats-site/src/typestats_site/_logging.py diff --git a/packages/typestats-site/src/typestats_site/__main__.py b/packages/typestats-site/src/typestats_site/__main__.py index 73e27006c..52f865526 100644 --- a/packages/typestats-site/src/typestats_site/__main__.py +++ b/packages/typestats-site/src/typestats_site/__main__.py @@ -3,7 +3,6 @@ import contextlib import dataclasses import datetime as dt -import logging from pathlib import Path from typing import Annotated @@ -11,6 +10,7 @@ import tyro from typestats_site import PROJECTS_PATH +from typestats_site._logging import setup_logging def _relative_default(p: str) -> str: @@ -118,11 +118,7 @@ async def _run(cmd: Collect | Dashboard | Preview) -> None: def app() -> None: - logging.basicConfig( - format="%(asctime)s :: %(name)s :: %(levelname)s :: %(message)s", - datefmt="%Y-%m-%d %H:%M:%S", - level=logging.INFO, - ) + setup_logging() prog = "typestats-site" desc = "Dashboard site generation and PyPI collection for typestats." diff --git a/packages/typestats-site/src/typestats_site/_logging.py b/packages/typestats-site/src/typestats_site/_logging.py new file mode 100644 index 000000000..74250be02 --- /dev/null +++ b/packages/typestats-site/src/typestats_site/_logging.py @@ -0,0 +1,54 @@ +"""Logging setup with per-task project/version prefixes for concurrent collection.""" + +import logging +import os +from collections.abc import Generator +from contextlib import contextmanager +from contextvars import ContextVar +from typing import Final + +__all__ = "log_context", "setup_logging" + +_FORMAT: Final = "%(asctime)s %(levelname)-7s %(scope)s%(message)s" +_DATE_FORMAT: Final = "%H:%M:%S" +_LEVEL_ENV: Final = "TYPESTATS_LOG_LEVEL" + +# per-task project/version label, surfaced to the formatter as `%(scope)s` +_log_scope: Final[ContextVar[str]] = ContextVar("_log_scope", default="") + + +@contextmanager +def log_context(label: str, /) -> Generator[None]: + """Prefix every log record emitted within this scope with `[label]`.""" + token = _log_scope.set(label) + try: + yield + finally: + _log_scope.reset(token) + + +def _inject_scope(record: logging.LogRecord) -> bool: + label = _log_scope.get() + record.scope = f"[{label}] " if label else "" + return True + + +def setup_logging() -> None: + """Configure root logging; level set by `TYPESTATS_LOG_LEVEL` (default INFO).""" + level = logging.getLevelNamesMapping().get( + os.environ.get(_LEVEL_ENV, "INFO").upper(), + logging.INFO, + ) + + handler = logging.StreamHandler() + handler.addFilter(_inject_scope) + logging.basicConfig( + format=_FORMAT, + datefmt=_DATE_FORMAT, + level=level, + handlers=[handler], + ) + + # keep HTTP internals out of our output, even at DEBUG + for noisy in ("httpx", "httpcore"): + logging.getLogger(noisy).setLevel(logging.WARNING) diff --git a/packages/typestats-site/src/typestats_site/collect.py b/packages/typestats-site/src/typestats_site/collect.py index a39cce6c9..16d176057 100644 --- a/packages/typestats-site/src/typestats_site/collect.py +++ b/packages/typestats-site/src/typestats_site/collect.py @@ -23,6 +23,7 @@ from typestats.stubs import find_stubs_dir, stubs_base_name from ._http import retry_client +from ._logging import log_context from ._pypi import ( FileDetail, available_versions, @@ -50,7 +51,7 @@ async def clean_data(data_dir: anyio.Path, /) -> int: async for json_file in data_dir.rglob("*.json"): await json_file.unlink() removed += 1 - _logger.debug(" removed %s", json_file) + _logger.debug("removed %s", json_file) async for child in data_dir.iterdir(): if await child.is_dir(): @@ -89,16 +90,13 @@ class _ProjectCollector: base_available: dict[Version, FileDetail] | None base_install_cache: dict[str, anyio.Path] = dataclasses.field(default_factory=dict) - def log(self, level: int, version: Version, msg: str, /, *args: object) -> None: - _logger.log(level, f" %s %s - {msg}", self.project.name, version, *args) # noqa: G004 - async def collect_version(self, version: Version, out: Path) -> bool: project = self.project try: sp = await install_to_venv(self.work_dir, project.name, str(version)) except subprocess.CalledProcessError: - self.log(logging.WARNING, version, "install failed, skipping") + _logger.warning("install failed, skipping") return False # detect *-stubs/ dirs not derivable from the project name @@ -122,12 +120,7 @@ async def collect_version(self, version: Version, out: Path) -> bool: sp, ) if base_ver is None: - self.log( - logging.WARNING, - version, - "no matching %s version, skipping", - base_name, - ) + _logger.warning("no matching %s version, skipping", base_name) return False base_ver_str = str(base_ver) @@ -163,7 +156,7 @@ async def collect_version(self, version: Version, out: Path) -> bool: json_bytes = report.model_dump_json(indent=2).encode() out.write_bytes(json_bytes) # noqa: ASYNC240 - self.log(logging.INFO, version, "wrote %s", out) + _logger.info("wrote %s", out) return True @@ -207,21 +200,22 @@ async def collect_project( # noqa: PLR0913 written: list[Path] = [] for version in sorted(eligible): - out = project_data_dir / f"{version}.json" - if out.is_file(): - if _is_current_schema(out): - collector.log(logging.INFO, version, "already collected, skipping") - continue - collector.log(logging.INFO, version, "outdated schema, re-collecting") - out.unlink() - - collector.log(logging.INFO, version, "analyzing...") - - try: - if await collector.collect_version(version, out): - written.append(out) - finally: - await remove_venv(work_dir, project.name, str(version)) + with log_context(f"{project.name} {version}"): + out = project_data_dir / f"{version}.json" + if out.is_file(): + if _is_current_schema(out): + _logger.debug("already collected, skipping") + continue + _logger.debug("outdated schema, re-collecting") + out.unlink() + + _logger.debug("analyzing") + + try: + if await collector.collect_version(version, out): + written.append(out) + finally: + await remove_venv(work_dir, project.name, str(version)) return written @@ -264,23 +258,24 @@ async def _collect(project: "Project") -> None: # Per-project subdir: no cross-project venv sharing, safe to reap. project_work_dir = work_dir / project.name await project_work_dir.mkdir() - try: - written.extend( - await collect_project( - project, - client, - data_dir, - project_work_dir, - backfill_since=backfill_since, - backfill_limit=backfill_limit, - ), - ) - except Exception: - _logger.exception(" %s - failed, skipping", project.name) - finally: - # sync because an `await` here can be cancelled mid-cleanup. - clear_venv_locks(project_work_dir) - shutil.rmtree(project_work_dir, ignore_errors=True) + with log_context(project.name): + try: + written.extend( + await collect_project( + project, + client, + data_dir, + project_work_dir, + backfill_since=backfill_since, + backfill_limit=backfill_limit, + ), + ) + except Exception: + _logger.exception("failed, skipping") + finally: + # sync because an `await` here can be cancelled mid-cleanup. + clear_venv_locks(project_work_dir) + shutil.rmtree(project_work_dir, ignore_errors=True) async with anyio.create_task_group() as tg: for project in projects: diff --git a/src/typestats/subprocess.py b/src/typestats/subprocess.py index 1758d3237..cbdd75eef 100644 --- a/src/typestats/subprocess.py +++ b/src/typestats/subprocess.py @@ -12,7 +12,7 @@ async def run(*args: str, cwd: str | None = None) -> subprocess.CompletedProcess[bytes]: """Run a subprocess, log the command, and check the return code.""" - _logger.info("Running subprocess: %s", " ".join(args)) + _logger.debug("Running subprocess: %s", " ".join(args)) result = await anyio.run_process(list(args), check=False, cwd=cwd) if result.returncode != 0: stderr = result.stderr.decode(errors="replace").strip()