Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions packages/typestats-site/src/typestats_site/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
import contextlib
import dataclasses
import datetime as dt
import logging
from pathlib import Path
from typing import Annotated

import anyio
import tyro

from typestats_site import PROJECTS_PATH
from typestats_site._logging import setup_logging


def _relative_default(p: str) -> str:
Expand Down Expand Up @@ -118,11 +118,7 @@ async def _run(cmd: Collect | Dashboard | Preview) -> None:


def app() -> None:
logging.basicConfig(
format="%(asctime)s :: %(name)s :: %(levelname)s :: %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
level=logging.INFO,
)
setup_logging()

prog = "typestats-site"
desc = "Dashboard site generation and PyPI collection for typestats."
Expand Down
54 changes: 54 additions & 0 deletions packages/typestats-site/src/typestats_site/_logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""Logging setup with per-task project/version prefixes for concurrent collection."""

import logging
import os
from collections.abc import Generator
from contextlib import contextmanager
from contextvars import ContextVar
from typing import Final

__all__ = "log_context", "setup_logging"

_FORMAT: Final = "%(asctime)s %(levelname)-7s %(scope)s%(message)s"
_DATE_FORMAT: Final = "%H:%M:%S"
_LEVEL_ENV: Final = "TYPESTATS_LOG_LEVEL"

# per-task project/version label, surfaced to the formatter as `%(scope)s`
_log_scope: Final[ContextVar[str]] = ContextVar("_log_scope", default="")


@contextmanager
def log_context(label: str, /) -> Generator[None]:
"""Prefix every log record emitted within this scope with `[label]`."""
token = _log_scope.set(label)
try:
yield
finally:
_log_scope.reset(token)


def _inject_scope(record: logging.LogRecord) -> bool:
label = _log_scope.get()
record.scope = f"[{label}] " if label else ""
return True


def setup_logging() -> None:
"""Configure root logging; level set by `TYPESTATS_LOG_LEVEL` (default INFO)."""
level = logging.getLevelNamesMapping().get(
os.environ.get(_LEVEL_ENV, "INFO").upper(),
logging.INFO,
)

handler = logging.StreamHandler()
handler.addFilter(_inject_scope)
logging.basicConfig(
format=_FORMAT,
datefmt=_DATE_FORMAT,
level=level,
handlers=[handler],
)

# keep HTTP internals out of our output, even at DEBUG
for noisy in ("httpx", "httpcore"):
logging.getLogger(noisy).setLevel(logging.WARNING)
83 changes: 39 additions & 44 deletions packages/typestats-site/src/typestats_site/collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from typestats.stubs import find_stubs_dir, stubs_base_name

from ._http import retry_client
from ._logging import log_context
from ._pypi import (
FileDetail,
available_versions,
Expand Down Expand Up @@ -50,7 +51,7 @@ async def clean_data(data_dir: anyio.Path, /) -> int:
async for json_file in data_dir.rglob("*.json"):
await json_file.unlink()
removed += 1
_logger.debug(" removed %s", json_file)
_logger.debug("removed %s", json_file)

async for child in data_dir.iterdir():
if await child.is_dir():
Expand Down Expand Up @@ -89,16 +90,13 @@ class _ProjectCollector:
base_available: dict[Version, FileDetail] | None
base_install_cache: dict[str, anyio.Path] = dataclasses.field(default_factory=dict)

def log(self, level: int, version: Version, msg: str, /, *args: object) -> None:
_logger.log(level, f" %s %s - {msg}", self.project.name, version, *args) # noqa: G004

async def collect_version(self, version: Version, out: Path) -> bool:
project = self.project

try:
sp = await install_to_venv(self.work_dir, project.name, str(version))
except subprocess.CalledProcessError:
self.log(logging.WARNING, version, "install failed, skipping")
_logger.warning("install failed, skipping")
return False

# detect *-stubs/ dirs not derivable from the project name
Expand All @@ -122,12 +120,7 @@ async def collect_version(self, version: Version, out: Path) -> bool:
sp,
)
if base_ver is None:
self.log(
logging.WARNING,
version,
"no matching %s version, skipping",
base_name,
)
_logger.warning("no matching %s version, skipping", base_name)
return False

base_ver_str = str(base_ver)
Expand Down Expand Up @@ -163,7 +156,7 @@ async def collect_version(self, version: Version, out: Path) -> bool:

json_bytes = report.model_dump_json(indent=2).encode()
out.write_bytes(json_bytes) # noqa: ASYNC240
self.log(logging.INFO, version, "wrote %s", out)
_logger.info("wrote %s", out)

return True

Expand Down Expand Up @@ -207,21 +200,22 @@ async def collect_project( # noqa: PLR0913

written: list[Path] = []
for version in sorted(eligible):
out = project_data_dir / f"{version}.json"
if out.is_file():
if _is_current_schema(out):
collector.log(logging.INFO, version, "already collected, skipping")
continue
collector.log(logging.INFO, version, "outdated schema, re-collecting")
out.unlink()

collector.log(logging.INFO, version, "analyzing...")

try:
if await collector.collect_version(version, out):
written.append(out)
finally:
await remove_venv(work_dir, project.name, str(version))
with log_context(f"{project.name} {version}"):
out = project_data_dir / f"{version}.json"
if out.is_file():
if _is_current_schema(out):
_logger.debug("already collected, skipping")
continue
_logger.debug("outdated schema, re-collecting")
out.unlink()

_logger.debug("analyzing")

try:
if await collector.collect_version(version, out):
written.append(out)
finally:
await remove_venv(work_dir, project.name, str(version))

return written

Expand Down Expand Up @@ -264,23 +258,24 @@ async def _collect(project: "Project") -> None:
# Per-project subdir: no cross-project venv sharing, safe to reap.
project_work_dir = work_dir / project.name
await project_work_dir.mkdir()
try:
written.extend(
await collect_project(
project,
client,
data_dir,
project_work_dir,
backfill_since=backfill_since,
backfill_limit=backfill_limit,
),
)
except Exception:
_logger.exception(" %s - failed, skipping", project.name)
finally:
# sync because an `await` here can be cancelled mid-cleanup.
clear_venv_locks(project_work_dir)
shutil.rmtree(project_work_dir, ignore_errors=True)
with log_context(project.name):
try:
written.extend(
await collect_project(
project,
client,
data_dir,
project_work_dir,
backfill_since=backfill_since,
backfill_limit=backfill_limit,
),
)
except Exception:
_logger.exception("failed, skipping")
finally:
# sync because an `await` here can be cancelled mid-cleanup.
clear_venv_locks(project_work_dir)
shutil.rmtree(project_work_dir, ignore_errors=True)

async with anyio.create_task_group() as tg:
for project in projects:
Expand Down
2 changes: 1 addition & 1 deletion src/typestats/subprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

async def run(*args: str, cwd: str | None = None) -> subprocess.CompletedProcess[bytes]:
"""Run a subprocess, log the command, and check the return code."""
_logger.info("Running subprocess: %s", " ".join(args))
_logger.debug("Running subprocess: %s", " ".join(args))
result = await anyio.run_process(list(args), check=False, cwd=cwd)
if result.returncode != 0:
stderr = result.stderr.decode(errors="replace").strip()
Expand Down