Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 128 additions & 4 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import shutil
import subprocess
import sys
import tempfile
import time
from dataclasses import dataclass
from pathlib import Path
Expand Down Expand Up @@ -440,6 +441,129 @@ def run_cmd(cmd: list[str], **kwargs) -> tuple[bool, str]:
return False, str(e)


def _redaction_tokens() -> list[str]:
"""Return local-only values that must not be written to diagnostic metadata."""
values: set[str] = set()

path_values = [
ROOT,
Path.home(),
Path(tempfile.gettempdir()),
]
for env_key in ("TMP", "TEMP", "TMPDIR"):
env_value = os.environ.get(env_key)
if env_value:
path_values.append(Path(env_value))

for path in path_values:
try:
resolved = path.resolve()
except OSError:
resolved = path
for candidate in {str(path), path.as_posix(), str(resolved), resolved.as_posix()}:
if candidate:
values.add(candidate)

for env_key in ("USER", "USERNAME", "LOGNAME"):
env_value = os.environ.get(env_key)
if env_value:
values.add(env_value)

for value in (getpass.getuser(), platform.node()):
if value:
values.add(value)

return sorted((value for value in values if value), key=len, reverse=True)


def redact_diagnostic_text(value: str) -> str:
"""Redact local paths, usernames, and hostnames from diagnostic metadata text."""
redacted = value
for token in _redaction_tokens():
redacted = redacted.replace(token, "<redacted>")
return redacted


def repo_relative_metadata_path(path: Optional[str]) -> Optional[str]:
"""Return a repository-relative `/` path when possible, otherwise redacted text."""
if path is None:
return None

path_obj = Path(path)
try:
relpath = path_obj.resolve().relative_to(ROOT)
return relpath.as_posix()
except (OSError, ValueError):
return redact_diagnostic_text(str(path))


def _iter_metadata_strings(value):
if isinstance(value, str):
yield value
elif isinstance(value, dict):
for nested in value.values():
yield from _iter_metadata_strings(nested)
elif isinstance(value, list):
for nested in value:
yield from _iter_metadata_strings(nested)


def validate_diagnostic_metadata(metadata_path: Path, root: Path = ROOT) -> list[str]:
"""Validate diagnostic JSON redaction and `.logd` pairing.

Returns a list of human-readable validation errors. An empty list means the
metadata is safe to submit with its paired encrypted diagnostic artifact.
"""
errors: list[str] = []
if not metadata_path.exists():
return [f"diagnostic metadata is missing: {metadata_path}"]

try:
metadata = json.loads(metadata_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as exc:
return [f"diagnostic metadata is not valid JSON: {exc}"]

logd_value = metadata.get("diagnostic_logd")
if isinstance(logd_value, str):
logd_paths = [logd_value]
elif isinstance(logd_value, list) and all(isinstance(item, str) for item in logd_value):
logd_paths = logd_value
else:
logd_paths = []
errors.append("diagnostic_logd must be a relative .logd path or a list of relative .logd paths")

for relpath in logd_paths:
if "\\" in relpath:
errors.append(f"diagnostic_logd uses backslashes instead of `/`: {relpath}")
if Path(relpath).is_absolute():
errors.append(f"diagnostic_logd must be repository-relative: {relpath}")
if ".." in Path(relpath).parts:
errors.append(f"diagnostic_logd must not traverse outside the repository: {relpath}")
if not relpath.endswith(".logd"):
errors.append(f"diagnostic_logd must point to a .logd artifact: {relpath}")
artifact_path = root / relpath
if not artifact_path.exists():
errors.append(f"diagnostic_logd artifact is missing: {relpath}")

sensitive_values = [value for value in _redaction_tokens() if value and value != str(root)]
for text_value in _iter_metadata_strings(metadata):
for token in sensitive_values:
if token in text_value:
errors.append(f"diagnostic metadata leaks local value `{token}`")

for module in metadata.get("modules", []):
if not isinstance(module, dict):
continue
artifact = module.get("artifact")
if isinstance(artifact, str):
if "\\" in artifact:
errors.append(f"module artifact uses backslashes instead of `/`: {artifact}")
if Path(artifact).is_absolute():
errors.append(f"module artifact must be repository-relative: {artifact}")

return errors


def collect_system_info() -> str:
lines = [
"Tent of Trials - System Diagnostic Snapshot",
Expand Down Expand Up @@ -523,8 +647,8 @@ def build_diagnostic_report(
"name": name,
"status": "PASS" if success else "FAIL",
"elapsed_seconds": round(elapsed, 3),
"artifact": binary,
"output": output,
"artifact": repo_relative_metadata_path(binary),
"output": redact_diagnostic_text(output),
}
for name, success, elapsed, output, binary in results
],
Expand Down Expand Up @@ -631,7 +755,7 @@ def generate_logd(
safe_dir.mkdir(parents=True, exist_ok=True)

(safe_dir / "system-info.txt").write_text(
collect_system_info(), encoding="utf-8"
redact_diagnostic_text(collect_system_info()), encoding="utf-8"
)

summary_lines = [
Expand Down Expand Up @@ -662,7 +786,7 @@ def generate_logd(
if binary:
log_lines.append(f"artifact: {binary}")
if output:
log_lines.append(output)
log_lines.append(redact_diagnostic_text(output))
(safe_dir / "build.log").write_text("\n".join(log_lines), encoding="utf-8")

sr = subprocess.run(
Expand Down
23 changes: 0 additions & 23 deletions diagnostic/build-00000000.json

This file was deleted.

24 changes: 24 additions & 0 deletions diagnostic/build-d8514bf7.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"generated_at": "2026-06-21T13:25:40.784594+00:00",
"commit": "d8514bf7",
"diagnostic_logd": "diagnostic/build-d8514bf7.logd",
"diagnostic_logd_error": null,
"message_blocker": null,
"chunked": false,
"chunk_size_bytes": null,
"password": "19800841b2952655813e",
"decrypt_command": "encryptly unpack diagnostic/build-d8514bf7.logd <outdir> --password 19800841b2952655813e",
"total_modules": 1,
"passed": 1,
"failed": 0,
"modules": [
{
"name": "frailbox",
"status": "PASS",
"elapsed_seconds": 0.011,
"artifact": "frailbox/frailbox",
"output": "make: Nothing to be done for 'all'."
}
],
"pr_note": "Include the encrypted diagnostic logd artifact(s): diagnostic/build-d8514bf7.logd. The encrypted .logd is the required diagnostic content for PR review; this JSON file is metadata. Maintainers may ask you to remove these diagnostic artifacts before merging."
}
Binary file added diagnostic/build-d8514bf7.logd
Binary file not shown.
84 changes: 84 additions & 0 deletions tests/test_diagnostic_redaction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import getpass
import json
import platform
import tempfile
import unittest
from pathlib import Path

import build


class DiagnosticRedactionTests(unittest.TestCase):
def test_report_uses_relative_artifact_paths_and_redacts_local_output(self):
local_values = [
str(build.ROOT),
build.ROOT.as_posix(),
str(Path.home()),
Path.home().as_posix(),
tempfile.gettempdir(),
getpass.getuser(),
platform.node(),
]
output = "\n".join(value for value in local_values if value)
artifact = build.ROOT / "backend" / "target" / "debug" / "backend"

report = build.build_diagnostic_report(
[("backend", True, 1.234, output, str(artifact))],
"12345678",
logd_relpaths=["diagnostic/build-12345678.logd"],
password="test-password",
)

module = report["modules"][0]
self.assertEqual(module["artifact"], "backend/target/debug/backend")
for value in local_values:
if value:
self.assertNotIn(value, module["output"])

def test_metadata_validator_accepts_relative_json_logd_pair(self):
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir)
diagnostic_dir = root / "diagnostic"
diagnostic_dir.mkdir()
logd_path = diagnostic_dir / "build-12345678.logd"
logd_path.write_bytes(b"encrypted diagnostic placeholder")
metadata_path = diagnostic_dir / "build-12345678.json"
metadata_path.write_text(
json.dumps(
{
"diagnostic_logd": "diagnostic/build-12345678.logd",
"modules": [
{
"name": "backend",
"status": "PASS",
"artifact": "backend/target/debug/backend",
"output": "redacted output",
}
],
}
),
encoding="utf-8",
)

self.assertEqual(build.validate_diagnostic_metadata(metadata_path, root=root), [])

def test_metadata_validator_reports_missing_json_and_mismatched_logd(self):
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir)
missing_json = root / "diagnostic" / "missing.json"
self.assertIn("diagnostic metadata is missing", build.validate_diagnostic_metadata(missing_json, root=root)[0])

diagnostic_dir = root / "diagnostic"
diagnostic_dir.mkdir()
metadata_path = diagnostic_dir / "build-12345678.json"
metadata_path.write_text(
json.dumps({"diagnostic_logd": "diagnostic/build-deadbeef.logd", "modules": []}),
encoding="utf-8",
)

errors = build.validate_diagnostic_metadata(metadata_path, root=root)
self.assertTrue(any("artifact is missing" in error for error in errors))


if __name__ == "__main__":
unittest.main()