diff --git a/build.py b/build.py index 07b97c41..1d609193 100644 --- a/build.py +++ b/build.py @@ -9,6 +9,7 @@ import shutil import subprocess import sys +import tempfile import time from dataclasses import dataclass from pathlib import Path @@ -440,6 +441,129 @@ def run_cmd(cmd: list[str], **kwargs) -> tuple[bool, str]: return False, str(e) +def _redaction_tokens() -> list[str]: + """Return local-only values that must not be written to diagnostic metadata.""" + values: set[str] = set() + + path_values = [ + ROOT, + Path.home(), + Path(tempfile.gettempdir()), + ] + for env_key in ("TMP", "TEMP", "TMPDIR"): + env_value = os.environ.get(env_key) + if env_value: + path_values.append(Path(env_value)) + + for path in path_values: + try: + resolved = path.resolve() + except OSError: + resolved = path + for candidate in {str(path), path.as_posix(), str(resolved), resolved.as_posix()}: + if candidate: + values.add(candidate) + + for env_key in ("USER", "USERNAME", "LOGNAME"): + env_value = os.environ.get(env_key) + if env_value: + values.add(env_value) + + for value in (getpass.getuser(), platform.node()): + if value: + values.add(value) + + return sorted((value for value in values if value), key=len, reverse=True) + + +def redact_diagnostic_text(value: str) -> str: + """Redact local paths, usernames, and hostnames from diagnostic metadata text.""" + redacted = value + for token in _redaction_tokens(): + redacted = redacted.replace(token, "") + return redacted + + +def repo_relative_metadata_path(path: Optional[str]) -> Optional[str]: + """Return a repository-relative `/` path when possible, otherwise redacted text.""" + if path is None: + return None + + path_obj = Path(path) + try: + relpath = path_obj.resolve().relative_to(ROOT) + return relpath.as_posix() + except (OSError, ValueError): + return redact_diagnostic_text(str(path)) + + +def _iter_metadata_strings(value): + if isinstance(value, str): + yield value + elif isinstance(value, dict): + for nested in value.values(): + yield from _iter_metadata_strings(nested) + elif isinstance(value, list): + for nested in value: + yield from _iter_metadata_strings(nested) + + +def validate_diagnostic_metadata(metadata_path: Path, root: Path = ROOT) -> list[str]: + """Validate diagnostic JSON redaction and `.logd` pairing. + + Returns a list of human-readable validation errors. An empty list means the + metadata is safe to submit with its paired encrypted diagnostic artifact. + """ + errors: list[str] = [] + if not metadata_path.exists(): + return [f"diagnostic metadata is missing: {metadata_path}"] + + try: + metadata = json.loads(metadata_path.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + return [f"diagnostic metadata is not valid JSON: {exc}"] + + logd_value = metadata.get("diagnostic_logd") + if isinstance(logd_value, str): + logd_paths = [logd_value] + elif isinstance(logd_value, list) and all(isinstance(item, str) for item in logd_value): + logd_paths = logd_value + else: + logd_paths = [] + errors.append("diagnostic_logd must be a relative .logd path or a list of relative .logd paths") + + for relpath in logd_paths: + if "\\" in relpath: + errors.append(f"diagnostic_logd uses backslashes instead of `/`: {relpath}") + if Path(relpath).is_absolute(): + errors.append(f"diagnostic_logd must be repository-relative: {relpath}") + if ".." in Path(relpath).parts: + errors.append(f"diagnostic_logd must not traverse outside the repository: {relpath}") + if not relpath.endswith(".logd"): + errors.append(f"diagnostic_logd must point to a .logd artifact: {relpath}") + artifact_path = root / relpath + if not artifact_path.exists(): + errors.append(f"diagnostic_logd artifact is missing: {relpath}") + + sensitive_values = [value for value in _redaction_tokens() if value and value != str(root)] + for text_value in _iter_metadata_strings(metadata): + for token in sensitive_values: + if token in text_value: + errors.append(f"diagnostic metadata leaks local value `{token}`") + + for module in metadata.get("modules", []): + if not isinstance(module, dict): + continue + artifact = module.get("artifact") + if isinstance(artifact, str): + if "\\" in artifact: + errors.append(f"module artifact uses backslashes instead of `/`: {artifact}") + if Path(artifact).is_absolute(): + errors.append(f"module artifact must be repository-relative: {artifact}") + + return errors + + def collect_system_info() -> str: lines = [ "Tent of Trials - System Diagnostic Snapshot", @@ -523,8 +647,8 @@ def build_diagnostic_report( "name": name, "status": "PASS" if success else "FAIL", "elapsed_seconds": round(elapsed, 3), - "artifact": binary, - "output": output, + "artifact": repo_relative_metadata_path(binary), + "output": redact_diagnostic_text(output), } for name, success, elapsed, output, binary in results ], @@ -631,7 +755,7 @@ def generate_logd( safe_dir.mkdir(parents=True, exist_ok=True) (safe_dir / "system-info.txt").write_text( - collect_system_info(), encoding="utf-8" + redact_diagnostic_text(collect_system_info()), encoding="utf-8" ) summary_lines = [ @@ -662,7 +786,7 @@ def generate_logd( if binary: log_lines.append(f"artifact: {binary}") if output: - log_lines.append(output) + log_lines.append(redact_diagnostic_text(output)) (safe_dir / "build.log").write_text("\n".join(log_lines), encoding="utf-8") sr = subprocess.run( diff --git a/diagnostic/build-00000000.json b/diagnostic/build-00000000.json deleted file mode 100644 index 33e2ca62..00000000 --- a/diagnostic/build-00000000.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "generated_at": "2026-06-16T15:23:47.496569+00:00", - "commit": "00000000", - "diagnostic_logd": "diagnostic/build-00000000.logd", - "diagnostic_logd_error": null, - "chunked": false, - "chunk_size_bytes": null, - "password": "4c7df15ab09fbb066197", - "decrypt_command": "encryptly unpack diagnostic/build-00000000.logd --password 4c7df15ab09fbb066197", - "total_modules": 1, - "passed": 0, - "failed": 1, - "modules": [ - { - "name": "frailbox", - "status": "FAIL", - "elapsed_seconds": 0, - "artifact": null, - "output": "Command not found: [Errno 2] No such file or directory: 'make'" - } - ], - "pr_note": "Include this JSON diagnostic report and diagnostic/build-00000000.logd in your PR. Maintainers may ask you to remove these diagnostic artifacts before merging." -} diff --git a/diagnostic/build-d8514bf7.json b/diagnostic/build-d8514bf7.json new file mode 100644 index 00000000..d87add4f --- /dev/null +++ b/diagnostic/build-d8514bf7.json @@ -0,0 +1,24 @@ +{ + "generated_at": "2026-06-21T13:25:40.784594+00:00", + "commit": "d8514bf7", + "diagnostic_logd": "diagnostic/build-d8514bf7.logd", + "diagnostic_logd_error": null, + "message_blocker": null, + "chunked": false, + "chunk_size_bytes": null, + "password": "19800841b2952655813e", + "decrypt_command": "encryptly unpack diagnostic/build-d8514bf7.logd --password 19800841b2952655813e", + "total_modules": 1, + "passed": 1, + "failed": 0, + "modules": [ + { + "name": "frailbox", + "status": "PASS", + "elapsed_seconds": 0.011, + "artifact": "frailbox/frailbox", + "output": "make: Nothing to be done for 'all'." + } + ], + "pr_note": "Include the encrypted diagnostic logd artifact(s): diagnostic/build-d8514bf7.logd. The encrypted .logd is the required diagnostic content for PR review; this JSON file is metadata. Maintainers may ask you to remove these diagnostic artifacts before merging." +} diff --git a/diagnostic/build-d8514bf7.logd b/diagnostic/build-d8514bf7.logd new file mode 100644 index 00000000..d93dec70 Binary files /dev/null and b/diagnostic/build-d8514bf7.logd differ diff --git a/tests/test_diagnostic_redaction.py b/tests/test_diagnostic_redaction.py new file mode 100644 index 00000000..a0875a1c --- /dev/null +++ b/tests/test_diagnostic_redaction.py @@ -0,0 +1,84 @@ +import getpass +import json +import platform +import tempfile +import unittest +from pathlib import Path + +import build + + +class DiagnosticRedactionTests(unittest.TestCase): + def test_report_uses_relative_artifact_paths_and_redacts_local_output(self): + local_values = [ + str(build.ROOT), + build.ROOT.as_posix(), + str(Path.home()), + Path.home().as_posix(), + tempfile.gettempdir(), + getpass.getuser(), + platform.node(), + ] + output = "\n".join(value for value in local_values if value) + artifact = build.ROOT / "backend" / "target" / "debug" / "backend" + + report = build.build_diagnostic_report( + [("backend", True, 1.234, output, str(artifact))], + "12345678", + logd_relpaths=["diagnostic/build-12345678.logd"], + password="test-password", + ) + + module = report["modules"][0] + self.assertEqual(module["artifact"], "backend/target/debug/backend") + for value in local_values: + if value: + self.assertNotIn(value, module["output"]) + + def test_metadata_validator_accepts_relative_json_logd_pair(self): + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + diagnostic_dir = root / "diagnostic" + diagnostic_dir.mkdir() + logd_path = diagnostic_dir / "build-12345678.logd" + logd_path.write_bytes(b"encrypted diagnostic placeholder") + metadata_path = diagnostic_dir / "build-12345678.json" + metadata_path.write_text( + json.dumps( + { + "diagnostic_logd": "diagnostic/build-12345678.logd", + "modules": [ + { + "name": "backend", + "status": "PASS", + "artifact": "backend/target/debug/backend", + "output": "redacted output", + } + ], + } + ), + encoding="utf-8", + ) + + self.assertEqual(build.validate_diagnostic_metadata(metadata_path, root=root), []) + + def test_metadata_validator_reports_missing_json_and_mismatched_logd(self): + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + missing_json = root / "diagnostic" / "missing.json" + self.assertIn("diagnostic metadata is missing", build.validate_diagnostic_metadata(missing_json, root=root)[0]) + + diagnostic_dir = root / "diagnostic" + diagnostic_dir.mkdir() + metadata_path = diagnostic_dir / "build-12345678.json" + metadata_path.write_text( + json.dumps({"diagnostic_logd": "diagnostic/build-deadbeef.logd", "modules": []}), + encoding="utf-8", + ) + + errors = build.validate_diagnostic_metadata(metadata_path, root=root) + self.assertTrue(any("artifact is missing" in error for error in errors)) + + +if __name__ == "__main__": + unittest.main()