Skip to content
This repository was archived by the owner on May 12, 2026. It is now read-only.

Commit c66d495

Browse files
toraidlcodex
andcommitted
perf: optimize artifact state collection and remove redundant APK hashing
Refactor artifact state collection into a single filesystem traversal to reduce repeated IO and metadata work during diff report generation. Key changes: - Replace three independent scans (files/build.props/apks) with one _collect_state_entries pass. - Reuse per-file size/sha256 for APK metadata to avoid hashing APK files twice. - Cache aapt tool resolution with lru_cache so tool probing occurs once per process. - Keep diff report schema unchanged while improving runtime efficiency for large target trees. Tests: - Add regression test to ensure each file is hashed exactly once during collect_artifact_state. - Existing diff-report behavior test remains green. Verification: - .venv/bin/python -m ruff check src/app/diff_report.py tests/test_diff_report.py - .venv/bin/python -m mypy --config-file mypy-curated.ini - .venv/bin/python -m pytest -q tests/test_diff_report.py - .venv/bin/python -m pytest -q Co-authored-by: OpenAI Codex <noreply@openai.com>
1 parent 183c963 commit c66d495

2 files changed

Lines changed: 81 additions & 56 deletions

File tree

src/app/diff_report.py

Lines changed: 59 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
import json
77
import logging
88
import re
9+
import shutil
910
import subprocess
11+
from functools import lru_cache
1012
from pathlib import Path
1113
from typing import Any
1214

@@ -43,19 +45,6 @@ def _sha256(path: Path) -> str:
4345
return digest.hexdigest()
4446

4547

46-
def _collect_files(root: Path) -> dict[str, dict[str, Any]]:
47-
files: dict[str, dict[str, Any]] = {}
48-
for path in root.rglob("*"):
49-
if not path.is_file():
50-
continue
51-
rel = str(path.relative_to(root))
52-
files[rel] = {
53-
"size": path.stat().st_size,
54-
"sha256": _sha256(path),
55-
}
56-
return files
57-
58-
5948
def _parse_prop_file(path: Path) -> dict[str, str]:
6049
props: dict[str, str] = {}
6150
with open(path, "r", encoding="utf-8", errors="ignore") as handle:
@@ -68,60 +57,73 @@ def _parse_prop_file(path: Path) -> dict[str, str]:
6857
return props
6958

7059

71-
def _collect_build_props(root: Path) -> dict[str, dict[str, str]]:
72-
result: dict[str, dict[str, str]] = {}
73-
for prop_file in root.rglob("build.prop"):
74-
if not prop_file.is_file():
75-
continue
76-
rel = str(prop_file.relative_to(root))
77-
result[rel] = _parse_prop_file(prop_file)
78-
return result
60+
@lru_cache(maxsize=1)
61+
def _resolve_aapt_tool() -> str | None:
62+
for tool in ("aapt2", "aapt"):
63+
if shutil.which(tool):
64+
return tool
65+
return None
7966

8067

81-
def _extract_apk_metadata(apk_path: Path) -> dict[str, Any]:
68+
def _extract_apk_metadata(apk_path: Path, file_meta: dict[str, Any] | None = None) -> dict[str, Any]:
69+
if file_meta is None:
70+
file_meta = {"size": apk_path.stat().st_size, "sha256": _sha256(apk_path)}
71+
8272
metadata: dict[str, Any] = {
83-
"size": apk_path.stat().st_size,
84-
"sha256": _sha256(apk_path),
73+
"size": file_meta["size"],
74+
"sha256": file_meta["sha256"],
8575
"package": None,
8676
"version_name": None,
8777
"version_code": None,
8878
}
8979

90-
for tool in ("aapt2", "aapt"):
91-
try:
92-
proc = subprocess.run(
93-
[tool, "dump", "badging", str(apk_path)],
94-
check=False,
95-
capture_output=True,
96-
text=True,
97-
)
98-
except FileNotFoundError:
99-
continue
80+
tool = _resolve_aapt_tool()
81+
if tool is None:
82+
return metadata
10083

101-
if proc.returncode != 0 or not proc.stdout:
102-
continue
103-
104-
match = re.search(
105-
r"package: name='([^']+)'.*versionCode='([^']+)'.*versionName='([^']*)'",
106-
proc.stdout,
107-
)
108-
if match:
109-
metadata["package"] = match.group(1)
110-
metadata["version_code"] = match.group(2)
111-
metadata["version_name"] = match.group(3)
112-
break
84+
proc = subprocess.run(
85+
[tool, "dump", "badging", str(apk_path)],
86+
check=False,
87+
capture_output=True,
88+
text=True,
89+
)
90+
if proc.returncode != 0 or not proc.stdout:
91+
return metadata
11392

93+
match = re.search(
94+
r"package: name='([^']+)'.*versionCode='([^']+)'.*versionName='([^']*)'",
95+
proc.stdout,
96+
)
97+
if match:
98+
metadata["package"] = match.group(1)
99+
metadata["version_code"] = match.group(2)
100+
metadata["version_name"] = match.group(3)
114101
return metadata
115102

116103

117-
def _collect_apks(root: Path) -> dict[str, dict[str, Any]]:
118-
result: dict[str, dict[str, Any]] = {}
119-
for apk in root.rglob("*.apk"):
120-
if not apk.is_file():
104+
def _collect_state_entries(
105+
root: Path,
106+
) -> tuple[dict[str, dict[str, Any]], dict[str, dict[str, str]], dict[str, dict[str, Any]]]:
107+
files: dict[str, dict[str, Any]] = {}
108+
build_props: dict[str, dict[str, str]] = {}
109+
apks: dict[str, dict[str, Any]] = {}
110+
111+
for path in root.rglob("*"):
112+
if not path.is_file():
121113
continue
122-
rel = str(apk.relative_to(root))
123-
result[rel] = _extract_apk_metadata(apk)
124-
return result
114+
rel = str(path.relative_to(root))
115+
file_meta = {
116+
"size": path.stat().st_size,
117+
"sha256": _sha256(path),
118+
}
119+
files[rel] = file_meta
120+
121+
if path.name == "build.prop":
122+
build_props[rel] = _parse_prop_file(path)
123+
if path.suffix == ".apk":
124+
apks[rel] = _extract_apk_metadata(path, file_meta=file_meta)
125+
126+
return files, build_props, apks
125127

126128

127129
def collect_artifact_state(root: str | Path, logger: logging.Logger) -> dict[str, Any]:
@@ -132,11 +134,12 @@ def collect_artifact_state(root: str | Path, logger: logging.Logger) -> dict[str
132134
return {"root": str(target), "files": {}, "build_props": {}, "apks": {}}
133135

134136
logger.info("Collecting artifact state from: %s", target)
137+
files, build_props, apks = _collect_state_entries(target)
135138
return {
136139
"root": str(target),
137-
"files": _collect_files(target),
138-
"build_props": _collect_build_props(target),
139-
"apks": _collect_apks(target),
140+
"files": files,
141+
"build_props": build_props,
142+
"apks": apks,
140143
}
141144

142145

tests/test_diff_report.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22
from pathlib import Path
33

4+
import src.app.diff_report as diff_report
45
from src.app.diff_report import collect_artifact_state, generate_diff_report
56

67

@@ -35,3 +36,24 @@ def test_generate_diff_report_tracks_file_and_prop_changes(tmp_path: Path):
3536
flag["code"] == "IDENTITY_PROP_CHANGED"
3637
for flag in report["highlights"]["risk_flags"]
3738
)
39+
40+
41+
def test_collect_artifact_state_hashes_each_file_once(tmp_path: Path, monkeypatch):
42+
target = tmp_path / "target"
43+
(target / "system").mkdir(parents=True)
44+
(target / "system" / "build.prop").write_text("ro.product.name=device_a\n", encoding="utf-8")
45+
(target / "system" / "app.apk").write_bytes(b"apk-bytes")
46+
(target / "system" / "plain.txt").write_text("hello", encoding="utf-8")
47+
48+
hash_calls: list[str] = []
49+
original_sha = diff_report._sha256
50+
51+
def counting_sha256(path: Path) -> str:
52+
hash_calls.append(str(path.relative_to(target)))
53+
return original_sha(path)
54+
55+
monkeypatch.setattr(diff_report, "_sha256", counting_sha256)
56+
state = collect_artifact_state(target, logger=logging.getLogger("test"))
57+
58+
assert sorted(hash_calls) == ["system/app.apk", "system/build.prop", "system/plain.txt"]
59+
assert "system/app.apk" in state["apks"]

0 commit comments

Comments
 (0)