diff --git a/.github/workflows/build-attest.yml b/.github/workflows/build-attest.yml new file mode 100644 index 0000000..bf4d992 --- /dev/null +++ b/.github/workflows/build-attest.yml @@ -0,0 +1,53 @@ +name: Build + Attest + +on: + workflow_dispatch: + release: + types: [published] + +permissions: + contents: read + id-token: write + attestations: write + +jobs: + build-and-attest: + runs-on: ubuntu-latest + + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + + - name: Install build tooling + run: python -m pip install --upgrade pip build + + - name: Build distributions + run: python -m build + + - name: Upload built distributions + uses: actions/upload-artifact@v4 + with: + name: python-dist + path: dist/* + if-no-files-found: error + + - name: Generate build provenance attestation + uses: actions/attest@v4 + with: + subject-path: "dist/*" + + - name: Add verification hint + run: | + { + echo "## Verification" + echo + echo "Verify a downloaded artifact with:" + echo + echo '`gh attestation verify dist/ -R joy7758/agent-evidence`' + } >> "$GITHUB_STEP_SUMMARY" diff --git a/README.md b/README.md index 0e0984a..0f68f26 100644 --- a/README.md +++ b/README.md @@ -1,704 +1,169 @@ - -[English](./README.md) | [中文](./README.zh-CN.md) - +# agent-evidence -# Agent Evidence +把 AI Agent / service operation 转换为可验证、可审计、可复核的 evidence object。
+Turn AI agent operations into auditable and verifiable evidence objects. -Concrete execution-evidence entry for verifiable AI agent runs with offline -verification. +## Why this matters -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.19334062.svg)](https://doi.org/10.5281/zenodo.19334062) -[![CI](https://github.com/joy7758/agent-evidence/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/joy7758/agent-evidence/actions/workflows/ci.yml) -![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-blue) -![Semantic Events](https://img.shields.io/badge/semantic%20events-v2.0.0-1f6feb) -![Status](https://img.shields.io/badge/status-experimental-orange) +普通 AI workflow 通常只留下聊天记录、trace 页面或零散日志。它们能帮助开发者排查问题,但很难直接交给审查者、客户、治理团队或后续系统复核。 -Agent Evidence is the concrete execution-evidence entry point for the Digital -Biosphere Architecture. +`agent-evidence` 关注的是一次 Agent / service operation 结束之后,能不能留下结构化证据:input / output hashes、operation type、policy reference、provenance chain、verification result,以及可以被 validator 检查的 evidence object。 -It packages agent/runtime execution into verifiable evidence bundles for offline -verification. It is not the full architecture hub, not the audit control plane, -and not just tracing or logging. For system context, start with -[digital-biosphere-architecture](https://github.com/joy7758/digital-biosphere-architecture); -for the shortest walkthrough, see -[verifiable-agent-demo](https://github.com/joy7758/verifiable-agent-demo); for -post-execution review, see -[aro-audit](https://github.com/joy7758/aro-audit). +这个仓库的目标不是再做一个通用 Agent 平台,而是提供一个最小、可运行、可验证的 operation evidence 路径。 -## Role +## What it provides -`agent-evidence` is the concrete execution-evidence entry point for packaging -agent/runtime execution into portable bundles that another party can verify -offline. +- `FDO_OPERATION_EVIDENCE_PROFILE_V0_1` +- JSON Schema +- profile-aware validator +- minimal valid / invalid examples +- registration pack +- FDO Testbed registration draft +- outreach draft for FDO discussion +- LangChain / LangGraph 优先的 evidence handoff 思路 -## Not this repo +## Quick Start -- not the full architecture hub -- not the audit control plane -- not just tracing or logging -- not the walkthrough demo -- not the execution-integrity kernel - -## Start here - -- architecture context -> [digital-biosphere-architecture](https://github.com/joy7758/digital-biosphere-architecture) -- current primary package -> `spec/execution-evidence-operation-accountability-profile-v0.1.md`, `schema/execution-evidence-operation-accountability-profile-v0.1.schema.json` -- current runnable surfaces -> [examples/README.md](examples/README.md), [demo/README.md](demo/README.md), `agent-evidence validate-profile ` -- historical lineage -> [docs/lineage.md](docs/lineage.md) -- walkthrough -> [verifiable-agent-demo](https://github.com/joy7758/verifiable-agent-demo) -- post-execution review -> [aro-audit](https://github.com/joy7758/aro-audit) - -## Current v0.1 package - -The current primary package surface is -`Execution Evidence and Operation Accountability Profile v0.1`. - -It is frozen in GitHub Release `v0.2.0`. - -Current package DOI: https://doi.org/10.5281/zenodo.19334062 - -The package version inside that release remains `v0.1`. - -Start here for the current v0.1 path: - -- Spec: `spec/execution-evidence-operation-accountability-profile-v0.1.md` -- Schema: `schema/execution-evidence-operation-accountability-profile-v0.1.schema.json` -- Validator CLI: `agent-evidence validate-profile ` -- Examples: [examples/README.md](examples/README.md) -- Demo: [demo/README.md](demo/README.md) -- Status and acceptance: `docs/STATUS.md`, `docs/ACCEPTANCE-CHECKLIST.md` -- Submission handoff: `submission/package-manifest.md`, `submission/final-handoff.md` - -Implementation note: JSONL, SQLite, and PostgreSQL backends remain available, -but they are subordinate to the evidence-entry role of this repository. - -![Storage](https://img.shields.io/badge/storage-JSONL%20%7C%20SQLite%20%7C%20Postgres-0a7b83) - -### Minimal v0.1 walkthrough +Install from source: ```bash python3 -m venv .venv source .venv/bin/activate -pip install -e ".[dev]" +pip install -e ".[dev,langchain,signing]" ``` -Validate the minimal valid and invalid examples: +Validate a minimal valid evidence profile: ```bash agent-evidence validate-profile examples/minimal-valid-evidence.json -agent-evidence validate-profile examples/invalid-missing-required.json -agent-evidence validate-profile examples/invalid-unclosed-reference.json -agent-evidence validate-profile examples/invalid-policy-link-broken.json -``` - -Run the minimal demo: - -```bash -python3 demo/run_operation_accountability_demo.py -``` - -Expected result: - -- the valid example returns JSON with `"ok": true` -- each invalid example returns JSON with `"ok": false` and one primary error code -- the demo writes artifacts under `demo/artifacts/` and ends with one `PASS` summary line - -Known environment note: - -- the repository `.venv` may show one `langchain_core` warning under Python 3.14 during broader test runs; it does not affect the minimal profile, validator, or demo path - -## Historical lineage - -Historical `Execution Evidence Object`, older `Agent Evidence Profile` wording, -legacy FDO mapping language, and conference-specimen notes -remain in this repository, but they are no longer the primary entry surface. -Use [docs/lineage.md](docs/lineage.md) for the historical map and retained -paths. - -The historical specimen track still keeps its original DOI: -https://doi.org/10.5281/zenodo.19055948 - -## Fastest proof - -```bash -python3 -m venv .venv -source .venv/bin/activate -pip install -e ".[dev,langchain,sql]" -python integrations/langchain/export_evidence.py -agent-evidence verify-bundle --bundle-dir integrations/langchain/langchain-evidence-bundle -``` - -This runs the documented LangChain exporter and verifies the emitted bundle -offline. - -For a smaller callback/export recipe aimed at external readers, see -`docs/cookbooks/langchain_minimal_evidence.md`. - -## Why this is not just tracing - -Tracing and logs help operators inspect a run. Agent Evidence packages runtime -events into portable artifacts that another party can verify later, including -offline. - -Evidence path: - -`runtime events -> evidence bundle -> signed manifest -> detached anchor (when present) -> offline verify` - -This repository implements the bundle, manifest, signatures, and offline -verification steps. External anchoring is out of scope for AEP v0.1 and is not -enabled by default. - -The toolkit now supports two storage modes: - -- append-only local JSONL files -- SQLAlchemy-backed SQLite/PostgreSQL databases - -The current model treats each record as a semantic event envelope: - -- `event.event_type` is framework-neutral, such as `chain.start` or `tool.end` -- `event.context.source_event_type` preserves the raw framework event name -- `hashes.previous_event_hash` links to the prior event -- `hashes.chain_hash` provides a cumulative chain tip for integrity checks - -### Secure Serialization - -The evidence serialization layer implements: - -- default redaction of sensitive fields -- maximum recursion depth -- circular reference protection -- object size limits - -These protections prevent evidence bundles from leaking secrets or causing -serialization-based denial-of-service conditions. - -## Why this shape - -The project is organized so evidence capture stays modular: - -- `agent_evidence`: core models and recorder logic -- `agent_evidence/crypto`: canonical hashing and chain helpers -- `agent_evidence/storage`: append-only local storage backends -- `agent_evidence/integrations`: adapters for external agent frameworks -- `agent_evidence/cli`: command-line entrypoints -- `agent_evidence/schema`: JSON schema for persisted envelopes -- `examples`: executable usage examples -- `tests`: baseline regression coverage - -## Quick start - -```bash -python3 -m venv .venv -source .venv/bin/activate -pip install -e ".[dev,langchain,sql]" -agent-evidence schema -``` - -## Agent Evidence Profile v0.1 MVP - -The current MVP path is an integrity-verifiable evidence bundle with offline -verification. It is implemented as an Agent Evidence Profile that keeps one -LangChain-first integration path and leaves room for later OpenInference / -OpenTelemetry compatibility mappings. - -AEP v0.1 is an integrity-verifiable evidence profile, not a non-repudiation -system. - -AEP is an integrity-verifiable evidence profile for autonomous agent runs, with -offline verification and runtime provenance capture. - -Generate the first bundle: - -```bash -python integrations/langchain/export_evidence.py -agent-evidence verify-bundle --bundle-dir integrations/langchain/langchain-evidence-bundle -``` - -Run the gate against one valid and one tampered fixture: - -```bash -python scripts/run_profile_gate.py -``` - -## Automaton Sidecar Exporter - -The next read-only path is a Conway-neutral Automaton sidecar/exporter. It -reads `state.db`, git history, and persisted on-chain references, then emits an -AEP bundle plus `fdo-stub.json` and `erc8004-validation-stub.json`. - -```bash -agent-evidence export automaton \ - --state-db /path/to/state.db \ - --repo /path/to/state/repo \ - --runtime-root /path/to/automaton-checkout \ - --out ./automaton-aep-bundle -``` - -`agent-evidence export automaton` has been validated against a live isolated-home -Automaton run and remains marked experimental while the live data contract is -still settling. - -When `--runtime-root` is provided, the exporter attempts to resolve -`source_runtime_version`, `source_runtime_commit`, and `source_runtime_dirty` -from the Automaton checkout without changing the export path. - -## Controlled Release Surface - -The controlled specimen release at [v0.1-live-chain](/Users/zhangbin/GitHub/agent-evidence/release/v0.1-live-chain/README.md) -is a historical lineage surface, not the current primary entry. - -The historical specimen archive for that track remains on Zenodo with DOI: -https://doi.org/10.5281/zenodo.19055948 - -It freezes: - -- AEP schema -- verify CLI -- LangChain exporter -- Automaton exporter -- live runbook -- public live/tampered fixtures -- AEP boundary statement - -See [docs/lineage.md](docs/lineage.md) for how this historical surface relates -to the current Agent Evidence / AEP v0.1 package path. - -The formal specimen release note is [RELEASE_NOTE.md](/Users/zhangbin/GitHub/agent-evidence/release/v0.1-live-chain/RELEASE_NOTE.md). - -## CLI examples - -```bash -agent-evidence record \ - --store ./data/evidence.jsonl \ - --actor planner \ - --event-type tool.call \ - --input '{"task":"summarize"}' \ - --output '{"status":"ok"}' \ - --context '{"source":"cli","component":"tool"}' - -agent-evidence list --store ./data/evidence.jsonl -agent-evidence show --store ./data/evidence.jsonl --index 0 -agent-evidence verify --store ./data/evidence.jsonl ``` -SQL stores use a SQLAlchemy URL instead of a file path: +Check that an intentionally invalid example fails: ```bash -agent-evidence record \ - --store sqlite+pysqlite:///./data/evidence.db \ - --actor planner \ - --event-type tool.call \ - --context '{"source":"cli","component":"tool"}' - -agent-evidence query \ - --store sqlite+pysqlite:///./data/evidence.db \ - --event-type tool.call \ - --source cli - -agent-evidence query \ - --store sqlite+pysqlite:///./data/evidence.db \ - --span-id tool-1 \ - --parent-span-id root \ - --offset 0 \ - --limit 50 - -agent-evidence query \ - --store sqlite+pysqlite:///./data/evidence.db \ - --previous-event-hash \ - --event-hash-from \ - --event-hash-to - -agent-evidence export \ - --store ./data/evidence.jsonl \ - --format json \ - --output ./exports/evidence.bundle.json - -agent-evidence export \ - --store ./data/evidence.jsonl \ - --format csv \ - --output ./exports/evidence.csv \ - --manifest-output ./exports/evidence.csv.manifest.json \ - --private-key ./keys/manifest-private.pem \ - --key-id evidence-demo - -agent-evidence export \ - --store ./data/evidence.jsonl \ - --format xml \ - --output ./exports/evidence.xml \ - --manifest-output ./exports/evidence.xml.manifest.json \ - --private-key ./keys/manifest-private.pem \ - --key-id evidence-demo - -agent-evidence export \ - --store ./data/evidence.jsonl \ - --format json \ - --archive-format tar.gz \ - --output ./exports/evidence-package.tgz \ - --private-key ./keys/manifest-private.pem \ - --key-id evidence-demo - -agent-evidence export \ - --store ./data/evidence.jsonl \ - --format json \ - --output ./exports/evidence.multisig.json \ - --required-signatures 2 \ - --required-signature-role approver=1 \ - --required-signature-role attestor=1 \ - --signer-config ./keys/operations-q2.signer.json \ - --signer-config ./keys/compliance-q1.signer.json - -agent-evidence verify-export \ - --bundle ./exports/evidence.bundle.json \ - --public-key ./keys/manifest-public.pem - -agent-evidence verify-export \ - --bundle ./exports/evidence.multisig.json \ - --keyring ./keys/manifest-keyring.json - -agent-evidence verify-export \ - --bundle ./exports/evidence.multisig.json \ - --keyring ./keys/manifest-keyring.json \ - --required-signature-role approver=1 - -agent-evidence verify-export \ - --xml ./exports/evidence.xml \ - --manifest ./exports/evidence.xml.manifest.json \ - --public-key ./keys/manifest-public.pem - -agent-evidence verify-export \ - --archive ./exports/evidence-package.tgz \ - --public-key ./keys/manifest-public.pem -``` - -## Development - -```bash -make install -make test -make lint -make hooks -``` - -The repository includes a `.pre-commit-config.yaml` with baseline whitespace, -JSON, and Ruff checks. - -For PostgreSQL support, install the extra driver dependencies: - -```bash -pip install -e ".[dev,postgres]" -``` - -## Semantic event model - -Each persisted record follows this shape: - -```json -{ - "schema_version": "2.0.0", - "event": { - "event_id": "...", - "timestamp": "2026-03-16T00:00:00+00:00", - "event_type": "tool.end", - "actor": "search-tool", - "inputs": {}, - "outputs": {}, - "context": { - "source": "langchain", - "component": "tool", - "source_event_type": "on_tool_end", - "span_id": "...", - "parent_span_id": null, - "ancestor_span_ids": [], - "name": "search-tool", - "tags": ["langchain", "tool"], - "attributes": {} - }, - "metadata": {} - }, - "hashes": { - "event_hash": "...", - "previous_event_hash": "...", - "chain_hash": "..." - } -} -``` - -`event_type` is the stable semantic layer. `source_event_type` keeps the -original callback or trace event for lossless debugging. - -## LangChain integration - -Agent Evidence supports two integration paths for current LangChain runtimes: - -- callback handlers for live capture during execution -- stream event adapters for `Runnable.astream_events(..., version="v2")` - -Example callback usage: - -```python -from agent_evidence import EvidenceRecorder, LocalEvidenceStore -from agent_evidence.integrations import EvidenceCallbackHandler -from langchain_core.runnables import RunnableLambda - -store = LocalEvidenceStore("data/evidence.jsonl") -recorder = EvidenceRecorder(store) -handler = EvidenceCallbackHandler(recorder) - -chain = RunnableLambda(lambda text: text.upper()).with_config({"run_name": "uppercase"}) -result = chain.invoke( - "hello", - config={"callbacks": [handler], "metadata": {"session_id": "demo"}}, -) -``` - -Example stream event capture: - -```python -import asyncio - -from agent_evidence import EvidenceRecorder, LocalEvidenceStore -from agent_evidence.integrations import record_langchain_event -from langchain_core.runnables import RunnableLambda - -async def main() -> None: - store = LocalEvidenceStore("data/evidence.jsonl") - recorder = EvidenceRecorder(store) - chain = RunnableLambda(lambda text: text[::-1]).with_config({"run_name": "reverse"}) - - async for event in chain.astream_events("hello", version="v2"): - record_langchain_event(recorder, event) - -asyncio.run(main()) -``` - -Both integration paths normalize LangChain callback names such as -`on_chain_start` and `on_tool_end` into semantic event types such as -`chain.start` and `tool.end`. - -## OpenAI Agents SDK integration - -OpenAI Agents SDK already exposes tracing extension points through custom trace -processors, so Agent Evidence can mirror trace and span lifecycle events into -the same semantic evidence model without patching the runtime. - -Install the optional dependency: - -```bash -pip install -e ".[openai-agents]" -``` - -Example trace processor usage: - -```python -from agents import trace -from agents.tracing import custom_span - -from agent_evidence import EvidenceRecorder, LocalEvidenceStore, export_json_bundle -from agent_evidence.integrations import install_openai_agents_processor - -store = LocalEvidenceStore("data/openai-agents.evidence.jsonl") -recorder = EvidenceRecorder(store) -install_openai_agents_processor(recorder) - -with trace( - "support-workflow", - group_id="session-001", - metadata={"session_id": "session-001"}, -): - with custom_span("collect_context", {"channel": "chat"}): - pass - -export_json_bundle( - store.query(source="openai_agents"), - "exports/openai-agents.bundle.json", -) +agent-evidence validate-profile examples/invalid-missing-required.json ``` -By default `install_openai_agents_processor()` adds Agent Evidence alongside -the SDK's active processors. Pass `replace=True` if you want the SDK to emit -only into Agent Evidence for that process. +The console command is provided by the package entry point after `pip install -e ...`. -See [`examples/openai_agents/basic_export.py`](examples/openai_agents/basic_export.py) -for a complete local example. +## Example Evidence Fields -## Verification +- `operation_id` — one operation or run that needs to be reviewed +- `operation_type` — what kind of operation was performed +- `input_hashes` — hashes for inputs or source artifacts +- `output_hashes` — hashes for generated outputs +- `policy_reference` — policy, rule, or governance checkpoint used during the run +- `provenance_chain` — links between inputs, actions, outputs, and evidence +- `verification_result` — machine-readable result from profile validation or bundle verification -Use the CLI to validate the chain after capture: +## Demo Screenshots -```bash -agent-evidence verify --store ./data/evidence.jsonl -``` +Real screenshots are intentionally not generated in this README. Add them under: -This recomputes each `event_hash`, checks `previous_event_hash`, and validates -the cumulative `chain_hash`. +- `assets/profile-validator.png` +- `assets/evidence-object.png` +- `assets/fdo-testbed-registration.png` -## SQL storage +See [assets/README.md](./assets/README.md) for the capture checklist. -`SqlEvidenceStore` persists the semantic event envelope into a relational table -while keeping indexed columns for efficient filtering: +## Relation to FDO -- `event_type` -- `actor` -- `timestamp` -- `source` -- `component` -- `span_id` -- `parent_span_id` -- `previous_event_hash` -- `event_hash` -- `chain_hash` +`agent-evidence` is an experimental, minimal, discussion-oriented operation evidence profile. It explores how AI / Agent operation evidence can be expressed with an FDO-style profile, schema, examples, validator, and registration pack. -The query interface supports: +It is not an official FDO standard. The current public claim is narrower: this repository provides a working profile and validator surface that can support FDO-facing discussion and a minimal FDO Testbed registration draft. -- semantic filters such as `event_type`, `actor`, `source`, and `component` -- chain traversal via `previous_event_hash` -- span-scoped inspection with `span_id` and `parent_span_id` -- time windows via `since` and `until` -- lexicographic hash windows via `event_hash_from/to` and `chain_hash_from/to` -- pagination via `offset` and `limit` +Start here for the FDO-facing pack: -Hash window filters operate on fixed-width lowercase SHA-256 hex digests, so -lexicographic ranges map cleanly to digest ordering for indexed lookups. +- [FDO Operation Evidence Profile Registration Pack](./docs/fdo-mapping/fdo-operation-evidence-profile-registration-pack.md) +- [FDO Testbed registration draft](./submission/fdo-testbed-registration-draft.md) +- [FDO outreach draft](./submission/peter-sven-outreach-draft.md) +- [LDT4SSC / DS4SSCC module pitch](./submission/ldt4ssc-ds4sscc-module-pitch.md) -The store accepts standard SQLAlchemy URLs, for example: +Current external naming relationship: -- `sqlite+pysqlite:///./data/evidence.db` -- `postgresql+psycopg://user:password@localhost:5432/agent_evidence` +- `FDO_OPERATION_EVIDENCE_PROFILE_V0_1` = operation-level evidence profile +- `ARO_AUDIT_PROFILE_V1` = audit-facing sibling profile -## Migration +## For hiring managers -You can migrate existing JSONL evidence into SQLite or PostgreSQL: +This repository shows that I can: -```bash -agent-evidence migrate \ - --source ./data/evidence.jsonl \ - --target sqlite+pysqlite:///./data/evidence.db -``` +- turn LangChain / Agent workflow thinking into a concrete evidence boundary +- design JSON Schema and validator logic for high-responsibility AI workflows +- model audit trail, provenance, hashes, and verification results as deliverable artifacts +- connect trustworthy AI governance ideas to runnable examples +- package technical work as open-source documentation, examples, and CLI validation -The `query` command works across both local and SQL stores, although SQL stores -are preferable once record volume grows beyond simple local inspection. +## Minimal Run Path -## Bundle export +The first-run flow in this repository is organized around: -Agent Evidence supports three export shapes: +1. install +2. run or inspect one minimal example +3. validate the profile +4. export or verify an evidence bundle when needed +5. review the generated `bundle`, `receipt`, and `summary` -- JSON bundles containing `records`, `manifest`, and one or more detached signatures -- CSV artifacts plus a JSON sidecar manifest -- XML artifacts plus a JSON sidecar manifest +Current entry surfaces: -Exports can also be packaged as a single `.zip` or `.tar.gz` archive via -`--archive-format`. Packaged exports include: +- [demo/README.md](./demo/README.md) +- [examples/README.md](./examples/README.md) +- [LangChain minimal evidence cookbook](./docs/cookbooks/langchain_minimal_evidence.md) +- [OpenAI-compatible minimal cookbook](./docs/cookbooks/openai_compatible_minimal.md) +- [Review pack minimal cookbook](./docs/cookbooks/review_pack_minimal.md) +- [GitHub build attestation note](./docs/ci/github-build-attestation-minimal.md) -- the exported artifact -- the sidecar manifest -- a small `package-manifest.json` used to locate those files during verification +## What You Get -Both formats include a manifest with: +After one run, the primary outputs are intentionally narrow: -- `artifact_digest` for the exported bytes -- ordered event-hash and chain-hash list digests -- first/last event hashes and latest chain hash -- export filters used to produce the artifact +- `bundle` — exported evidence package that can be handed off, verified, and retained outside the original runtime +- `receipt` — machine-readable verification result returned by `agent-evidence validate-profile`, `agent-evidence verify-bundle`, or `agent-evidence verify-export` +- `summary` — reviewer-facing summary output produced by the current demo and example surfaces -Each signature can also carry: +## Integration Priorities -- `key_id` and `key_version` for key rotation -- `signer` and `role` for audit attribution -- `signed_at` and arbitrary JSON metadata +Current priority order: -Manifests can also carry threshold policies: +1. LangChain / LangGraph +2. OpenAI-compatible runtimes -- `signature_policy.minimum_valid_signatures` for `N-of-M` -- `signature_policy.minimum_valid_signatures_by_role` for role thresholds such - as `{"approver": 1, "attestor": 1}` +The goal is one narrow evidence handoff surface, not many adapters at once. -If neither is present, verification defaults to requiring every signature in -the artifact to validate. If only role thresholds are present, the effective -total threshold defaults to the sum of those role requirements. +## Related Surfaces -If a bundle carries signatures, verification is fail-closed: you must provide -`--public-key` or `--keyring`, otherwise verification returns `ok=false`. +- Architecture: [digital-biosphere-architecture](https://github.com/joy7758/digital-biosphere-architecture) +- Demo: [verifiable-agent-demo](https://github.com/joy7758/verifiable-agent-demo) +- Audit: [aro-audit](https://github.com/joy7758/aro-audit) +- EDC spike: [frozen/reference asset](https://github.com/joy7758/agent-evidence/blob/main/docs/edc-java-spike/README.md) -Manifest signing uses Ed25519 PEM keys. To enable signing outside the dev -environment: +## Scope Boundaries -```bash -pip install -e ".[signing]" -``` +`agent-evidence` is the active code surface here for `bundle`, `receipt`, and `summary`. -Example key generation with OpenSSL: +It is not: -```bash -openssl genpkey -algorithm Ed25519 -out ./keys/manifest-private.pem -openssl pkey -in ./keys/manifest-private.pem -pubout -out ./keys/manifest-public.pem -``` +- the full Digital Biosphere Architecture stack +- the audit control plane +- the current EDC implementation line +- an exporter proliferation project +- a generic agent governance platform -Signer config files let you attach multiple signatures during export. Example -`operations-q2.signer.json`: - -```json -{ - "private_key": "./operations-q2-private.pem", - "key_id": "operations", - "key_version": "2026-q2", - "signer": "Operations Bot", - "role": "approver", - "metadata": { - "environment": "prod" - } -} -``` +Historical proposal, submission, poster, roadmap, and lineage materials remain in this repository for context, not as the primary developer entry path. -To embed signature policy in the exported manifest, pass: - -- `--required-signatures N` for a global `N-of-M` rule -- `--required-signature-role =` one or more times for role rules - -`verify-export` will honor the manifest policy by default, or you can override -the global threshold and role thresholds at verification time with the same -flags. - -Keyrings let `verify-export` resolve rotated keys by `key_id` and -`key_version`. Example `manifest-keyring.json`: - -```json -{ - "keys": [ - { - "key_id": "operations", - "key_version": "2026-q1", - "public_key": "./operations-q1-public.pem" - }, - { - "key_id": "operations", - "key_version": "2026-q2", - "public_key": "./operations-q2-public.pem" - } - ] -} -``` +## Status -When you export CSV, Agent Evidence writes the CSV artifact and a manifest -sidecar such as `evidence.csv.manifest.json`. Spreadsheet-facing CSV exports -sanitize cells that begin with formula prefixes such as `=`, `+`, `-`, or `@` -to reduce formula injection risk during human review. `verify-export` -validates the manifest summary, exported artifact digest, and every signature -from a provided public key or keyring. +The current primary package line in this repository is `Execution Evidence and Operation Accountability Profile v0.1`. -Archive verification also enforces unpacking limits for member count, per-file -size, and total unpacked size so untrusted `.zip` and `.tar.gz` bundles fail -closed before full extraction. +This repository is being tightened into a developer-product entry page: -## PostgreSQL integration validation +- installable from source +- runnable on a minimal path +- able to produce `bundle`, `receipt`, and `summary` +- explicit about active versus frozen/reference surfaces -For a repeatable real-database validation path, use the bundled Docker-backed -integration script: +For the current repo boundary, see [docs/reports/repo-map-audit.md](./docs/reports/repo-map-audit.md). -```bash -make install-postgres -make test-postgres -``` +## English Summary -This starts a temporary PostgreSQL container, exports -`AGENT_EVIDENCE_POSTGRES_URL`, and runs `tests/test_postgres_integration.py` -against the live database. +`agent-evidence` turns AI agent operations into structured evidence objects that can be validated, reviewed, and retained outside the original runtime. The project focuses on a minimal operation evidence profile, JSON Schema, validator, examples, and FDO-facing discussion material. It is experimental and discussion-oriented, not an official FDO standard. diff --git a/README.zh-CN.md b/README.zh-CN.md index 9c2d3b1..86b5861 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -16,85 +16,131 @@ Agent Evidence 是 Digital Biosphere Architecture 的具体 execution-evidence 它把 agent/runtime 执行打包成可离线验证的证据 bundle。它不是完整的架构总仓,不是 audit control plane,也不只是 tracing 或 logging。要看系统上下文,请先去 [digital-biosphere-architecture](https://github.com/joy7758/digital-biosphere-architecture);要走最短演练路径,请看 [verifiable-agent-demo](https://github.com/joy7758/verifiable-agent-demo);要做执行后审阅,请看 [aro-audit](https://github.com/joy7758/aro-audit)。 -## 角色 +## 研究定位 -`agent-evidence` 是具体的 execution-evidence 入口,用来把 agent/runtime 的执行过程打包成可移植、可离线验证的证据包。 +Agent Evidence 只做一个收敛主张:执行证据与操作问责,是可治理 AI +系统的一等验证边界。 -## 不是这个仓库 +本仓库是 Digital Biosphere Architecture 中的具体 execution-evidence +入口。它把一次 operation 打包为可移植、可独立复核、可离线验证的对象化证据;它是方法入口,不是架构总仓,不是 audit control plane,也不是泛化的 +agent governance 平台。 -- 不是完整的架构总仓 -- 不是 audit control plane -- 不只是 tracing 或 logging -- 不是 walkthrough demo -- 不是 execution-integrity kernel +## 当前规范包 -## 从这里开始 - -- architecture context -> [digital-biosphere-architecture](https://github.com/joy7758/digital-biosphere-architecture) -- 当前主 package -> `spec/execution-evidence-operation-accountability-profile-v0.1.md`、`schema/execution-evidence-operation-accountability-profile-v0.1.schema.json` -- 当前可运行入口 -> [examples/README.md](examples/README.md)、[demo/README.md](demo/README.md)、`agent-evidence validate-profile ` -- 历史脉络 -> [docs/lineage.md](docs/lineage.md) -- walkthrough -> [verifiable-agent-demo](https://github.com/joy7758/verifiable-agent-demo) -- post-execution review -> [aro-audit](https://github.com/joy7758/aro-audit) - -## 当前 v0.1 package - -当前主 package surface 是 +当前 canonical package 是 `Execution Evidence and Operation Accountability Profile v0.1`。 -它冻结在 GitHub Release `v0.2.0` 中。 - -当前 package DOI:https://doi.org/10.5281/zenodo.19334062 - -该 release 内部冻结的 package 版本仍是 `v0.1`。 - -当前 v0.1 路径从这里开始: +冻结信息: +- GitHub Release:`v0.2.0` +- DOI:[10.5281/zenodo.19334062](https://doi.org/10.5281/zenodo.19334062) +- 该 release 内部冻结的 package 版本仍是 `v0.1` +核心入口: - 规范:`spec/execution-evidence-operation-accountability-profile-v0.1.md` - Schema:`schema/execution-evidence-operation-accountability-profile-v0.1.schema.json` - Validator CLI:`agent-evidence validate-profile ` -- Examples:[examples/README.md](examples/README.md) -- Demo:[demo/README.md](demo/README.md) +- 样例:`examples/README.md` +- Demo:`demo/README.md` +- reviewer-facing 高风险入口:`docs/high-risk-scenario-entry.md` - 状态与验收:`docs/STATUS.md`、`docs/ACCEPTANCE-CHECKLIST.md` -- 提交交付:`submission/package-manifest.md`、`submission/final-handoff.md` +- 投稿交付:`submission/package-manifest.md`、`submission/final-handoff.md` -实现说明:JSONL、SQLite 和 PostgreSQL 后端仍然可用,但它们从属于本仓库作为 evidence-entry 的定位。 +## FDO 对外文稿入口 -![存储](https://img.shields.io/badge/storage-JSONL%20%7C%20SQLite%20%7C%20Postgres-0a7b83) +如果当前目标是 FDO Testbed 注册、外联或项目插入申请,先看: -### Minimal v0.1 walkthrough +- `docs/fdo-mapping/fdo-operation-evidence-profile-registration-pack.md` +- `submission/fdo-testbed-registration-draft.md` +- `submission/peter-sven-outreach-draft.md` +- `submission/ldt4ssc-ds4sscc-module-pitch.md` -```bash -python3 -m venv .venv -source .venv/bin/activate -pip install -e ".[dev]" -``` +当前对外命名关系: -验证最小 valid / invalid 样例: +- `FDO_OPERATION_EVIDENCE_PROFILE_V0_1` = operation-level evidence profile +- `ARO_AUDIT_PROFILE_V1` = audit-facing sibling profile -```bash -agent-evidence validate-profile examples/minimal-valid-evidence.json -agent-evidence validate-profile examples/invalid-missing-required.json -agent-evidence validate-profile examples/invalid-unclosed-reference.json -agent-evidence validate-profile examples/invalid-policy-link-broken.json -``` +## Minimal v0.1 walkthrough -运行最小 demo: + python3 -m venv .venv + source .venv/bin/activate + pip install -e ".[dev]" -```bash -python3 demo/run_operation_accountability_demo.py -``` + agent-evidence validate-profile examples/minimal-valid-evidence.json + agent-evidence validate-profile examples/invalid-missing-required.json + agent-evidence validate-profile examples/invalid-unclosed-reference.json + agent-evidence validate-profile examples/invalid-policy-link-broken.json -预期结果: + agent-evidence validate-profile examples/valid-high-risk-payment-review-evidence.json + agent-evidence validate-profile examples/invalid-high-risk-unclosed-reference.json + agent-evidence validate-profile examples/invalid-high-risk-policy-link-broken.json + python3 demo/run_operation_accountability_demo.py + +预期结果: - valid 样例返回 JSON,其中 `"ok": true` - 每个 invalid 样例返回 JSON,其中 `"ok": false`,并给出一个主错误码 - demo 会把工件写到 `demo/artifacts/`,最后输出一行 `PASS` 摘要 已知环境说明: - - 仓库 `.venv` 在 Python 3.14 下跑更大范围测试时,可能出现一条 `langchain_core` warning;它不影响最小 profile、validator 或 demo 路径 +## 论文台账 + +下面这些文件就是当前项目与论文状态的统一台账入口: + +- 项目状态与里程碑台账:`docs/STATUS.md` +- 旗舰论文工作日志:`paper/flagship/WORKLOG.md` +- 稿件基线说明:`submission/manuscript-baselines.md` +- claims-to-evidence 映射表:`paper/flagship/13_claims_to_evidence_map.md` +- validation 结果总表:`paper/flagship/18_validation_results_table.md` + +## 稿件口径纪律 + +不要混写不同稿件表面。 + +- `B1-minimal-frozen`:`Execution Evidence and Operation Accountability Profile v0.1`;主张 = 最小验证边界 +- `B4-high-risk-current-main`:reviewer-facing 的高风险入口;更适合未来 high-risk / compliance-interface 稿件 +- `B2-extended-middle`:除非整篇重写,否则继续停放 +- `B3-aep-live-chain`:保留为历史 AEP runtime-evidence 表面 + +## 本仓库已经建立了什么 + +本仓库当前已经建立了: +- execution evidence 与 operation accountability 的最小 profile +- 带显式 error code 的 profile-aware validator +- 单链路 demo +- reviewer-facing 的场景切片 +- 能把论文主张压到具体仓库资产上的证据表面 + +## 范围纪律 + +这个仓库是: +- 具体 execution-evidence 入口 +- 最小验证边界规范包 +- validator / specimen / demo 的落地表面 + +这个仓库不是: +- 架构总仓 +- audit control plane +- walkthrough demo +- execution-integrity kernel +- 泛化 agent governance 平台 +- manifesto 仓库 + +## 架构导航 + +- 架构总入口 -> `digital-biosphere-architecture` +- 最短演练路径 -> `verifiable-agent-demo` +- 执行后审阅入口 -> `aro-audit` +- EDC Java spike 入口 -> `docs/edc/edc_extension_minimal_structure.md` +- 历史脉络图 -> `docs/lineage.md` + +## 下一步重点 + +- external-context evidence +- third-party checker +- introduction / discussion / conclusion 的连续 manuscript assembly + ## 历史脉络 历史上的 `Execution Evidence Object`、较早的 `Agent Evidence Profile` 命名、旧版 FDO mapping 表述以及会议样品说明仍然保留在仓库里,但它们已经不是当前主入口。历史脉络请统一看 [docs/lineage.md](docs/lineage.md)。 @@ -129,6 +175,10 @@ Tracing 和 logs 主要帮助操作者检查一次运行。Agent Evidence 把运 这个仓库当前实现了 bundle、manifest、signatures 和 offline verification 这些步骤。外部 anchoring 不在 AEP v0.1 的范围内,默认也不会启用。 +可选的 trust binding 和 manifest 签名不是一回事。当前 profile 中它表现为 +`validation.trust_bindings[]`,只用于指向外部验证来源,不替代本地签名。 +当前 validator 只检查本地 target 和 digest 是否一致,不校验外部系统本身。 + 该工具包现在支持两种存储模式: - 仅附加本地 JSONL 文件 diff --git a/agent_evidence/integrations/__init__.py b/agent_evidence/integrations/__init__.py index 71f5c8d..593dec2 100644 --- a/agent_evidence/integrations/__init__.py +++ b/agent_evidence/integrations/__init__.py @@ -5,6 +5,8 @@ ) from .langchain import ( EvidenceCallbackHandler, + LangChainAdapter, + LangChainArtifacts, evidence_from_langchain_event, record_langchain_event, ) @@ -16,12 +18,20 @@ exported_trace_summary, install_openai_agents_processor, ) +from .openai_compatible import ( + OpenAICompatibleAdapter, + OpenAICompatibleArtifacts, +) __all__ = [ "AgentEvidenceTracingProcessor", "build_erc8004_validation_stub", "build_fdo_stub", "EvidenceCallbackHandler", + "LangChainAdapter", + "LangChainArtifacts", + "OpenAICompatibleAdapter", + "OpenAICompatibleArtifacts", "evidence_from_langchain_event", "evidence_from_openai_agents_span", "evidence_from_openai_agents_trace", diff --git a/agent_evidence/integrations/langchain.py b/agent_evidence/integrations/langchain.py index b2e27ad..5315b8a 100644 --- a/agent_evidence/integrations/langchain.py +++ b/agent_evidence/integrations/langchain.py @@ -1,13 +1,19 @@ from __future__ import annotations +import json +import shutil +from dataclasses import dataclass +from pathlib import Path from typing import Any, Mapping from uuid import UUID from agent_evidence.aep import EvidenceBundleBuilder from agent_evidence.aep.hash_chain import sha256_digest +from agent_evidence.export import export_json_bundle, verify_json_bundle from agent_evidence.models import EvidenceContext, EvidenceEvent, utc_now from agent_evidence.recorder import EvidenceRecorder from agent_evidence.serialization import ensure_json_object, to_jsonable +from agent_evidence.storage import LocalEvidenceStore try: from langchain_core.callbacks import BaseCallbackHandler as LangChainBaseCallbackHandler @@ -44,6 +50,65 @@ class LangChainBaseCallbackHandler: # type: ignore[no-redef] } +@dataclass(frozen=True) +class LangChainArtifacts: + """Normalized LangChain integration outputs plus supporting files.""" + + bundle_path: Path + receipt: dict[str, Any] + receipt_path: Path + summary: dict[str, Any] + summary_path: Path + supporting_files: dict[str, Path] + + +def _load_ed25519_runtime(): + try: + from cryptography.hazmat.primitives import serialization + from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey + except ModuleNotFoundError as exc: # pragma: no cover - depends on extras + raise ModuleNotFoundError( + "LangChainAdapter signing requires cryptography. Install agent-evidence with " + "the [signing] or [dev] extra." + ) from exc + + return serialization, Ed25519PrivateKey + + +def _write_adapter_keypair( + output_dir: Path, + *, + private_key_pem: bytes | None = None, +) -> tuple[Path, Path, bytes, bytes]: + serialization, Ed25519PrivateKey = _load_ed25519_runtime() + if private_key_pem is None: + private_key = Ed25519PrivateKey.generate() + else: + loaded = serialization.load_pem_private_key(private_key_pem, password=None) + if not isinstance(loaded, Ed25519PrivateKey): + raise TypeError("LangChainAdapter requires an Ed25519 private key in PEM format.") + private_key = loaded + + resolved_private_pem = private_key.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.PKCS8, + encryption_algorithm=serialization.NoEncryption(), + ) + public_pem = private_key.public_key().public_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PublicFormat.SubjectPublicKeyInfo, + ) + private_key_path = output_dir / "manifest-private.pem" + public_key_path = output_dir / "manifest-public.pem" + private_key_path.write_bytes(resolved_private_pem) + public_key_path.write_bytes(public_pem) + return private_key_path, public_key_path, resolved_private_pem, public_pem + + +def _write_json(path: Path, payload: dict[str, Any]) -> None: + path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + + def _merge_tags(*parts: list[str] | tuple[str, ...] | None) -> list[str]: merged: list[str] = [] for part in parts: @@ -756,3 +821,147 @@ def on_custom_event( name=name, extra=kwargs, ) + + +class LangChainAdapter: + """Recommended LangChain wrapper for the current quickstart artifact path.""" + + def __init__( + self, + *, + output_dir: Path, + store: LocalEvidenceStore, + recorder: EvidenceRecorder, + handler: EvidenceCallbackHandler, + private_key_pem: bytes | None = None, + key_id: str = "langchain-cookbook-demo", + key_version: str | None = None, + signer: str = "local-demo", + role: str = "attestor", + ) -> None: + self.output_dir = output_dir + self.store = store + self.recorder = recorder + self._handler = handler + self._private_key_pem = private_key_pem + self._key_id = key_id + self._key_version = key_version + self._signer = signer + self._role = role + self._artifacts: LangChainArtifacts | None = None + + @classmethod + def for_output_dir( + cls, + output_dir: str | Path, + *, + digest_only: bool = True, + omit_request: bool = False, + omit_response: bool = False, + capture_stream_tokens: bool = False, + base_tags: list[str] | None = None, + private_key_pem: bytes | None = None, + key_id: str = "langchain-cookbook-demo", + key_version: str | None = None, + signer: str = "local-demo", + role: str = "attestor", + ) -> "LangChainAdapter": + resolved_output_dir = Path(output_dir) + if resolved_output_dir.exists(): + if resolved_output_dir.is_dir(): + shutil.rmtree(resolved_output_dir) + else: + resolved_output_dir.unlink() + resolved_output_dir.mkdir(parents=True, exist_ok=True) + + store = LocalEvidenceStore(resolved_output_dir / "runtime-events.jsonl") + recorder = EvidenceRecorder(store) + handler = EvidenceCallbackHandler( + recorder=recorder, + base_tags=base_tags, + capture_stream_tokens=capture_stream_tokens, + digest_only=digest_only, + omit_request=omit_request, + omit_response=omit_response, + ) + return cls( + output_dir=resolved_output_dir, + store=store, + recorder=recorder, + handler=handler, + private_key_pem=private_key_pem, + key_id=key_id, + key_version=key_version, + signer=signer, + role=role, + ) + + def callback_handler(self) -> EvidenceCallbackHandler: + return self._handler + + def finalize(self) -> LangChainArtifacts: + if self._artifacts is not None: + return self._artifacts + + records = self.store.list() + bundle_path = self.output_dir / "langchain-evidence.bundle.json" + manifest_path = self.output_dir / "langchain-evidence.manifest.json" + receipt_path = self.output_dir / "receipt.json" + summary_path = self.output_dir / "summary.json" + + private_key_path, public_key_path, private_pem, public_pem = _write_adapter_keypair( + self.output_dir, + private_key_pem=self._private_key_pem, + ) + bundle = export_json_bundle( + records, + bundle_path, + filters={"source": "langchain", "limit": len(records)}, + private_key_pem=private_pem, + key_id=self._key_id, + key_version=self._key_version, + signer=self._signer, + role=self._role, + manifest_output_path=manifest_path, + ) + receipt = verify_json_bundle(bundle_path, public_key_pem=public_pem) + _write_json(receipt_path, receipt) + + verify_command = ( + f"agent-evidence verify-export --bundle {bundle_path} --public-key {public_key_path}" + ) + summary = { + "ok": receipt["ok"], + "output_dir": str(self.output_dir), + "store_path": str(self.store.path), + "bundle_path": str(bundle_path), + "receipt_path": str(receipt_path), + "manifest_path": str(manifest_path), + "private_key_path": str(private_key_path), + "public_key_path": str(public_key_path), + "record_count": len(records), + "signature_count": len(bundle.signatures), + "verify_command": verify_command, + "verify_result": receipt, + "anchor_note": ( + "Detached anchoring is not implemented in this repository. Use the exported " + "bundle digest and signed manifest as the handoff point if you want to anchor " + "it in an external timestamp or registry system." + ), + } + _write_json(summary_path, summary) + + self._artifacts = LangChainArtifacts( + bundle_path=bundle_path, + receipt=receipt, + receipt_path=receipt_path, + summary=summary, + summary_path=summary_path, + supporting_files={ + "manifest": manifest_path, + "private_key": private_key_path, + "public_key": public_key_path, + "runtime_events": self.store.path, + }, + ) + return self._artifacts diff --git a/agent_evidence/integrations/openai_compatible/__init__.py b/agent_evidence/integrations/openai_compatible/__init__.py new file mode 100644 index 0000000..e7c7df3 --- /dev/null +++ b/agent_evidence/integrations/openai_compatible/__init__.py @@ -0,0 +1,6 @@ +from .adapter import OpenAICompatibleAdapter, OpenAICompatibleArtifacts + +__all__ = [ + "OpenAICompatibleAdapter", + "OpenAICompatibleArtifacts", +] diff --git a/agent_evidence/integrations/openai_compatible/adapter.py b/agent_evidence/integrations/openai_compatible/adapter.py new file mode 100644 index 0000000..f64f385 --- /dev/null +++ b/agent_evidence/integrations/openai_compatible/adapter.py @@ -0,0 +1,419 @@ +from __future__ import annotations + +import json +import shutil +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Callable, Mapping, TypeVar +from uuid import uuid4 + +from agent_evidence.aep.hash_chain import sha256_digest +from agent_evidence.export import export_json_bundle, verify_json_bundle +from agent_evidence.models import EvidenceContext +from agent_evidence.recorder import EvidenceRecorder +from agent_evidence.serialization import ensure_json_object, to_jsonable +from agent_evidence.storage import LocalEvidenceStore + +T = TypeVar("T") + + +@dataclass(frozen=True) +class OpenAICompatibleArtifacts: + """Normalized OpenAI-compatible integration outputs plus supporting files.""" + + bundle_path: Path + receipt: dict[str, Any] + receipt_path: Path + summary: dict[str, Any] + summary_path: Path + supporting_files: dict[str, Path] + + +def _load_ed25519_runtime(): + try: + from cryptography.hazmat.primitives import serialization + from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey + except ModuleNotFoundError as exc: # pragma: no cover - depends on extras + raise ModuleNotFoundError( + "OpenAICompatibleAdapter signing requires cryptography. Install agent-evidence " + "with the [signing] or [dev] extra." + ) from exc + + return serialization, Ed25519PrivateKey + + +def _write_adapter_keypair( + output_dir: Path, + *, + private_key_pem: bytes | None = None, +) -> tuple[Path, Path, bytes, bytes]: + serialization, Ed25519PrivateKey = _load_ed25519_runtime() + if private_key_pem is None: + private_key = Ed25519PrivateKey.generate() + else: + loaded = serialization.load_pem_private_key(private_key_pem, password=None) + if not isinstance(loaded, Ed25519PrivateKey): + raise TypeError( + "OpenAICompatibleAdapter requires an Ed25519 private key in PEM format." + ) + private_key = loaded + + resolved_private_pem = private_key.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.PKCS8, + encryption_algorithm=serialization.NoEncryption(), + ) + public_pem = private_key.public_key().public_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PublicFormat.SubjectPublicKeyInfo, + ) + private_key_path = output_dir / "manifest-private.pem" + public_key_path = output_dir / "manifest-public.pem" + private_key_path.write_bytes(resolved_private_pem) + public_key_path.write_bytes(public_pem) + return private_key_path, public_key_path, resolved_private_pem, public_pem + + +def _write_json(path: Path, payload: dict[str, Any]) -> None: + path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + + +def _merge_tags(*parts: list[str] | tuple[str, ...] | None) -> list[str]: + merged: list[str] = [] + for part in parts: + for tag in part or []: + tag_text = str(tag) + if tag_text and tag_text not in merged: + merged.append(tag_text) + return merged + + +def _payload_slot(value: Any, *, digest_only: bool, omit: bool) -> dict[str, Any]: + if omit: + return {"mode": "omitted"} + + normalized = to_jsonable(value) + if normalized is None or normalized == {} or normalized == []: + return {"mode": "absent"} + + slot = { + "mode": "digest_only" if digest_only else "inline", + "digest": sha256_digest(normalized), + } + if not digest_only: + slot["content"] = normalized + return slot + + +class OpenAICompatibleAdapter: + """Provider-agnostic wrapper for the current local-first export path.""" + + def __init__( + self, + *, + output_dir: Path, + store: LocalEvidenceStore, + recorder: EvidenceRecorder, + provider_label: str, + model: str, + api_key: str, + base_url: str | None, + digest_only: bool, + omit_request: bool, + omit_response: bool, + request_settings: dict[str, Any], + base_tags: list[str], + private_key_pem: bytes | None = None, + key_id: str = "openai-compatible-demo", + key_version: str | None = None, + signer: str = "local-demo", + role: str = "attestor", + ) -> None: + self.output_dir = output_dir + self.store = store + self.recorder = recorder + self.provider_label = provider_label + self.model = model + self.base_url = base_url + self.digest_only = digest_only + self.omit_request = omit_request + self.omit_response = omit_response + self.request_settings = request_settings + self.base_tags = base_tags + self._api_key = api_key + self._private_key_pem = private_key_pem + self._key_id = key_id + self._key_version = key_version + self._signer = signer + self._role = role + self._call_count = 0 + self._artifacts: OpenAICompatibleArtifacts | None = None + + @classmethod + def for_output_dir( + cls, + output_dir: str | Path, + *, + provider_label: str, + model: str, + api_key: str, + base_url: str | None = None, + digest_only: bool = True, + omit_request: bool = False, + omit_response: bool = False, + temperature: float | None = None, + top_p: float | None = None, + max_output_tokens: int | None = None, + tool_choice: str | None = None, + parallel_tool_calls: bool | None = None, + timeout: float | None = None, + base_tags: list[str] | None = None, + private_key_pem: bytes | None = None, + key_id: str = "openai-compatible-demo", + key_version: str | None = None, + signer: str = "local-demo", + role: str = "attestor", + ) -> "OpenAICompatibleAdapter": + if not provider_label: + raise ValueError("provider_label is required.") + if not model: + raise ValueError("model is required.") + if not api_key: + raise ValueError("api_key is required.") + + resolved_output_dir = Path(output_dir) + if resolved_output_dir.exists(): + if resolved_output_dir.is_dir(): + shutil.rmtree(resolved_output_dir) + else: + resolved_output_dir.unlink() + resolved_output_dir.mkdir(parents=True, exist_ok=True) + + request_settings = { + key: value + for key, value in { + "temperature": temperature, + "top_p": top_p, + "max_output_tokens": max_output_tokens, + "tool_choice": tool_choice, + "parallel_tool_calls": parallel_tool_calls, + "timeout": timeout, + }.items() + if value is not None + } + + store = LocalEvidenceStore(resolved_output_dir / "runtime-events.jsonl") + recorder = EvidenceRecorder(store) + return cls( + output_dir=resolved_output_dir, + store=store, + recorder=recorder, + provider_label=provider_label, + model=model, + api_key=api_key, + base_url=base_url, + digest_only=digest_only, + omit_request=omit_request, + omit_response=omit_response, + request_settings=request_settings, + base_tags=base_tags or [], + private_key_pem=private_key_pem, + key_id=key_id, + key_version=key_version, + signer=signer, + role=role, + ) + + def _event_tags(self, tags: list[str] | None) -> list[str]: + return _merge_tags( + ["openai-compatible", self.provider_label, "provider-call"], + self.base_tags, + tags, + ) + + def _call_metadata( + self, + *, + operation: str, + metadata: Mapping[str, Any] | None, + ) -> dict[str, Any]: + merged: dict[str, Any] = { + "operation": operation, + "provider_label": self.provider_label, + "model": self.model, + "request_settings": ensure_json_object(self.request_settings), + } + if self.base_url is not None: + merged["base_url"] = self.base_url + if metadata is not None: + merged.update(ensure_json_object(metadata)) + return merged + + def _build_context( + self, + *, + source_event_type: str, + operation: str, + call_id: str, + tags: list[str], + status: str, + ) -> EvidenceContext: + attributes = { + "provider_label": self.provider_label, + "model": self.model, + "call_id": call_id, + "status": status, + "request_settings": ensure_json_object(self.request_settings), + } + if self.base_url is not None: + attributes["base_url"] = self.base_url + return EvidenceContext( + source="openai_compatible", + component="provider_call", + source_event_type=source_event_type, + span_id=call_id, + name=operation, + tags=tags, + attributes=attributes, + ) + + def record_call( + self, + *, + operation: str, + request: Any, + invoke: Callable[[], T], + metadata: Mapping[str, Any] | None = None, + tags: list[str] | None = None, + ) -> T: + call_id = str(uuid4()) + event_tags = self._event_tags(tags) + event_metadata = self._call_metadata(operation=operation, metadata=metadata) + + self.recorder.record( + actor=self.provider_label, + event_type="provider.call.start", + inputs=_payload_slot( + request, + digest_only=self.digest_only, + omit=self.omit_request, + ), + context=self._build_context( + source_event_type="on_provider_call_start", + operation=operation, + call_id=call_id, + tags=event_tags, + status="started", + ), + metadata=event_metadata, + ) + + try: + response = invoke() + except BaseException as exc: + self._call_count += 1 + self.recorder.record( + actor=self.provider_label, + event_type="provider.call.error", + outputs={"error": to_jsonable(exc)}, + context=self._build_context( + source_event_type="on_provider_call_error", + operation=operation, + call_id=call_id, + tags=event_tags, + status="failed", + ), + metadata=event_metadata, + ) + raise + + self._call_count += 1 + self.recorder.record( + actor=self.provider_label, + event_type="provider.call.end", + outputs=_payload_slot( + response, + digest_only=self.digest_only, + omit=self.omit_response, + ), + context=self._build_context( + source_event_type="on_provider_call_end", + operation=operation, + call_id=call_id, + tags=event_tags, + status="succeeded", + ), + metadata=event_metadata, + ) + return response + + def finalize(self) -> OpenAICompatibleArtifacts: + if self._artifacts is not None: + return self._artifacts + + records = self.store.list() + bundle_path = self.output_dir / "openai-compatible.bundle.json" + manifest_path = self.output_dir / "openai-compatible.manifest.json" + receipt_path = self.output_dir / "receipt.json" + summary_path = self.output_dir / "summary.json" + + private_key_path, public_key_path, private_pem, public_pem = _write_adapter_keypair( + self.output_dir, + private_key_pem=self._private_key_pem, + ) + bundle = export_json_bundle( + records, + bundle_path, + filters={ + "source": "openai_compatible", + "provider_label": self.provider_label, + "model": self.model, + "limit": len(records), + }, + private_key_pem=private_pem, + key_id=self._key_id, + key_version=self._key_version, + signer=self._signer, + role=self._role, + manifest_output_path=manifest_path, + ) + receipt = verify_json_bundle(bundle_path, public_key_pem=public_pem) + _write_json(receipt_path, receipt) + + verify_command = ( + f"agent-evidence verify-export --bundle {bundle_path} --public-key {public_key_path}" + ) + summary = { + "ok": receipt["ok"], + "provider_label": self.provider_label, + "model": self.model, + "base_url": self.base_url, + "output_dir": str(self.output_dir), + "store_path": str(self.store.path), + "bundle_path": str(bundle_path), + "receipt_path": str(receipt_path), + "manifest_path": str(manifest_path), + "private_key_path": str(private_key_path), + "public_key_path": str(public_key_path), + "call_count": self._call_count, + "record_count": len(records), + "signature_count": len(bundle.signatures), + "verify_command": verify_command, + "verify_result": receipt, + } + _write_json(summary_path, summary) + + self._artifacts = OpenAICompatibleArtifacts( + bundle_path=bundle_path, + receipt=receipt, + receipt_path=receipt_path, + summary=summary, + summary_path=summary_path, + supporting_files={ + "manifest": manifest_path, + "private_key": private_key_path, + "public_key": public_key_path, + "runtime_events": self.store.path, + }, + ) + return self._artifacts diff --git a/agent_evidence/oap.py b/agent_evidence/oap.py index 1fcd10e..6a334b0 100644 --- a/agent_evidence/oap.py +++ b/agent_evidence/oap.py @@ -377,6 +377,53 @@ def _validate_integrity(profile: dict[str, Any]) -> list[dict[str, str]]: return issues +def _trust_binding_targets(profile: dict[str, Any]) -> dict[str, str]: + targets = { + profile["statement_id"]: recompute_integrity(profile)["statement_digest"], + } + for artifact in profile["evidence"]["artifacts"]: + targets[artifact["artifact_id"]] = artifact["digest"] + return targets + + +def _validate_trust_bindings(profile: dict[str, Any]) -> list[dict[str, str]]: + bindings = profile["validation"].get("trust_bindings") or [] + if not bindings: + return [] + + issues = _duplicate_ids( + [binding["binding_id"] for binding in bindings], + "trust", + "duplicate_trust_binding_id", + "validation.trust_bindings", + ) + targets = _trust_binding_targets(profile) + + for index, binding in enumerate(bindings): + target_ref = binding["target_ref"] + if target_ref not in targets: + issues.append( + _issue( + "trust", + "unresolved_trust_binding_target_ref", + f"validation.trust_bindings[{index}].target_ref", + "trust binding target_ref must resolve to statement_id or " + "evidence.artifacts[].artifact_id.", + ) + ) + continue + if binding["target_digest"] != targets[target_ref]: + issues.append( + _issue( + "trust", + "trust_binding_target_digest_mismatch", + f"validation.trust_bindings[{index}].target_digest", + "trust binding target_digest does not match the resolved local target.", + ) + ) + return issues + + def build_validation_report( profile: dict[str, Any], *, @@ -388,6 +435,7 @@ def build_validation_report( reference_issues: list[dict[str, str]] = [] consistency_issues: list[dict[str, str]] = [] integrity_issues: list[dict[str, str]] = [] + trust_issues: list[dict[str, str]] = [] if not schema_issues: reference_issues = _validate_reference_closure(profile) @@ -395,12 +443,20 @@ def build_validation_report( consistency_issues = _validate_link_consistency(profile) if not schema_issues and not reference_issues and not consistency_issues: integrity_issues = _validate_integrity(profile) + if ( + not schema_issues + and not reference_issues + and not consistency_issues + and not integrity_issues + ): + trust_issues = _validate_trust_bindings(profile) stages = [ {"name": "schema", "ok": not schema_issues, "issues": schema_issues}, {"name": "references", "ok": not reference_issues, "issues": reference_issues}, {"name": "consistency", "ok": not consistency_issues, "issues": consistency_issues}, {"name": "integrity", "ok": not integrity_issues, "issues": integrity_issues}, + {"name": "trust", "ok": not trust_issues, "issues": trust_issues}, ] issues = [issue for stage in stages for issue in stage["issues"]] report = { diff --git a/agent_evidence/review_pack/__init__.py b/agent_evidence/review_pack/__init__.py new file mode 100644 index 0000000..0aba7c4 --- /dev/null +++ b/agent_evidence/review_pack/__init__.py @@ -0,0 +1,9 @@ +from .assembler import ReviewPackAssembler, ReviewPackResult +from .renderer import RenderedReviewReport, ReviewPackRenderer + +__all__ = [ + "ReviewPackAssembler", + "ReviewPackResult", + "RenderedReviewReport", + "ReviewPackRenderer", +] diff --git a/agent_evidence/review_pack/assembler.py b/agent_evidence/review_pack/assembler.py new file mode 100644 index 0000000..aedb56f --- /dev/null +++ b/agent_evidence/review_pack/assembler.py @@ -0,0 +1,206 @@ +from __future__ import annotations + +import json +import shutil +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Mapping + +from .pdf_export import write_review_report_pdf +from .renderer import ReviewPackRenderer + +PRIMARY_FILENAMES = { + "bundle": "bundle.json", + "receipt": "receipt.json", + "summary": "summary.json", +} +SUPPORTING_FILENAMES = { + "manifest": "manifest.json", + "public_key": "manifest-public.pem", + "runtime_events": "runtime-events.jsonl", + "private_key": "manifest-private.pem", +} +PACK_INDEX_FILENAME = "index.json" +REPORT_FILENAME = "report.md" +REPORT_PDF_FILENAME = "report.pdf" +PRIMARY_DIRNAME = "primary" +REVIEW_DIRNAME = "review" +SUPPORTING_DIRNAME = "supporting" + + +@dataclass(frozen=True) +class ReviewPackResult: + """Normalized Review Pack output paths.""" + + pack_dir: Path + primary_files: dict[str, Path] + supporting_files: dict[str, Path] + index_path: Path + report_path: Path + report_pdf_path: Path + + +def _load_json(path: Path) -> dict[str, Any]: + payload = json.loads(path.read_text(encoding="utf-8")) + if not isinstance(payload, dict): + raise ValueError(f"Expected a JSON object at {path}") + return payload + + +def _copy_file(source: Path, destination: Path) -> Path: + destination.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(source, destination) + return destination + + +def _receipt_facts(receipt: Mapping[str, Any]) -> dict[str, Any]: + issues = receipt.get("issues", []) + if not isinstance(issues, list): + issues = [] + facts: dict[str, Any] = { + "ok": receipt.get("ok"), + "issues": list(issues), + } + for key in ( + "format", + "profile", + "source", + "record_count", + "issue_count", + "signature_present", + "signature_count", + "required_signature_count", + "signature_verified", + "latest_chain_hash", + ): + if key in receipt: + facts[key] = receipt[key] + return facts + + +def _summary_orientation(summary: Mapping[str, Any]) -> dict[str, Any]: + orientation: dict[str, Any] = {} + for key in ( + "ok", + "record_count", + "signature_count", + "call_count", + "provider_label", + "model", + "base_url", + "verify_command", + ): + orientation[key] = summary.get(key) + return orientation + + +class ReviewPackAssembler: + """Package current primary artifacts into one review-oriented pack.""" + + def __init__(self, *, output_dir: Path) -> None: + self.output_dir = output_dir + + @classmethod + def for_output_dir(cls, output_dir: str | Path) -> "ReviewPackAssembler": + resolved_output_dir = Path(output_dir) + if resolved_output_dir.exists(): + if resolved_output_dir.is_dir(): + shutil.rmtree(resolved_output_dir) + else: + resolved_output_dir.unlink() + resolved_output_dir.mkdir(parents=True, exist_ok=True) + return cls(output_dir=resolved_output_dir) + + def assemble( + self, + *, + bundle_path: str | Path, + receipt_path: str | Path, + summary_path: str | Path, + supporting_files: Mapping[str, str | Path] | None = None, + include_private_key: bool = False, + ) -> ReviewPackResult: + primary_sources = { + "bundle": Path(bundle_path), + "receipt": Path(receipt_path), + "summary": Path(summary_path), + } + for name, path in primary_sources.items(): + if not path.exists(): + raise FileNotFoundError(f"{name} source file was not found: {path}") + + primary_dir = self.output_dir / PRIMARY_DIRNAME + review_dir = self.output_dir / REVIEW_DIRNAME + supporting_dir = self.output_dir / SUPPORTING_DIRNAME + primary_dir.mkdir(parents=True, exist_ok=True) + review_dir.mkdir(parents=True, exist_ok=True) + + copied_primary = { + name: _copy_file(path, primary_dir / PRIMARY_FILENAMES[name]) + for name, path in primary_sources.items() + } + + bundle = _load_json(copied_primary["bundle"]) + receipt = _load_json(copied_primary["receipt"]) + summary = _load_json(copied_primary["summary"]) + + copied_supporting: dict[str, Path] = {} + missing_supporting: list[str] = [] + for name, raw_path in (supporting_files or {}).items(): + if name == "private_key" and not include_private_key: + continue + source = Path(raw_path) + if not source.exists(): + missing_supporting.append(name) + continue + destination_name = SUPPORTING_FILENAMES.get(name, source.name) + copied_supporting[name] = _copy_file(source, supporting_dir / destination_name) + + primary_refs = { + name: str(path.relative_to(self.output_dir)) for name, path in copied_primary.items() + } + supporting_refs = { + name: str(path.relative_to(self.output_dir)) for name, path in copied_supporting.items() + } + report_path = review_dir / REPORT_FILENAME + report_pdf_path = review_dir / REPORT_PDF_FILENAME + rendered_report = ReviewPackRenderer().render( + bundle=bundle, + receipt=receipt, + summary=summary, + primary_files=primary_refs, + supporting_files=supporting_refs, + missing_supporting=missing_supporting, + ) + report_path.write_text(rendered_report.markdown, encoding="utf-8") + write_review_report_pdf(rendered_report.markdown, report_pdf_path) + + index_path = self.output_dir / PACK_INDEX_FILENAME + index_payload = { + "primary_files": primary_refs, + "supporting_files": supporting_refs, + "excluded_supporting_files": ( + ["private_key"] + if supporting_files + and "private_key" in supporting_files + and not include_private_key + else [] + ), + "missing_supporting_files": missing_supporting, + "report_path": str(report_path.relative_to(self.output_dir)), + "receipt_facts": _receipt_facts(receipt), + "summary_orientation": _summary_orientation(summary), + } + index_path.write_text( + json.dumps(index_payload, indent=2, sort_keys=True) + "\n", + encoding="utf-8", + ) + + return ReviewPackResult( + pack_dir=self.output_dir, + primary_files=copied_primary, + supporting_files=copied_supporting, + index_path=index_path, + report_path=report_path, + report_pdf_path=report_pdf_path, + ) diff --git a/agent_evidence/review_pack/pdf_export.py b/agent_evidence/review_pack/pdf_export.py new file mode 100644 index 0000000..a1727cd --- /dev/null +++ b/agent_evidence/review_pack/pdf_export.py @@ -0,0 +1,95 @@ +from __future__ import annotations + +from pathlib import Path + +from reportlab.lib.pagesizes import A4 +from reportlab.pdfbase import pdfmetrics +from reportlab.pdfbase.cidfonts import UnicodeCIDFont +from reportlab.pdfgen import canvas + +_FONT_NAME = "STSong-Light" +_FONT_REGISTERED = False + + +def _ensure_font_registered() -> str: + global _FONT_REGISTERED + if not _FONT_REGISTERED: + pdfmetrics.registerFont(UnicodeCIDFont(_FONT_NAME)) + _FONT_REGISTERED = True + return _FONT_NAME + + +def _wrap_text(text: str, *, font_name: str, font_size: int, max_width: float) -> list[str]: + if not text: + return [""] + + wrapped: list[str] = [] + current = "" + for char in text: + candidate = f"{current}{char}" + if current and pdfmetrics.stringWidth(candidate, font_name, font_size) > max_width: + wrapped.append(current) + current = char + else: + current = candidate + if current: + wrapped.append(current) + return wrapped or [""] + + +def write_review_report_pdf(markdown: str, destination: str | Path) -> Path: + font_name = _ensure_font_registered() + target = Path(destination) + target.parent.mkdir(parents=True, exist_ok=True) + + page_width, page_height = A4 + margin = 52 + usable_width = page_width - (margin * 2) + bottom_margin = 48 + + pdf = canvas.Canvas(str(target), pagesize=A4) + pdf.setTitle("审阅报告") + + y = page_height - margin + + def ensure_space(required_height: float) -> None: + nonlocal y + if y - required_height < bottom_margin: + pdf.showPage() + y = page_height - margin + + def draw_line(text: str, *, font_size: int, leading: float) -> None: + nonlocal y + wrapped = _wrap_text( + text, + font_name=font_name, + font_size=font_size, + max_width=usable_width, + ) + ensure_space(leading * len(wrapped)) + pdf.setFont(font_name, font_size) + for line in wrapped: + pdf.drawString(margin, y, line) + y -= leading + + for raw_line in markdown.splitlines(): + stripped = raw_line.strip() + if not stripped: + y -= 8 + ensure_space(0) + continue + + if stripped.startswith("# "): + draw_line(stripped[2:], font_size=20, leading=28) + y -= 4 + continue + + if stripped.startswith("## "): + draw_line(stripped[3:], font_size=14, leading=20) + y -= 2 + continue + + draw_line(stripped, font_size=11, leading=16) + + pdf.save() + return target diff --git a/agent_evidence/review_pack/renderer.py b/agent_evidence/review_pack/renderer.py new file mode 100644 index 0000000..c0efde6 --- /dev/null +++ b/agent_evidence/review_pack/renderer.py @@ -0,0 +1,319 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Mapping + +_PRIMARY_ORDER = ("bundle", "receipt", "summary") +_SUPPORTING_ORDER = ("manifest", "public_key", "runtime_events", "private_key") +_SUMMARY_ORIENTATION_KEYS = ( + "provider_label", + "model", + "base_url", + "record_count", + "signature_count", + "call_count", + "verify_command", +) +_RECEIPT_FACT_KEYS = ( + "ok", + "format", + "profile", + "source", + "record_count", + "issue_count", + "signature_present", + "signature_count", + "required_signature_count", + "signature_verified", + "latest_chain_hash", +) +_TAXONOMY_LABELS_ZH = { + "Verification passed": "校验通过", + "Verification issues present": "校验未通过", + "Chain continuity failed": "链路连续性异常", + "Signature failure": "签名校验失败", + "Integrity failure": "完整性校验失败", + "Profile validation failure": "规范校验失败", + "Optional support material missing": "缺少可选附属文件", +} +_PRIMARY_DISPLAY_NAMES = { + "bundle": "证据包文件(bundle)", + "receipt": "校验回执文件(receipt)", + "summary": "摘要文件(summary)", +} +_SUPPORTING_DISPLAY_NAMES = { + "manifest": "清单文件(manifest)", + "public_key": "公钥文件(public key)", + "runtime_events": "运行事件文件(runtime events)", + "private_key": "私钥文件(private key)", +} +_SUMMARY_ORIENTATION_LABELS = { + "provider_label": "服务提供方", + "model": "模型", + "base_url": "接口地址", + "record_count": "记录数", + "signature_count": "签名数", + "call_count": "调用次数", + "verify_command": "建议校验命令", +} +_RECEIPT_FACT_LABELS = { + "ok": "回执结论", + "format": "回执格式", + "profile": "适用规范", + "source": "回执来源", + "record_count": "记录数", + "issue_count": "问题数", + "signature_present": "是否带签名", + "signature_count": "签名数量", + "required_signature_count": "必需签名数量", + "signature_verified": "签名校验结果", + "latest_chain_hash": "最新链哈希", +} +_MANIFEST_FACT_LABELS = { + "export_format": "导出格式", + "record_count": "记录数", + "artifact_digest": "交付摘要指纹", + "latest_chain_hash": "最新链哈希", +} + + +@dataclass(frozen=True) +class RenderedReviewReport: + """Deterministic reviewer-facing report output.""" + + markdown: str + taxonomy_labels: tuple[str, ...] + + +def _receipt_issues(receipt: Mapping[str, Any]) -> tuple[str, ...]: + issues = receipt.get("issues", []) + if not isinstance(issues, list): + return () + return tuple(str(issue) for issue in issues) + + +def _append_label(labels: list[str], label: str) -> None: + if label not in labels: + labels.append(label) + + +def _taxonomy_labels( + receipt: Mapping[str, Any], + *, + missing_supporting: list[str], +) -> tuple[str, ...]: + labels: list[str] = [] + issues = _receipt_issues(receipt) + issue_text = "\n".join(issues).lower() + + if receipt.get("ok") is True: + _append_label(labels, "Verification passed") + else: + _append_label(labels, "Verification issues present") + + if "chain" in issue_text: + _append_label(labels, "Chain continuity failed") + if "signature" in issue_text or receipt.get("signature_verified") is False: + _append_label(labels, "Signature failure") + if any(word in issue_text for word in ("integrity", "manifest", "artifact digest")): + _append_label(labels, "Integrity failure") + if receipt.get("profile") or "validation" in issue_text: + _append_label(labels, "Profile validation failure") + if missing_supporting: + _append_label(labels, "Optional support material missing") + return tuple(labels) + + +def _overall_explanation( + receipt: Mapping[str, Any], + *, + missing_supporting: list[str], +) -> str: + if receipt.get("ok") is True and not missing_supporting: + return "校验通过,主交付物已经齐备,可直接进入审阅。" + if receipt.get("ok") is True and missing_supporting: + return "校验通过,但本次审阅包未包含部分可选附属文件。" + return "校验发现需人工复核的问题,请先查看问题摘要和证据引用。" + + +def _path_text(value: str | Path) -> str: + return str(value) + + +def _taxonomy_label_zh(label: str) -> str: + return _TAXONOMY_LABELS_ZH.get(label, label) + + +def _yes_no(value: bool) -> str: + return "是" if value else "否" + + +def _field_display_name(field: str) -> str: + return _PRIMARY_DISPLAY_NAMES.get( + field, + _SUPPORTING_DISPLAY_NAMES.get(field, f"附属文件({field})"), + ) + + +class ReviewPackRenderer: + """Render a deterministic reviewer-facing report from current pack inputs.""" + + def render( + self, + *, + bundle: Mapping[str, Any], + receipt: Mapping[str, Any], + summary: Mapping[str, Any], + primary_files: Mapping[str, str | Path], + supporting_files: Mapping[str, str | Path], + missing_supporting: list[str], + ) -> RenderedReviewReport: + taxonomy_labels = _taxonomy_labels(receipt, missing_supporting=missing_supporting) + taxonomy_labels_zh = tuple(_taxonomy_label_zh(label) for label in taxonomy_labels) + explanation = _overall_explanation(receipt, missing_supporting=missing_supporting) + verdict = "校验通过" if receipt.get("ok") else "校验未通过" + + lines = [ + "# 审阅报告", + "", + "## 总体状态", + f"- 结果:`{verdict}`", + f"- 回执结论(receipt.ok):`{receipt.get('ok')}`", + f"- 是否需人工复核:`{_yes_no(receipt.get('ok') is not True)}`", + ( + f"- 重点标签:{', '.join(f'`{label}`' for label in taxonomy_labels_zh)}" + if taxonomy_labels_zh + else "- 重点标签:`无`" + ), + f"- 说明:{explanation}", + "", + "## 交付物清单", + ] + lines.extend( + self._artifact_inventory_lines( + summary, + primary_files, + supporting_files, + missing_supporting, + ) + ) + lines.extend(["", "## 校验结果"]) + lines.extend(self._verification_fact_lines(receipt)) + lines.extend(["", "## 问题摘要"]) + lines.extend(self._issue_summary_lines(receipt, taxonomy_labels, missing_supporting)) + lines.extend(["", "## 证据引用"]) + lines.extend(self._evidence_reference_lines(bundle, primary_files)) + lines.extend( + [ + "", + "## 审阅备注", + "- 占位:请在此补充审阅备注。", + ( + "- 建议顺序:先查看 `primary/receipt.json`,再查看 " + "`primary/bundle.json`,最后查看 `primary/summary.json`。" + ), + "", + ] + ) + return RenderedReviewReport(markdown="\n".join(lines), taxonomy_labels=taxonomy_labels) + + def _artifact_inventory_lines( + self, + summary: Mapping[str, Any], + primary_files: Mapping[str, str | Path], + supporting_files: Mapping[str, str | Path], + missing_supporting: list[str], + ) -> list[str]: + lines: list[str] = [] + for name in _PRIMARY_ORDER: + if name in primary_files: + lines.append(f"- {_field_display_name(name)}:`{_path_text(primary_files[name])}`") + for name in _SUPPORTING_ORDER: + if name in supporting_files: + lines.append( + f"- {_field_display_name(name)}:`{_path_text(supporting_files[name])}`" + ) + for name in sorted(name for name in supporting_files if name not in _SUPPORTING_ORDER): + lines.append(f"- {_field_display_name(name)}:`{_path_text(supporting_files[name])}`") + for name in sorted(missing_supporting): + lines.append(f"- 缺少可选附属文件:`{_field_display_name(name)}`") + for key in _SUMMARY_ORIENTATION_KEYS: + value = summary.get(key) + if value is not None: + label = _SUMMARY_ORIENTATION_LABELS.get(key, key) + lines.append(f"- {label}(summary.{key}):`{value}`") + if not lines: + lines.append("- 未提供交付物清单明细。") + return lines + + def _verification_fact_lines(self, receipt: Mapping[str, Any]) -> list[str]: + lines: list[str] = [] + for key in _RECEIPT_FACT_KEYS: + if key in receipt: + label = _RECEIPT_FACT_LABELS.get(key, key) + lines.append(f"- {label}(receipt.{key}):`{receipt[key]}`") + if not lines: + lines.append("- 未提供可用的回执校验事实。") + return lines + + def _issue_summary_lines( + self, + receipt: Mapping[str, Any], + taxonomy_labels: tuple[str, ...], + missing_supporting: list[str], + ) -> list[str]: + labels_zh = tuple(_taxonomy_label_zh(label) for label in taxonomy_labels) + lines = [f"- 重点标签:{', '.join(f'`{label}`' for label in labels_zh)}"] + issues = _receipt_issues(receipt) + if issues: + lines.extend(f"- 回执问题:`{issue}`" for issue in issues) + else: + lines.append("- 回执未报告问题。") + for name in sorted(missing_supporting): + lines.append(f"- 缺少可选附属文件:`{_field_display_name(name)}`") + return lines + + def _evidence_reference_lines( + self, + bundle: Mapping[str, Any], + primary_files: Mapping[str, str | Path], + ) -> list[str]: + manifest = bundle.get("manifest") + if not isinstance(manifest, Mapping): + manifest = {} + records = bundle.get("records") + if not isinstance(records, list): + records = [] + signatures = bundle.get("signatures") + if not isinstance(signatures, list): + signatures = [] + + lines: list[str] = [] + if "bundle" in primary_files: + lines.append(f"- 证据包文件(bundle):`{_path_text(primary_files['bundle'])}`") + for key in ("export_format", "record_count", "artifact_digest", "latest_chain_hash"): + if key in manifest: + label = _MANIFEST_FACT_LABELS.get(key, key) + lines.append(f"- {label}(bundle.manifest.{key}):`{manifest[key]}`") + lines.append(f"- 事件记录数(bundle.records):`{len(records)}`") + lines.append(f"- 签名记录数(bundle.signatures):`{len(signatures)}`") + for index, record in enumerate(records[:3]): + if not isinstance(record, Mapping): + continue + event = record.get("event") + if not isinstance(event, Mapping): + continue + event_id = event.get("event_id") + event_type = event.get("event_type") + if event_id is None and event_type is None: + continue + lines.append( + ( + f"- 记录 {index}(bundle.record[{index}]):" + f"`event_id={event_id}` `event_type={event_type}`" + ) + ) + if len(records) > 3: + lines.append(f"- 其余未展开记录:`{len(records) - 3}`") + return lines diff --git a/assets/README.md b/assets/README.md new file mode 100644 index 0000000..c67ed97 --- /dev/null +++ b/assets/README.md @@ -0,0 +1,11 @@ +# README Screenshot Placeholders + +This directory is reserved for real README screenshots. Do not add generated placeholder images. + +Capture these files after the demos are run manually: + +- `profile-validator.png` — terminal or UI view of `agent-evidence validate-profile examples/minimal-valid-evidence.json` +- `evidence-object.png` — readable view of a minimal evidence object with operation, policy, provenance, and verification fields +- `fdo-testbed-registration.png` — screenshot of the minimal FDO Testbed registration draft or registration step + +Keep screenshots simple and factual. The goal is to show input, execution, evidence, validation, and receipt, not to create marketing artwork. diff --git a/demo/README.md b/demo/README.md index b9d0911..4e87008 100644 --- a/demo/README.md +++ b/demo/README.md @@ -23,6 +23,10 @@ python3 demo/run_operation_accountability_demo.py Artifacts are written under `demo/artifacts/`. +The demo also includes one optional `validation.trust_bindings[]` example to +show how a local statement can point to an external trust source without making +that source mandatory for local validation. + For system context, start with [digital-biosphere-architecture](https://github.com/joy7758/digital-biosphere-architecture). For historical naming surfaces, see [docs/lineage.md](../docs/lineage.md). diff --git a/demo/expected-output.md b/demo/expected-output.md index 5731933..a1920c8 100644 --- a/demo/expected-output.md +++ b/demo/expected-output.md @@ -6,6 +6,7 @@ Command: ```bash agent-evidence validate-profile examples/minimal-valid-evidence.json +agent-evidence validate-profile examples/valid-trust-binding-evidence.json ``` Expected summary: @@ -58,6 +59,20 @@ Expected summary: - primary error code: `unresolved_evidence_policy_ref` - failure reason: `evidence.policy_ref` does not resolve to `policy.id` +### Trust binding digest mismatch + +Command: + +```bash +agent-evidence validate-profile examples/invalid-trust-binding-digest-mismatch.json +``` + +Expected summary: + +- JSON output includes `"ok": false` +- primary error code: `trust_binding_target_digest_mismatch` +- failure reason: `validation.trust_bindings[0].target_digest` does not match the resolved local target + ## Demo Script Command: @@ -86,3 +101,4 @@ Expected end state: - `schema_violation`: required field or field shape does not satisfy the schema - `unresolved_output_ref`: an operation output ref does not resolve to `evidence.references[].ref_id` - `unresolved_evidence_policy_ref`: `evidence.policy_ref` does not resolve to `policy.id` +- `trust_binding_target_digest_mismatch`: a trust binding points to the right local target but carries the wrong digest diff --git a/demo/run_operation_accountability_demo.py b/demo/run_operation_accountability_demo.py index a600c04..1a26458 100644 --- a/demo/run_operation_accountability_demo.py +++ b/demo/run_operation_accountability_demo.py @@ -167,11 +167,25 @@ def build_statement( "provenance_ref": "prov:metadata-enrich-001", "policy_ref": "policy:approved-metadata-v1", "validator": "agent-evidence validate-profile", - "method": "schema+reference+consistency", + "method": "schema+reference+consistency+trust-binding", "status": "verifiable", }, } - return with_recomputed_integrity(statement) + statement = with_recomputed_integrity(statement) + statement["validation"]["trust_bindings"] = [ + { + "binding_id": "trust:sigstore-demo-001", + "mechanism": "sigstore", + "proof_type": "transparency-log-entry", + "target_ref": statement["statement_id"], + "target_digest": statement["evidence"]["integrity"]["statement_digest"], + "locator": "https://rekor.example.com/api/v1/log/entries/demo-001", + "verifier_hint": ( + "Recompute evidence.integrity.statement_digest before external verification." + ), + } + ] + return statement def main() -> int: diff --git a/docs/STATUS.md b/docs/STATUS.md index 41a63aa..a64bc13 100644 --- a/docs/STATUS.md +++ b/docs/STATUS.md @@ -14,6 +14,10 @@ - M3 样例集:已完成 - M4 validator 与 CLI:已完成 - M5 demo 与文稿:已完成 +- M11 FDO-facing registration / outreach / proposal skeleton:已完成 +- M8 可选 trust binding 扩展:已完成 +- M9 EDC augmentation 边界与最小接入文档:已完成 +- M10 EDC control-plane event extension 草图:已完成 - M7 旗舰论文规划包:已完成 ## 当前落地产物 @@ -23,6 +27,11 @@ - validator:`agent_evidence/oap.py` 与 CLI 命令 `agent-evidence validate-profile ` - demo:`demo/run_operation_accountability_demo.py` - 文稿:`docs/research-brief-zh.md`、`docs/abstract-en.md` +- FDO-facing registration pack:`docs/fdo-mapping/fdo-operation-evidence-profile-registration-pack.md` +- FDO Testbed registration draft:`submission/fdo-testbed-registration-draft.md` +- Peter / Sven outreach draft:`submission/peter-sven-outreach-draft.md` +- LDT4SSC / DS4SSCC module pitch:`submission/ldt4ssc-ds4sscc-module-pitch.md` +- 可选 trust-binding 扩展:`validation.trust_bindings[]`、对应样例与说明文档 ## 已验证结果 - `./.venv/bin/ruff check agent_evidence/oap.py agent_evidence/cli/main.py demo/run_operation_accountability_demo.py tests/test_operation_accountability_profile.py` 通过 @@ -30,6 +39,8 @@ - `python3 demo/run_operation_accountability_demo.py` 通过 - `validate-profile` 对 valid 样例返回 `ok: true` - `validate-profile` 对 invalid 样例返回 `ok: false` 且带明确 error code +- `validate-profile` 对 trust-binding valid 样例返回 `ok: true` +- `validate-profile` 对 trust-binding invalid 样例返回 `ok: false` 且主错误码为 `trust_binding_target_digest_mismatch` ## 术语与命名统一结果 - profile 正式名称统一为 `Execution Evidence and Operation Accountability Profile v0.1` @@ -49,6 +60,89 @@ - 优先沿用现有 Python 包、CLI、tests、docs 结构。 - 先交付最小闭环,再考虑更广映射。 +## M11 FDO-facing registration / outreach / proposal skeleton +- 状态:已完成 +- 定位结论: + - 用户给出的外部动作目标可以拆成两部分:仓库内可准备的 registration / outreach / proposal pack,以及仓库外必须人工完成的登录、提交、审批和邮件发送。 + - 当前仓库已经具备 spec / schema / examples / validator / demo,缺口不在实现本身,而在对外口径和复用包。 + - 本轮不新建第二套仓库内实现,不重命名当前 canonical profile,只补 FDO-facing 别名、映射说明和外联骨架。 +- 本轮新增产物: + - `docs/fdo-mapping/fdo-operation-evidence-profile-registration-pack.md` + - `submission/fdo-testbed-registration-draft.md` + - `submission/peter-sven-outreach-draft.md` + - `submission/ldt4ssc-ds4sscc-module-pitch.md` + - `README.md`、`README.zh-CN.md` 的 FDO-facing 导航入口 + - `submission/package-manifest.md` 的新增清单项 +- 本轮命名收敛: + - 仓库 canonical package 名称继续使用 `Execution Evidence and Operation Accountability Profile v0.1` + - machine-readable profile id 继续使用 `execution-evidence-operation-accountability-profile@0.1` + - FDO-facing 对外对象名采用 `FDO_OPERATION_EVIDENCE_PROFILE_V0_1` + - `ARO_AUDIT_PROFILE_V1` 保持为审计导向 sibling object,不被本轮新对象替换 + - 该 FDO-facing 名称只作为外部注册标签,不触发仓库内部 schema / validator / demo 重命名 +- 本轮完成与未完成边界: + - 已完成:注册字段草稿、链接位设计、flat field 到 canonical field 的映射、Peter/Sven 邮件草稿、LDT4SSC/DS4SSCC 模块提案骨架 + - 未完成:GitHub 新仓库创建、FDO Testbed 登录与提交、审批结果获取、邮件真实发送 + - 未完成原因:这些步骤依赖外部账户、外部审批或人工沟通,不应在本地仓库内虚构为已完成 +- 本轮核验: + - `git diff --check`:通过 + - `./.venv/bin/agent-evidence validate-profile examples/minimal-valid-evidence.json`:通过,`ok: true` + - `./.venv/bin/agent-evidence validate-profile examples/invalid-missing-required.json`:通过,主错误码 `schema_violation` + - `python3 demo/run_operation_accountability_demo.py`:通过,末尾输出 `PASS execution-evidence-operation-accountability-profile@0.1 ...` + - 本轮仅新增文档与 README 导航,不涉及 schema、validator、demo 行为修改 + +## M9 EDC augmentation 边界与最小接入文档 +- 状态:已完成 +- 定位结论: + - EDC 不是本仓库的新研究主线,不是替代品,也不是竞争关系。 + - 在 dataspace / policy-governed data exchange 场景中,EDC 被定位为 `agent-evidence` 的 execution-evidence augmentation layer。 + - EDC 负责 catalog、contract、transfer governance;`agent-evidence` 负责把执行过程打包成可独立验证的 evidence。 +- 本轮新增产物: + - `docs/edc/EDC_AUGMENTATION_BOUNDARY.md` + - `docs/edc/edc_minimal_evidence_profile_draft.md` + - `docs/edc/edc_demo_minimal_path.md` + - `README.md` 中新增最小导航入口 `EDC / Dataspace augmentation` + - `plans/implementation-plan.md` 同步新增 EDC augmentation 文档里程碑 +- 本轮边界收敛: + - 只做最小接入 demo 路径、最小 profile 草案、最小独立验证说明 + - 首个推荐接入面是 control-plane event extension,不先改 persistence,也不先碰 data plane + - 不扩张成完整 EDC 平台、connector 产品、dataspace 全栈、通用 usage control 系统 +- 官方依据: + - EDC Control Plane 文档明确控制面负责 catalog、contract agreement、transfer governance,且 transfer 不直接发送数据 + - EDC Extensions 文档明确运行时扩展入口为 `ServiceExtension` + - EDC Service Layers 文档明确 `EventRouter`、in-process events、callbacks 是官方事件接入面 + - DSP 官方规范明确范围是 publish data、negotiate agreements、access data +- 本轮核验: + - `git diff --check`:通过 + - 文档引用来源:仅使用 EDC / DSP 官方公开文档链接 + - 本轮未改动 Python 代码、schema、examples、tests,因此未额外运行代码路径测试 + +## M10 EDC control-plane event extension 草图 +- 状态:已完成 +- 定位结论: + - 第二轮只收敛 control-plane event extension 草图,不写 Java 可运行代码,不写 schema JSON,不碰 persistence / data plane。 + - 首个推荐切口继续是 `ServiceExtension` + `EventRouter`,并按五个控制面事件族组织映射:asset、policy definition、contract definition、contract negotiation、transfer process。 + - `EventEnvelope` 中的 `id` 和 `at` 是第一轮 evidence 去重与时间锚的基础,不能和 payload 语义字段混用。 +- 本轮新增产物: + - `docs/edc/edc_control_plane_event_extension_sketch.md` + - `docs/edc/edc_event_to_evidence_mapping.md` + - `docs/edc/edc_extension_minimal_structure.md` + - `docs/edc/edc_demo_minimal_path.md` 增补 control-plane event 观察视角、最小事件范围与 bundle grouping key + - `README.md` 增补第二轮 EDC 文档导航 + - `plans/implementation-plan.md` 同步新增 M10 里程碑 +- 本轮推荐结果: + - 最小 demo 事件范围:`asset.created`、`policy.definition.created`、`contract.definition.created`、`contract.negotiation.requested|finalized|terminated`、`transfer.process.requested|started|completed|terminated` + - 最终 bundle grouping key:`transfer_process_id` + - `contract_agreement_id` 作为 transfer 出现前的 staging correlation key,而不是最终 bundle key +- 官方依据: + - EDC contributor 文档明确区分 `Event` 与 `EventEnvelope` + - EDC contributor 文档明确 `EventRouter` 支持 in-process subscriber 与 callbacks + - 官方源码明确 `EventRouter` 具备 sync / async 两种注册方式,`EventRouterImpl` 先分发 sync,再分发 async + - 官方源码明确控制面存在五个直接可订阅的事件族,以及 negotiation / transfer 的具体事件名 +- 本轮核验: + - `git diff --check`:通过 + - 文档引用来源:仅使用 EDC / DSP 官方文档与官方仓库源码链接 + - 本轮未改动 Python 代码、schema、examples、tests,因此未额外运行代码路径测试 + ## 本轮最小验证记录 - 命令:`./.venv/bin/ruff check agent_evidence/oap.py agent_evidence/cli/main.py demo/run_operation_accountability_demo.py tests/test_operation_accountability_profile.py` - 结果:`All checks passed!` @@ -76,6 +170,33 @@ - `.venv` 的 Python 3.14 环境会带出一条 `langchain_core` warning。 - 仓库内仍保留历史 `Execution Evidence Object` / `Agent Evidence Profile` 资料;本轮没有重写这些既有表面,只通过 README 和状态文档把 v0.1 最小路径与其分开说明。 +## M8 可选 trust binding 扩展 +- 状态:已完成 +- 本轮新增或更新: + - spec:为 `validation.trust_bindings[]` 增加可选外部验证挂接位 + - schema:新增 trust binding 结构定义 + - validator:新增本地 target closure 与 digest 一致性校验 + - examples:新增 `valid-trust-binding-evidence.json` 与 `invalid-trust-binding-digest-mismatch.json` + - demo:最小 demo 增加 1 个 trust binding 占位示例 + - docs:README、中文 README、cookbook、demo 说明补齐“trust binding 不是本地签名系统”边界 +- 本轮边界结论: + - trust binding 是可选 external verification hook,不是 mandatory signing system + - 当前 validator 只验证本地 target / digest 一致性,不验证外部透明日志、registry 或第三方信任服务 + - `export` / `verify-export` 继续以本仓库内建 manifest signing 为主,不依赖 trust binding +- 本轮核验: + - 命令:`./.venv/bin/python -m pytest tests/test_operation_accountability_profile.py` + - 结果:`10 passed, 1 warning in 0.61s` + - 是否通过:通过 + - 命令:`./.venv/bin/agent-evidence validate-profile examples/valid-trust-binding-evidence.json` + - 结果:`ok: true` + - 是否通过:通过 + - 命令:`./.venv/bin/agent-evidence validate-profile examples/invalid-trust-binding-digest-mismatch.json` + - 结果:`ok: false`,primary error code `trust_binding_target_digest_mismatch` + - 是否通过:通过 + - 命令:`python3 demo/run_operation_accountability_demo.py` + - 结果:demo 闭环执行完成,末尾输出 `PASS execution-evidence-operation-accountability-profile@0.1 ...` + - 是否通过:通过 + ## 第三轮发布前复验 - 命令:`./.venv/bin/ruff check agent_evidence/oap.py agent_evidence/cli/main.py demo/run_operation_accountability_demo.py tests/test_operation_accountability_profile.py` - 结果:`All checks passed!` diff --git a/docs/artifacts/artifact-contract-draft.md b/docs/artifacts/artifact-contract-draft.md new file mode 100644 index 0000000..7d4acd1 --- /dev/null +++ b/docs/artifacts/artifact-contract-draft.md @@ -0,0 +1,292 @@ +# Artifact Contract Draft + +Scope: `agent-evidence` only. + +Grounding surfaces used for this draft: + +- `docs/reports/repo-map-audit.md` +- `README.md` +- `docs/quickstart.md` +- `examples/langchain_minimal_evidence.py` +- `spec/execution-evidence-operation-accountability-profile-v0.1.md` +- `schema/execution-evidence-operation-accountability-profile-v0.1.schema.json` +- `agent_evidence/oap.py` +- one current generated output set from the LangChain quickstart path: + - `bundle`: `/tmp/agent-evidence-quickstart/langchain-evidence.bundle.json` + - `receipt`: `/tmp/agent-evidence-quickstart/receipt.json` + - `summary`: `/tmp/agent-evidence-quickstart/summary.json` + +This draft is product-facing. It does not define a new schema. It records the minimum artifact contract already visible in current repo surfaces. + +## 1. Primary Outputs vs Supporting Files + +| Type | Current normalized name | Current examples | Contract status | +| --- | --- | --- | --- | +| Primary output | `bundle` | `langchain-evidence.bundle.json` | Primary product output | +| Primary output | `receipt` | `receipt.json`, `validate-profile` JSON, `validation-report.json` | Primary product output | +| Primary output | `summary` | `summary.json`, demo PASS/FAIL review lines | Primary product output | +| Supporting file | manifest sidecar | `langchain-evidence.manifest.json` | Supporting export material, not a primary output name | +| Supporting file | verification key | `manifest-public.pem` | Supporting verification material | +| Supporting file | signing key | `manifest-private.pem` | Supporting local demo material only | +| Supporting file | runtime capture | `runtime-events.jsonl` | Supporting runtime/export material | + +Working rule: + +- product-facing language should normalize to `bundle`, `receipt`, and `summary` +- supporting files may exist around those outputs, but they are not additional primary artifact types + +## 2. Bundle Minimum Contract + +Current bundle carrier in the quickstart path is one JSON document with three top-level fields: + +- `manifest` +- `records` +- `signatures` + +### Minimum required fields + +At minimum, a current JSON `bundle` should preserve: + +- `manifest.export_format` +- `manifest.generated_at` +- `manifest.record_count` +- `manifest.artifact_digest` +- `manifest.event_hash_list_digest` +- `manifest.chain_hash_list_digest` +- `manifest.signature_policy` +- `records[]` +- for each record: + - `schema_version` + - `event` + - `hashes` +- for each `event`: + - `event_id` + - `timestamp` + - `event_type` + - `actor` + - `context` + - `inputs` + - `outputs` + - `metadata` +- for each `hashes` object: + - `event_hash` + - `chain_hash` + - `previous_event_hash` +- `signatures[]` + - when signatures are present, each signature must preserve: + - `algorithm` + - `signature` + - `signed_at` + - any available signer metadata such as `key_id`, `key_version`, `signer`, `role`, `metadata` + +### What makes a bundle portable/reviewable + +A current `bundle` is portable and reviewable when it keeps all of the following together: + +- the exported evidence records themselves +- the manifest digests and counts needed to re-check integrity +- the chain-hash progression needed to reason about event order and continuity +- any signatures that let another party verify the manifest later + +The sidecar manifest file is useful support material, but the bundle itself already carries the manifest and signatures. Portability should not depend on a second naming layer. + +### Which fields are canonical evidence fields + +In the current bundle surface, the canonical evidence-bearing fields are: + +- `records[].event.*` +- `records[].hashes.*` +- `manifest.*` +- `signatures[]` when present + +Local paths, temp directories, shell commands, and review notes are not bundle fields and must not be treated as canonical evidence. + +## 3. Receipt Minimum Contract + +Current receipts are command-specific JSON verification results. The repo currently emits them from at least these surfaces: + +- `agent-evidence verify-export` +- `agent-evidence verify-bundle` +- `agent-evidence validate-profile` + +They do not yet share one explicit schema, so the contract here is the minimum common product-facing surface. + +### Minimum required fields + +Every `receipt` should preserve at least: + +- `ok` +- machine-readable issue or failure material + - `issues[]` when the verifier emits flat issues + - or staged issue lists when the verifier emits `stages[]` +- verification scope + - `format` for export verification receipts + - or `profile` plus `source` for profile-validation receipts +- at least one scale/count field + - `record_count` + - or `issue_count` + +When signature verification is in scope, the receipt should also preserve: + +- `signature_present` +- `signature_count` +- `required_signature_count` +- `signature_verified` +- `signature_results[]` + +When chain integrity is in scope, the receipt should preserve: + +- `latest_chain_hash` + +### What must be preserved from verification results + +The receipt is the machine-readable verification result. It must preserve: + +- pass/fail state +- exact machine-readable issue data as emitted by the verifier +- enough context to know what was verified +- counts and signature results needed to interpret the outcome + +### Which fields are canonical versus presentation-only + +Canonical receipt facts: + +- `ok` +- `issues[]` or stage issue payloads +- `profile`, `source`, or `format` +- `record_count` / `issue_count` +- `latest_chain_hash` when present +- signature policy and signature verification results when present + +Presentation-only receipt fields: + +- any human summary lines such as `summary[]` +- CLI stderr wording +- local output filenames chosen by one example or shell command + +Important note: + +- the current OAP validator includes `summary[]` inside its JSON report +- that convenience rendering is useful, but it should still be treated as presentation-layer material rather than a canonical schema obligation for every receipt type + +## 4. Summary Minimum Contract + +Current `summary` is reviewer-facing output. In the LangChain quickstart path it is a JSON file that combines high-level run context with an embedded verification result. + +### Minimum required fields + +To function as a reviewer-facing `summary`, the current surface should preserve at least: + +- overall outcome: `ok` +- pointer to the produced `bundle` + - for example `bundle_path` +- enough run context to orient a reviewer + - for example `record_count` + - and `signature_count` +- a way to re-run or inspect verification + - `verify_command` + - and/or an embedded or linked receipt such as `verify_result` + +### What makes a summary reviewer-facing rather than evidence-canonical + +A `summary` is for orientation and review, not for canonical evidence closure. Its job is to answer: + +- what was produced +- whether the current run passed +- where the reviewer should look next + +### Distinguish summary content from verification facts + +Verification facts belong in the `receipt`. + +Summary-only or review-only content includes: + +- `output_dir` +- `bundle_path` +- `manifest_path` +- `store_path` +- `private_key_path` +- `public_key_path` +- `verify_command` +- `anchor_note` + +The current `summary` may embed `verify_result`, but that embedded copy is convenience content. The authoritative verification facts still belong to the `receipt` surface. + +## 5. Evidence-Origin Fields vs Review/Presentation Fields + +### Evidence-origin fields + +Fields that come from core evidence or verification logic: + +- bundle record payloads: `records[].event.*` +- bundle hash-chain payloads: `records[].hashes.*` +- manifest digests, counts, filters, timestamps, signature policy +- manifest signatures and signature metadata +- OAP statement fields defined by the current spec/schema: + - `actor` + - `subject` + - `operation` + - `policy` + - `constraints` + - `provenance` + - `evidence` + - `validation` +- machine-readable verification facts emitted by validators and verifiers + +### Review/presentation fields + +Fields that belong only to review, rendering, or local operator convenience: + +- `summary[]` text lines +- `verify_command` +- `output_dir` +- local file paths +- shell snippets +- explanatory notes such as `anchor_note` +- smoke-check wording +- any future reviewer annotations or pack-level commentary + +### What must NOT back-propagate into canonical schema + +The following should not be pushed back into the canonical profile schema just because they are helpful in product docs or summary files: + +- `receipt.json` and `summary.json` filenames +- local path fields such as `bundle_path` or `output_dir` +- `verify_command` +- reviewer notes +- review-pack layout fields +- presentation-only summary lines + +## 6. Implementation-Specific Filenames vs Canonical Artifact Names + +| Canonical product name | Current implementation-level names | Contract stance | +| --- | --- | --- | +| `bundle` | `langchain-evidence.bundle.json`, bundle directories used by `verify-bundle` | Normalize product language to `bundle`; do not promote path-specific filenames to canonical names | +| `receipt` | `receipt.json`, JSON from `validate-profile`, demo `validation-report.json` | Normalize product language to `receipt`; `validation-report.json` is an implementation filename, not a fourth artifact type | +| `summary` | `summary.json`, demo PASS/FAIL summary lines | Normalize product language to `summary`; keep filename choices implementation-level | + +Supporting filenames that remain supporting only: + +- `langchain-evidence.manifest.json` +- `manifest-public.pem` +- `manifest-private.pem` +- `runtime-events.jsonl` + +## 7. Open Questions Deferred to Later Review Pack Work + +- Should `summary` always embed the full `receipt`, or should it only point to it? +- Should the repo later define one normalized receipt schema across `validate-profile`, `verify-export`, and `verify-bundle`? +- Should reviewer-facing summaries have one stable minimal template across quickstart, demo, and future review-pack flows? +- Should implementation filenames become more uniform across examples, or remain path-specific while the product language stays normalized? +- Should the sidecar manifest remain a visible supporting file in developer paths, or become an internal implementation detail later? + +These are deferred questions. They should not be solved here by inventing a new schema or adding a new artifact type. + +## 8. Non-Goals + +- no schema change +- no exporter expansion +- no cross-repo changes +- no EDC expansion +- no audit-plane expansion +- no new primary artifact type beyond `bundle`, `receipt`, and `summary` diff --git a/docs/ci/github-build-attestation-minimal.md b/docs/ci/github-build-attestation-minimal.md new file mode 100644 index 0000000..2acc65a --- /dev/null +++ b/docs/ci/github-build-attestation-minimal.md @@ -0,0 +1,42 @@ +# GitHub build attestation (minimal) + +## What this workflow does + +The `Build + Attest` workflow builds Python distribution artifacts under `dist/*` with `python -m build`, uploads those files as the GitHub Actions workflow artifact named `python-dist`, and asks GitHub to issue artifact attestations for those exact built files. + +This is upstream build provenance only. It gives this repository one minimal GitHub-native proof path for built distribution artifacts. + +## What this workflow does not do + +- It does not change the AEP schema. +- It does not change `bundle`, `receipt`, or `summary` semantics. +- It does not change CLI behavior, runtime behavior, export logic, or offline verifier semantics. +- It does not add Cosign, OPA, Conftest, container image signing, or a release asset redesign. +- It does not attach built files to GitHub Releases. + +## When it runs + +- Manually through `workflow_dispatch` +- Automatically when a GitHub release is published + +## How to verify a downloaded artifact + +Download one built file first, then run: + +```bash +gh attestation verify dist/ -R joy7758/agent-evidence +``` + +`` should be the local file you want to check, such as a wheel or source distribution downloaded from the `python-dist` workflow artifact or another byte-identical distribution source. This workflow does not publish release assets, so the example assumes you already have the exact built file locally. + +## Why this repo uses `actions/attest@v4` + +This repository uses `actions/attest@v4` directly because it is the current low-level action for GitHub artifact attestations, and `actions/attest-build-provenance` is now only a thin wrapper around it in v4. Using `actions/attest@v4` keeps the workflow smaller and makes the attested subject path explicit with `subject-path: "dist/*"`. + +## GitHub plan limitation + +GitHub artifact attestations are supported for public repositories on current GitHub plans. For private or internal repositories, GitHub Enterprise Cloud is required. + +## Scope boundary + +This workflow is intentionally narrow. It adds upstream build provenance for Python distribution artifacts only. It does not redefine the repository as a broader supply-chain system, and it does not alter the existing AEP, `bundle`, `receipt`, `summary`, CLI, or runtime/export surfaces. diff --git a/docs/cookbooks/langchain_minimal_evidence.md b/docs/cookbooks/langchain_minimal_evidence.md index 6eaedb3..c90c374 100644 --- a/docs/cookbooks/langchain_minimal_evidence.md +++ b/docs/cookbooks/langchain_minimal_evidence.md @@ -6,33 +6,57 @@ This cookbook shows the smallest local-first LangChain path in this repository: - capture LangChain runtime events through an external callback - persist those events to a local JSONL store -- export a signed JSON evidence bundle -- verify the exported bundle offline with a public key +- export a signed JSON `bundle` +- verify the exported `bundle` offline to produce a `receipt` +- write a reviewer-facing `summary` The example stays outside LangGraph persistence and checkpointer internals. ## 2) Why callback/export-first -This repository already has a thin LangChain callback surface and a separate -signed export surface. Reusing those two pieces keeps the integration external, -reviewable, and easy to adapt into another LangChain app. +This repository now has one recommended LangChain wrapper: +`LangChainAdapter`. + +It keeps the integration external, reviewable, and easy to adapt into another +LangChain app while still reusing the existing callback and export primitives. The callback records runtime facts. The export step packages those records into a portable artifact with a signed manifest summary. That is the smallest honest surface here. +Recommended public API: + +```python +from agent_evidence.integrations.langchain import LangChainAdapter + +adapter = LangChainAdapter.for_output_dir( + "./artifacts/langchain-run", + digest_only=True, + omit_request=False, + omit_response=False, +) + +callbacks = [adapter.callback_handler()] +artifacts = adapter.finalize() +``` + ## 3) Minimal flow ```text LangChain callback events -> local JSONL evidence store -> signed JSON bundle --> offline verify +-> receipt +-> summary ``` If you need detached anchoring, treat the signed bundle and manifest as an external handoff point. That step is not verified by this repo today. +If you later add profile-level `validation.trust_bindings[]`, treat them as +optional pointers to an external trust source. They are not the same thing as +the manifest signatures verified by `verify-export`. + ## 4) Prerequisites ```bash @@ -41,6 +65,9 @@ source .venv/bin/activate pip install -e ".[langchain,signing]" ``` +The commands below assume `agent-evidence` is available on `PATH`. If you are +using the repository virtualenv directly, run `.venv/bin/agent-evidence ...`. + No model API key is required. The example uses deterministic local runnables and a mocked model callback event. @@ -62,30 +89,35 @@ Or choose an explicit output directory: python examples/langchain_minimal_evidence.py --output-dir ./tmp/langchain-minimal-evidence ``` -The script generates the run artifacts, signs the exported bundle with a local -Ed25519 demo key, runs an API-level verification pass, and writes a summary. +The script uses `LangChainAdapter` to capture the run, signs the exported +bundle with a local Ed25519 demo key, writes a machine-readable `receipt`, and +writes a reviewer-facing `summary`. ## 6) Output artifacts -By default the script writes: +Primary outputs: + +- `bundle`: `examples/artifacts/langchain-minimal-evidence/langchain-evidence.bundle.json` +- `receipt`: `examples/artifacts/langchain-minimal-evidence/receipt.json` +- `summary`: `examples/artifacts/langchain-minimal-evidence/summary.json` + +Supporting files written by the same run: - `examples/artifacts/langchain-minimal-evidence/runtime-events.jsonl` -- `examples/artifacts/langchain-minimal-evidence/langchain-evidence.bundle.json` - `examples/artifacts/langchain-minimal-evidence/langchain-evidence.manifest.json` - `examples/artifacts/langchain-minimal-evidence/manifest-private.pem` - `examples/artifacts/langchain-minimal-evidence/manifest-public.pem` -- `examples/artifacts/langchain-minimal-evidence/summary.json` Notes: - `runtime-events.jsonl` is the local append-only callback capture. -- `langchain-evidence.bundle.json` is the portable export artifact. - `langchain-evidence.manifest.json` is a readable sidecar copy of the signed manifest. - The generated private key is only for local demo use. ## 7) Verify -Run the offline verification command from the summary: +Run the offline verification command from the summary if you want to regenerate +or inspect the `receipt` directly: ```bash agent-evidence verify-export \ @@ -95,6 +127,9 @@ agent-evidence verify-export \ You should get `ok: true` plus signature verification details. +That verification step covers the local signed export. It does not verify any +external trust anchor or transparency log entry. + ## 8) Boundaries / what this is not - This is not a LangGraph persistence or checkpointer integration. diff --git a/docs/cookbooks/openai_compatible_minimal.md b/docs/cookbooks/openai_compatible_minimal.md new file mode 100644 index 0000000..47c1549 --- /dev/null +++ b/docs/cookbooks/openai_compatible_minimal.md @@ -0,0 +1,127 @@ +# OpenAI-Compatible Minimal + +## 1) What these examples show + +This cookbook shows two thin configuration examples built on the real +`OpenAICompatibleAdapter` wrapper: + +- default OpenAI-compatible configuration +- alternate `base_url` configuration + +Both examples keep the provider client outside core evidence logic and preserve +the same primary outputs used elsewhere in this repository: + +- `bundle` +- `receipt` +- `summary` + +Manifest sidecars, keys, and runtime JSONL remain supporting files only. + +## 2) Prerequisites + +From the repository root: + +```bash +python3 -m venv .venv +source .venv/bin/activate +pip install -e ".[signing]" +pip install openai +``` + +These examples are local example surfaces, not tests. They make live provider +calls when you run them. + +## 3) Recommended wrapper + +Both examples use the same C1 wrapper: + +```python +from agent_evidence.integrations.openai_compatible import OpenAICompatibleAdapter + +adapter = OpenAICompatibleAdapter.for_output_dir( + output_dir="./artifacts/openai-compatible-run", + provider_label="openai", + model="gpt-4.1-mini", + api_key=os.environ["OPENAI_API_KEY"], + base_url="https://api.openai.com/v1", +) + +response = adapter.record_call( + operation="chat.completions.create", + request={"model": "gpt-4.1-mini", "messages": [...]}, + invoke=lambda: client.chat.completions.create(...), +) + +artifacts = adapter.finalize() +``` + +The examples change configuration only. They do not introduce a second export +path or provider-specific branching into core. + +## 4) Default configuration example + +Environment: + +```bash +export OPENAI_API_KEY=... +export OPENAI_MODEL=${OPENAI_MODEL:-gpt-4.1-mini} +``` + +Run: + +```bash +python examples/openai_compatible/basic_export.py +``` + +This example: + +- uses `provider_label="openai"` +- relies on the SDK default base URL +- records one `chat.completions.create` call +- writes `bundle`, `receipt`, and `summary` + +## 5) Alternate base_url example + +Environment: + +```bash +export OPENAI_API_KEY=... +export OPENAI_COMPAT_BASE_URL=... +export OPENAI_COMPAT_PROVIDER_LABEL=... +export OPENAI_MODEL=${OPENAI_MODEL:-gpt-4.1-mini} +``` + +Run: + +```bash +python examples/openai_compatible/alternate_base_url.py +``` + +This example: + +- uses the same `OpenAICompatibleAdapter` +- changes only `base_url` and `provider_label` +- keeps the same `bundle` / `receipt` / `summary` output contract + +## 6) Output artifacts + +Primary outputs: + +- `bundle` +- `receipt` +- `summary` + +Supporting files written by the same run: + +- manifest sidecar +- verification key +- local signing key +- runtime JSONL capture + +## 7) Boundaries + +- no CLI changes +- no schema changes +- no `README.md` or `docs/quickstart.md` changes +- no `openai_agents` path merge or rename +- no provider-specific business logic in core diff --git a/docs/cookbooks/review_pack_minimal.md b/docs/cookbooks/review_pack_minimal.md new file mode 100644 index 0000000..62b157b --- /dev/null +++ b/docs/cookbooks/review_pack_minimal.md @@ -0,0 +1,127 @@ +# Review Pack Minimal + +## 1) What this path does + +This cookbook exposes the current Review Pack as one thin packaging layer above +an existing: + +- `bundle` +- `receipt` +- `summary` + +It does not create a new canonical artifact type. It packages those existing +artifacts into one stable review directory and renders `review/report.md`. + +## 2) What stays primary vs supporting + +Primary pack contents remain: + +- `bundle` +- `receipt` +- `summary` + +Supporting files remain optional: + +- manifest sidecar +- verification public key +- runtime JSONL capture +- local private key + +The private key is excluded by default. + +## 3) Required inputs + +The builder takes existing artifact paths as inputs: + +- `bundle_path` +- `receipt_path` +- `summary_path` + +Optional supporting inputs: + +- `manifest_path` +- `public_key_path` +- `runtime_events_path` +- `private_key_path` + +Supporting files are copied only when you pass them in. Failure taxonomy and +reviewer-facing labels stay in `review/report.md` only. + +## 4) Example script + +The thin developer-facing entry path is: + +```python +from agent_evidence.review_pack import ReviewPackAssembler + +assembler = ReviewPackAssembler.for_output_dir("./artifacts/review-pack") +pack = assembler.assemble( + bundle_path="./artifacts/run/langchain-evidence.bundle.json", + receipt_path="./artifacts/run/receipt.json", + summary_path="./artifacts/run/summary.json", + supporting_files={ + "manifest": "./artifacts/run/langchain-evidence.manifest.json", + "public_key": "./artifacts/run/manifest-public.pem", + "runtime_events": "./artifacts/run/runtime-events.jsonl", + }, +) +``` + +The example wrapper script lives at: + +```bash +python examples/review_pack/build_review_pack.py ... +``` + +## 5) Run it on an existing artifact set + +From the repository root: + +```bash +python examples/review_pack/build_review_pack.py \ + --bundle-path ./examples/artifacts/langchain-minimal-evidence/langchain-evidence.bundle.json \ + --receipt-path ./examples/artifacts/langchain-minimal-evidence/receipt.json \ + --summary-path ./examples/artifacts/langchain-minimal-evidence/summary.json \ + --manifest-path ./examples/artifacts/langchain-minimal-evidence/langchain-evidence.manifest.json \ + --public-key-path ./examples/artifacts/langchain-minimal-evidence/manifest-public.pem \ + --runtime-events-path ./examples/artifacts/langchain-minimal-evidence/runtime-events.jsonl \ + --output-dir ./examples/artifacts/review-pack +``` + +If you want to include the private key for a local-only workflow, you must pass +both of these: + +```bash + --private-key-path ./examples/artifacts/langchain-minimal-evidence/manifest-private.pem \ + --include-private-key +``` + +Without `--include-private-key`, the pack keeps that file out by default. + +## 6) Output layout + +The assembled pack keeps a stable layout: + +```text +review-pack/ +├── index.json +├── primary/ +│ ├── bundle.json +│ ├── receipt.json +│ └── summary.json +├── review/ +│ └── report.md +└── supporting/ + └── optional supporting files +``` + +`index.json` is only a pack index. It is not a fourth canonical artifact. + +## 7) Boundaries + +- no CLI changes +- no schema changes +- no `README.md` or `docs/quickstart.md` changes +- no hosted delivery work +- no cross-repo work +- no back-propagation of renderer labels into canonical schema diff --git a/docs/edc/EDC_AUGMENTATION_BOUNDARY.md b/docs/edc/EDC_AUGMENTATION_BOUNDARY.md new file mode 100644 index 0000000..df4d286 --- /dev/null +++ b/docs/edc/EDC_AUGMENTATION_BOUNDARY.md @@ -0,0 +1,170 @@ +# EDC Augmentation Boundary + +## 结论先行 + +EDC 在这里不是新研究主线,不是替代品,也不是竞争关系。 + +在 `agent-evidence` 的语境里,EDC 更合适的定位是一个 +execution-evidence augmentation layer 所依附的高价值场景:EDC 负责 +dataspace 中的数据交换、合同协商和传输治理,`agent-evidence` 负责把 +执行过程整理成可独立验证的 evidence。 + +这轮只收敛三件事: + +- 最小接入 demo 的边界 +- 最小 EDC 场景 evidence profile 草案 +- 最小独立验证说明 + +## 1. 做什么 + +这轮要做的是把 `agent-evidence` 明确挂到 EDC 的控制面附近,而不是重做 +EDC 本体。 + +最小工作范围是: + +- 把 EDC 明确写成 `agent-evidence` 的 execution-evidence augmentation layer +- 只围绕 dataspace / policy-governed data exchange 场景收敛最小接入面 +- 优先使用 control-plane event extension 捕获与导出 evidence +- 为一个最小 transfer 链路定义最少字段、最少产物、最少验证动作 +- 保持 evidence 可以被第三方独立验证,而不是只能回看 EDC 内部日志 + +换句话说,这里不是“做一个 EDC”,而是“在 EDC 已经负责 catalog / +contract / transfer governance 的前提下,给执行过程补一个可验证证据层”。 + +## 2. 不做什么 + +这轮明确不做下面这些事: + +- 不把 EDC 变成新的研究主线 +- 不做 EDC 替代实现 +- 不做完整 EDC 平台 +- 不做 connector 产品 +- 不做 dataspace 全栈 +- 不做通用 usage control 系统 +- 不先改 persistence 层 +- 不先碰 data plane 传输实现 +- 不追求一次解决所有 dataspace flavor 的证据映射 + +如果一项工作会把范围推向“平台化”或“产品化”,这一轮就先不做。 + +## 3. 最小可交付是什么 + +本轮最小可交付只有三块: + +1. 一份边界文档 + 明确 EDC 和 `agent-evidence` 各自负责什么,以及为什么首个切口是 + control-plane event extension。 + +2. 一份最小 profile 草案 + 只保留独立验证一个最小 transfer 过程所需字段,不带 secrets、 + privateProperties 或内部实现细节。 + +3. 一条最小 demo 路径说明 + 从 asset 到 policy / contract definition,再到 contract agreement、 + transfer process、evidence bundle、independent verify,画出闭环。 + +这三块加起来的意义,是先把“怎么接、接哪里、交付什么”讲清楚,而不是 +先跳进 Java 代码。 + +## 为什么 EDC 是高价值场景,而不是新方向 + +EDC 值得接,不是因为我们要换方向,而是因为它天然具备三件事: + +- 它已经有真实的 policy-governed data exchange 语境,不是抽象 demo +- 它已经把 catalog、contract、transfer governance 这些关键控制面对象定义清楚 +- 它已经给了扩展口,允许在不重做控制面的前提下接入额外能力 + +这对 `agent-evidence` 很重要。`agent-evidence` 想证明的不是“自己也能做一个 +dataspace 控制面”,而是“当现有 dataspace 控制面已经处理交换与治理时, +我们可以把执行过程补成可验证证据”。 + +所以,EDC 在这里是高价值落地场景,不是新的产品线,也不是新的研究中心。 + +## 为什么首个接入面是 control-plane event extension + +推荐先做 control-plane event extension,而不是先改 persistence 或 data +plane,原因很直接: + +- 官方已经把 `ServiceExtension` 作为运行时扩展入口 +- 官方已经把 `EventRouter` 作为 in-process 事件订阅入口 +- 官方已经提供 callbacks 作为外部接收状态变化的标准方式 +- 控制面事件天然覆盖 contract negotiation 和 transfer process 的关键状态变化 +- 这些状态变化正好是 evidence 最需要的“谁、何时、对哪条交换链路做了什么” + +反过来看,为什么不先动别的面: + +- 先改 persistence,会把方案绑到具体表结构、事务实现和版本细节 +- 先碰 data plane,会过早卷入真实数据传输协议和连接器能力差异 +- 两者都会把“增强层”拉成“平台内部改造” + +最小接入 demo 更稳的做法是: + +- 在 control plane 订阅关键事件 +- 把这些事件归一成最小 evidence bundle +- 让 bundle 脱离运行时后仍可独立验证 + +如果后面需要更强一致性,再考虑把 exporter 从 async 订阅升级到更可靠的 +transactional dispatch 或持久化出口,而不是一开始就侵入底层存储。 + +## 角色边界 + +### EDC 负责什么 + +- 发布和检索 catalog +- 管理 policy definition 和 contract definition +- 生成和协商 contract agreement +- 启动和治理 transfer process +- 协调 control plane 与 data plane 的交互 + +### `agent-evidence` 负责什么 + +- 把一次执行过程相关的关键状态变化整理成 evidence bundle +- 记录最小但闭合的 participant / asset / contract / transfer 关联 +- 生成可对外传递的 digest、signature、anchor 等验证材料 +- 提供独立验证入口,不要求验证者接入 EDC 内部数据库或日志系统 + +## 当前推荐的最小接入形态 + +第一轮建议把接入形态收敛成: + +- 一个 control-plane event subscriber / exporter 草图 +- 一个最小 evidence profile +- 一个独立验证说明 + +不先承诺: + +- 完整 Java 扩展实现 +- 完整 management API 自动化 +- 全状态机覆盖 +- 跨 connector 产品化部署 + +## 官方依据 + +以下链接为 2026-04-12 检索时使用的官方入口: + +- EDC Control Plane + [https://eclipse-edc.github.io/documentation/for-adopters/control-plane/](https://eclipse-edc.github.io/documentation/for-adopters/control-plane/) + 说明控制面负责 catalog、contract agreement、transfer governance,并明确 + transfer 只控制数据流,不直接发送数据。 + +- EDC Extensions / `ServiceExtension` + [https://eclipse-edc.github.io/documentation/for-adopters/extensions/](https://eclipse-edc.github.io/documentation/for-adopters/extensions/) + 说明运行时扩展的官方入口是 `ServiceExtension`。 + +- EDC Events and Callbacks / `EventRouter` + [https://eclipse-edc.github.io/documentation/for-contributors/runtime/service-layers/](https://eclipse-edc.github.io/documentation/for-contributors/runtime/service-layers/) + 说明控制面状态变化通过事件传播,支持 in-process 订阅和 webhook callbacks。 + +- EDC Control-plane entities and transfer callbacks + [https://eclipse-edc.github.io/documentation/for-contributors/control-plane/entities/](https://eclipse-edc.github.io/documentation/for-contributors/control-plane/entities/) + 说明 transfer process 的回调事件类型,以及 callbackAddresses 的使用方式。 + +- Eclipse Dataspace Protocol scope + [https://eclipse-dataspace-protocol-base.github.io/DataspaceProtocol/HEAD/](https://eclipse-dataspace-protocol-base.github.io/DataspaceProtocol/HEAD/) + 说明 DSP 的范围是 publish data、negotiate agreements、access data。 + +## 下一步路线图 + +1. 先画清楚 control-plane event -> evidence field mapping,只覆盖最小成功链路。 +2. 再把 mapping 收敛成 EDC minimal evidence schema / JSON 草案。 +3. 最后补一个最小 exporter demo 和独立 verify 说明,不提前产品化。 diff --git a/docs/edc/edc_control_plane_event_extension_sketch.md b/docs/edc/edc_control_plane_event_extension_sketch.md new file mode 100644 index 0000000..49bddd8 --- /dev/null +++ b/docs/edc/edc_control_plane_event_extension_sketch.md @@ -0,0 +1,279 @@ +# EDC Control-Plane Event Extension Sketch + +## 结论先行 + +第一轮真正值得做的接入面,就是 EDC control-plane event extension。 + +原因不是它“最炫”,而是它最稳: + +- 当前只讨论 control plane +- 当前不碰 persistence store 改造 +- 当前不碰 data plane / provisioner / connector 产品化 +- 官方已经把 `ServiceExtension` 和 `EventRouter` 定义成运行时扩展与事件订阅入口 +- control plane 已经覆盖 catalog、contract agreement、transfer governance 这些最关键的治理对象 +- `EventEnvelope` 已经把 event id、timestamp 这类去重和审计最需要的元数据独立出来 + +所以,这一轮最合理的目标不是“写一个 Java 扩展跑起来”,而是先把: + +- 订阅哪些事件 +- 哪些事件该进 evidence +- 这些事件怎样归一成 `agent-evidence` 语义事件 +- 最小 extension 结构应该怎么拆 +- 第三方独立验证怎么接上 + +这些问题先钉住。 + +## 这次草图的边界 + +这份文档只覆盖: + +- EDC control plane 里的事件接入 +- control-plane event -> semantic evidence 的最小映射 +- 一个最小 extension 的职责切分 +- 独立验证如何挂在导出物之后 + +这份文档明确不覆盖: + +- persistence store 改造 +- SQL / JPA / JDBC 表设计 +- data plane / provisioner 行为 +- connector 产品化 +- 多 runtime 聚合平台 + +## EDC control plane 负责什么 + +按官方 adopter 文档,control plane 负责三件大事: + +- 处理 catalog / dataset / offer 暴露 +- 管理 contract negotiation 和 contract agreement +- 启动并治理 transfer process + +官方文档还明确写了另一件很关键的事: + +- data transfer “controls the flow of data, but it does not send it” + +也就是说,control plane 负责治理链路,不负责真实数据发送。真实发送由独立的 +data planes 完成。 + +这正好解释了为什么 `agent-evidence` 在这里应该补“执行证据层”,而不是去做 +新的传输面。 + +## 为什么 `ServiceExtension` + `EventRouter` 是最稳切口 + +### `ServiceExtension` + +官方把 `ServiceExtension` 定义为 runtime service 的扩展入口,并且提供了 +`initialize`、`prepare`、`start`、`shutdown`、`cleanup` 这些标准生命周期。 + +对于这轮目标来说,最小接入只需要做两件事: + +- 在 `initialize` 阶段注册 subscriber +- 把收到的 control-plane events 送进 mapper / exporter + +这比改 store、改 state machine、改 data plane 都更稳,因为它不要求侵入 EDC +核心实体持久化逻辑。 + +### `EventRouter` + +官方 contributor 文档和官方源码都把 `EventRouter` 定义成事件分发中心。 + +它允许: + +- `registerSync(Class, EventSubscriber)` 注册同步 subscriber +- `register(Class, EventSubscriber)` 注册异步 subscriber + +这两个模式的意义很直接: + +- sync 适合“至少要有一次”的本地持久化、事务内导出或 outbox 风格动作 +- async 适合通知、外发、send-and-forget + +对于 `agent-evidence` 来说,这意味着可以先画清楚两种部署姿态: + +- 最小草图:async exporter,低侵入,先跑通语义映射 +- 稍强一致性版本:sync subscriber + 本地 staging / outbox,再异步写 bundle + +## `Event` 和 `EventEnvelope` 的区别为什么重要 + +这点对 evidence 尤其重要。 + +官方 contributor 文档明确说明: + +- `Event` payload 应该带领域信息,比如 asset id、transfer process id +- event metadata 不应该塞在 payload 里 +- event id、timestamp 这类元数据应该放在 `EventEnvelope` + +这对 evidence 设计有三个直接好处: + +1. 语义和运输元数据分离 + evidence mapper 可以把 payload 当成“发生了什么”,把 envelope 当成 + “这条记录何时、以什么事件 id 被看到”。 + +2. 去重更自然 + `EventEnvelope.id` 很适合做一次投递级别去重键。 + +3. 时间锚更清楚 + `EventEnvelope.at` 可以直接作为事件观测时间,而不用再从 payload 猜。 + +如果把这两层混在一起,后面的 idempotency、重放、重组 bundle 都会变脆。 + +## 这个 extension 应该订阅哪些事件 + +第一轮建议订阅五个 control-plane 事件族,而不是只盯着 transfer: + +- `AssetEvent` +- `PolicyDefinitionEvent` +- `ContractDefinitionEvent` +- `ContractNegotiationEvent` +- `TransferProcessEvent` + +原因是最小 demo 虽然最后落在 transfer 上,但一个可解释的 evidence bundle +不能只知道“传输了”,还要知道: + +- 交换对象是谁 +- 它受哪条 policy / contract definition 约束 +- agreement 是怎么来的 +- transfer 是哪条 agreement 驱动的 + +这五个事件族正好覆盖这条最小闭环。 + +## 这些事件如何整理成 `agent-evidence` 语义事件 + +第一轮不要照搬所有 EDC 原始事件名到最终 bundle。 + +更稳的做法是分两层: + +1. raw control-plane event layer + 保留 `payload.name()`、`EventEnvelope.id`、`EventEnvelope.at` 和最小领域 id。 + +2. semantic evidence layer + 只把对独立验证有意义的状态,归一成较稳定的语义事件。 + +建议的最小语义事件类型可以是: + +- `dataspace.asset.registered` +- `dataspace.policy.definition.registered` +- `dataspace.contract.definition.bound` +- `dataspace.contract.agreement.established` +- `dataspace.contract.negotiation.terminated` +- `dataspace.transfer.started` +- `dataspace.transfer.completed` +- `dataspace.transfer.terminated` + +这里有两个刻意的收敛: + +- 不把所有中间态都提升成 first-class evidence +- 不把语义事件名字绑死在 EDC 某个内部类名上 + +这样后续即使换 connector flavor,语义层也更容易保持稳定。 + +## 推荐的最小 extension 结构 + +第一轮建议只画下面这条线: + +1. `ServiceExtension` + 在 runtime 启动时注册 subscriber 和 mapper 组件。 + +2. `EventRouter` subscriber registration + 按事件族注册,不先细分到每个具体事件类。 + +3. `ControlPlaneEventSubscriber` + 收到 `EventEnvelope` 后先做 envelope 级去重和最小字段抽取。 + +4. `EventToEvidenceMapper` + 把 EDC raw event 归一成 `agent-evidence` semantic evidence fragment。 + +5. `EvidenceGroupingService` + 根据 grouping key 把碎片归组到同一条最小 demo 链路。 + +6. `BundleWriter` / `Exporter` + 把分组后的 evidence 写成外部 bundle 或中间导出物。 + +7. 独立 validator + 这一步不放在 EDC runtime 内,而是交给 `agent-evidence` 仓库侧的 bundle / + verify 路径。 + +## 第三方独立验证怎么接上 + +这里的核心原则是: + +EDC runtime 负责“观察并导出”,`agent-evidence` 负责“脱离 runtime 后还能验证”。 + +最小接法是: + +- extension 产出一个独立 evidence bundle +- bundle 至少带上 participant、asset、policy、contract、transfer、manifest 信息 +- Python 侧 validator 读取 bundle,做字段闭合、状态一致性、digest 校验 + +第三方不需要: + +- 连进 EDC 内部数据库 +- 读取 connector 本地日志 +- 理解 EDC 内部线程或表结构 + +第三方只需要: + +- bundle 本身 +- 最小 manifest / digest / signature 材料 +- 可选外部 anchor 信息 + +这也是为什么第一轮先做 event sketch,而不是先做 Java 可运行代码:真正需要先固定的, +不是“怎么写类”,而是“导出后独立验证到底看什么”。 + +## 为什么这一层是 augmentation layer,而不是替代 EDC + +因为它不替代 EDC 已有职责。 + +EDC 仍然负责: + +- catalog +- contract negotiation +- contract agreement +- transfer governance + +这层 extension 只负责: + +- 监听控制面事件 +- 把关键状态变化整理成 semantic evidence +- 把这些证据导出成外部可验证 bundle + +换句话说,它不改变 EDC 的治理逻辑,只给 EDC 场景增加一个 execution +evidence layer。 + +## 推荐的第一轮实现姿态 + +如果下一轮真要开始动代码,建议顺序是: + +1. 先做 family-level subscriber,不先做复杂 per-event wiring +2. 先做 raw event journal + semantic fragment mapper,不先做完整 schema +3. 先做 transfer-centered grouping,不先做跨 runtime 合并 +4. 先把 bundle 导出去给 Python validator,不在 Java 里重写验证器 + +## 官方参考 + +以下链接为 2026-04-12 检索时使用的官方入口: + +- Control Plane + [https://eclipse-edc.github.io/documentation/for-adopters/control-plane/](https://eclipse-edc.github.io/documentation/for-adopters/control-plane/) + +- Extensions + [https://eclipse-edc.github.io/documentation/for-adopters/extensions/](https://eclipse-edc.github.io/documentation/for-adopters/extensions/) + +- Service Layers / events and callbacks + [https://eclipse-edc.github.io/documentation/for-contributors/runtime/service-layers/](https://eclipse-edc.github.io/documentation/for-contributors/runtime/service-layers/) + +- Control-plane entities + [https://eclipse-edc.github.io/documentation/for-contributors/control-plane/entities/](https://eclipse-edc.github.io/documentation/for-contributors/control-plane/entities/) + +- DSP scope + [https://eclipse-dataspace-protocol-base.github.io/DataspaceProtocol/HEAD/](https://eclipse-dataspace-protocol-base.github.io/DataspaceProtocol/HEAD/) + +- Official `ServiceExtension` source + [https://github.com/eclipse-edc/Connector/blob/main/spi/common/boot-spi/src/main/java/org/eclipse/edc/spi/system/ServiceExtension.java](https://github.com/eclipse-edc/Connector/blob/main/spi/common/boot-spi/src/main/java/org/eclipse/edc/spi/system/ServiceExtension.java) + +- Official `EventRouter`, `EventSubscriber`, `EventEnvelope` sources + [https://github.com/eclipse-edc/Connector/blob/main/spi/common/core-spi/src/main/java/org/eclipse/edc/spi/event/EventRouter.java](https://github.com/eclipse-edc/Connector/blob/main/spi/common/core-spi/src/main/java/org/eclipse/edc/spi/event/EventRouter.java) + [https://github.com/eclipse-edc/Connector/blob/main/spi/common/core-spi/src/main/java/org/eclipse/edc/spi/event/EventSubscriber.java](https://github.com/eclipse-edc/Connector/blob/main/spi/common/core-spi/src/main/java/org/eclipse/edc/spi/event/EventSubscriber.java) + [https://github.com/eclipse-edc/Connector/blob/main/spi/common/core-spi/src/main/java/org/eclipse/edc/spi/event/EventEnvelope.java](https://github.com/eclipse-edc/Connector/blob/main/spi/common/core-spi/src/main/java/org/eclipse/edc/spi/event/EventEnvelope.java) + +- Official `EventRouterImpl` source + [https://github.com/eclipse-edc/Connector/blob/main/core/common/runtime-core/src/main/java/org/eclipse/edc/runtime/core/event/EventRouterImpl.java](https://github.com/eclipse-edc/Connector/blob/main/core/common/runtime-core/src/main/java/org/eclipse/edc/runtime/core/event/EventRouterImpl.java) diff --git a/docs/edc/edc_demo_minimal_path.md b/docs/edc/edc_demo_minimal_path.md new file mode 100644 index 0000000..9fd8367 --- /dev/null +++ b/docs/edc/edc_demo_minimal_path.md @@ -0,0 +1,155 @@ +# EDC Demo Minimal Path + +## 结论先行 + +这个 demo 路径不是 EDC 替代实现。 + +它只是说明:当 EDC 已经负责 catalog、contract、transfer governance 时, +`agent-evidence` 如何作为增强层,补出一条最小可验证的 execution-evidence +闭环。 + +## 最小路径 + +| 输入 | 系统动作 | 生成的 evidence | 第三方如何验证 | +| --- | --- | --- | --- | +| `asset` | 提供方在 EDC control plane 中登记可交换资产,并让它出现在 catalog / dataset / distribution 语境里 | 记录 `asset_id`、provider participant、最小对象引用 | 验证 `asset_id` 存在且后续 contract / transfer 都引用同一资产 | +| `policy / contract definition` | 提供方配置 policy definition 与 contract definition,把治理规则绑定到资产选择条件 | 记录 `policy_definition_id`、`contract_definition_id` 以及它们与 `asset_id` 的关联 | 验证 policy / contract 定义与资产选择逻辑闭合,不要求读取内部库表 | +| `contract agreement` | 消费方发起 negotiation,控制面生成 contract agreement | 记录 `contract_agreement_id`、consumer participant、provider participant | 验证 agreement 与前面的 participant、asset、policy / contract 定义能串起来 | +| `transfer process` | 消费方基于 agreement 发起 transfer,控制面推进状态机;事件订阅器捕获关键状态变化 | 记录 `transfer_process_id`、`flow_type`、`state_transitions`、`started_at`、`completed_at` 或 `terminated_at` | 验证状态序列和时间戳合理,且 transfer 绑定到同一 agreement | +| `evidence bundle` | augmentation exporter 把最小字段打包成 evidence bundle,计算 manifest digest,并附带签名或外部锚定摘要 | 记录 `manifest_digest`、`signature_count`、可选 `anchor_type` / `anchor_id` | 重算或比对 manifest digest,继续检查签名数量和可选锚定信息 | +| `independent verify` | 独立验证器读取 bundle,不需要接入 EDC 内部日志或数据库 | 输出机器可读结果和人可读结论 | 检查字段完整性、引用闭合、状态一致性、digest 一致性、可选锚定可追溯性 | + +## 这个 demo 为什么是增强层 + +因为 EDC 仍然负责: + +- catalog 暴露 +- policy / contract 定义 +- contract negotiation +- transfer governance + +而 `agent-evidence` 只负责: + +- 订阅关键控制面事件 +- 导出最小 evidence bundle +- 让 bundle 在 EDC 外部也能被验证 + +也就是说,这个 demo 的目标不是“换掉 EDC”,而是“让 EDC 场景里的执行过程 +有一份可单独带走、可单独核验的证据”。 + +## 推荐的最小实现顺序 + +建议把实现顺序收敛成下面这一条线: + +1. 先用 control-plane event subscriber 捕获最小事件集 + 先覆盖 agreement finalized、transfer started、transfer completed / + terminated。 + +2. 再把事件归一成最小 evidence bundle + 先只保留 participant、asset、policy、contract、transfer、manifest 相关字段。 + +3. 最后接独立 verify + 先验证字段闭合、状态一致性和 manifest digest,不急着做复杂外部信任基础设施。 + +## 非目标 + +这个最小 demo 当前不包括: + +- 自定义 data plane +- 新的 connector 产品封装 +- 对 EDC persistence 的侵入式改造 +- 完整 usage control 执行系统 +- 全状态机、全协议、全后端一次打通 + +## 这条 demo 路径在 control-plane event extension 视角下如何被观察 + +| Demo step | EDC object / process | Observed control-plane event family | Generated evidence fragment | Final evidence bundle grouping key | +| --- | --- | --- | --- | --- | +| asset prepared | `Asset` | `AssetEvent`,最小看 `asset.created` | `dataspace.asset.registered`,带 `asset_id` | 先不单独成 bundle;作为后续 transfer bundle 的背景片段,靠 `asset_id` 关联 | +| policy prepared | `PolicyDefinition` | `PolicyDefinitionEvent`,最小看 `policy.definition.created` | `dataspace.policy.definition.registered` | 先不单独成 bundle;靠 `policy_definition_id` 关联 | +| contract definition prepared | `ContractDefinition` | `ContractDefinitionEvent`,最小看 `contract.definition.created` | `dataspace.contract.definition.bound` | 先不单独成 bundle;靠 `contract_definition_id` 关联 | +| contract agreement established | `ContractNegotiation` -> `ContractAgreement` | `ContractNegotiationEvent`,最小看 `contract.negotiation.finalized` 或 `terminated` | `dataspace.contract.agreement.established` 或 `dataspace.contract.negotiation.terminated` | transfer 尚未出现前,可临时靠 `contract_agreement_id` 关联 | +| transfer requested / started | `TransferProcess` | `TransferProcessEvent`,最小看 `transfer.process.requested`、`transfer.process.started` | `dataspace.transfer.requested`、`dataspace.transfer.started` | 从这里开始用 `transfer_process_id` 作为最终 bundle key | +| transfer completed / terminated | `TransferProcess` terminal state | `TransferProcessEvent`,最小看 `transfer.process.completed` 或 `terminated` | `dataspace.transfer.completed` 或 `dataspace.transfer.terminated` | `transfer_process_id` | +| bundle exported | augmentation exporter | 不是新 EDC 事件,而是 extension 内导出动作 | manifest、digest、signature count、anchor fields | `transfer_process_id` | + +## 推荐的 bundle grouping key + +如果只看“什么时候最容易先串起来”,`contract_agreement_id` 很诱人,因为 +agreement 比 transfer 更早出现。 + +但如果看“最终 evidence bundle 应该描述什么”,我更推荐: + +`transfer_process_id` + +原因有三点: + +1. 一个 bundle 最终描述的是一条具体 execution path + transfer 才是最接近执行实例的对象。 + +2. 一条 agreement 未来可能对应多条 transfer + 如果直接用 `contract_agreement_id` 当最终 bundle key,后续容易把多次执行混在一起。 + +3. transfer 事件天然提供 success / terminated 终点 + 用它做最终 key,更容易直接落到 `started_at`、`completed_at`、`terminated_at`。 + +### 更稳的实际做法 + +推荐采用两段式关联: + +- transfer 出现前:允许临时用 `contract_agreement_id` 做 staging correlation +- transfer 出现后:把最终 bundle 固定到 `transfer_process_id` + +这意味着: + +- `contract_agreement_id` 是重要关联字段 +- `transfer_process_id` 才是最终 evidence bundle grouping key + +## 推荐的最小 demo 事件范围 + +第一轮最小 demo 我建议只钉住下面这些事件: + +- `asset.created` +- `policy.definition.created` +- `contract.definition.created` +- `contract.negotiation.requested` +- `contract.negotiation.finalized` +- `contract.negotiation.terminated` +- `transfer.process.requested` +- `transfer.process.started` +- `transfer.process.completed` +- `transfer.process.terminated` + +如果还想再缩一刀,最先可以删掉的是: + +- `contract.negotiation.requested` + +但我不建议第一轮就删,因为它能把“协商确实开始过”这一步补齐。 + +## 非常小的后续实现建议 + +下一步最值得先做的是: + +`control-plane event subscriber / exporter` + +原因是它最贴近官方扩展面,最不容易把范围拉向 persistence 或 data plane, +也最适合先把 `event -> evidence mapping` 画清楚。 + +## 官方参考 + +以下链接为 2026-04-12 检索时使用的官方入口: + +- Control Plane + [https://eclipse-edc.github.io/documentation/for-adopters/control-plane/](https://eclipse-edc.github.io/documentation/for-adopters/control-plane/) + +- Extensions / `ServiceExtension` + [https://eclipse-edc.github.io/documentation/for-adopters/extensions/](https://eclipse-edc.github.io/documentation/for-adopters/extensions/) + +- Events / `EventRouter` / callbacks + [https://eclipse-edc.github.io/documentation/for-contributors/runtime/service-layers/](https://eclipse-edc.github.io/documentation/for-contributors/runtime/service-layers/) + +- Transfer callbacks and event names + [https://eclipse-edc.github.io/documentation/for-contributors/control-plane/entities/](https://eclipse-edc.github.io/documentation/for-contributors/control-plane/entities/) + +- DSP scope + [https://eclipse-dataspace-protocol-base.github.io/DataspaceProtocol/HEAD/](https://eclipse-dataspace-protocol-base.github.io/DataspaceProtocol/HEAD/) diff --git a/docs/edc/edc_event_to_evidence_mapping.md b/docs/edc/edc_event_to_evidence_mapping.md new file mode 100644 index 0000000..f7c5587 --- /dev/null +++ b/docs/edc/edc_event_to_evidence_mapping.md @@ -0,0 +1,213 @@ +# EDC Event To Evidence Mapping + +## 结论先行 + +第一轮不要把所有 control-plane events 原样倒进 evidence。 + +更稳的做法是: + +- 先订阅五个控制面事件族 +- 先保留 raw event + envelope metadata +- 再只把独立验证真正需要的状态,提升成 semantic evidence + +这意味着: + +- Asset / PolicyDefinition / ContractDefinition 事件主要负责补治理背景 +- ContractNegotiation / TransferProcess 事件才是 execution evidence 主干 +- `EventEnvelope.id` 和 `EventEnvelope.at` 应该直接进入去重和时间锚逻辑 + +## 总体映射表 + +| EDC event family | Typical trigger / state | Why it matters for evidence | Proposed agent-evidence semantic event type | Required fields to capture | Nice-to-have fields | Whether it should be part of the minimal demo | +| --- | --- | --- | --- | --- | --- | --- | +| `AssetEvent` | `asset.created` | 证明 demo 中被交换对象何时进入 control plane | `dataspace.asset.registered` | `envelope.id`, `envelope.at`, `payload.name()`, `assetId`, `participantContextId` | asset public properties digest、asset descriptor snapshot digest | Yes, if the demo creates the asset in the same run | +| `AssetEvent` | `asset.updated` | 可解释资产治理背景变化,但不是最小闭环必需 | `dataspace.asset.updated` | `envelope.id`, `envelope.at`, `assetId`, `participantContextId` | changed property names、before/after snapshot digest | No | +| `AssetEvent` | `asset.deleted` | 适合做生命周期审计,不是首个 transfer demo 的主干 | `dataspace.asset.deleted` | `envelope.id`, `envelope.at`, `assetId`, `participantContextId` | deletion reason、operator context | No | +| `PolicyDefinitionEvent` | `policy.definition.created` | 证明治理规则对象已存在,可被 contract / transfer 引用 | `dataspace.policy.definition.registered` | `envelope.id`, `envelope.at`, `policyDefinitionId`, `participantContextId` | policy digest、policy scope summary | Yes, if the demo creates the policy in the same run | +| `PolicyDefinitionEvent` | `policy.definition.updated` | 有助于审计规则演变,但不是首个最小链路必需 | `dataspace.policy.definition.updated` | `envelope.id`, `envelope.at`, `policyDefinitionId`, `participantContextId` | old/new policy digest | No | +| `PolicyDefinitionEvent` | `policy.definition.deleted` | 适合治理审计,不是 transfer evidence 主干 | `dataspace.policy.definition.deleted` | `envelope.id`, `envelope.at`, `policyDefinitionId`, `participantContextId` | deletion reason | No | +| `ContractDefinitionEvent` | `contract.definition.created` | 证明某条 policy 与 asset selector 已被绑定成可协商合同入口 | `dataspace.contract.definition.bound` | `envelope.id`, `envelope.at`, `contractDefinitionId`, `participantContextId` | referenced policy ids、selector digest | Yes, if the demo creates the contract definition in the same run | +| `ContractDefinitionEvent` | `contract.definition.updated` | 有助于理解规则变更,但不是首个最小闭环必需 | `dataspace.contract.definition.updated` | `envelope.id`, `envelope.at`, `contractDefinitionId`, `participantContextId` | old/new selector digest | No | +| `ContractDefinitionEvent` | `contract.definition.deleted` | 生命周期补充,不是最小 transfer demo 主干 | `dataspace.contract.definition.deleted` | `envelope.id`, `envelope.at`, `contractDefinitionId`, `participantContextId` | deletion reason | No | +| `ContractNegotiationEvent` | `contract.negotiation.requested` | 证明交换请求正式进入 negotiation state machine | `dataspace.contract.negotiation.requested` | `envelope.id`, `envelope.at`, `contractNegotiationId`, `counterPartyId`, `counterPartyAddress`, `protocol`, `participantContextId` | last contract offer digest、callback presence | Yes | +| `ContractNegotiationEvent` | `contract.negotiation.offered` / `accepted` / `agreed` / `verified` | 提供协商中间态,可帮助诊断,但对最小独立验证不是必须 | `dataspace.contract.negotiation.progressed` | `envelope.id`, `envelope.at`, `payload.name()`, `contractNegotiationId`, `counterPartyId`, `protocol` | offer digest、transition reason | No | +| `ContractNegotiationEvent` | `contract.negotiation.finalized` | 这是 governance 闭环的关键点,带出 contract agreement | `dataspace.contract.agreement.established` | `envelope.id`, `envelope.at`, `contractNegotiationId`, `contractAgreement.id`, `contractAgreement.assetId`, `contractAgreement.providerId`, `contractAgreement.consumerId`, `contractAgreement.contractSigningDate`, `participantContextId`, `protocol` | contract policy digest、claims digest | Yes | +| `ContractNegotiationEvent` | `contract.negotiation.terminated` | 证明协商失败或中止,是 fail path 的终点 | `dataspace.contract.negotiation.terminated` | `envelope.id`, `envelope.at`, `contractNegotiationId`, `counterPartyId`, `participantContextId`, `protocol` | termination reason、last offer digest | Yes, for failure demo | +| `TransferProcessEvent` | `transfer.process.requested` | 证明 agreement 已被用于发起具体 transfer | `dataspace.transfer.requested` | `envelope.id`, `envelope.at`, `transferProcessId`, `assetId`, `contractId`, `type`, `participantContextId`, `protocol` | callback presence | Yes | +| `TransferProcessEvent` | `transfer.process.initiated` | 证明 transfer state machine 已启动 | `dataspace.transfer.initiated` | `envelope.id`, `envelope.at`, `transferProcessId`, `assetId`, `contractId`, `type`, `participantContextId`, `protocol` | participant role hint | Optional | +| `TransferProcessEvent` | `transfer.process.started` | execution evidence 真正开始变强的节点,表示 transfer 已进入 started state | `dataspace.transfer.started` | `envelope.id`, `envelope.at`, `transferProcessId`, `assetId`, `contractId`, `type`, `participantContextId`, `protocol` | `dataAddress` digest only、flow endpoint class | Yes | +| `TransferProcessEvent` | `transfer.process.completed` | success path 的终点,支撑 `completed_at` | `dataspace.transfer.completed` | `envelope.id`, `envelope.at`, `transferProcessId`, `assetId`, `contractId`, `participantContextId` | completion summary | Yes | +| `TransferProcessEvent` | `transfer.process.terminated` | fail path / stop path 的终点,支撑 `terminated_at` | `dataspace.transfer.terminated` | `envelope.id`, `envelope.at`, `transferProcessId`, `assetId`, `contractId`, `participantContextId` | termination reason | Yes | +| `TransferProcessEvent` | `transfer.process.suspended` | 对长生命周期 transfer 很有用,但会让首个 demo 的状态链变复杂 | `dataspace.transfer.suspended` | `envelope.id`, `envelope.at`, `transferProcessId`, `assetId`, `contractId`, `participantContextId` | suspension reason | No | +| `TransferProcessEvent` | `transfer.process.preparationRequested` / `prepared` | 与 preparation phase 相关,适合扩展版链路,不是最小成功闭环必需 | `dataspace.transfer.preparation.progressed` | `envelope.id`, `envelope.at`, `transferProcessId`, `assetId`, `contractId`, `participantContextId` | preparation handler name | No | +| `TransferProcessEvent` | `transfer.process.provisioned` / `deprovisioningRequested` / `deprovisioned` | 这些事件在官方源码里已标记 deprecated,不适合作为首个最小 demo 基石 | `dataspace.transfer.legacy.lifecycle` | `envelope.id`, `envelope.at`, `transferProcessId` | none | No | + +## ContractNegotiation 推荐的最小状态链 + +### 最小成功链 + +| Stage | Recommended EDC event | Why keep it | +| --- | --- | --- | +| negotiation entered | `contract.negotiation.requested` | 证明协商正式开始 | +| agreement established | `contract.negotiation.finalized` | 这是最关键的治理闭环节点,能带出 contract agreement | + +### 最小失败链 + +| Stage | Recommended EDC event | Why keep it | +| --- | --- | --- | +| negotiation entered | `contract.negotiation.requested` | 证明请求确实发生过 | +| negotiation failed / stopped | `contract.negotiation.terminated` | 给出失败或终止终点 | + +### 为什么不把所有中间态都放进最小 demo + +`offered`、`accepted`、`agreed`、`verified` 当然有价值,但它们更像协商轨迹细节。 + +第一轮最小 demo 先需要回答的是: + +- agreement 有没有建立 +- agreement 建立后 transfer 有没有开始和结束 + +所以第一轮推荐只把: + +- `requested` +- `finalized` +- `terminated` + +视作 negotiation 的最小核心事件。 + +## TransferProcess 推荐的最小状态链 + +### 最小成功链 + +| Stage | Recommended EDC event | Why keep it | +| --- | --- | --- | +| transfer requested | `transfer.process.requested` | 证明 agreement 已被用来发起具体 transfer | +| transfer started | `transfer.process.started` | execution evidence 的核心起点 | +| transfer completed | `transfer.process.completed` | success path 终点 | + +### 最小失败链 + +| Stage | Recommended EDC event | Why keep it | +| --- | --- | --- | +| transfer requested | `transfer.process.requested` | 证明 transfer 被发起 | +| transfer started | `transfer.process.started` | 证明执行已经进入运行态 | +| transfer terminated | `transfer.process.terminated` | fail / stop path 终点 | + +### 为什么不把 preparation / suspend 先放进最小 demo + +因为那会立刻把第一轮扩成更完整的长生命周期状态机说明。 + +第一轮先证明下面这件事就够了: + +- agreement 建立 +- transfer 发起 +- transfer 开始 +- transfer 成功完成或失败终止 + +`suspended`、`preparationRequested`、`prepared` 更适合作为第三轮之后的扩展。 + +## 哪些事件暂时不进最小 demo + +第一轮建议先不进最小 demo 的主要是四类: + +1. update / delete 类管理事件 + 这些事件对治理审计有用,但不是最小 transfer 闭环的骨架。 + +2. contract negotiation 中间态 + `offered`、`accepted`、`agreed`、`verified` 容易把读者拉进更细的协商协议过程。 + +3. transfer 的 suspension / preparation 类事件 + 它们对长生命周期 transfer 很重要,但不是首个最小成功链路的门槛。 + +4. 官方已标记 deprecated 的 transfer lifecycle 事件 + 例如 `provisioned`、`deprovisioningRequested`、`deprovisioned`。 + +## 如何做 idempotency / de-duplication + +第一轮建议做两级去重,而不是只做一级。 + +### 一级:envelope 级去重 + +目标:去掉完全重复投递。 + +建议键: + +- `(participantContextId, envelope.id)` + +理由: + +- `EventEnvelope.id` 是官方定义的事件唯一标识 +- `participantContextId` 能避免多参与者上下文混写时的误判 + +### 二级:语义状态级折叠 + +目标:去掉“同一个领域对象、同一个状态”被重复记证。 + +建议键: + +- Asset: `(participantContextId, assetId, payload.name())` +- PolicyDefinition: `(participantContextId, policyDefinitionId, payload.name())` +- ContractDefinition: `(participantContextId, contractDefinitionId, payload.name())` +- ContractNegotiation: `(participantContextId, contractNegotiationId, payload.name())` +- TransferProcess: `(participantContextId, transferProcessId, payload.name())` + +理由: + +- envelope 去重只能挡住“同一个 envelope 重放” +- 语义折叠才能挡住“不同 envelope、同一状态重复发布或重试” + +### `EventEnvelope` metadata 和 domain IDs 应该怎么一起用 + +建议把两类 id 分工用: + +- `EventEnvelope.id` 用于“这条消息我见过没有” +- domain IDs 用于“这条消息对应哪条治理链路” + +最小链路里最关键的 domain IDs 是: + +- `contractNegotiationId` +- `contractAgreement.id` 或 `contractId` +- `transferProcessId` + +推荐做法: + +1. 先按 `EventEnvelope.id` 去掉精确重复 +2. 再按 domain ID + `payload.name()` 折叠同一状态 +3. 最后再按 bundle grouping key 把 evidence fragment 归组 + +## 对 bundle grouping key 的含义提醒 + +第一轮 mapping 里,最关键的不是“所有事件都立刻有最终 bundle key”,而是: + +- 事件先被可靠识别 +- 事件能被关联到同一治理链路 +- bundle 在 transfer 开始后有明确归宿 + +因此更稳的思路是: + +- negotiation finalized 前:允许先用 `contractAgreement.id` 做临时关联 +- transfer started 后:再把最终 bundle 归到 `transferProcessId` + +这能避免把多个 transfer 混进同一个 agreement-level bundle。 + +## 官方参考 + +以下链接为 2026-04-12 检索时使用的官方入口: + +- Control Plane + [https://eclipse-edc.github.io/documentation/for-adopters/control-plane/](https://eclipse-edc.github.io/documentation/for-adopters/control-plane/) + +- Service Layers / events and callbacks + [https://eclipse-edc.github.io/documentation/for-contributors/runtime/service-layers/](https://eclipse-edc.github.io/documentation/for-contributors/runtime/service-layers/) + +- Control-plane entities + [https://eclipse-edc.github.io/documentation/for-contributors/control-plane/entities/](https://eclipse-edc.github.io/documentation/for-contributors/control-plane/entities/) + +- Official event family sources + [https://github.com/eclipse-edc/Connector/tree/main/spi/control-plane/asset-spi/src/main/java/org/eclipse/edc/connector/controlplane/asset/spi/event](https://github.com/eclipse-edc/Connector/tree/main/spi/control-plane/asset-spi/src/main/java/org/eclipse/edc/connector/controlplane/asset/spi/event) + [https://github.com/eclipse-edc/Connector/tree/main/spi/control-plane/policy-spi/src/main/java/org/eclipse/edc/connector/controlplane/policy/spi/event](https://github.com/eclipse-edc/Connector/tree/main/spi/control-plane/policy-spi/src/main/java/org/eclipse/edc/connector/controlplane/policy/spi/event) + [https://github.com/eclipse-edc/Connector/tree/main/spi/control-plane/contract-spi/src/main/java/org/eclipse/edc/connector/controlplane/contract/spi/event](https://github.com/eclipse-edc/Connector/tree/main/spi/control-plane/contract-spi/src/main/java/org/eclipse/edc/connector/controlplane/contract/spi/event) + [https://github.com/eclipse-edc/Connector/tree/main/spi/control-plane/transfer-spi/src/main/java/org/eclipse/edc/connector/controlplane/transfer/spi/event](https://github.com/eclipse-edc/Connector/tree/main/spi/control-plane/transfer-spi/src/main/java/org/eclipse/edc/connector/controlplane/transfer/spi/event) + +- Official `EventEnvelope` source + [https://github.com/eclipse-edc/Connector/blob/main/spi/common/core-spi/src/main/java/org/eclipse/edc/spi/event/EventEnvelope.java](https://github.com/eclipse-edc/Connector/blob/main/spi/common/core-spi/src/main/java/org/eclipse/edc/spi/event/EventEnvelope.java) diff --git a/docs/edc/edc_extension_minimal_structure.md b/docs/edc/edc_extension_minimal_structure.md new file mode 100644 index 0000000..4b1eb1b --- /dev/null +++ b/docs/edc/edc_extension_minimal_structure.md @@ -0,0 +1,244 @@ +# EDC Extension Minimal Structure + +## 结论先行 + +第一轮需要的是 extension 结构草图,不是正式实现。 + +最小 Java extension 只要把下面五件事拆清楚,就够支持下一轮 skeleton spike: + +- `ServiceExtension` 负责装配 +- `EventRouter` 负责订阅 control-plane events +- subscriber 负责收包、去重入口和字段抽取 +- mapper / exporter 负责把事件变成 evidence fragment +- bundle / verify 继续留在 `agent-evidence` 仓库侧,不在 Java 里重写 + +## 这份草图的边界 + +这不是正式实现,也不要求可编译。 + +它只回答: + +- 一个最小 Java extension 应该有哪些部分 +- 哪些职责应该留在 EDC runtime 内 +- 哪些职责应该继续留在 Python `agent-evidence` 侧 + +它不回答: + +- 持久化表怎么设计 +- bundle 文件格式最终定稿长什么样 +- runtime 间怎样汇总 +- data plane / provisioner 怎么接 + +## 一个最小 Java extension 的组成 + +### 1. `ServiceExtension` + +职责: + +- 在 `initialize` 中注册 subscriber +- 注入 monitor、router、可选 transaction 组件 +- 装配 mapper、grouping、writer + +### 2. `EventRouter` subscriber registration + +职责: + +- 订阅 `AssetEvent` +- 订阅 `PolicyDefinitionEvent` +- 订阅 `ContractDefinitionEvent` +- 订阅 `ContractNegotiationEvent` +- 订阅 `TransferProcessEvent` + +第一轮建议按 family 注册,而不是每个 concrete event 单独注册。 + +### 3. `Monitor` + +职责: + +- 记录扩展初始化、订阅成功、过滤丢弃、导出失败等运行时诊断信息 + +这不是 evidence 本身,但对 extension 可运维性是必要的。 + +### 4. optional `TransactionContext` + +职责: + +- 如果未来要做 sync subscriber 下的本地 staging / outbox,可作为事务边界工具 + +第一轮只是草图,所以它是 optional。 + +是否真的要接它,应该由“你要不要保证至少一次导出”决定,而不是先入为主地把 +扩展做重。 + +### 5. exporter / mapper / bundle writer interface + +职责最好拆成三层: + +- `EventToEvidenceMapper` + - 输入 `EventEnvelope` + - 输出 semantic evidence fragment + +- `EvidenceGroupingService` + - 决定 fragment 归到哪个 staging bucket / bundle + +- `EvidenceBundleWriter` + - 负责把分组后的内容导出成 JSON、JSONL、bundle 目录或其他中间物 + +这样后面无论你要写本地文件、HTTP exporter 还是 outbox,都不用重写 mapping。 + +## 极简伪代码级结构 + +```java +@Extension("Agent Evidence Control Plane Extension") +public class AgentEvidenceControlPlaneExtension implements ServiceExtension { + + @Inject private EventRouter eventRouter; + @Inject private Monitor monitor; + @Inject(required = false) private TransactionContext transactionContext; + + @Override + public void initialize(ServiceExtensionContext context) { + var deduplicator = new EnvelopeDeduplicator(); + var mapper = new EventToEvidenceMapper(monitor); + var grouper = new EvidenceGroupingService(monitor); + var writer = new EvidenceBundleWriter(monitor); + + var subscriber = new ControlPlaneEvidenceSubscriber( + deduplicator, mapper, grouper, writer, transactionContext, monitor + ); + + eventRouter.register(AssetEvent.class, subscriber); + eventRouter.register(PolicyDefinitionEvent.class, subscriber); + eventRouter.register(ContractDefinitionEvent.class, subscriber); + eventRouter.register(ContractNegotiationEvent.class, subscriber); + eventRouter.register(TransferProcessEvent.class, subscriber); + } +} + +public class ControlPlaneEvidenceSubscriber implements EventSubscriber { + @Override + public void on(EventEnvelope envelope) { + if (deduplicator.seen(envelope)) { + return; + } + + var fragment = mapper.map(envelope); + if (fragment == null) { + return; + } + + var groupKey = grouper.resolve(fragment); + writer.append(groupKey, fragment); + } +} +``` + +## EDC runtime 内负责什么 + +建议只把下面这些责任留在 Java / EDC runtime 内: + +- 订阅 control-plane events +- 抽取 envelope metadata 和最小领域 id +- 做第一层去重 +- 做最小 semantic mapping +- 产出可带出 runtime 的 evidence fragment / bundle draft + +这些事情都属于“观察和导出”。 + +## `agent-evidence` 仓库侧负责什么 + +建议继续把下面这些责任留在 Python `agent-evidence` 仓库侧: + +- bundle 结构定义的主线演进 +- manifest 生成与 digest 规则 +- 独立 validator +- human-readable failure summary +- machine-readable JSON validation report +- CLI / demo / examples + +这些事情都属于“脱离 EDC runtime 之后如何验证和复用”。 + +## 为什么不要在 Java 侧重写全部验证 + +因为那会很快把“扩展接入”变成“复制一套 agent-evidence 核心逻辑”。 + +第一轮更合理的边界是: + +- Java 侧负责采集和导出 +- Python 侧负责验证和演示 + +这样仓库主线不会被 EDC 绑死。 + +## 哪些部分建议保留在 Python `agent-evidence` 仓库侧 + +- semantic evidence profile 的正式定义 +- schema / validator 逻辑 +- manifest canonicalization 与 digest 规则 +- bundle 验证 CLI +- examples 和 invalid corpus +- 对外 demo 和 release artifact + +## 哪些部分未来如果真做 EDC extension,需要落在 Java 侧 + +- `ServiceExtension` 本体 +- `EventRouter` family-level subscriber 注册 +- envelope 级去重入口 +- raw event -> semantic evidence fragment mapper +- runtime 内的轻量 staging / outbox +- 最小 exporter + +## sync 还是 async,第一轮怎么选 + +第一轮如果只是做 skeleton spike,我建议: + +- 默认先画 async subscriber 路线 +- 在文档里预留 sync + `TransactionContext` 的升级口 + +原因很简单: + +- async 更贴近“先证明能观察并导出” +- sync 会立刻把你带进事务性和失败处理设计 + +但要明确一点: + +- 如果后续目标变成“至少一次写入本地 staging”,那就该认真评估 sync subscriber + 和事务边界了 + +## 推荐的接口切分 + +为了避免后面改一处牵一片,建议最少切出这四个接口: + +- `EnvelopeDeduplicator` +- `EventToEvidenceMapper` +- `EvidenceGroupingService` +- `EvidenceBundleWriter` + +这样下一轮做 Java skeleton spike 时,就能先把骨架立起来,而不必先决定: + +- 写文件 +- 发 HTTP +- 走消息队列 +- 还是落本地 outbox + +## 官方参考 + +以下链接为 2026-04-12 检索时使用的官方入口: + +- Extensions + [https://eclipse-edc.github.io/documentation/for-adopters/extensions/](https://eclipse-edc.github.io/documentation/for-adopters/extensions/) + +- Service Layers / events and callbacks + [https://eclipse-edc.github.io/documentation/for-contributors/runtime/service-layers/](https://eclipse-edc.github.io/documentation/for-contributors/runtime/service-layers/) + +- Official `ServiceExtension` source + [https://github.com/eclipse-edc/Connector/blob/main/spi/common/boot-spi/src/main/java/org/eclipse/edc/spi/system/ServiceExtension.java](https://github.com/eclipse-edc/Connector/blob/main/spi/common/boot-spi/src/main/java/org/eclipse/edc/spi/system/ServiceExtension.java) + +- Official `EventRouter` / `EventSubscriber` sources + [https://github.com/eclipse-edc/Connector/blob/main/spi/common/core-spi/src/main/java/org/eclipse/edc/spi/event/EventRouter.java](https://github.com/eclipse-edc/Connector/blob/main/spi/common/core-spi/src/main/java/org/eclipse/edc/spi/event/EventRouter.java) + [https://github.com/eclipse-edc/Connector/blob/main/spi/common/core-spi/src/main/java/org/eclipse/edc/spi/event/EventSubscriber.java](https://github.com/eclipse-edc/Connector/blob/main/spi/common/core-spi/src/main/java/org/eclipse/edc/spi/event/EventSubscriber.java) + +- Official callback dispatcher extension example + [https://github.com/eclipse-edc/Connector/blob/main/extensions/control-plane/callback/callback-event-dispatcher/src/main/java/org/eclipse/edc/connector/controlplane/callback/dispatcher/CallbackEventDispatcherExtension.java](https://github.com/eclipse-edc/Connector/blob/main/extensions/control-plane/callback/callback-event-dispatcher/src/main/java/org/eclipse/edc/connector/controlplane/callback/dispatcher/CallbackEventDispatcherExtension.java) + +- Official CloudEvents extension example + [https://github.com/eclipse-edc/Connector/blob/main/extensions/common/events/events-cloud-http/src/main/java/org/eclipse/edc/event/cloud/http/CloudEventsHttpExtension.java](https://github.com/eclipse-edc/Connector/blob/main/extensions/common/events/events-cloud-http/src/main/java/org/eclipse/edc/event/cloud/http/CloudEventsHttpExtension.java) diff --git a/docs/edc/edc_minimal_evidence_profile_draft.md b/docs/edc/edc_minimal_evidence_profile_draft.md new file mode 100644 index 0000000..31a69cf --- /dev/null +++ b/docs/edc/edc_minimal_evidence_profile_draft.md @@ -0,0 +1,135 @@ +# EDC Minimal Evidence Profile Draft + +## 结论先行 + +这不是一个大而全的 EDC profile。 + +这只是一个最小字段草案,用来支持一个最小 dataspace 交换链路的独立验证: +谁和谁交换、基于哪条 policy / contract、走了哪条 transfer process、 +最后导出了什么 evidence。 + +## 适用边界 + +这个草案默认挂在 EDC control-plane event extension / exporter 上。 + +它不试图表达: + +- EDC 的全部实体模型 +- data plane 的所有传输细节 +- connector 的全部内部状态 +- vault、database、secret 管理细节 + +## 最小字段表 + +| 字段名 | 含义 | 来源 | 是否必填 | 为什么需要 | +| --- | --- | --- | --- | --- | +| `provider_participant_id` | 提供方参与者标识 | 控制面 participant 配置、catalog / contract 相关对象 | 是 | 让第三方知道证据指向哪一方提供数据 | +| `consumer_participant_id` | 消费方参与者标识 | contract negotiation / agreement / transfer 相关对象 | 是 | 让第三方知道合同和传输绑定到哪一方消费者 | +| `asset_id` | 被交换资产标识 | asset / dataset / catalog offer 关联对象 | 是 | 把 evidence 绑定到具体被治理对象 | +| `policy_definition_id` | 生效的 policy definition 标识 | policy definition | 是 | 让验证者知道治理约束来自哪条 policy | +| `contract_definition_id` | 触发该交换的 contract definition 标识 | contract definition | 是 | 衔接 asset 选择和合同生成入口 | +| `contract_agreement_id` | 已达成合同标识 | contract agreement | 是 | 这是 transfer process 的核心治理绑定点 | +| `transfer_process_id` | 传输过程标识 | transfer process | 是 | 把 evidence 绑定到单条控制面交换链路 | +| `flow_type` | 传输流型,如 `consumer-pull` / `provider-push` | transfer request、distribution、transfer metadata | 是 | 让验证者知道链路语义,不必反推 data plane 内部实现 | +| `state_transitions` | 控制面观测到的关键状态序列 | control-plane events / callbacks | 是 | 这是最小执行证据主体,证明链路如何推进 | +| `started_at` | 链路开始进入运行态的时间 | `transfer.process.started` 或等价事件时间戳 | 是 | 为第三方提供最小时间锚点 | +| `completed_at` | 成功完成时间 | `transfer.process.completed` 事件时间戳 | 否 | 只在成功完成路径需要 | +| `terminated_at` | 终止时间 | `transfer.process.terminated` 事件时间戳 | 否 | 只在失败或被终止路径需要 | +| `manifest_digest` | evidence bundle manifest 的摘要 | `agent-evidence` exporter 生成的 manifest | 是 | 让第三方验证 bundle 没被篡改 | +| `signature_count` | bundle 上可见签名数量 | manifest / signature material | 是 | 让验证者知道当前证据带不带签名以及有几个 | +| `anchor_type` | 外部锚定机制类型,如 transparency log / registry | augmentation exporter 生成的可选 binding | 否 | 用来声明是否有外部可核验锚点 | +| `anchor_id` | 外部锚定标识 | 外部 anchor / receipt / transparency entry | 否 | 让第三方能追到外部验证对象 | + +## 字段解释上的几个收敛原则 + +- `state_transitions` 应只保留对外可解释的最小状态,不照搬内部所有中间态 +- `flow_type` 只保留验证所需的链路语义,不展开 data plane 实现细节 +- `manifest_digest` 是证据包完整性的最小抓手,不等于 EDC 内部对象哈希总表 +- `signature_count` 可以为 `0`,但字段本身保留,避免验证者猜测有没有签名层 + +## 哪些字段故意不带 + +下面这些内容这版故意不带: + +- secrets +- `privateProperties` +- vault key / secret alias +- callback `authKey` / `authCodeId` +- EDR token、临时访问令牌、连接凭证 +- `DataAddress` 的内部实现细节 +- JDBC 表主键、行版本号、重试计数器、调度器元数据 +- connector 内部线程、队列、事务日志 +- 任意会把验证者绑定到某个具体部署实现的内部字段 + +原因很简单: + +- 这些字段不是独立验证最小闭环所必需 +- 它们容易泄露安全信息 +- 它们会把 profile 从“可移植证据”拉成“实现内省导出” + +## 如何支持第三方 independent verification,而不依赖 EDC 内部日志 + +目标不是让第三方接进 EDC 数据库或日志系统,而是给第三方一个足够小、但能 +闭环验证的 evidence bundle。 + +最小做法是: + +- 由 control-plane event extension / exporter 导出一个 bundle +- bundle 内至少包含上表中的最小字段 +- `state_transitions` 使用事件时间戳和状态名表达链路推进 +- `manifest_digest` 保护导出物本身 +- 如有签名或外部锚定,再通过 `signature_count`、`anchor_type`、`anchor_id` + 暴露出来 + +第三方验证时至少做四件事: + +1. 检查字段完整性和必填项 + 看 participant、asset、policy、contract、transfer 是否闭合。 + +2. 检查关联闭合 + 看 `asset_id`、`policy_definition_id`、`contract_definition_id`、 + `contract_agreement_id`、`transfer_process_id` 是否属于同一条交换链路。 + +3. 检查时间与状态一致性 + 看 `state_transitions` 是否合理,`started_at`、`completed_at`、 + `terminated_at` 是否与状态路径相符。 + +4. 检查证据完整性 + 重算或核对 `manifest_digest`,并在存在签名或锚定时继续核对外部材料。 + +这意味着验证者拿到 bundle 后,不需要访问 EDC 内部日志,只需要: + +- 证据包本身 +- 公开或可共享的 agreement / transfer 标识 +- 可选的外部签名或锚定材料 + +## 当前建议的最小成功链路 + +最先支持的不是所有状态,而是一条最小成功链路: + +- contract agreement 已生成 +- transfer process 已启动 +- transfer process 已完成 +- evidence bundle 已导出 +- 第三方可独立核对 manifest 和关联字段 + +终止态、暂停态、恢复态可以作为下一轮扩展,而不是这轮的门槛。 + +## 官方参考 + +以下链接为 2026-04-12 检索时使用的官方入口: + +- Control Plane + [https://eclipse-edc.github.io/documentation/for-adopters/control-plane/](https://eclipse-edc.github.io/documentation/for-adopters/control-plane/) + +- Extensions / `ServiceExtension` + [https://eclipse-edc.github.io/documentation/for-adopters/extensions/](https://eclipse-edc.github.io/documentation/for-adopters/extensions/) + +- Events / `EventRouter` / callbacks + [https://eclipse-edc.github.io/documentation/for-contributors/runtime/service-layers/](https://eclipse-edc.github.io/documentation/for-contributors/runtime/service-layers/) + +- Transfer callbacks and event names + [https://eclipse-edc.github.io/documentation/for-contributors/control-plane/entities/](https://eclipse-edc.github.io/documentation/for-contributors/control-plane/entities/) + +- DSP scope + [https://eclipse-dataspace-protocol-base.github.io/DataspaceProtocol/HEAD/](https://eclipse-dataspace-protocol-base.github.io/DataspaceProtocol/HEAD/) diff --git a/docs/fdo-mapping/fdo-operation-evidence-profile-registration-pack.md b/docs/fdo-mapping/fdo-operation-evidence-profile-registration-pack.md new file mode 100644 index 0000000..a312988 --- /dev/null +++ b/docs/fdo-mapping/fdo-operation-evidence-profile-registration-pack.md @@ -0,0 +1,111 @@ +# FDO Operation Evidence Profile Registration Pack + +This note turns the current repository package into one FDO-facing registration +pack without creating a second implementation surface. + +## 1. Naming Boundary + +Use the following names consistently: + +| Purpose | Value | +| --- | --- | +| Repository canonical package name | `Execution Evidence and Operation Accountability Profile v0.1` | +| Repository machine-readable profile id | `execution-evidence-operation-accountability-profile@0.1` | +| Proposed FDO-facing object name | `FDO_OPERATION_EVIDENCE_PROFILE_V0_1` | +| Proposed FDO-facing object type | `Profile` | + +The FDO-facing name is an external registration label. It should point to the +current canonical package already implemented in this repository. It should not +trigger a rename of the existing schema, validator, examples, or demo. + +## 1.1 Relationship To `ARO_AUDIT_PROFILE_V1` + +Use the following external naming relationship: + +| Object | Role | +| --- | --- | +| `FDO_OPERATION_EVIDENCE_PROFILE_V0_1` | operation-level evidence and validation profile for one agent or service operation | +| `ARO_AUDIT_PROFILE_V1` | audit-facing sibling profile used to declare audit-ready object support and audit pointers | + +They are related but not interchangeable: + +- `FDO_OPERATION_EVIDENCE_PROFILE_V0_1` focuses on one bounded operation + accountability statement. +- `ARO_AUDIT_PROFILE_V1` focuses on audit-ready object declaration and audit + trace support. +- The new FDO-facing object should be presented as complementary to + `ARO_AUDIT_PROFILE_V1`, not as a rename or replacement. + +## 2. Current Repository Assets To Link + +The current repository already contains the minimum package needed for an +FDO-facing registration: + +| Purpose | File | +| --- | --- | +| Spec | `spec/execution-evidence-operation-accountability-profile-v0.1.md` | +| Schema | `schema/execution-evidence-operation-accountability-profile-v0.1.schema.json` | +| Valid example | `examples/minimal-valid-evidence.json` | +| Invalid examples | `examples/invalid-missing-required.json`, `examples/invalid-unclosed-reference.json`, `examples/invalid-policy-link-broken.json` | +| Validator implementation | `agent_evidence/oap.py` | +| Validator CLI entry | `agent_evidence/cli/main.py` | +| Demo | `demo/run_operation_accountability_demo.py` | +| Demo guide | `demo/README.md` | +| Repo overview | `README.md` | + +If a dedicated GitHub repository is created later, these files are the minimal +subset to copy first. + +## 3. Mapping From Flat Outreach Terms To Canonical Fields + +The flat example often used in outreach is narrower than the canonical +statement shape. The mapping below keeps the external explanation short while +preserving the actual implemented structure. + +| Flat outreach term | Canonical field path | +| --- | --- | +| `operation_id` | `operation.id` | +| `agent_id` | `actor.id` | +| `input_hash` | `evidence.references[]` entry with `role = "input"` -> `digest` | +| `output_hash` | `evidence.references[]` entry with `role = "output"` -> `digest` | +| `operation_type` | `operation.type` | +| `policy_reference` | `policy.id` and `operation.policy_ref` | +| `provenance_chain` | `provenance.*` links plus `evidence.artifacts[]` | +| `signature` | not a core v0.1 field; use local integrity digests and optional `validation.trust_bindings[]` | +| `verification_result` | `validation.status` plus validator report `ok` | + +## 4. Suggested FDO Testbed Registration Text + +Recommended object name: + +`FDO_OPERATION_EVIDENCE_PROFILE_V0_1` + +Recommended description: + +`A minimal profile for recording and validating one policy-constrained agent operation with explicit policy, provenance, evidence, and validation links.` + +Recommended type: + +`Profile` + +Recommended GitHub landing surface: + +- repository root `README.md` +- spec document +- schema file +- valid example +- validator entry +- demo guide + +## 5. Manual External Steps Still Required + +The repository can prepare the registration pack, but these actions still +require manual completion outside the local workspace: + +1. Create or choose the GitHub repository that will host the public package. +2. Log into the FDO Testbed Type Registry. +3. Submit the new object entry and paste the public GitHub links. +4. Share the resulting registry URL in the outreach email and proposal note. + +Until those actions are completed, treat this pack as submission-ready draft +material rather than an already registered object. diff --git a/docs/high-risk-scenario-entry.md b/docs/high-risk-scenario-entry.md new file mode 100644 index 0000000..62a76de --- /dev/null +++ b/docs/high-risk-scenario-entry.md @@ -0,0 +1,33 @@ +# High-Risk Scenario Entry + +This note adds one discoverable, reviewer-facing high-risk scenario to the +current AEP v0.1 path without changing the core package boundary. + +## Scenario + +The scenario is one flagged payment review: + +- one payment case subject +- two input references +- one review decision output +- one policy-constrained operation accountability statement + +## Files + +- `examples/valid-high-risk-payment-review-evidence.json` +- `examples/invalid-high-risk-unclosed-reference.json` +- `examples/invalid-high-risk-policy-link-broken.json` + +## Boundary + +This is not a payment engine, settlement system, fraud model, or compliance +control plane. It is a minimal operation-accountability surface for a +high-risk, reviewer-facing setting. + +## Validate + +```bash +agent-evidence validate-profile examples/valid-high-risk-payment-review-evidence.json +agent-evidence validate-profile examples/invalid-high-risk-unclosed-reference.json +agent-evidence validate-profile examples/invalid-high-risk-policy-link-broken.json +``` diff --git a/docs/quickstart.md b/docs/quickstart.md new file mode 100644 index 0000000..caf35e8 --- /dev/null +++ b/docs/quickstart.md @@ -0,0 +1,94 @@ +# Quickstart + +This quickstart uses one existing runnable path only: `examples/langchain_minimal_evidence.py`. + +It does not stitch together the demo path and the profile-validator path. The goal is a first run that produces the normalized outputs used in this repository today: + +- `bundle` +- `receipt` +- `summary` + +## 1. Prerequisites + +- Python `3.11+` +- No model API key is required for this path +- On Python `3.14`, `langchain_core` may emit a non-blocking warning during the run + +## 2. Install + +From the repository root: + +```bash +python3 -m venv .venv +source .venv/bin/activate +pip install -e ".[langchain,signing]" +``` + +## 3. Run One Runnable Example + +Choose an output directory and run the existing minimal LangChain example: + +```bash +OUTPUT_DIR=/tmp/agent-evidence-quickstart +python examples/langchain_minimal_evidence.py --output-dir "$OUTPUT_DIR" +``` + +What this step already does: + +- captures local LangChain runtime events +- exports a signed `bundle` +- verifies the exported bundle in-process +- writes a `summary` + +## 4. Export Step + +There is no separate export command in this quickstart path. The example above already performs the export step and writes the `bundle`. + +Confirm that the `bundle` exists: + +```bash +test -f "$OUTPUT_DIR/langchain-evidence.bundle.json" +``` + +## 5. Verify Step + +Generate a standalone `receipt` from the exported `bundle`: + +```bash +agent-evidence verify-export \ + --bundle "$OUTPUT_DIR/langchain-evidence.bundle.json" \ + --public-key "$OUTPUT_DIR/manifest-public.pem" \ + > "$OUTPUT_DIR/receipt.json" +``` + +## 6. Summary / Review Step + +Review the generated `summary` and `receipt`: + +```bash +sed -n '1,120p' "$OUTPUT_DIR/summary.json" +sed -n '1,120p' "$OUTPUT_DIR/receipt.json" +``` + +## 7. Expected Output Locations + +Primary outputs: + +- `bundle`: `$OUTPUT_DIR/langchain-evidence.bundle.json` +- `receipt`: `$OUTPUT_DIR/receipt.json` +- `summary`: `$OUTPUT_DIR/summary.json` + +Supporting files produced by the same run: + +- manifest: `$OUTPUT_DIR/langchain-evidence.manifest.json` +- verification key: `$OUTPUT_DIR/manifest-public.pem` +- local runtime capture: `$OUTPUT_DIR/runtime-events.jsonl` + +## 8. Smoke Checklist + +- `pip install -e ".[langchain,signing]"` completes successfully +- `python examples/langchain_minimal_evidence.py --output-dir "$OUTPUT_DIR"` exits with `ok: true` +- `$OUTPUT_DIR/langchain-evidence.bundle.json` exists +- `agent-evidence verify-export ... > "$OUTPUT_DIR/receipt.json"` exits successfully +- `$OUTPUT_DIR/receipt.json` contains `"ok": true` +- `$OUTPUT_DIR/summary.json` exists and contains the generated review summary diff --git a/docs/release/v0.1-rc-release-notes.md b/docs/release/v0.1-rc-release-notes.md new file mode 100644 index 0000000..df452e9 --- /dev/null +++ b/docs/release/v0.1-rc-release-notes.md @@ -0,0 +1,190 @@ +# agent-evidence v0.1 RC + +Scope: bounded release candidate for `agent-evidence` only. + +## 1. Release Title And Scope + +`agent-evidence v0.1 RC` is the first bounded release candidate for the current +single-repo product surface. + +This release candidate is intentionally narrow: + +- local-first +- adapter-first +- review-pack-enabled +- no hosted control plane +- no new canonical artifact type beyond `bundle`, `receipt`, and `summary` + +## 2. What Is New In v0.1 RC + +This release candidate brings the current repository into one coherent product +line: + +- a developer-product README and bounded quickstart +- one recommended LangChain integration entry point: + `LangChainAdapter` +- one recommended OpenAI-compatible integration entry point: + `OpenAICompatibleAdapter` +- one Review Pack path with: + - assembler + - deterministic renderer + - developer-facing example + - smoke gate + +The result is one bounded workflow that can capture a run, export it, verify +it, and package it for review without expanding into a hosted platform or a +larger governance system. + +## 3. Primary Supported Surfaces + +### Quickstart + +Current bounded first-run path: + +- install from source +- run `examples/langchain_minimal_evidence.py` +- produce `bundle` +- verify to produce `receipt` +- review `summary` + +Reference: + +- `docs/quickstart.md` + +### LangChainAdapter + +Recommended entry point: + +- `agent_evidence.integrations.langchain.LangChainAdapter` + +Current role: + +- capture LangChain runtime events +- export a signed JSON `bundle` +- verify and write `receipt` +- write reviewer-facing `summary` + +### OpenAICompatibleAdapter + +Recommended entry point: + +- `agent_evidence.integrations.openai_compatible.OpenAICompatibleAdapter` + +Current role: + +- wrap provider calls without moving provider logic into core evidence logic +- keep config propagation bounded +- preserve the same `bundle` / `receipt` / `summary` contract + +### Review Pack Path + +Current path: + +- `agent_evidence.review_pack.ReviewPackAssembler` +- `agent_evidence.review_pack.ReviewPackRenderer` +- `examples/review_pack/build_review_pack.py` + +Current role: + +- assemble a stable review pack from existing artifacts +- render deterministic `review/report.md` +- keep supporting files optional +- exclude private keys by default + +## 4. Artifact Contract Boundary + +The primary artifact contract remains: + +- `bundle` +- `receipt` +- `summary` + +Supporting files remain supporting only: + +- manifest sidecar +- verification public key +- runtime JSONL capture +- local signing private key + +Source-of-truth boundaries remain intact: + +- evidence payload lives in `bundle` +- machine-readable verification facts live in `receipt` +- reviewer-facing orientation lives in `summary` +- Review Pack layout and renderer labels stay presentation-only + +## 5. Validation And Test Status + +Current bounded release-path gates are in place: + +- `tests/test_quickstart_smoke.py` +- `tests/test_langchain_adapter.py` +- `tests/test_langchain_integration.py` +- `tests/test_openai_compatible_adapter.py` +- `tests/test_review_pack_assembler.py` +- `tests/test_review_pack_renderer.py` +- `tests/test_review_pack_example_smoke.py` + +Current release-gate result: + +- `15 passed` + +That gate covers: + +- quickstart path +- LangChain wrapper path +- OpenAI-compatible wrapper path +- Review Pack assembler +- Review Pack renderer +- Review Pack example-level smoke path + +## 6. Known Limitations + +- no Review Pack CLI +- no OpenAI-compatible CLI +- no hosted delivery +- no live-provider tests +- no cross-repo integration +- no LangGraph-specific implementation surface +- no claim of full platform coverage + +Current warning note: + +- Python 3.14 currently surfaces non-blocking `langchain_core` compatibility and + deprecation warnings in the bounded test path + +## 7. Explicit Non-Goals + +- no schema changes +- no exporter expansion +- no new canonical artifact type +- no hosted audit plane +- no provider-specific business logic in core +- no Review Pack promotion to the main quickstart or README entry at this stage + +## 8. Upgrade / Adoption Notes + +This is the first bounded release candidate, so adoption guidance is simple: + +- install from source +- start with the LangChain quickstart path +- treat `bundle`, `receipt`, and `summary` as the only primary outputs +- treat supporting files as optional adjunct material +- use Review Pack only as a packaging/rendering layer above those artifacts + +For OpenAI-compatible adoption: + +- use the provider-agnostic wrapper +- keep provider client logic outside core evidence logic +- treat current examples as bounded configuration surfaces, not as a claim of + live-provider certification + +## 9. Recommended Next Follow-Ups After RC + +At most three low-risk follow-ups should be prioritized next: + +1. Reduce the current Python 3.14 `langchain_core` warning surface. +2. Add one bounded release-facing note or packaging step around the `v0.1` RC + position. +3. Harden non-live OpenAI-compatible edge-case tests without changing the + artifact contract. diff --git a/docs/release/v0.1-rc-ship-checklist.md b/docs/release/v0.1-rc-ship-checklist.md new file mode 100644 index 0000000..3076580 --- /dev/null +++ b/docs/release/v0.1-rc-ship-checklist.md @@ -0,0 +1,186 @@ +# v0.1 RC Ship Checklist + +Scope: bounded `agent-evidence v0.1 RC` only. + +This checklist defines the exact pre-ship, publish, and rollback steps for the +current release candidate. It assumes the current `v0.1` release gate and RC +release notes are already accepted. + +## 1. RC Tag Target + +### Proposed tag name + +- `v0.1.0-rc1` + +### Release title + +- `agent-evidence v0.1.0 RC1` + +### Scope statement + +- bounded single-repo release candidate +- local-first +- adapter-first +- review-pack-enabled +- no hosted control plane +- no new canonical artifact type beyond `bundle`, `receipt`, and `summary` + +## 2. Required Pre-Ship Checks + +### Exact test command already considered sufficient + +Run this exact bounded release-path suite: + +```bash +python -m pytest \ + tests/test_quickstart_smoke.py \ + tests/test_langchain_adapter.py \ + tests/test_langchain_integration.py \ + tests/test_openai_compatible_adapter.py \ + tests/test_review_pack_assembler.py \ + tests/test_review_pack_renderer.py \ + tests/test_review_pack_example_smoke.py +``` + +Ship target for this suite: + +- all tests pass +- no new failing warnings beyond the currently known Python 3.14 + `langchain_core` warnings + +### Doc checks + +Confirm these docs exist and match the same bounded story: + +- `README.md` +- `docs/quickstart.md` +- `docs/artifacts/artifact-contract-draft.md` +- `docs/reports/v0.1-release-gate.md` +- `docs/release/v0.1-rc-release-notes.md` + +### Artifact boundary checks + +Confirm all release-facing materials preserve these boundaries: + +- primary outputs remain: + - `bundle` + - `receipt` + - `summary` +- supporting files remain supporting only: + - manifest sidecar + - verification public key + - runtime JSONL capture + - local signing private key +- Review Pack renderer/layout fields do not become schema fields +- private key remains excluded by default from Review Pack + +## 3. Release Assets + +### Should be linked in the release draft + +- `docs/release/v0.1-rc-release-notes.md` +- `docs/reports/v0.1-release-gate.md` +- `docs/quickstart.md` +- `docs/artifacts/artifact-contract-draft.md` +- `docs/reports/review-pack-scope.md` + +### Should be included in the release framing + +- quickstart path +- `LangChainAdapter` +- `OpenAICompatibleAdapter` +- Review Pack path +- bounded artifact contract: + - `bundle` + - `receipt` + - `summary` + +### Should explicitly not be included in the release framing + +- hosted delivery claims +- live-provider certification claims +- cross-repo integration claims +- LangGraph-specific implementation claims +- Review Pack CLI claims +- OpenAI-compatible CLI claims +- any statement that supporting files are primary outputs + +## 4. Known Warnings And Limitations + +- Python 3.14 currently emits non-blocking `langchain_core` compatibility and + deprecation warnings in the bounded test path +- no hosted delivery +- no live-provider tests +- no CLI expansion +- no cross-repo integration +- no LangGraph-specific implementation surface + +These limitations are acceptable for the bounded RC and should be stated +explicitly, not hidden. + +## 5. Publish Sequence + +### 1. Local verification + +- ensure the working tree matches the intended RC contents +- run the bounded release-path test suite +- confirm release docs are present and internally consistent + +### 2. Tag + +- create the RC tag: + - `v0.1.0-rc1` + +### 3. GitHub release draft + +- create a draft release tied to `v0.1.0-rc1` +- title it: + - `agent-evidence v0.1.0 RC1` +- use the bounded scope statement from this checklist and the RC notes + +### 4. Release notes attachment + +- attach or paste the contents of: + - `docs/release/v0.1-rc-release-notes.md` +- link the release gate and quickstart docs + +### 5. Post-publish verification + +- confirm the tag resolves to the intended commit +- confirm the GitHub release draft shows the right title and notes +- confirm linked docs paths are valid in the tagged tree +- confirm the published framing still uses `bundle` / `receipt` / `summary` + as the only primary outputs + +## 6. Rollback Criteria + +### Block shipping if any of these are true before tag/release + +- the bounded release-path test suite fails +- release-facing docs disagree on primary outputs or scope +- supporting files are presented as primary outputs +- Review Pack is described as a new canonical artifact type +- release framing implies hosted delivery, live-provider validation, or + cross-repo integration that the repo does not provide + +### Withdraw or redo the tag/release if any of these are discovered immediately after publish + +- the release notes materially misstate current supported surfaces +- the tagged tree is not the intended RC commit +- linked docs for quickstart, release notes, or artifact contract are broken +- the published release text breaks the bounded artifact contract + +## 7. Immediate Post-RC Follow-Ups + +At most three follow-ups should be kept in scope immediately after RC: + +1. Reduce the current Python 3.14 `langchain_core` warning surface. +2. Add one minimal packaging/publish note around the RC without changing the + artifact contract. +3. Harden non-live OpenAI-compatible edge-case tests. + +Non-goals for those follow-ups: + +- no new architecture work +- no schema changes +- no hosted delivery work diff --git a/docs/reports/langchain-adapter-inventory.md b/docs/reports/langchain-adapter-inventory.md new file mode 100644 index 0000000..eda6661 --- /dev/null +++ b/docs/reports/langchain-adapter-inventory.md @@ -0,0 +1,322 @@ +# LangChain Adapter Inventory + +Scope: `agent-evidence` only. + +Method: local inventory of current LangChain / LangGraph-related code, docs, examples, CLI references, and tests. This document distinguishes between current working surfaces and a proposed future adapter API. It does not change code, schema, tests, or directory structure. + +## 1. Current LangChain / LangGraph Touchpoints + +### Current LangChain touchpoints + +| Path | Surface type | Current role | Notes | +| --- | --- | --- | --- | +| `pyproject.toml` | packaging | declares `langchain` extra | Current install surface is `pip install -e ".[langchain,signing]"`; no `langgraph` extra exists. | +| `README.md` | product entry | names LangChain / LangGraph as the first integration priority | Product-facing priority statement only. | +| `docs/quickstart.md` | active product doc | current first-run path | Uses `examples/langchain_minimal_evidence.py` plus `agent-evidence verify-export`. | +| `docs/cookbooks/langchain_minimal_evidence.md` | active how-to doc | detailed LangChain-first recipe | Explicitly says the path stays outside LangGraph persistence and checkpointer internals. | +| `examples/langchain_minimal_evidence.py` | runnable example | current de facto product path | Callback capture -> local JSONL -> JSON `bundle` -> in-process verify -> `summary`. | +| `agent_evidence/integrations/langchain.py` | integration module | current LangChain capture primitives | Exposes stream-event normalization helpers and `EvidenceCallbackHandler`. | +| `agent_evidence/integrations/__init__.py` | public integration exports | re-exports LangChain helpers | Makes LangChain surfaces available from `agent_evidence.integrations`. | +| `agent_evidence/__init__.py` | public package exports | exposes generic record/export/verify primitives | Current example path imports generic primitives from here, not from a LangChain-specific adapter entry. | +| `integrations/langchain/export_evidence.py` | runnable integration helper | alternate LangChain path via `EvidenceBundleBuilder` | Produces an AEP bundle directory, not the current JSON `bundle` quickstart surface. | +| `integrations/langchain/README.md` | helper doc | documents the AEP bundle path | Uses `verify-bundle`; this is parallel to, not the same as, the quickstart path. | +| `agent_evidence/cli/main.py` | CLI | generic verify/export commands used by LangChain paths | No LangChain-specific CLI subcommand exists. | +| `tests/test_langchain_integration.py` | test | lower-level LangChain coverage | Covers callback capture, stream event normalization, AEP bundle builder path, and example smoke. | +| `tests/test_quickstart_smoke.py` | test | current first-run smoke gate | Covers the docs quickstart path only. | +| `tests/test_aep_profile.py` and `tests/fixtures/agent_evidence_profile/*` | test / fixtures | supporting AEP bundle verification path | LangChain-labeled payloads exist here, but this is not the current quickstart product path. | + +### Current LangGraph touchpoints + +Actual LangGraph-specific implementation surfaces are absent. + +Current LangGraph-related references are limited to: + +- `README.md` + - priority statement: `LangChain / LangGraph` first +- `docs/cookbooks/langchain_minimal_evidence.md` + - explicit boundary: the current path stays outside LangGraph persistence and checkpointer internals + - explicit non-goal: this is not a LangGraph persistence or checkpointer integration + +There is currently: + +- no `langgraph` dependency in `pyproject.toml` +- no `agent_evidence/integrations/langgraph.py` +- no LangGraph example +- no LangGraph-specific test +- no LangGraph-specific CLI surface + +## 2. Current Responsibilities by Surface + +| Surface | Capture | Export | Verify | Summary / review | Supporting materials | +| --- | --- | --- | --- | --- | --- | +| `agent_evidence/integrations/langchain.py` | Yes | Indirectly, via `bundle_builder` branch in callback handler | No direct CLI-level verify | No | tags, event mapping, span metadata, redaction flags | +| `examples/langchain_minimal_evidence.py` | Yes, via `EvidenceCallbackHandler(recorder=...)` | Yes, via `export_json_bundle` | Yes, via `verify_json_bundle` and later `verify-export` command | Yes, writes `summary.json` | key generation, manifest sidecar, runtime JSONL | +| `docs/quickstart.md` | Documents current path | Documents current path | Documents explicit `verify-export` step | Documents review step | names manifest, key, runtime capture as supporting files | +| `docs/cookbooks/langchain_minimal_evidence.md` | Documents callback capture | Documents signed JSON bundle export | Documents offline verify | Documents reviewer-oriented output artifacts | installation notes, artifact locations, boundary notes | +| `integrations/langchain/export_evidence.py` | Yes, via `EvidenceCallbackHandler(bundle_builder=...)` | Yes, via `EvidenceBundleBuilder.write_bundle()` | Yes, via `verify_bundle` | No normalized `summary` output | AEP bundle directory contents | +| `integrations/langchain/README.md` | Documents AEP callback path | Documents AEP bundle write | Documents `verify-bundle` | No | points to fixture gate | +| `agent_evidence/cli/main.py` | No LangChain-specific capture | Generic export of stored records | Generic `verify-export` and `verify-bundle` | JSON verification result only | keyring, signer config, archive packaging | +| `tests/test_langchain_integration.py` | Yes | Yes | Yes | Partial | validates callback/event surfaces and example behavior | +| `tests/test_quickstart_smoke.py` | Yes, by executing the example | Yes, by asserting `bundle` exists | Yes, via `verify-export` | Yes, by asserting `summary` exists | subprocess and temp output directory | + +## 3. Which Surfaces Are Active, Helper, or Confusing + +### Active product surface + +- `README.md` + - current product entry and priority statement +- `docs/quickstart.md` + - current first-run path +- `docs/cookbooks/langchain_minimal_evidence.md` + - current detailed LangChain-first recipe +- `examples/langchain_minimal_evidence.py` + - current runnable example for `bundle / receipt / summary` +- `agent_evidence/integrations/langchain.py` + - current capture primitive +- `tests/test_quickstart_smoke.py` + - current smoke gate for the first-run path + +### Helper / example surface + +- `tests/test_langchain_integration.py` + - low-level coverage and API characterization +- `agent_evidence/integrations/__init__.py` + - public re-export layer +- `agent_evidence/__init__.py` + - generic primitives that the example currently composes manually +- `pyproject.toml` + - install-time support surface + +### Legacy / duplicated / confusing surface + +- `integrations/langchain/export_evidence.py` + - alternate LangChain path that writes an AEP bundle directory +- `integrations/langchain/README.md` + - documents the alternate AEP bundle path, not the quickstart path +- `agent_evidence.aep.EvidenceBundleBuilder` + - still active in tests and the integration helper, but not aligned with the current `bundle / receipt / summary` quickstart language +- `verify-bundle` + - valid command, but it verifies the alternate AEP bundle line rather than the current quickstart JSON `bundle` +- `README.md` and docs using `LangChain / LangGraph` together + - currently overstates parity because only LangChain has real implementation surfaces +- `examples/README.md` + - current primary example index does not mention `examples/langchain_minimal_evidence.py`, which makes the actual first-run path harder to discover + +## 4. Current De Facto Integration Path + +### What a developer must actually do today + +The current first-run path is: + +1. install the LangChain + signing extras +2. import generic primitives from `agent_evidence` +3. import `EvidenceCallbackHandler` from `agent_evidence.integrations` +4. attach the handler to LangChain callbacks +5. run the LangChain workflow +6. export a JSON `bundle` +7. verify that `bundle` +8. write a `summary` + +In code, that means the current de facto path is composed from multiple surfaces: + +```python +from agent_evidence import ( + EvidenceRecorder, + LocalEvidenceStore, + export_json_bundle, + verify_json_bundle, +) +from agent_evidence.integrations import EvidenceCallbackHandler +``` + +Then the example itself is still responsible for: + +- output directory creation +- signing key generation +- manifest sidecar path selection +- receipt generation or re-verification command construction +- summary file construction + +### Where that path is fragmented + +- The active capture primitive is LangChain-specific, but export and verify primitives are generic top-level functions. +- There is no single adapter object that owns the whole LangChain path. +- The repo exposes two parallel LangChain output lines: + - quickstart line: JSON `bundle` + `verify-export` + - AEP line: bundle directory + `verify-bundle` +- There is no dedicated LangChain CLI entry such as `agent-evidence langchain ...`. +- Discovery is split across `README.md`, `docs/quickstart.md`, `docs/cookbooks/langchain_minimal_evidence.md`, and `integrations/langchain/README.md`. +- `examples/README.md` does not surface the current LangChain example at all. + +## 5. Missing or Weak Integration Points + +### API gaps + +- No single LangChain adapter entry point wraps capture, export, verify, and summary generation. +- No reusable summary helper exists for the current LangChain-first path; `examples/langchain_minimal_evidence.py` assembles the summary inline. +- No reusable quickstart-oriented signer/output configuration helper exists; the example owns key generation and output naming. +- Stream-event normalization and callback capture are exposed as separate helper surfaces without one recommended primary API. + +### Naming gaps + +- `bundle` means two different implementation shapes today: + - JSON export artifact in the quickstart path + - AEP bundle directory in the older path +- `verify-export` and `verify-bundle` both look like primary verification commands, but they serve different artifact lines. +- `LangChain / LangGraph` is used as one priority label even though only LangChain has implemented surfaces. + +### Runtime / config gaps + +- No canonical config object exists for output directory, signing, redaction, or summary behavior in the current LangChain path. +- The example assumes local PEM key generation inside the example itself. +- The example uses direct callback method calls for the mocked model step rather than one reusable adapter lifecycle. + +### Test gaps + +- Quickstart smoke coverage now exists, which is good. +- There is still no test for one unified LangChain adapter API because that API does not exist yet. +- There is no LangGraph-specific test surface. +- There is no regression test asserting that the documented current example index points to the LangChain quickstart path. + +### Documentation gaps + +- `examples/README.md` omits the current LangChain runnable example. +- `integrations/langchain/README.md` documents a valid but alternate path without clearly marking it as secondary to the quickstart path. +- The repo does not yet have one explicit inventory document separating: + - active LangChain path + - alternate AEP LangChain path + - LangGraph not-yet-implemented scope + +## 6. Proposed Minimal Unified Adapter API + +This section is a recommendation for B2. It is not a description of current code. + +### Recommendation + +Use one stateful adapter object in `agent_evidence.integrations.langchain` as the single recommended entry point: + +- `LangChainAdapter` +- `LangChainArtifacts` + +### Recommended entry points only + +```python +from agent_evidence.integrations.langchain import LangChainAdapter + +adapter = LangChainAdapter.for_output_dir( + output_dir="./artifacts/langchain-run", + digest_only=True, + omit_request=False, + omit_response=False, +) + +callbacks = [adapter.callback_handler()] + +# user runs LangChain / LangGraph workflow with these callbacks + +artifacts = adapter.finalize() +``` + +### Suggested API shape + +`LangChainAdapter.for_output_dir(...)` + +Inputs: + +- `output_dir` +- optional signing material or signer config +- optional redaction flags: + - `digest_only` + - `omit_request` + - `omit_response` +- optional callback behavior: + - `capture_stream_tokens` +- optional default metadata/tags + +Behavior: + +- internally creates the current recommended capture path +- owns the local store, export step, verify step, and summary write +- returns one normalized artifact result + +`adapter.callback_handler()` + +Output: + +- one callback handler to place into LangChain or LangGraph callback configuration + +`adapter.finalize()` + +Output: + +- `LangChainArtifacts` + +Suggested `LangChainArtifacts` fields: + +- `bundle_path` +- `receipt` +- `receipt_path` +- `summary` +- `summary_path` +- `supporting_files` + +### How outputs stay normalized + +The adapter should normalize outputs to: + +- `bundle` + - exported JSON artifact path +- `receipt` + - machine-readable verification result +- `summary` + - reviewer-facing output + +Supporting files such as manifest sidecars, keys, and runtime JSONL should remain supporting files, not first-class product outputs. + +### Why this is the right minimum + +- It matches the current quickstart path instead of inventing a new artifact line. +- It keeps `EvidenceCallbackHandler` as the underlying capture primitive rather than replacing it. +- It gives LangGraph a place to land later without promising a separate API now; if LangGraph continues to use LangChain callback semantics, it can use the same adapter entry point. +- It avoids exposing developers to the current choice between generic record/export primitives and the older `EvidenceBundleBuilder` line. + +## 7. Lowest-Risk Implementation Sequence for B2 + +### Implement first + +1. Add the one recommended adapter wrapper in `agent_evidence.integrations.langchain`. +2. Build it on top of the current quickstart path only: + - callback capture + - local JSONL store + - `export_json_bundle` + - `verify_json_bundle` + - summary write +3. Update `examples/langchain_minimal_evidence.py` to use that wrapper internally. +4. Add focused tests for the new adapter object while keeping the current quickstart smoke gate. + +### Explicitly wait + +- any LangGraph persistence / checkpointer integration +- any LangGraph-specific adapter module +- any OpenAI-compatible unification work +- any new CLI surface for LangChain +- any attempt to merge the JSON quickstart path and the AEP bundle directory path in one patch +- any cross-repo demo synchronization + +### What should not be changed + +- canonical schema +- normalized output language: `bundle / receipt / summary` +- current quickstart smoke semantics +- current generic export/verify functions as underlying building blocks +- current `EvidenceCallbackHandler` event mapping unless a bug is found + +## 8. Non-Goals + +- no schema changes +- no exporter expansion +- no cross-repo changes +- no OpenAI-compatible work yet +- no demo repo changes yet +- no directory moves +- no attempt to productize LangGraph before the repo has real LangGraph surfaces diff --git a/docs/reports/openai-compatible-inventory.md b/docs/reports/openai-compatible-inventory.md new file mode 100644 index 0000000..3112808 --- /dev/null +++ b/docs/reports/openai-compatible-inventory.md @@ -0,0 +1,380 @@ +# OpenAI-Compatible Inventory + +Scope: `agent-evidence` only. + +Method: local inventory of current OpenAI / provider-related code, docs, examples, CLI references, and tests. This document distinguishes between current working helper surfaces and a proposed future OpenAI-compatible adapter API. It does not change code, schema, tests, or directory structure. + +## 1. Current OpenAI / Provider-Related Touchpoints + +### Product and packaging surfaces + +| Path | Surface type | Current role | Notes | +| --- | --- | --- | --- | +| `README.md` | product entry | names OpenAI-compatible runtimes as the second integration priority | Priority statement only. It does not point to a current runnable OpenAI-compatible path. | +| `pyproject.toml` | packaging | declares `openai-agents` extra | There is no `openai` SDK extra and no `openai-compatible` extra today. | +| `agent_evidence/integrations/__init__.py` | public integration exports | re-exports `openai_agents` helper surfaces | Makes the SDK-specific tracing processor visible from `agent_evidence.integrations`. | + +### Current OpenAI-related implementation surfaces + +| Path | Surface type | Current role | Notes | +| --- | --- | --- | --- | +| `agent_evidence/integrations/openai_agents.py` | integration module | OpenAI Agents SDK tracing processor | Converts OpenAI Agents traces/spans into evidence records through `EvidenceRecorder`. This is SDK-specific, not provider-agnostic. | +| `examples/openai_agents/basic_export.py` | runnable helper example | minimal local export path using the OpenAI Agents SDK | Exports a JSON `bundle` and prints verification output, but does not write a normalized `receipt` file or `summary`. | +| `tests/test_openai_agents_integration.py` | test | coverage for the OpenAI Agents tracing processor | Covers trace/span recording, exported summaries, and SDK registration. | + +### Current helper / prototype / legacy surfaces + +| Path | Surface type | Current role | Notes | +| --- | --- | --- | --- | +| `integrations/openai-agents/README.md` | helper doc | older prototype doc for OpenAI Agents export | Explicitly describes a legacy `Execution Evidence Object` wording surface. | +| `integrations/openai-agents/export_evidence.py` | prototype script | converts a mocked OpenAI Agents runtime trace into an older object-model export | Does not use the current `bundle / receipt / summary` product language. | +| `examples/openai-agent-run.json` | sample data | mocked OpenAI Agents runtime trace | Historical/example payload only. | +| `examples/evidence-object-openai-run.json` | sample data | mocked `execution-evidence-object` export | Historical/example payload only. | +| `demo/run_operation_accountability_demo.py` | demo | includes `"runtime": "openai-agents"` in one statement | This is a scenario label inside the profile demo, not an actual provider integration path. | + +### CLI surfaces + +Current CLI surfaces are generic only: + +- `agent-evidence verify-export` +- `agent-evidence verify-bundle` +- `agent-evidence validate-profile` + +There is currently: + +- no `agent-evidence openai ...` subcommand +- no `agent-evidence provider ...` subcommand +- no provider-specific export or capture CLI entry point + +## 2. What Already Exists vs What Is Absent + +### What already exists + +- one SDK-specific tracing helper for the OpenAI Agents SDK: + - `AgentEvidenceTracingProcessor` + - `install_openai_agents_processor(...)` +- generic evidence building blocks already used by other paths: + - `EvidenceRecorder` + - `LocalEvidenceStore` + - `export_json_bundle(...)` + - `verify_json_bundle(...)` +- JSON-safe serialization helpers: + - `to_jsonable(...)` + - `ensure_json_object(...)` +- redaction behavior for sensitive fields in `agent_evidence/serialization.py` + - currently includes `api_key`, `authorization`, `token`, and `prompt` +- one OpenAI Agents example and one OpenAI Agents integration test module + +### What is absent + +- no provider-agnostic module such as `agent_evidence/integrations/openai_compatible/` +- no `openai` SDK dependency in `pyproject.toml` +- no provider/client abstraction for: + - `api_key` + - `base_url` + - `model` + - `provider_label` +- no abstraction for raw OpenAI-compatible request/response capture +- no normalized summary writer for the current OpenAI-related example path +- no explicit `receipt.json` writer in the current OpenAI-related example path +- no provider-agnostic compatibility tests +- no active quickstart doc for an OpenAI-compatible first-run path + +### Current response-handling assumptions + +Current OpenAI-related code assumes one thing only: + +- if the OpenAI Agents SDK emits trace/span objects with `.export()` payloads, those payloads can be converted into evidence events + +Current code does not assume or provide: + +- a Responses API wrapper +- a Chat Completions wrapper +- streaming delta aggregation for OpenAI-compatible clients +- tool-call normalization across providers +- a stable provider-neutral response shape + +### Current signing / export / verify reuse points + +The Phase C wrapper should reuse the same stable building blocks already used by the LangChain path: + +- `EvidenceRecorder` +- `LocalEvidenceStore` +- `export_json_bundle(...)` +- `verify_json_bundle(...)` + +Those reuse points already produce the repo’s current product outputs: + +- `bundle` +- `receipt` +- `summary` + +## 3. Proposed Minimal Adapter API + +This section is a recommendation for Phase C. It is not a description of current code. + +### Recommendation + +Use one provider-agnostic adapter module: + +- `agent_evidence/integrations/openai_compatible/` + +Use one recommended public entry point: + +- `OpenAICompatibleAdapter` + +Use one normalized artifact result object: + +- `OpenAICompatibleArtifacts` + +### Recommended public API + +```python +from agent_evidence.integrations.openai_compatible import OpenAICompatibleAdapter + +adapter = OpenAICompatibleAdapter.for_output_dir( + output_dir="./artifacts/openai-compatible-run", + provider_label="openai", + model="gpt-4.1-mini", + api_key=os.environ["OPENAI_API_KEY"], + base_url="https://api.openai.com/v1", + digest_only=True, + omit_request=False, + omit_response=False, +) + +response = adapter.record_call( + operation="responses.create", + request={"input": "hello world"}, + invoke=lambda: client.responses.create(model="gpt-4.1-mini", input="hello world"), +) + +artifacts = adapter.finalize() +``` + +### Why this shape is the right minimum + +- It gives Phase C one recommended entry point instead of many provider-specific helpers. +- It keeps provider client invocation outside core business logic. +- It can reuse the current local-first artifact path: + - capture one provider call + - write a JSON `bundle` + - verify the exported `bundle` + - write a `receipt` + - write a `summary` +- It avoids treating the OpenAI Agents SDK tracing processor as the universal Phase C entry point. + +### Suggested API behavior + +`OpenAICompatibleAdapter.for_output_dir(...)` + +Inputs: + +- `output_dir` +- `provider_label` +- `model` +- `api_key` +- `base_url` +- optional redaction flags: + - `digest_only` + - `omit_request` + - `omit_response` +- optional request defaults: + - `temperature` + - `top_p` + - `max_output_tokens` + - `tool_choice` + - `timeout` + +Behavior: + +- stores provider/runtime metadata needed for the evidence context +- records one or more provider calls through a provider-neutral capture method +- keeps the actual provider client object outside core +- owns export, verify, and summary write + +`adapter.record_call(...)` + +Inputs: + +- `operation` +- `request` +- `invoke` +- optional metadata/tags + +Behavior: + +- records a start/end call boundary using generic evidence events +- normalizes request and response payloads through existing serialization helpers +- redacts or digests sensitive payload fields using the adapter settings +- returns the provider response to the caller + +`adapter.finalize()` + +Output: + +- `OpenAICompatibleArtifacts` + +Suggested `OpenAICompatibleArtifacts` fields: + +- `bundle_path` +- `receipt` +- `receipt_path` +- `summary` +- `summary_path` +- `supporting_files` + +### How outputs remain normalized + +The OpenAI-compatible adapter should keep the same product language already used elsewhere in this repo: + +- `bundle` + - exported evidence artifact +- `receipt` + - machine-readable verification result +- `summary` + - reviewer-facing output + +Supporting files such as manifest sidecars, keys, and runtime JSONL should remain supporting files, not additional primary outputs. + +## 4. Configuration Model + +### Required configuration + +- `api_key` + - accepted as adapter input + - must never be persisted raw into evidence artifacts + - if it appears in captured structures, current redaction should replace it with `[REDACTED]` +- `base_url` + - optional but first-class + - required for non-default OpenAI-compatible providers +- `model` + - required + - should be preserved in evidence metadata/context +- `provider_label` + - required + - should be a stable provider-neutral label such as: + - `openai` + - `azure-openai` + - `vllm` + - `lm-studio` + - `openrouter` + +### Optional request settings + +The adapter should allow provider-neutral pass-through settings such as: + +- `temperature` +- `top_p` +- `max_output_tokens` +- `tool_choice` +- `parallel_tool_calls` +- `timeout` + +These should remain request metadata, not canonical schema fields. + +### What must remain provider-agnostic + +The core adapter should not bake in: + +- Azure deployment naming rules +- provider-specific auth headers beyond the generic `api_key` input +- Responses API as the only supported operation surface +- Chat Completions as the only supported operation surface +- provider-specific retry or rate-limit policy inside core evidence logic + +The core contract should care about: + +- one provider call happened +- what request/response boundary was captured +- how that call becomes `bundle`, `receipt`, and `summary` + +## 5. Risks / Gaps + +### Provider-specific assumptions currently baked into code or docs + +- the only real implementation surface is named `openai_agents`, which is SDK-specific rather than provider-agnostic +- `README.md` says “OpenAI-compatible runtimes”, but the repo currently exposes only an OpenAI Agents SDK helper +- `integrations/openai-agents/export_evidence.py` and `examples/evidence-object-openai-run.json` still use the older `execution-evidence-object` wording +- `demo/run_operation_accountability_demo.py` uses `"runtime": "openai-agents"` as a scenario label, which could be misread as an implemented Phase C runtime path + +### Test gaps + +- no test for a provider-agnostic adapter object +- no test for provider config fields such as `base_url`, `provider_label`, and `model` +- no test for request/response capture across multiple OpenAI-compatible shapes +- no compatibility test for fake OpenAI-compatible clients + +### Artifact-structure risks + +- the current OpenAI Agents example exports a `bundle` and prints verify output, but does not write a normalized `receipt.json` +- the current OpenAI Agents example does not write a `summary` +- older OpenAI-related prototype files still point toward object-model exports rather than the current `bundle / receipt / summary` contract + +### Naming risks + +- the repo currently mixes: + - `openai-agents` + - `openai_agents` + - “OpenAI-compatible runtimes” +- that naming mix can blur the difference between: + - one SDK-specific tracing helper + - one future provider-agnostic adapter line + +### Security / serialization risks + +- current serialization redacts `prompt` globally + - that is safe by default, but it means provider request capture may lose reviewer-facing detail unless the adapter defines a clear digest-only / omit / inline policy +- current code has no explicit boundary yet for which provider config fields are safe to include in `summary` + +## 6. Lowest-Risk Implementation Sequence + +### C1 wrapper + +Implement one provider-agnostic wrapper only: + +- module path: + - `agent_evidence/integrations/openai_compatible/` +- public entry: + - `OpenAICompatibleAdapter` +- build it on existing stable primitives only: + - `EvidenceRecorder` + - `LocalEvidenceStore` + - `export_json_bundle(...)` + - `verify_json_bundle(...)` + +### C2 provider config examples + +Add one or two small example surfaces after the wrapper exists: + +- one default OpenAI-compatible example +- one alternate `base_url` example + +These examples should demonstrate config only. They should not introduce provider-specific branching into core. + +### C3 compatibility tests + +Add tests for: + +- adapter artifact generation +- request/response redaction behavior +- config propagation for `provider_label`, `model`, and `base_url` +- fake-client compatibility without live network calls + +### What should explicitly wait + +- any OpenAI-compatible CLI subcommand +- any live-provider integration test +- any cross-repo demo synchronization +- any attempt to merge `openai-agents` tracing and raw client wrapping into one patch +- any schema change + +## 7. Non-Goals + +- no schema changes +- no exporter expansion +- no LangGraph work +- no cross-repo demo changes +- no provider-specific business logic in core +- no attempt to recast `openai-agents` tracing as the final OpenAI-compatible API +- no attempt to absorb the older `execution-evidence-object` prototype surfaces into the Phase C wrapper patch diff --git a/docs/reports/repo-map-audit.md b/docs/reports/repo-map-audit.md new file mode 100644 index 0000000..95006ef --- /dev/null +++ b/docs/reports/repo-map-audit.md @@ -0,0 +1,174 @@ +# Repo Map Audit + +Scope: `agent-evidence` only. + +Method: filesystem scan, `README.md`, `pyproject.toml`, `.github/workflows/`, `docs/lineage.md`, and current directory contents. This audit classifies the repository into canonical core, active product surface, and history/frozen material. It does not propose cross-repo changes. + +Current working assumption: the current primary implementation line is the `Execution Evidence and Operation Accountability Profile v0.1` path, while older `Execution Evidence Object` and legacy AEP surfaces remain in-repo for lineage and reproducibility. + +## 1. Current top-level directory map + +Operational directories such as `.git/`, `.venv/`, `.pytest_cache/`, and `.ruff_cache/` are excluded from the product map below. + +| Path | Current role | Dominant class | Audit note | +| --- | --- | --- | --- | +| `.github/` | CI and repo automation | Active support surface | Mixed current CI and legacy prototype checks coexist here. | +| `agent_evidence/` | Installable Python package, CLI, validator, storage, library integrations | Canonical core | This is the main code surface. | +| `demo/` | Runnable single-path demo and generated artifacts | Active product surface | Fits the current minimal closed-loop story. | +| `docs/` | Product docs, status docs, lineage docs, EDC notes, outreach notes | Active product surface, mixed | Keep active, but treat some subtrees as frozen/reference only. | +| `examples/` | Valid/invalid evidence statements and runnable examples | Active product surface | Part of the current verification entry path. | +| `integrations/` | Framework-specific exporter demos | Active product surface | Useful developer surface, but should stay narrow. | +| `paper/` | Manuscript workspaces and flagship planning | Historical / paper / lineage | Important provenance, not primary product entry. | +| `plans/` | Implementation planning | Active support surface | Current execution-control surface for Phase A work. | +| `poster/` | Poster text and visual assets | Historical / outreach | Not part of the active product surface. | +| `proposal/` | Early proposal material | Historical / lineage | Background only. | +| `release/` | Release notes, outward positioning, frozen `v0.1-live-chain` package | Historical / frozen | Includes retained frozen package material. | +| `research/` | Research support notes and release materials | Historical / support | Supporting material, not a first-time developer path. | +| `roadmap/` | Roadmap and standardization notes | Historical / planning | Useful background, but not active product surface. | +| `schema/` | Canonical JSON schemas | Canonical core | Normative contract layer. | +| `scripts/` | Gates, verification helpers, historical prototype scripts | Mixed support surface | Highest confusion directory because current and legacy scripts share one path. | +| `speaking/` | Talk scripts and demo speaking notes | Historical / outreach | Not part of the product entry. | +| `spec/` | Canonical profile/spec documents | Canonical core | Holds both current and historical normative text. | +| `submission/` | Submission and handoff package materials | Historical / paper / release | Important for provenance, not for install/run entry. | +| `tests/` | Automated verification and regression tests | Canonical core | Protects current package behavior. | + +## 2. Canonical core directories + +The canonical core is the smallest set of directories that define what `agent-evidence` is as a package and what its current contract means. + +- `agent_evidence/` + - Main installable implementation surface. + - Includes the CLI, validator, storage, bundle verification path, and library-side integrations. +- `spec/` + - Normative profile/spec layer. + - Current primary spec is `execution-evidence-operation-accountability-profile-v0.1.md`. +- `schema/` + - Normative schema layer. + - Current primary schema is `execution-evidence-operation-accountability-profile-v0.1.schema.json`. +- `tests/` + - Regression guardrail for the canonical package. + +Important note: these directories also contain retained historical files, especially `Execution Evidence Object` assets in `spec/` and `schema/`. They remain canonical for lineage, but they are not the current primary surface. + +## 3. Active product surface directories + +These directories are the ones a first-time developer or platform engineer should encounter after the canonical core. + +- `demo/` + - Current single-path demo and artifact output surface. +- `examples/` + - Current valid/invalid examples and profile-facing specimen surface. +- `integrations/` + - Runnable framework exporter demos. + - Current priority should remain narrow: LangChain / LangGraph first, OpenAI-compatible second. +- `docs/` + - Active only in a selective sense. + - Current primary docs appear to be `docs/STATUS.md`, `docs/ACCEPTANCE-CHECKLIST.md`, `docs/high-risk-scenario-entry.md`, and `docs/cookbooks/`. +- `plans/` + - Active coordination surface during the current tightening phase. +- `.github/` + - Active support surface because CI and repo gates shape what is treated as live. + +Important note: `docs/` is not a uniformly active product directory. It currently mixes active product docs with lineage and frozen-reference subtrees. + +## 4. Historical / paper / lineage / frozen asset directories + +These directories matter for provenance, publications, prior framing, and outward communication, but they should not read as the active implementation line. + +- `paper/` + - Manuscript workspaces, flagship planning, and submission assembly. +- `poster/` + - Poster and figure production assets. +- `proposal/` + - Early proposal framing. +- `release/` + - Historical release-facing material, including frozen `v0.1-live-chain`. +- `research/` + - Research support material. +- `roadmap/` + - Standardization and future planning notes. +- `speaking/` + - Talk scripts and presentation aids. +- `submission/` + - Submission and handoff pack material. + +Frozen or lineage-heavy subtrees inside otherwise active directories: + +- `docs/architecture/` + - Historical `Execution Evidence Object` framing. +- `docs/edc/` + - Frozen asset/reference surface, not an active code line. +- `docs/fdo-mapping/` + - Supporting lineage/reference material. +- `docs/outreach/` + - Outward positioning rather than product entry. +- `release/v0.1-live-chain/` + - Frozen legacy AEP package surface. + +Mixed historical files inside otherwise canonical or active paths: + +- `spec/execution-evidence-object.md` +- `schema/execution-evidence-object.schema.json` +- `examples/evidence-object-openai-run.json` +- `scripts/verify_evidence_object.py` +- `scripts/demo_execution_evidence_object.py` + +## 5. Keep / weaken / move / archive recommendations + +### Keep + +- Keep `agent_evidence/`, `spec/`, `schema/`, and `tests/` as the canonical core. +- Keep `examples/`, `demo/`, `integrations/`, and selected `docs/` pages as the active developer surface. +- Keep `plans/` during Phase A because the repo is still tightening its primary entry path. +- Keep historical and paper material in-repo for provenance; this repo clearly serves both product and manuscript evidence functions. + +### Weaken + +- Weaken `paper/`, `poster/`, `proposal/`, `release/`, `research/`, `roadmap/`, `speaking/`, and `submission/` from any primary install/run navigation. +- Weaken `docs/edc/` to explicit frozen-reference status. +- Weaken `docs/architecture/`, `docs/fdo-mapping/`, and `docs/outreach/` from the primary product path. +- Weaken legacy prototype commands and wording that still make `Execution Evidence Object` or older AEP surfaces look co-equal with the current v0.1 path. +- Weaken legacy-only repo gates if they remain visible as if they define the active product contract. + +### Move + +- Do not do broad directory moves in Phase A. +- The first future move candidate is inside `scripts/`: separate current support scripts from legacy prototype scripts behind a clear legacy boundary. +- The second future move candidate is not a filesystem move first; it is a navigation move: route all historical prototype references through `docs/lineage.md` instead of the top-level entry path. + +### Archive + +- Treat `proposal/`, `poster/`, `speaking/`, and completed submission/release packs as archived-from-entry-surface material. +- Archive here means “kept for provenance, but not presented as active product surface,” not “delete” and not “move out of this repository now.” + +## 6. Minimal move plan with lowest-risk sequencing + +1. Freeze the classification first. + - Use this audit as the repo boundary note. + - Do not move directories yet. + +2. Tighten navigation before paths. + - Rewrite the top-level README to point only to the active product path. + - Keep historical and frozen links, but put them behind explicit labels such as lineage, frozen asset, or background. + +3. Keep mixed directories stable during Phase A. + - `docs/` and `scripts/` are mixed, but moving them now creates avoidable churn. + - Use labeling first, not path reshaping first. + +4. After README and quickstart are stable, do only surgical cleanup if still needed. + - Separate legacy prototype scripts from current support scripts. + - Optionally isolate frozen package/release material behind one clearer archive convention. + +5. Defer any large top-level consolidation until after the Phase A gate. + - The current problem is entry confusion, not lack of storage hierarchy. + - Large moves would break links and distract from the current developer-product tightening work. + +## 7. Risks if no changes are made + +- First-time developers will continue to see multiple competing narratives at once: package, prototype, paper, roadmap, release, and outreach. +- Historical `Execution Evidence Object` and legacy AEP surfaces will keep reading as if they are equal to the current `v0.1` package line. +- `docs/edc/` and related frozen material can be misread as active implementation commitments. +- Mixed support surfaces such as `scripts/` and `.github/workflows/` will keep blurring which checks define the current canonical contract. +- Future README and quickstart work will have to fight the repo shape every time, instead of benefiting from a stable active/frozen boundary. + +Bottom line: the repository does not need a large restructuring first. It needs a clear active/frozen boundary, narrowed primary navigation, and only then selective cleanup of mixed directories. diff --git a/docs/reports/review-pack-scope.md b/docs/reports/review-pack-scope.md new file mode 100644 index 0000000..cefe7c1 --- /dev/null +++ b/docs/reports/review-pack-scope.md @@ -0,0 +1,303 @@ +# Review Pack Scope + +Scope: `agent-evidence` only. + +Grounding surfaces used for this draft: + +- `docs/artifacts/artifact-contract-draft.md` +- `docs/quickstart.md` +- `README.md` +- `examples/langchain_minimal_evidence.py` +- `agent_evidence/integrations/langchain.py` +- `agent_evidence/integrations/openai_compatible/` +- `tests/test_quickstart_smoke.py` +- `tests/test_openai_compatible_adapter.py` + +This draft defines the first commercial Review Pack as a packaging and rendering +layer above the current `bundle` / `receipt` / `summary` contract. It does not +define a new schema or a new canonical artifact type. + +## 1. What Review Pack Is + +### Product goal + +Review Pack is the first commercial layer that turns current machine-facing +artifacts into a reviewer-facing handoff package. + +Its job is to: + +- package the current primary outputs together +- render verification facts into a readable review surface +- preserve a clear line back to the underlying evidence artifacts + +### Target reader + +The first target readers are: + +- engineering managers +- AI platform engineers +- internal reviewers or audit-adjacent operators + +The target reader is someone who needs to decide whether a run is reviewable, +verifiable, and handoff-ready without manually navigating raw artifact files. + +### Why it is not just raw artifacts + +The current raw artifacts are already correct, but they still require the +reader to do manual correlation: + +- open the `summary` +- inspect the `receipt` +- find the right `bundle` +- translate machine-readable failures into a reviewer-facing explanation + +Review Pack is the layer that assembles those materials into one bounded +review surface without changing what counts as canonical evidence. + +## 2. Review Pack Primary Contents + +### Included primary contents + +The first Review Pack should include the existing primary outputs only: + +- `bundle` +- `receipt` +- `summary` + +In addition, it should include one rendered review report derived from those +three sources. + +That rendered review report is product-facing, but it is not a new canonical +artifact type. It is a packaging/rendering file inside the Review Pack. + +### Optional or supporting contents + +Supporting files should remain optional/supporting: + +- manifest sidecar +- verification public key +- runtime JSONL capture + +Supporting files may help with: + +- offline re-verification +- deeper diagnostics +- provenance of how the pack was assembled + +They should not become additional primary outputs. + +### Explicit exclusions + +The local signing private key should not be included in a commercial Review Pack +by default, even if it exists in local demo/example surfaces today. + +## 3. Source-of-Truth Mapping + +### What comes from `bundle` + +`bundle` remains the source of truth for: + +- exported evidence records +- event ordering and chain continuity +- manifest digests and counts +- signature material attached to the manifest +- runtime/context fields captured by the adapter path + +### What comes from `receipt` + +`receipt` remains the source of truth for: + +- pass/fail verification outcome +- machine-readable issue lists +- signature verification results +- chain verification result +- record counts and latest chain hash +- verification scope such as export/profile format context + +### What comes from `summary` + +`summary` remains the source of truth for reviewer orientation only: + +- which run/package is being discussed +- where the `bundle` and `receipt` live +- high-level counts such as `record_count`, `signature_count`, or `call_count` +- adapter-facing run context such as `provider_label`, `model`, or `base_url` +- re-run hints such as `verify_command` + +### What must never become a new canonical evidence field + +The following must remain outside canonical evidence: + +- Review Pack folder layout fields +- rendered headings and section labels +- severity labels added for reviewer readability +- report prose or recommended next steps +- checklists, badges, or status decorations +- local file path conventions +- source filename normalization used only by the package + +## 4. Failure Taxonomy Surface + +### Failure classes to expose to reviewers + +The first Review Pack should expose reviewer-facing failure classes such as: + +- integrity failure +- chain continuity failure +- signature failure +- signature policy failure +- profile validation failure +- packaging incompleteness warning + +### Which are verification facts + +Verification facts must come from `receipt` only: + +- `ok` +- `issues[]` +- signature verification flags and counts +- chain verification outputs +- profile-validation issue payloads when that receipt type is in scope + +### Which are presentation labels + +Presentation labels may be introduced by Review Pack, but only as render-time +labels above receipt facts: + +- `Integrity failed` +- `Signature missing` +- `Receipt passed with warnings` +- `Review blocked` +- `Optional support material missing` + +These labels must be reproducible from existing receipt facts and must not be +written back into canonical schema or evidence records. + +## 5. Verification Report Shape + +The first rendered review report should include at least these sections: + +### 1. Verdict + +- overall pass/fail result +- one short reviewer-facing explanation + +### 2. What Was Reviewed + +- artifact identifiers or normalized artifact names +- adapter/runtime context from `summary` +- high-level counts such as records and signatures + +### 3. Verification Findings + +- failure classes or warnings +- linked machine-readable issues from `receipt` +- signature and chain status + +### 4. Evidence References + +- pointer to the included `bundle` +- pointer to the included `receipt` +- pointer to the included `summary` +- optional pointer to supporting materials when present + +### 5. Reviewer-Facing Explanation + +- a short explanation of why the package passed or failed +- next review step guidance such as: + - inspect the bundle + - rerun verification + - escalate signature failure + +This explanatory text is presentation-only. It must not replace receipt facts. + +## 6. Packaging/Layout Proposal + +### Minimal folder/file layout + +The first Review Pack should normalize its root layout to reviewer-facing names: + +```text +review-pack/ + bundle.json + receipt.json + summary.json + review-report.md + supporting/ + manifest.json + manifest-public.pem + runtime-events.jsonl +``` + +### Naming rules + +Product-facing names inside the pack root should stay normalized: + +- `bundle.json` +- `receipt.json` +- `summary.json` +- `review-report.md` + +Implementation-specific source filenames such as +`langchain-evidence.bundle.json` should be treated as source-path details, not +as the product-facing naming layer of the pack. + +### Product-facing vs implementation-facing + +Product-facing: + +- normalized root artifact names +- rendered report layout +- reviewer headings and summaries + +Implementation-facing: + +- source output filenames from examples/adapters +- local output directories +- private adapter temp paths +- pack assembly internals + +## 7. Lowest-Risk Implementation Sequence + +### Implement first + +1. Define one adapter-agnostic pack assembler that consumes current + `bundle` / `receipt` / `summary` without requiring schema changes. +2. Generate one rendered review report from existing receipt + summary fields. +3. Package normalized root files plus optional supporting materials. + +### Implement second + +1. Add fixture-based tests that prove both current adapter lines can assemble + the same Review Pack shape: + - LangChain path + - OpenAI-compatible path +2. Verify that rendered failure classes are derived from receipt facts rather + than invented independently. + +### What should explicitly wait + +- any new canonical artifact type +- any CLI surface for Review Pack +- any live-provider hardening work +- any hosted delivery or sharing workflow +- any review annotations or collaboration workflow +- any cross-repo review-pack integration + +### What must not be changed + +- canonical schema +- current `bundle` / `receipt` / `summary` contract +- adapter-specific artifact generation logic +- existing quickstart semantics +- supporting-files-as-supporting rule + +## 8. Non-Goals + +- no schema changes +- no exporter expansion +- no CLI changes +- no cross-repo work +- no live-provider hardening work +- no new canonical artifact types +- no attempt to push Review Pack labels back into core evidence fields diff --git a/docs/reports/v0.1-release-gate.md b/docs/reports/v0.1-release-gate.md new file mode 100644 index 0000000..66c5eb4 --- /dev/null +++ b/docs/reports/v0.1-release-gate.md @@ -0,0 +1,282 @@ +# v0.1 Release Gate + +Scope: `agent-evidence` only. + +Grounding surfaces used for this gate: + +- `README.md` +- `docs/quickstart.md` +- `docs/artifacts/artifact-contract-draft.md` +- `docs/reports/review-pack-scope.md` +- `agent_evidence/integrations/langchain.py` +- `agent_evidence/integrations/openai_compatible/` +- `agent_evidence/review_pack/` +- `tests/test_quickstart_smoke.py` +- `tests/test_langchain_adapter.py` +- `tests/test_langchain_integration.py` +- `tests/test_openai_compatible_adapter.py` +- `tests/test_review_pack_assembler.py` +- `tests/test_review_pack_renderer.py` +- `tests/test_review_pack_example_smoke.py` + +This gate assesses whether the current repository surface is ready to be treated +as a bounded `v0.1` release candidate. It does not change code, schema, tests, +or directory structure. + +## 1. Release Candidate Scope + +### In scope for `v0.1` + +- install from source +- one bounded quickstart path based on + `examples/langchain_minimal_evidence.py` +- one recommended LangChain adapter entry point: + `LangChainAdapter` +- one recommended OpenAI-compatible adapter entry point: + `OpenAICompatibleAdapter` +- one bounded Review Pack path above the current artifact contract: + - `ReviewPackAssembler` + - deterministic `review/report.md` + - `examples/review_pack/build_review_pack.py` +- normalized primary outputs: + - `bundle` + - `receipt` + - `summary` + +### Explicitly out of scope for `v0.1` + +- LangGraph-specific implementation surface +- OpenAI-compatible CLI +- Review Pack CLI +- hosted delivery +- live-provider integration tests +- cross-repo demo integration +- exporter expansion +- schema changes +- new canonical artifact types +- private-key inclusion in the default Review Pack path + +## 2. Primary Product Surfaces + +### Install / quickstart + +Current install and first-run path are present and bounded: + +- source install from `README.md` +- one quickstart path in `docs/quickstart.md` +- one runnable example: + `examples/langchain_minimal_evidence.py` + +This quickstart already produces the normalized outputs used by the repo today: + +- `bundle` +- `receipt` +- `summary` + +### LangChain adapter + +Current recommended entry point: + +- `agent_evidence.integrations.langchain.LangChainAdapter` + +Current role: + +- capture local LangChain runtime events +- export a signed JSON `bundle` +- verify that `bundle` +- write a reviewer-facing `summary` + +### OpenAI-compatible adapter + +Current recommended entry point: + +- `agent_evidence.integrations.openai_compatible.OpenAICompatibleAdapter` + +Current role: + +- wrap provider calls without moving provider logic into core evidence code +- preserve config fields such as `provider_label`, `model`, and `base_url` +- export the same normalized artifact contract: + - `bundle` + - `receipt` + - `summary` + +### Review Pack path + +Current recommended path: + +- `agent_evidence.review_pack.ReviewPackAssembler` +- `agent_evidence.review_pack.ReviewPackRenderer` +- `examples/review_pack/build_review_pack.py` + +Current role: + +- package existing `bundle`, `receipt`, and `summary` +- generate deterministic `review/report.md` +- keep supporting files optional +- exclude private keys by default + +## 3. Artifact Contract Boundary + +The current release candidate continues to respect the existing artifact +contract boundary: + +- `bundle`, `receipt`, and `summary` remain the only primary outputs +- supporting files remain supporting only: + - manifest sidecar + - verification public key + - runtime JSONL capture + - local private key + +The current surface also keeps source-of-truth boundaries intact: + +- evidence payload remains in `bundle` +- machine-readable verification facts remain in `receipt` +- reviewer orientation remains in `summary` +- Review Pack layout fields, renderer labels, local paths, and reviewer notes do + not become canonical schema fields + +## 4. Test And Gate Status + +This gate was grounded in the current bounded release path and its focused test +surfaces. + +### Quickstart smoke + +- `tests/test_quickstart_smoke.py` +- status: pass +- covers: + - install assumptions already satisfied in the active environment + - LangChain minimal example run + - `bundle` generation + - standalone `receipt` generation + - `summary` generation + +### LangChain adapter + +- `tests/test_langchain_adapter.py` +- `tests/test_langchain_integration.py` +- status: pass +- covers: + - wrapper entry point behavior + - callback capture + - export / verify flow + - current example path + - stream-event normalization + +### OpenAI-compatible adapter + +- `tests/test_openai_compatible_adapter.py` +- status: pass +- covers: + - fake-provider switching + - config propagation + - artifact contract invariance + - no raw `api_key` persistence + - no live network dependency + +### Review Pack + +- `tests/test_review_pack_assembler.py` +- `tests/test_review_pack_renderer.py` +- `tests/test_review_pack_example_smoke.py` +- status: pass +- covers: + - stable pack layout + - primary vs supporting separation + - deterministic reviewer-facing render + - example-level pack generation + - private key excluded by default + +### Current release-gate test run + +The current gate was rechecked with: + +```bash +python -m pytest \ + tests/test_quickstart_smoke.py \ + tests/test_langchain_adapter.py \ + tests/test_langchain_integration.py \ + tests/test_openai_compatible_adapter.py \ + tests/test_review_pack_assembler.py \ + tests/test_review_pack_renderer.py \ + tests/test_review_pack_example_smoke.py +``` + +Result: + +- `15 passed` + +## 5. Known Limitations + +- no Review Pack CLI +- no OpenAI-compatible CLI +- no hosted delivery +- no live-provider tests +- no cross-repo integration +- no LangGraph-specific implementation surface +- no guarantee that current Python 3.14 warnings are fully resolved + +Current warning/deprecation notes seen in the release-gate test run: + +- `langchain_core` emits a Python 3.14 compatibility warning tied to Pydantic v1 +- `langchain_core` also emits deprecation warnings around + `asyncio.iscoroutinefunction` + +These warnings do not block the current bounded test suite, but they should be +tracked as post-`v0.1` hardening work rather than silently ignored forever. + +## 6. Ship Decision + +### Recommendation + +Release candidate now. + +### Why + +The bounded `v0.1` criteria are met: + +- one installable source path exists +- one honest quickstart path exists and is gated +- LangChain has one recommended adapter entry point +- OpenAI-compatible has one recommended adapter entry point +- the artifact contract stays normalized to `bundle` / `receipt` / `summary` +- Review Pack now has: + - scope + - assembler + - deterministic renderer + - example/cookbook wiring + - smoke gate +- current focused release-path tests pass together + +### Conditions on that recommendation + +Treat `v0.1` as a bounded release candidate, not as a claim of full platform +coverage. + +It should be shipped with explicit positioning: + +- local-first +- adapter-first +- review-pack-enabled +- no hosted control plane +- no live-provider validation guarantee + +If that bounded positioning is acceptable, there is no technical reason in the +current repo surface to hold the release candidate. + +## 7. Lowest-Risk Follow-Ups After `v0.1` + +At most three follow-ups should be prioritized next: + +1. Resolve or reduce the current Python 3.14 `langchain_core` warning surface. +2. Add one bounded doc/release note that makes the `v0.1` limits explicit to + users without changing the artifact contract. +3. Add one non-live provider hardening pass around edge-case response shapes for + `OpenAICompatibleAdapter`. + +Non-goal for these follow-ups: + +- no schema changes +- no new architecture work +- no hosted delivery work diff --git a/docs/review-pack-cn-report-notes.md b/docs/review-pack-cn-report-notes.md new file mode 100644 index 0000000..7dc0227 --- /dev/null +++ b/docs/review-pack-cn-report-notes.md @@ -0,0 +1,27 @@ +# 审阅报告中文优先说明 + +## 目标 + +Review Pack 中的 `report.md` 现在以中文作为正式交付文本的主语言,面向中国客户或内部审阅对象时,默认先给出中文结论、中文状态和中文交付说明。 + +## 语言边界 + +- 审阅报告正文默认中文优先。 +- 英文术语只在必要时作为括号辅助说明,不再作为主标题或主标签。 +- `bundle / receipt / summary` 继续保留为底层主输出名称,但在审阅报告中不作为第一层客户语言。 + +## 保持不变的部分 + +- `bundle`、`receipt`、`summary` 仍然是主输出。 +- Review Pack 仍然只是整理与渲染层。 +- `report.md` 仍然只是审阅输出,不是新的 schema。 +- 机器可读事实仍然来自既有输入: + - `receipt` 提供校验事实 + - `bundle` 提供证据引用 + - `summary` 提供审阅说明与上下文 + +## 当前交付原则 + +- 第一眼先看到中文结论,而不是英文标题。 +- 先看总体状态、问题摘要和交付物清单,再决定是否下钻技术细节。 +- 需要保留技术定位能力时,在中文标签后附上括号中的英文术语或字段名。 diff --git a/docs/review-pack-pdf-notes.md b/docs/review-pack-pdf-notes.md new file mode 100644 index 0000000..f3aef3b --- /dev/null +++ b/docs/review-pack-pdf-notes.md @@ -0,0 +1,22 @@ +# 审阅报告 PDF 交付说明 + +## 当前原则 + +- `review/report.pdf` 现在是 Review Pack 中的正式交付格式。 +- `review/report.md` 继续保留,作为中间文本格式和技术检查格式。 +- 两者同时存在,互不替代。 + +## 当前实现 + +- PDF 由现有中文优先的审阅报告正文直接生成。 +- 当前优先保证: + - 可稳定生成 + - 中文可正常显示 + - 文件路径稳定 + - 可直接进入 Review Pack + +## 当前限制 + +- 当前 PDF 以稳定交付为第一目标,不追求复杂排版或品牌化设计。 +- 本轮不涉及 Windows。 +- 本轮不涉及签名、公证或对外分发包装。 diff --git a/examples/README.md b/examples/README.md index 5dbecbf..f8a962b 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,4 +1,32 @@ -# Operation Accountability Statement Examples +# Examples + +## Current primary runnable example + +The current primary runnable example in this repository is: + +- [`langchain_minimal_evidence.py`](./langchain_minimal_evidence.py) + - Recommended LangChain entry point for the current quickstart path. + - Produces the normalized outputs used in this repo today: + `bundle`, `receipt`, and `summary`. + - Uses the recommended public API: + + ```python + from agent_evidence.integrations.langchain import LangChainAdapter + + adapter = LangChainAdapter.for_output_dir("./artifacts/langchain-run") + callbacks = [adapter.callback_handler()] + artifacts = adapter.finalize() + ``` + + - Supporting files such as the manifest sidecar, local keys, and runtime JSONL + remain supporting materials rather than additional primary outputs. + +Related docs: + +- [`../docs/cookbooks/langchain_minimal_evidence.md`](../docs/cookbooks/langchain_minimal_evidence.md) +- [`../integrations/langchain/README.md`](../integrations/langchain/README.md) + +## Operation Accountability Statement examples These examples are the current primary example surface for `execution-evidence-operation-accountability-profile@0.1`. @@ -18,6 +46,15 @@ Each file is intended to produce one validation report. - Passes the same profile checks in a second context: one dataset package subject, two input references, and one retention decision output. - Main value: second-context validity evidence for the same minimal profile. +- `valid-high-risk-payment-review-evidence.json` + - Passes the same profile checks in a high-risk, reviewer-facing context: + one flagged payment case subject, two input references, and one review decision output. + - Main value: a discoverable high-risk scenario entry without changing the v0.1 boundary. +- `valid-trust-binding-evidence.json` + - Passes the same local profile checks while also carrying one optional + `validation.trust_bindings[]` entry. + - Main value: shows how to point to an external trust source without making + that source mandatory for local conformance. - `invalid-missing-required.json` - Fails because `validation.method` is intentionally removed. - Main broken rule: required field completeness. @@ -29,6 +66,14 @@ Each file is intended to produce one validation report. - Fails because `evidence.policy_ref` points to `policy:stale-metadata-v1` instead of the declared `policy.id`. - Main broken rule: policy/evidence link consistency. +- `invalid-high-risk-unclosed-reference.json` + - Fails because the operation output reference points to + `ref:missing-review-decision`, which is not defined in `evidence.references`. + - Main broken rule: reference closure. +- `invalid-high-risk-policy-link-broken.json` + - Fails because `evidence.policy_ref` points to + `policy:stale-payment-review-v1` instead of the declared `policy.id`. + - Main broken rule: policy/evidence link consistency. - `invalid-provenance-output-mismatch.json` - Fails because `provenance.output_refs` points to `ref:input-note` instead of matching `operation.output_refs`. @@ -37,15 +82,30 @@ Each file is intended to produce one validation report. - Fails because `validation.provenance_ref` points to `prov:missing-metadata-enrich-001`, which is not defined locally. - Main broken rule: validation/provenance reference closure. +- `invalid-trust-binding-digest-mismatch.json` + - Fails because `validation.trust_bindings[0].target_digest` does not match + the local digest of the target statement. + - Main broken rule: trust-binding/local-target consistency. + +Command note: + +- The commands below assume `agent-evidence` is installed in the active environment. +- If you are using the repository virtualenv directly, run + `.venv/bin/agent-evidence ...` instead. ## Validate ```bash agent-evidence validate-profile examples/minimal-valid-evidence.json agent-evidence validate-profile examples/valid-retention-review-evidence.json +agent-evidence validate-profile examples/valid-high-risk-payment-review-evidence.json +agent-evidence validate-profile examples/valid-trust-binding-evidence.json agent-evidence validate-profile examples/invalid-missing-required.json agent-evidence validate-profile examples/invalid-unclosed-reference.json agent-evidence validate-profile examples/invalid-policy-link-broken.json +agent-evidence validate-profile examples/invalid-high-risk-unclosed-reference.json +agent-evidence validate-profile examples/invalid-high-risk-policy-link-broken.json agent-evidence validate-profile examples/invalid-provenance-output-mismatch.json agent-evidence validate-profile examples/invalid-validation-provenance-link-broken.json +agent-evidence validate-profile examples/invalid-trust-binding-digest-mismatch.json ``` diff --git a/examples/invalid-high-risk-policy-link-broken.json b/examples/invalid-high-risk-policy-link-broken.json new file mode 100644 index 0000000..0ca28a8 --- /dev/null +++ b/examples/invalid-high-risk-policy-link-broken.json @@ -0,0 +1,120 @@ +{ + "actor": { + "id": "actor:payment-reviewer", + "name": "payment-reviewer", + "runtime": "openai-agents", + "type": "agent" + }, + "constraints": [ + { + "id": "constraint:approved-review-codes", + "description": "Only approved payment-review decision codes may be assigned." + }, + { + "id": "constraint:no-direct-settlement", + "description": "Flagged payments must not be settled directly by this operation." + } + ], + "evidence": { + "artifacts": [ + { + "artifact_id": "artifact:payment-review-report-001", + "digest": "sha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd", + "locator": "urn:demo:payment-review-report-001", + "type": "review-report" + } + ], + "id": "evidence:payment-review-001", + "integrity": { + "references_digest": "sha256:d45164cb5a7904af3e0f4c6b469d557d2169cae65e01b2a77c6b1f321e38248d", + "artifacts_digest": "sha256:b3d4643ff1a9ec504881fe2b826a19c586f48aeeb366dde766a01d849218aafa", + "statement_digest": "sha256:429e45523ff1cc7b16dc391ed94ed98f2ddbd60745c77a04d2489cbd30b6e868" + }, + "operation_ref": "op:payment-review-001", + "policy_ref": "policy:stale-payment-review-v1", + "references": [ + { + "digest": "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "locator": "urn:demo:payment-case-042", + "object_id": "obj:payment-case-042", + "ref_id": "ref:input-payment-case", + "role": "input" + }, + { + "digest": "sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + "locator": "urn:demo:risk-note-042", + "object_id": "obj:risk-note-042", + "ref_id": "ref:input-risk-note", + "role": "input" + }, + { + "digest": "sha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc", + "locator": "urn:demo:payment-review-decision-042", + "object_id": "obj:payment-review-decision-042", + "ref_id": "ref:output-review-decision", + "role": "output" + } + ], + "subject_ref": "obj:payment-case-042" + }, + "operation": { + "description": "Review one flagged payment case and emit one review decision.", + "id": "op:payment-review-001", + "input_refs": [ + "ref:input-payment-case", + "ref:input-risk-note" + ], + "output_refs": [ + "ref:output-review-decision" + ], + "policy_ref": "policy:payment-review-v1", + "result": { + "status": "succeeded", + "summary": "one payment review decision object emitted" + }, + "subject_ref": "obj:payment-case-042", + "type": "payment.review" + }, + "policy": { + "constraint_refs": [ + "constraint:approved-review-codes", + "constraint:no-direct-settlement" + ], + "id": "policy:payment-review-v1", + "name": "payment-review-policy" + }, + "profile": { + "name": "execution-evidence-operation-accountability-profile", + "version": "0.1" + }, + "provenance": { + "actor_ref": "actor:payment-reviewer", + "id": "prov:payment-review-001", + "input_refs": [ + "ref:input-payment-case", + "ref:input-risk-note" + ], + "operation_ref": "op:payment-review-001", + "output_refs": [ + "ref:output-review-decision" + ], + "subject_ref": "obj:payment-case-042" + }, + "statement_id": "eeoap:payment-review-001", + "subject": { + "digest": "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "id": "obj:payment-case-042", + "locator": "urn:demo:payment-case-042", + "type": "fdo-record" + }, + "timestamp": "2026-04-13T00:00:00Z", + "validation": { + "evidence_ref": "evidence:payment-review-001", + "id": "validation:payment-review-001", + "method": "schema+reference+consistency", + "policy_ref": "policy:payment-review-v1", + "provenance_ref": "prov:payment-review-001", + "status": "verifiable", + "validator": "agent-evidence validate-profile" + } +} diff --git a/examples/invalid-high-risk-unclosed-reference.json b/examples/invalid-high-risk-unclosed-reference.json new file mode 100644 index 0000000..6c21329 --- /dev/null +++ b/examples/invalid-high-risk-unclosed-reference.json @@ -0,0 +1,120 @@ +{ + "actor": { + "id": "actor:payment-reviewer", + "name": "payment-reviewer", + "runtime": "openai-agents", + "type": "agent" + }, + "constraints": [ + { + "id": "constraint:approved-review-codes", + "description": "Only approved payment-review decision codes may be assigned." + }, + { + "id": "constraint:no-direct-settlement", + "description": "Flagged payments must not be settled directly by this operation." + } + ], + "evidence": { + "artifacts": [ + { + "artifact_id": "artifact:payment-review-report-001", + "digest": "sha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd", + "locator": "urn:demo:payment-review-report-001", + "type": "review-report" + } + ], + "id": "evidence:payment-review-001", + "integrity": { + "references_digest": "sha256:d45164cb5a7904af3e0f4c6b469d557d2169cae65e01b2a77c6b1f321e38248d", + "artifacts_digest": "sha256:b3d4643ff1a9ec504881fe2b826a19c586f48aeeb366dde766a01d849218aafa", + "statement_digest": "sha256:ee93200ca4b41f13bb91e9e3365524005c1e1d97a0557aa54237ddf975b3577a" + }, + "operation_ref": "op:payment-review-001", + "policy_ref": "policy:payment-review-v1", + "references": [ + { + "digest": "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "locator": "urn:demo:payment-case-042", + "object_id": "obj:payment-case-042", + "ref_id": "ref:input-payment-case", + "role": "input" + }, + { + "digest": "sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + "locator": "urn:demo:risk-note-042", + "object_id": "obj:risk-note-042", + "ref_id": "ref:input-risk-note", + "role": "input" + }, + { + "digest": "sha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc", + "locator": "urn:demo:payment-review-decision-042", + "object_id": "obj:payment-review-decision-042", + "ref_id": "ref:output-review-decision", + "role": "output" + } + ], + "subject_ref": "obj:payment-case-042" + }, + "operation": { + "description": "Review one flagged payment case and emit one review decision.", + "id": "op:payment-review-001", + "input_refs": [ + "ref:input-payment-case", + "ref:input-risk-note" + ], + "output_refs": [ + "ref:missing-review-decision" + ], + "policy_ref": "policy:payment-review-v1", + "result": { + "status": "succeeded", + "summary": "one payment review decision object emitted" + }, + "subject_ref": "obj:payment-case-042", + "type": "payment.review" + }, + "policy": { + "constraint_refs": [ + "constraint:approved-review-codes", + "constraint:no-direct-settlement" + ], + "id": "policy:payment-review-v1", + "name": "payment-review-policy" + }, + "profile": { + "name": "execution-evidence-operation-accountability-profile", + "version": "0.1" + }, + "provenance": { + "actor_ref": "actor:payment-reviewer", + "id": "prov:payment-review-001", + "input_refs": [ + "ref:input-payment-case", + "ref:input-risk-note" + ], + "operation_ref": "op:payment-review-001", + "output_refs": [ + "ref:missing-review-decision" + ], + "subject_ref": "obj:payment-case-042" + }, + "statement_id": "eeoap:payment-review-001", + "subject": { + "digest": "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "id": "obj:payment-case-042", + "locator": "urn:demo:payment-case-042", + "type": "fdo-record" + }, + "timestamp": "2026-04-13T00:00:00Z", + "validation": { + "evidence_ref": "evidence:payment-review-001", + "id": "validation:payment-review-001", + "method": "schema+reference+consistency", + "policy_ref": "policy:payment-review-v1", + "provenance_ref": "prov:payment-review-001", + "status": "verifiable", + "validator": "agent-evidence validate-profile" + } +} diff --git a/examples/invalid-trust-binding-digest-mismatch.json b/examples/invalid-trust-binding-digest-mismatch.json new file mode 100644 index 0000000..f97c3dc --- /dev/null +++ b/examples/invalid-trust-binding-digest-mismatch.json @@ -0,0 +1,122 @@ +{ + "actor": { + "id": "actor:metadata-enricher", + "name": "metadata-enricher", + "runtime": "openai-agents", + "type": "agent" + }, + "constraints": [ + { + "description": "Only approved metadata fields may be added.", + "id": "constraint:approved-fields" + }, + { + "description": "The note body must remain unchanged.", + "id": "constraint:no-content-rewrite" + } + ], + "evidence": { + "artifacts": [ + { + "artifact_id": "artifact:operation-log-001", + "digest": "sha256:4444444444444444444444444444444444444444444444444444444444444444", + "locator": "urn:demo:operation-log-001", + "type": "execution-log" + } + ], + "id": "evidence:metadata-enrich-001", + "integrity": { + "artifacts_digest": "sha256:d0470d93af9a5d6df99bf51369de7656ae8c84e8c65e9c1301cf6b095ea5010d", + "references_digest": "sha256:60ae1db47aa67231dd84e0c77a7d5a5a470e7f4474bee9922fb2a78499f75dfc", + "statement_digest": "sha256:a1ed81dc475aa6c0c82e30487c48c4ec31a800869220aeedfa871ebc03928a89" + }, + "operation_ref": "op:metadata-enrich-001", + "policy_ref": "policy:approved-metadata-v1", + "references": [ + { + "digest": "sha256:2222222222222222222222222222222222222222222222222222222222222222", + "locator": "urn:demo:client-note-001", + "object_id": "obj:client-note-001", + "ref_id": "ref:input-note", + "role": "input" + }, + { + "digest": "sha256:3333333333333333333333333333333333333333333333333333333333333333", + "locator": "urn:demo:client-note-001-derived", + "object_id": "obj:client-note-001-derived", + "ref_id": "ref:output-note", + "role": "output" + } + ], + "subject_ref": "obj:client-note-001" + }, + "operation": { + "description": "Add approved metadata tags to one client note object.", + "id": "op:metadata-enrich-001", + "input_refs": [ + "ref:input-note" + ], + "output_refs": [ + "ref:output-note" + ], + "policy_ref": "policy:approved-metadata-v1", + "result": { + "status": "succeeded", + "summary": "one derived note object emitted" + }, + "subject_ref": "obj:client-note-001", + "type": "metadata.enrich" + }, + "policy": { + "constraint_refs": [ + "constraint:approved-fields", + "constraint:no-content-rewrite" + ], + "id": "policy:approved-metadata-v1", + "name": "approved-metadata-policy" + }, + "profile": { + "name": "execution-evidence-operation-accountability-profile", + "version": "0.1" + }, + "provenance": { + "actor_ref": "actor:metadata-enricher", + "id": "prov:metadata-enrich-001", + "input_refs": [ + "ref:input-note" + ], + "operation_ref": "op:metadata-enrich-001", + "output_refs": [ + "ref:output-note" + ], + "subject_ref": "obj:client-note-001" + }, + "statement_id": "eeoap:metadata-demo-001", + "subject": { + "digest": "sha256:1111111111111111111111111111111111111111111111111111111111111111", + "id": "obj:client-note-001", + "locator": "urn:demo:client-note-001", + "type": "fdo-record" + }, + "timestamp": "2026-03-30T00:00:00Z", + "validation": { + "evidence_ref": "evidence:metadata-enrich-001", + "id": "validation:metadata-enrich-001", + "method": "schema+reference+consistency+trust-binding", + "policy_ref": "policy:approved-metadata-v1", + "provenance_ref": "prov:metadata-enrich-001", + "status": "verifiable", + "trust_bindings": [ + { + "binding_id": "trust:sigstore-rekor-entry-001", + "locator": "https://rekor.example.com/api/v1/log/entries/demo-001", + "mechanism": "sigstore", + "proof_type": "transparency-log-entry", + "target_digest": "sha256:5555555555555555555555555555555555555555555555555555555555555555", + "target_ref": "eeoap:metadata-demo-001", + "verifier_hint": "Recompute evidence.integrity.statement_digest before external verification." + } + ], + "validator": "agent-evidence validate-profile" + } +} diff --git a/examples/langchain_minimal_evidence.py b/examples/langchain_minimal_evidence.py index 22031c3..0f6dc76 100644 --- a/examples/langchain_minimal_evidence.py +++ b/examples/langchain_minimal_evidence.py @@ -2,26 +2,10 @@ import argparse import json -import shutil from pathlib import Path from uuid import uuid4 -from agent_evidence import ( - EvidenceRecorder, - LocalEvidenceStore, - export_json_bundle, - verify_json_bundle, -) -from agent_evidence.integrations import EvidenceCallbackHandler - -try: - from cryptography.hazmat.primitives import serialization - from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey -except ModuleNotFoundError as exc: # pragma: no cover - runtime dependency guard - raise ModuleNotFoundError( - "cryptography is required for this example. Install agent-evidence with " - "the [signing] or [dev] extra." - ) from exc +from agent_evidence.integrations import LangChainAdapter try: from langchain_core.runnables import RunnableLambda @@ -43,37 +27,18 @@ def multiply(x: int, y: int) -> int: return x * y -def _write_ed25519_keypair(output_dir: Path) -> tuple[Path, Path, bytes, bytes]: - private_key = Ed25519PrivateKey.generate() - private_pem = private_key.private_bytes( - encoding=serialization.Encoding.PEM, - format=serialization.PrivateFormat.PKCS8, - encryption_algorithm=serialization.NoEncryption(), - ) - public_pem = private_key.public_key().public_bytes( - encoding=serialization.Encoding.PEM, - format=serialization.PublicFormat.SubjectPublicKeyInfo, - ) - private_key_path = output_dir / "manifest-private.pem" - public_key_path = output_dir / "manifest-public.pem" - private_key_path.write_bytes(private_pem) - public_key_path.write_bytes(public_pem) - return private_key_path, public_key_path, private_pem, public_pem - - def run_example(output_dir: str | Path = DEFAULT_OUTPUT_DIR) -> dict[str, object]: - output_root = Path(output_dir) - if output_root.exists(): - shutil.rmtree(output_root) - output_root.mkdir(parents=True, exist_ok=True) - - store_path = output_root / "runtime-events.jsonl" - bundle_path = output_root / "langchain-evidence.bundle.json" - manifest_path = output_root / "langchain-evidence.manifest.json" - - store = LocalEvidenceStore(store_path) - recorder = EvidenceRecorder(store) - handler = EvidenceCallbackHandler(recorder=recorder, base_tags=["cookbook", "local-first"]) + adapter = LangChainAdapter.for_output_dir( + output_dir, + digest_only=True, + omit_request=False, + omit_response=False, + base_tags=["cookbook", "local-first"], + key_id="langchain-cookbook-demo", + signer="local-demo", + role="attestor", + ) + handler = adapter.callback_handler() uppercase = RunnableLambda(lambda text: text.upper()).with_config({"run_name": "uppercase"}) failing = RunnableLambda( @@ -110,47 +75,8 @@ def run_example(output_dir: str | Path = DEFAULT_OUTPUT_DIR) -> dict[str, object except RuntimeError: pass - records = store.list() - private_key_path, public_key_path, private_pem, public_pem = _write_ed25519_keypair(output_root) - - bundle = export_json_bundle( - records, - bundle_path, - filters={"source": "langchain", "limit": len(records)}, - private_key_pem=private_pem, - key_id="langchain-cookbook-demo", - signer="local-demo", - role="attestor", - manifest_output_path=manifest_path, - ) - verify_result = verify_json_bundle(bundle_path, public_key_pem=public_pem) - - verify_command = ( - f"agent-evidence verify-export --bundle {bundle_path} --public-key {public_key_path}" - ) - summary = { - "ok": verify_result["ok"], - "output_dir": str(output_root), - "store_path": str(store_path), - "bundle_path": str(bundle_path), - "manifest_path": str(manifest_path), - "private_key_path": str(private_key_path), - "public_key_path": str(public_key_path), - "record_count": len(records), - "signature_count": len(bundle.signatures), - "verify_command": verify_command, - "verify_result": verify_result, - "anchor_note": ( - "Detached anchoring is not implemented in this repository. Use the exported " - "bundle digest and signed manifest as the handoff point if you want to anchor " - "it in an external timestamp or registry system." - ), - } - (output_root / "summary.json").write_text( - json.dumps(summary, indent=2, sort_keys=True) + "\n", - encoding="utf-8", - ) - return summary + artifacts = adapter.finalize() + return artifacts.summary def main() -> int: diff --git a/examples/openai_compatible/alternate_base_url.py b/examples/openai_compatible/alternate_base_url.py new file mode 100644 index 0000000..7ed3a90 --- /dev/null +++ b/examples/openai_compatible/alternate_base_url.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +import argparse +import json +import os +from pathlib import Path + +from agent_evidence.integrations.openai_compatible import OpenAICompatibleAdapter + +DEFAULT_OUTPUT_DIR = ( + Path(__file__).resolve().parents[1] / "artifacts" / "openai-compatible-alt-base-url" +) +DEFAULT_MODEL = "gpt-4.1-mini" +DEFAULT_PROMPT = "Reply with exactly: hello from an OpenAI-compatible base URL." + + +def _require_env(name: str) -> str: + value = os.environ.get(name) + if value: + return value + raise RuntimeError( + f"{name} is required for this example. Export it in your shell before running." + ) + + +def _build_client(*, api_key: str, base_url: str): + try: + from openai import OpenAI + except ModuleNotFoundError as exc: # pragma: no cover - depends on local environment + raise ModuleNotFoundError( + "The `openai` package is required for this example. Install it locally with " + "`pip install openai`." + ) from exc + return OpenAI(api_key=api_key, base_url=base_url) + + +def run_example(output_dir: str | Path = DEFAULT_OUTPUT_DIR) -> dict[str, object]: + api_key = _require_env("OPENAI_API_KEY") + base_url = _require_env("OPENAI_COMPAT_BASE_URL") + provider_label = _require_env("OPENAI_COMPAT_PROVIDER_LABEL") + model = os.environ.get("OPENAI_MODEL", DEFAULT_MODEL) + prompt = os.environ.get("OPENAI_PROMPT", DEFAULT_PROMPT) + + client = _build_client(api_key=api_key, base_url=base_url) + adapter = OpenAICompatibleAdapter.for_output_dir( + output_dir, + provider_label=provider_label, + model=model, + api_key=api_key, + base_url=base_url, + digest_only=True, + omit_request=False, + omit_response=False, + base_tags=["example", "openai-compatible"], + ) + + response = adapter.record_call( + operation="chat.completions.create", + request={ + "model": model, + "messages": [{"role": "user", "content": prompt}], + "base_url": base_url, + "provider_label": provider_label, + }, + invoke=lambda: client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": prompt}], + ), + metadata={"example": "alternate_base_url"}, + tags=["alternate-base-url"], + ) + + artifacts = adapter.finalize() + summary = dict(artifacts.summary) + summary["response_id"] = getattr(response, "id", None) + summary["provider_operation"] = "chat.completions.create" + return summary + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Run a minimal OpenAI-compatible export against an alternate base URL." + ) + parser.add_argument( + "--output-dir", + type=Path, + default=DEFAULT_OUTPUT_DIR, + help=f"Directory for generated artifacts. Default: {DEFAULT_OUTPUT_DIR}", + ) + args = parser.parse_args() + + summary = run_example(args.output_dir) + print(json.dumps(summary, indent=2, sort_keys=True)) + return 0 if summary["ok"] else 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/examples/openai_compatible/basic_export.py b/examples/openai_compatible/basic_export.py new file mode 100644 index 0000000..666233f --- /dev/null +++ b/examples/openai_compatible/basic_export.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +import argparse +import json +import os +from pathlib import Path + +from agent_evidence.integrations.openai_compatible import OpenAICompatibleAdapter + +DEFAULT_OUTPUT_DIR = Path(__file__).resolve().parents[1] / "artifacts" / "openai-compatible-basic" +DEFAULT_MODEL = "gpt-4.1-mini" +DEFAULT_PROMPT = "Reply with exactly: hello from agent-evidence." + + +def _require_env(name: str) -> str: + value = os.environ.get(name) + if value: + return value + raise RuntimeError( + f"{name} is required for this example. Export it in your shell before running." + ) + + +def _build_client(*, api_key: str): + try: + from openai import OpenAI + except ModuleNotFoundError as exc: # pragma: no cover - depends on local environment + raise ModuleNotFoundError( + "The `openai` package is required for this example. Install it locally with " + "`pip install openai`." + ) from exc + return OpenAI(api_key=api_key) + + +def run_example(output_dir: str | Path = DEFAULT_OUTPUT_DIR) -> dict[str, object]: + api_key = _require_env("OPENAI_API_KEY") + model = os.environ.get("OPENAI_MODEL", DEFAULT_MODEL) + prompt = os.environ.get("OPENAI_PROMPT", DEFAULT_PROMPT) + + client = _build_client(api_key=api_key) + adapter = OpenAICompatibleAdapter.for_output_dir( + output_dir, + provider_label="openai", + model=model, + api_key=api_key, + digest_only=True, + omit_request=False, + omit_response=False, + base_tags=["example", "openai-compatible"], + ) + + response = adapter.record_call( + operation="chat.completions.create", + request={ + "model": model, + "messages": [{"role": "user", "content": prompt}], + }, + invoke=lambda: client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": prompt}], + ), + metadata={"example": "basic_export"}, + tags=["default-config"], + ) + + artifacts = adapter.finalize() + summary = dict(artifacts.summary) + summary["response_id"] = getattr(response, "id", None) + summary["provider_operation"] = "chat.completions.create" + return summary + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Run a minimal OpenAI-compatible export with default OpenAI settings." + ) + parser.add_argument( + "--output-dir", + type=Path, + default=DEFAULT_OUTPUT_DIR, + help=f"Directory for generated artifacts. Default: {DEFAULT_OUTPUT_DIR}", + ) + args = parser.parse_args() + + summary = run_example(args.output_dir) + print(json.dumps(summary, indent=2, sort_keys=True)) + return 0 if summary["ok"] else 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/examples/review_pack/build_review_pack.py b/examples/review_pack/build_review_pack.py new file mode 100644 index 0000000..07be08a --- /dev/null +++ b/examples/review_pack/build_review_pack.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +import argparse +import json +from pathlib import Path + +from agent_evidence.review_pack import ReviewPackAssembler + +DEFAULT_OUTPUT_DIR = Path(__file__).resolve().parents[1] / "artifacts" / "review-pack" + + +def _supporting_files_from_args(args: argparse.Namespace) -> dict[str, Path]: + supporting_files: dict[str, Path] = {} + for name in ("manifest", "public_key", "runtime_events", "private_key"): + value = getattr(args, f"{name}_path") + if value is not None: + supporting_files[name] = value + return supporting_files + + +def build_review_pack(args: argparse.Namespace) -> dict[str, object]: + assembler = ReviewPackAssembler.for_output_dir(args.output_dir) + pack = assembler.assemble( + bundle_path=args.bundle_path, + receipt_path=args.receipt_path, + summary_path=args.summary_path, + supporting_files=_supporting_files_from_args(args), + include_private_key=args.include_private_key, + ) + + result = { + "pack_dir": str(pack.pack_dir), + "index_path": str(pack.index_path), + "report_path": str(pack.report_path), + "primary_files": {name: str(path) for name, path in pack.primary_files.items()}, + "supporting_files": {name: str(path) for name, path in pack.supporting_files.items()}, + } + return result + + +def main() -> int: + parser = argparse.ArgumentParser( + description=( + "Build a Review Pack from an existing bundle, receipt, and summary " + "without changing their schemas." + ) + ) + parser.add_argument("--bundle-path", type=Path, required=True, help="Path to the bundle.") + parser.add_argument( + "--receipt-path", + type=Path, + required=True, + help="Path to the machine-readable receipt.", + ) + parser.add_argument( + "--summary-path", + type=Path, + required=True, + help="Path to the reviewer-facing summary.", + ) + parser.add_argument( + "--output-dir", + type=Path, + default=DEFAULT_OUTPUT_DIR, + help=f"Directory for the assembled review pack. Default: {DEFAULT_OUTPUT_DIR}", + ) + parser.add_argument( + "--manifest-path", + type=Path, + help="Optional path to the manifest sidecar to include as supporting material.", + ) + parser.add_argument( + "--public-key-path", + type=Path, + help="Optional path to the verification public key to include as supporting material.", + ) + parser.add_argument( + "--runtime-events-path", + type=Path, + help="Optional path to runtime JSONL capture to include as supporting material.", + ) + parser.add_argument( + "--private-key-path", + type=Path, + help=( + "Optional path to the local signing private key. It is excluded by default and " + "only copied if --include-private-key is also set." + ), + ) + parser.add_argument( + "--include-private-key", + action="store_true", + help="Include the private key supporting file. Default is excluded.", + ) + args = parser.parse_args() + + result = build_review_pack(args) + print(json.dumps(result, indent=2, sort_keys=True)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/examples/valid-high-risk-payment-review-evidence.json b/examples/valid-high-risk-payment-review-evidence.json new file mode 100644 index 0000000..a74c113 --- /dev/null +++ b/examples/valid-high-risk-payment-review-evidence.json @@ -0,0 +1,120 @@ +{ + "actor": { + "id": "actor:payment-reviewer", + "name": "payment-reviewer", + "runtime": "openai-agents", + "type": "agent" + }, + "constraints": [ + { + "id": "constraint:approved-review-codes", + "description": "Only approved payment-review decision codes may be assigned." + }, + { + "id": "constraint:no-direct-settlement", + "description": "Flagged payments must not be settled directly by this operation." + } + ], + "evidence": { + "artifacts": [ + { + "artifact_id": "artifact:payment-review-report-001", + "digest": "sha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd", + "locator": "urn:demo:payment-review-report-001", + "type": "review-report" + } + ], + "id": "evidence:payment-review-001", + "integrity": { + "references_digest": "sha256:d45164cb5a7904af3e0f4c6b469d557d2169cae65e01b2a77c6b1f321e38248d", + "artifacts_digest": "sha256:b3d4643ff1a9ec504881fe2b826a19c586f48aeeb366dde766a01d849218aafa", + "statement_digest": "sha256:429e45523ff1cc7b16dc391ed94ed98f2ddbd60745c77a04d2489cbd30b6e868" + }, + "operation_ref": "op:payment-review-001", + "policy_ref": "policy:payment-review-v1", + "references": [ + { + "digest": "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "locator": "urn:demo:payment-case-042", + "object_id": "obj:payment-case-042", + "ref_id": "ref:input-payment-case", + "role": "input" + }, + { + "digest": "sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + "locator": "urn:demo:risk-note-042", + "object_id": "obj:risk-note-042", + "ref_id": "ref:input-risk-note", + "role": "input" + }, + { + "digest": "sha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc", + "locator": "urn:demo:payment-review-decision-042", + "object_id": "obj:payment-review-decision-042", + "ref_id": "ref:output-review-decision", + "role": "output" + } + ], + "subject_ref": "obj:payment-case-042" + }, + "operation": { + "description": "Review one flagged payment case and emit one review decision.", + "id": "op:payment-review-001", + "input_refs": [ + "ref:input-payment-case", + "ref:input-risk-note" + ], + "output_refs": [ + "ref:output-review-decision" + ], + "policy_ref": "policy:payment-review-v1", + "result": { + "status": "succeeded", + "summary": "one payment review decision object emitted" + }, + "subject_ref": "obj:payment-case-042", + "type": "payment.review" + }, + "policy": { + "constraint_refs": [ + "constraint:approved-review-codes", + "constraint:no-direct-settlement" + ], + "id": "policy:payment-review-v1", + "name": "payment-review-policy" + }, + "profile": { + "name": "execution-evidence-operation-accountability-profile", + "version": "0.1" + }, + "provenance": { + "actor_ref": "actor:payment-reviewer", + "id": "prov:payment-review-001", + "input_refs": [ + "ref:input-payment-case", + "ref:input-risk-note" + ], + "operation_ref": "op:payment-review-001", + "output_refs": [ + "ref:output-review-decision" + ], + "subject_ref": "obj:payment-case-042" + }, + "statement_id": "eeoap:payment-review-001", + "subject": { + "digest": "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "id": "obj:payment-case-042", + "locator": "urn:demo:payment-case-042", + "type": "fdo-record" + }, + "timestamp": "2026-04-13T00:00:00Z", + "validation": { + "evidence_ref": "evidence:payment-review-001", + "id": "validation:payment-review-001", + "method": "schema+reference+consistency", + "policy_ref": "policy:payment-review-v1", + "provenance_ref": "prov:payment-review-001", + "status": "verifiable", + "validator": "agent-evidence validate-profile" + } +} diff --git a/examples/valid-trust-binding-evidence.json b/examples/valid-trust-binding-evidence.json new file mode 100644 index 0000000..58a7c76 --- /dev/null +++ b/examples/valid-trust-binding-evidence.json @@ -0,0 +1,122 @@ +{ + "actor": { + "id": "actor:metadata-enricher", + "name": "metadata-enricher", + "runtime": "openai-agents", + "type": "agent" + }, + "constraints": [ + { + "description": "Only approved metadata fields may be added.", + "id": "constraint:approved-fields" + }, + { + "description": "The note body must remain unchanged.", + "id": "constraint:no-content-rewrite" + } + ], + "evidence": { + "artifacts": [ + { + "artifact_id": "artifact:operation-log-001", + "digest": "sha256:4444444444444444444444444444444444444444444444444444444444444444", + "locator": "urn:demo:operation-log-001", + "type": "execution-log" + } + ], + "id": "evidence:metadata-enrich-001", + "integrity": { + "artifacts_digest": "sha256:d0470d93af9a5d6df99bf51369de7656ae8c84e8c65e9c1301cf6b095ea5010d", + "references_digest": "sha256:60ae1db47aa67231dd84e0c77a7d5a5a470e7f4474bee9922fb2a78499f75dfc", + "statement_digest": "sha256:a1ed81dc475aa6c0c82e30487c48c4ec31a800869220aeedfa871ebc03928a89" + }, + "operation_ref": "op:metadata-enrich-001", + "policy_ref": "policy:approved-metadata-v1", + "references": [ + { + "digest": "sha256:2222222222222222222222222222222222222222222222222222222222222222", + "locator": "urn:demo:client-note-001", + "object_id": "obj:client-note-001", + "ref_id": "ref:input-note", + "role": "input" + }, + { + "digest": "sha256:3333333333333333333333333333333333333333333333333333333333333333", + "locator": "urn:demo:client-note-001-derived", + "object_id": "obj:client-note-001-derived", + "ref_id": "ref:output-note", + "role": "output" + } + ], + "subject_ref": "obj:client-note-001" + }, + "operation": { + "description": "Add approved metadata tags to one client note object.", + "id": "op:metadata-enrich-001", + "input_refs": [ + "ref:input-note" + ], + "output_refs": [ + "ref:output-note" + ], + "policy_ref": "policy:approved-metadata-v1", + "result": { + "status": "succeeded", + "summary": "one derived note object emitted" + }, + "subject_ref": "obj:client-note-001", + "type": "metadata.enrich" + }, + "policy": { + "constraint_refs": [ + "constraint:approved-fields", + "constraint:no-content-rewrite" + ], + "id": "policy:approved-metadata-v1", + "name": "approved-metadata-policy" + }, + "profile": { + "name": "execution-evidence-operation-accountability-profile", + "version": "0.1" + }, + "provenance": { + "actor_ref": "actor:metadata-enricher", + "id": "prov:metadata-enrich-001", + "input_refs": [ + "ref:input-note" + ], + "operation_ref": "op:metadata-enrich-001", + "output_refs": [ + "ref:output-note" + ], + "subject_ref": "obj:client-note-001" + }, + "statement_id": "eeoap:metadata-demo-001", + "subject": { + "digest": "sha256:1111111111111111111111111111111111111111111111111111111111111111", + "id": "obj:client-note-001", + "locator": "urn:demo:client-note-001", + "type": "fdo-record" + }, + "timestamp": "2026-03-30T00:00:00Z", + "validation": { + "evidence_ref": "evidence:metadata-enrich-001", + "id": "validation:metadata-enrich-001", + "method": "schema+reference+consistency+trust-binding", + "policy_ref": "policy:approved-metadata-v1", + "provenance_ref": "prov:metadata-enrich-001", + "status": "verifiable", + "trust_bindings": [ + { + "binding_id": "trust:sigstore-rekor-entry-001", + "locator": "https://rekor.example.com/api/v1/log/entries/demo-001", + "mechanism": "sigstore", + "proof_type": "transparency-log-entry", + "target_digest": "sha256:a1ed81dc475aa6c0c82e30487c48c4ec31a800869220aeedfa871ebc03928a89", + "target_ref": "eeoap:metadata-demo-001", + "verifier_hint": "Recompute evidence.integrity.statement_digest before external verification." + } + ], + "validator": "agent-evidence validate-profile" + } +} diff --git a/integrations/langchain/README.md b/integrations/langchain/README.md index 26b2ae4..d66c2df 100644 --- a/integrations/langchain/README.md +++ b/integrations/langchain/README.md @@ -1,24 +1,64 @@ # LangChain Integration -This integration writes a digest-only Agent Evidence Profile bundle from one -LangChain run. It is an integrity-verifiable evidence bundle, not a hosted -tracing platform and not a court-grade non-repudiation claim. +The single recommended LangChain entry point in this repository is +`LangChainAdapter` in `agent_evidence.integrations.langchain`. -The callback handler is narrowed to one integration point: +Current recommended public API: -- `EvidenceCallbackHandler` -- default `digest_only=True` -- optional `omit_request` / `omit_response` -- no token-by-token bundle persistence +```python +from agent_evidence.integrations.langchain import LangChainAdapter -Run the demo: +adapter = LangChainAdapter.for_output_dir( + "./artifacts/langchain-run", + digest_only=True, + omit_request=False, + omit_response=False, +) + +callbacks = [adapter.callback_handler()] +artifacts = adapter.finalize() +``` + +This adapter keeps the current quickstart path on one surface: + +- callback capture through `EvidenceCallbackHandler` +- JSON `bundle` export +- machine-readable `receipt` +- reviewer-facing `summary` + +Supporting files such as the manifest sidecar, local keys, and runtime JSONL +remain supporting materials, not additional primary outputs. + +Primary runnable example: + +```bash +python examples/langchain_minimal_evidence.py +``` + +See also: + +- `examples/langchain_minimal_evidence.py` +- `docs/cookbooks/langchain_minimal_evidence.md` + +## Alternate / secondary path + +The older AEP bundle directory path remains available as an alternate surface. +It is not the recommended starting point for the current LangChain-first flow. + +Alternate commands: ```bash python integrations/langchain/export_evidence.py agent-evidence verify-bundle --bundle-dir integrations/langchain/langchain-evidence-bundle ``` -Run the fixture gate: +Use that alternate path only if you specifically need the older bundle-directory +shape. The recommended LangChain entry point for current docs and examples is +still `LangChainAdapter` producing `bundle`, `receipt`, and `summary`. + +## Fixture gate + +Run the existing fixture gate: ```bash python scripts/run_profile_gate.py diff --git a/plans/implementation-plan.md b/plans/implementation-plan.md index 8b96136..3b20d33 100644 --- a/plans/implementation-plan.md +++ b/plans/implementation-plan.md @@ -56,3 +56,77 @@ - 明确提出 minimal verification boundary、failure taxonomy、external validation agenda - 保持 Chinese-first、plain language、结构紧凑 - 不修改现有 `paper/submission_tosem/`、blind package 或 review artifacts + +## M8 可选 trust binding 扩展 +- 输入: + - 当前 OAP v0.1 spec/schema/validator/examples/demo + - README 与 cookbook 中现有签名、离线验证、detached anchor 说明 +- 输出: + - `validation.trust_bindings[]` 可选字段 + - 对应 schema 与 validator 校验 + - 1 个 valid trust-binding 样例与 1 个 single-failure invalid 样例 + - README / cookbook / demo / STATUS 中的边界澄清 +- 验收条件: + - trust binding 明确是可选外部验证挂接点,不是强制签名系统 + - 支持多个机制标签,不内置绑定任何单一信任系统 + - validator 只检查本地目标引用与 digest 一致性,不伪装成外部系统验证器 + - 最小 demo 与既有 local signing / verify-export 路径保持不变 + +## M9 EDC augmentation 边界、最小 profile 草案与最小 demo 路径 +- 输入: + - 官方 EDC 文档中的 control plane、extensions、events / callbacks 材料 + - 官方 DSP 规范中的 protocol scope 材料 + - 当前仓库已有 `spec/`、`schema/`、`examples/`、`demo/`、`docs/` 结构 +- 输出: + - `docs/edc/EDC_AUGMENTATION_BOUNDARY.md` + - `docs/edc/edc_minimal_evidence_profile_draft.md` + - `docs/edc/edc_demo_minimal_path.md` + - `README.md` 最小导航入口 + - `docs/STATUS.md` 里程碑记录 +- 验收条件: + - 明确 EDC 是 `agent-evidence` 的 execution-evidence augmentation layer,而不是新主线 + - 明确 EDC 与 `agent-evidence` 的职责边界:前者负责 exchange / contract / transfer governance,后者负责执行证据 + - 最小 profile 草案只保留独立验证所需字段,不带 secrets、privateProperties、内部实现细节 + - 最小 demo 路径清楚描述 asset -> policy / contract definition -> contract agreement -> transfer process -> evidence bundle -> independent verify + - 明确首个推荐接入面是 control-plane event extension / exporter,而不是 persistence 或 data plane + - 仅引用官方 EDC / DSP 公开材料,不基于二手解读 + +## M10 EDC control-plane event extension 草图与 event mapping +- 输入: + - 官方 EDC adopter / contributor 文档中的 control plane、extensions、events / callbacks 材料 + - 官方 EDC 仓库中 `ServiceExtension`、`EventRouter`、`EventSubscriber`、`EventEnvelope` 与 control-plane event family 源码 + - M9 已产出的边界文档、最小 profile 草案、最小 demo 路径 +- 输出: + - `docs/edc/edc_control_plane_event_extension_sketch.md` + - `docs/edc/edc_event_to_evidence_mapping.md` + - `docs/edc/edc_extension_minimal_structure.md` + - `docs/edc/edc_demo_minimal_path.md` 的 event-extension 视角更新 + - `README.md` 的最小导航补充 + - `docs/STATUS.md` 里程碑记录 +- 验收条件: + - 明确当前只讨论 control plane,不碰 persistence store 改造,不碰 data plane / provisioner / connector 产品化 + - 明确为什么首个推荐切口是 `ServiceExtension` + `EventRouter` + - 明确 `Event` 与 `EventEnvelope` 的职责区分,以及它们对 evidence 去重和时间锚的意义 + - 覆盖 Asset、PolicyDefinition、ContractDefinition、ContractNegotiation、TransferProcess 五个事件面 + - 给出 negotiation / transfer 的最小状态链、哪些事件暂时不进最小 demo、以及两级去重策略 + - 推荐最终 bundle grouping key,并说明为什么 + - 不写 schema JSON,不写 Java 可运行代码 + +## M11 FDO-facing registration / outreach / proposal skeleton +- 输入: + - 当前 canonical package:`Execution Evidence and Operation Accountability Profile v0.1` + - 已有 `spec/`、`schema/`、`examples/`、`demo/`、`submission/`、`docs/fdo-mapping/` + - 用户给出的外部执行目标:GitHub 仓库表面、FDO Testbed 注册、Peter/Sven 外联、LDT4SSC/DS4SSCC 提案 +- 输出: + - `docs/fdo-mapping/fdo-operation-evidence-profile-registration-pack.md` + - `submission/fdo-testbed-registration-draft.md` + - `submission/peter-sven-outreach-draft.md` + - `submission/ldt4ssc-ds4sscc-module-pitch.md` + - `README.md` / `README.zh-CN.md` 的最小导航补充 + - `docs/STATUS.md` 里程碑记录 +- 验收条件: + - 不新建第二套 profile / validator / examples 体系 + - 明确当前 canonical package 名称与 FDO-facing 对象名之间的映射关系 + - 把外部注册、外联、项目插入申请收敛为可直接复用的最小文稿骨架 + - 明确哪些步骤可以在仓库内完成,哪些步骤仍需要登录、审批或人工发送 + - 不虚构已经完成的 GitHub/FDO Testbed/邮件外部动作 diff --git a/pyproject.toml b/pyproject.toml index 7f30e0b..aab699d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ dependencies = [ "click>=8.1", "jsonschema>=4.23", "pydantic>=2.7", + "reportlab>=4.2,<5.0", ] [project.optional-dependencies] @@ -52,6 +53,7 @@ postgres = [ [project.scripts] agent-evidence = "agent_evidence.cli.main:main" +ae-review-pack-engine = "agent_evidence.review_pack.engine:main" [tool.setuptools] include-package-data = true diff --git a/schema/execution-evidence-operation-accountability-profile-v0.1.schema.json b/schema/execution-evidence-operation-accountability-profile-v0.1.schema.json index ed9c4a9..2a7c9c4 100644 --- a/schema/execution-evidence-operation-accountability-profile-v0.1.schema.json +++ b/schema/execution-evidence-operation-accountability-profile-v0.1.schema.json @@ -435,6 +435,51 @@ "verifiable", "unverifiable" ] + }, + "trust_bindings": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "required": [ + "binding_id", + "mechanism", + "target_ref", + "target_digest", + "locator" + ], + "properties": { + "binding_id": { + "type": "string", + "minLength": 1 + }, + "mechanism": { + "type": "string", + "minLength": 1 + }, + "proof_type": { + "type": "string", + "minLength": 1 + }, + "target_ref": { + "type": "string", + "minLength": 1 + }, + "target_digest": { + "type": "string", + "pattern": "^sha256:[0-9a-f]{64}$" + }, + "locator": { + "type": "string", + "minLength": 1 + }, + "verifier_hint": { + "type": "string", + "minLength": 1 + } + } + } } } } diff --git a/spec/execution-evidence-operation-accountability-profile-v0.1.md b/spec/execution-evidence-operation-accountability-profile-v0.1.md index c78f91b..bd1e415 100644 --- a/spec/execution-evidence-operation-accountability-profile-v0.1.md +++ b/spec/execution-evidence-operation-accountability-profile-v0.1.md @@ -19,6 +19,10 @@ It is intentionally narrow. It covers only: It does not attempt to define a general registry, a full governance platform, or a full cryptographic trust fabric. +This profile may optionally carry external trust bindings, but those bindings +are only pointers to an external verification source. They are not the same as +the local manifest-signing flow implemented elsewhere in this repository. + ## 2. Core Object Model One operation accountability statement consists of the following top-level @@ -93,6 +97,7 @@ The minimal required fields are: The profile keeps optional fields to a minimum: - `operation.description` +- `validation.trust_bindings[]` The required locator fields remain flexible in value shape: @@ -100,6 +105,19 @@ The required locator fields remain flexible in value shape: - `evidence.references[].locator` may be a URI, path, or persistent identifier placeholder - `evidence.artifacts[].locator` may be a URI, path, or persistent identifier placeholder +When present, each `validation.trust_bindings[]` entry is one optional external +verification hook. The shape is intentionally generic so it can point to +different trust systems such as Sigstore, Notary v2, KERI, or a future +registry-specific verifier without forcing any of them into the core profile. + +The minimal suggested fields are: + +- `binding_id`: local identifier for the binding entry +- `mechanism`: free-text trust mechanism label, for example `sigstore` +- `target_ref`: local target this binding covers +- `target_digest`: digest expected by the external trust system +- `locator`: URI or other pointer to the external proof material + No optional extension fields are required for conformance in v0.1. ## 5. Field Relationships @@ -121,6 +139,12 @@ The minimum link rules are: - `validation.evidence_ref` must equal `evidence.id` - `validation.provenance_ref` must equal `provenance.id` - `validation.policy_ref` must equal `policy.id` +- if `validation.trust_bindings[]` is present, every `target_ref` must resolve + either to `statement_id` or to one `evidence.artifacts[].artifact_id` +- if a trust binding targets `statement_id`, `target_digest` must equal + `evidence.integrity.statement_digest` +- if a trust binding targets an artifact, `target_digest` must equal that + artifact's `digest` ## 6. Compliance Conditions @@ -142,6 +166,12 @@ following hold: 10. `evidence.integrity.statement_digest` equals the canonical digest of the statement core: `actor`, `subject`, `operation`, `policy`, `constraints`, and `provenance`. +11. If `validation.trust_bindings[]` is present, each trust binding resolves to + a local statement or artifact target and carries the matching local digest. + +The profile does not require the validator to verify the external system named +by `validation.trust_bindings[]`. In v0.1, the validator only checks that the +binding is well-formed and internally consistent with the local statement. ## 7. Failure Conditions @@ -154,6 +184,8 @@ Validation fails when at least one of the following occurs: - an output ref points to a non-output evidence reference - `policy`, `provenance`, and `evidence` do not agree on the linked entities - any integrity digest fails recomputation +- a trust binding points to a target that is not defined locally +- a trust binding carries a digest that does not match the resolved local target ## 8. Minimal JSON Expression Suggestion @@ -250,7 +282,17 @@ Validation fails when at least one of the following occurs: "policy_ref": "policy:approved-metadata-v1", "validator": "agent-evidence validate-profile", "method": "schema+reference+consistency", - "status": "verifiable" + "status": "verifiable", + "trust_bindings": [ + { + "binding_id": "trust:sigstore-demo-001", + "mechanism": "sigstore", + "proof_type": "transparency-log-entry", + "target_ref": "eeoap:demo-001", + "target_digest": "sha256:", + "locator": "https://rekor.example.com/api/v1/log/entries/demo-001" + } + ] } } ``` diff --git a/submission/fdo-testbed-registration-draft.md b/submission/fdo-testbed-registration-draft.md new file mode 100644 index 0000000..b566223 --- /dev/null +++ b/submission/fdo-testbed-registration-draft.md @@ -0,0 +1,44 @@ +# FDO Testbed Registration Draft + +Status: draft only. No external submission has been performed from this +repository workspace. + +## Registration Target + +- Registry surface: FDO Testbed Type Registry +- Page flow expected by user task: `All Objects` -> `Add New Object` + +## Suggested Form Values + +| Field | Value | +| --- | --- | +| Object name | `FDO_OPERATION_EVIDENCE_PROFILE_V0_1` | +| Object type | `Profile` | +| Short description | `A minimal profile for recording and validating one policy-constrained agent operation with explicit policy, provenance, evidence, and validation links.` | +| Canonical package name | `Execution Evidence and Operation Accountability Profile v0.1` | +| Canonical profile id | `execution-evidence-operation-accountability-profile@0.1` | +| Repository URL | `https://github.com/joy7758/agent-evidence` | +| Spec URL | `https://github.com/joy7758/agent-evidence/blob/main/spec/execution-evidence-operation-accountability-profile-v0.1.md` | +| Schema URL | `https://github.com/joy7758/agent-evidence/blob/main/schema/execution-evidence-operation-accountability-profile-v0.1.schema.json` | +| Example URL | `https://github.com/joy7758/agent-evidence/blob/main/examples/minimal-valid-evidence.json` | +| Validator URL | `https://github.com/joy7758/agent-evidence/blob/main/agent_evidence/oap.py` | +| Demo URL | `https://github.com/joy7758/agent-evidence/blob/main/demo/README.md` | + +## Submission Notes + +- Keep the registry-facing name as `FDO_OPERATION_EVIDENCE_PROFILE_V0_1`. +- Keep the implementation-facing name unchanged inside the repository. +- Present it as complementary to the already registered + `ARO_AUDIT_PROFILE_V1`, not as a rename or replacement. +- Do not claim general governance coverage, registry infrastructure, or full + cryptographic trust infrastructure. +- Position the object as a minimal verifiable profile for one operation + accountability statement. + +## Manual Submit Checklist + +1. Confirm the public GitHub repository URL. +2. Paste the URLs above into the registry form. +3. Submit the object for review. +4. Capture the resulting registry link. +5. Update outreach material with the final registry link. diff --git a/submission/ldt4ssc-ds4sscc-module-pitch.md b/submission/ldt4ssc-ds4sscc-module-pitch.md new file mode 100644 index 0000000..abe632a --- /dev/null +++ b/submission/ldt4ssc-ds4sscc-module-pitch.md @@ -0,0 +1,74 @@ +# LDT4SSC / DS4SSCC Module Pitch Skeleton + +## Working Title + +`FDO_OPERATION_EVIDENCE_PROFILE_V0_1 as a minimal AI evidence component` + +## One-Paragraph Pitch + +This module proposes a minimal, verifiable profile for one policy-constrained +agent operation. It does not try to replace broader project architectures. It +adds a small accountability layer that records who executed an operation, which +object was acted on, which policy constrained the action, which evidence +artifacts were produced, and how an independent validator can check the result. + +## Problem + +LDT4SSC and DS4SSCC style environments may already manage objects, policies, or +workflow context, but they still need a compact artifact that can answer one +bounded question after the fact: what exactly happened in this operation, under +which rule set, and with which verifiable evidence? + +## Proposed Module + +- profile: one operation accountability statement +- schema: machine-checkable JSON contract +- validator: structure, required fields, reference closure, and policy / + provenance / evidence consistency checks +- demo: one end-to-end path from object load to validation report + +## What The Module Adds + +- a narrow AI evidence boundary +- a stable handoff artifact for third-party review +- machine-readable validation output with explicit error codes +- a path to attach optional external trust bindings without making them + mandatory for local conformance + +## Relationship To Existing Audit-facing Objects + +- `FDO_OPERATION_EVIDENCE_PROFILE_V0_1` is the operation-evidence profile in + this package. +- `ARO_AUDIT_PROFILE_V1` remains the audit-facing sibling object already used + for audit-ready declarations and audit pointers. +- The proposed module should be framed as adding an operation-level evidence + layer alongside `ARO_AUDIT_PROFILE_V1`, not replacing it. + +## What The Module Does Not Add + +- a general governance platform +- a new registry infrastructure +- a full multi-agent orchestration layer +- a complete cryptographic trust fabric +- a full cross-flavor FDO mapping + +## Integration Boundary + +The recommended insertion point is after one concrete operation has completed +and before evidence leaves the local runtime boundary. The host project keeps +its own orchestration, object lifecycle, and policy systems. This module only +adds the minimal accountability statement and validator path. + +## Review Package + +- repository URL: `https://github.com/joy7758/agent-evidence` +- spec: `spec/execution-evidence-operation-accountability-profile-v0.1.md` +- schema: `schema/execution-evidence-operation-accountability-profile-v0.1.schema.json` +- example: `examples/minimal-valid-evidence.json` +- validator: `agent_evidence/oap.py` +- demo: `demo/run_operation_accountability_demo.py` + +## Proposed Next Step + +Request a short technical review focused on fit and insertion boundary, not on +full standardization or productization. diff --git a/submission/manuscript-baselines.md b/submission/manuscript-baselines.md new file mode 100644 index 0000000..c5dd71a --- /dev/null +++ b/submission/manuscript-baselines.md @@ -0,0 +1,28 @@ +# Manuscript baselines + +## B1-minimal-frozen + +- naming: `Execution Evidence and Operation Accountability Profile v0.1` +- counts: `1 valid / 3 invalid / 1 demo` +- claim: minimal verification boundary +- do not mention: high-risk scenario entry, `3 valid / 7 invalid` current-main counts, AEP live-chain language + +## B2-extended-middle + +- naming: `A Minimal Verifiable Profile for Operation Accountability in FDO-Based Agent Systems` +- counts: `2 valid / 5 invalid` +- status: parked unless fully rewritten +- rule: do not partially mix with B1 or B4 + +## B3-aep-live-chain + +- naming: `Agent Evidence Profile (AEP)` +- claim: runtime evidence bundle, offline verification, tamper failure, runtime provenance +- do not mention: operation-accountability example counts + +## B4-high-risk-current-main + +- naming: operation-accountability current-main high-risk scenario entry +- counts: `3 valid / 7 invalid` +- claim: reviewer-facing high-risk scenario entry +- best fit: future AI Act / high-risk / compliance-interface manuscripts diff --git a/submission/package-manifest.md b/submission/package-manifest.md index fa77c14..6ebbee8 100644 --- a/submission/package-manifest.md +++ b/submission/package-manifest.md @@ -25,3 +25,7 @@ - final handoff -> `submission/final-handoff.md` - commit / PR note -> `submission/commit-and-pr-note.md` - package manifest -> `submission/package-manifest.md` +- FDO registration pack -> `docs/fdo-mapping/fdo-operation-evidence-profile-registration-pack.md` +- FDO Testbed registration draft -> `submission/fdo-testbed-registration-draft.md` +- Peter / Sven outreach draft -> `submission/peter-sven-outreach-draft.md` +- LDT4SSC / DS4SSCC module pitch -> `submission/ldt4ssc-ds4sscc-module-pitch.md` diff --git a/submission/peter-sven-outreach-draft.md b/submission/peter-sven-outreach-draft.md new file mode 100644 index 0000000..9d03cbb --- /dev/null +++ b/submission/peter-sven-outreach-draft.md @@ -0,0 +1,52 @@ +# Peter / Sven Outreach Draft + +## Subject + +`Draft registration pack for FDO_OPERATION_EVIDENCE_PROFILE_V0_1` + +## Email Body + +Hello Peter and Sven, + +I have prepared a minimal registration pack for +`FDO_OPERATION_EVIDENCE_PROFILE_V0_1`, using the current repository package +`Execution Evidence and Operation Accountability Profile v0.1` as the +canonical implementation surface. + +The package currently includes: + +- a profile spec +- a JSON schema +- one valid example and single-failure invalid examples +- a profile-aware validator with machine-readable JSON output and explicit error codes +- one runnable end-to-end demo + +Repository entry: + +`https://github.com/joy7758/agent-evidence` + +Suggested registry-facing description: + +`A minimal profile for recording and validating one policy-constrained agent operation with explicit policy, provenance, evidence, and validation links.` + +I would appreciate your feedback on two points: + +1. whether this object is scoped correctly for an FDO Testbed `Profile` entry +2. whether it could be positioned as a technical module for LDT4SSC or DS4SSCC, + specifically as an AI evidence / operation-accountability component + +If useful, I can also share the spec, schema, example, validator entry, and a +short demo path as a compact review package. + +Best regards, + +`[name]` + +## Attach / Link Checklist + +- repository URL +- registry URL once available +- spec link +- schema link +- valid example link +- demo link diff --git a/tests/test_langchain_adapter.py b/tests/test_langchain_adapter.py new file mode 100644 index 0000000..fb586bb --- /dev/null +++ b/tests/test_langchain_adapter.py @@ -0,0 +1,95 @@ +from __future__ import annotations + +import json +from pathlib import Path +from uuid import uuid4 + +from langchain_core.runnables import RunnableLambda +from langchain_core.tools import tool + +from agent_evidence.integrations.langchain import ( + EvidenceCallbackHandler, + LangChainAdapter, +) + + +@tool +def multiply(x: int, y: int) -> int: + """Multiply two integers.""" + + return x * y + + +def test_langchain_adapter_finalize_writes_normalized_artifacts(tmp_path: Path) -> None: + adapter = LangChainAdapter.for_output_dir( + tmp_path / "langchain-run", + digest_only=True, + omit_request=False, + omit_response=False, + base_tags=["baseline"], + ) + handler = adapter.callback_handler() + + uppercase = RunnableLambda(lambda text: text.upper()).with_config({"run_name": "uppercase"}) + run_config = { + "callbacks": [handler], + "metadata": {"scenario": "langchain-adapter-test"}, + "tags": ["baseline"], + } + + uppercase.invoke("hello world", config=run_config) + multiply.invoke({"x": 6, "y": 7}, config=run_config) + + model_run_id = uuid4() + handler.on_chat_model_start( + serialized={"name": "mock-model"}, + messages=[[{"type": "human", "content": "hello world"}]], + run_id=model_run_id, + name="mock-model", + metadata={"scenario": "langchain-adapter-test"}, + ) + handler.on_llm_end( + {"text": "HELLO WORLD"}, + run_id=model_run_id, + name="mock-model", + metadata={"scenario": "langchain-adapter-test"}, + ) + + artifacts = adapter.finalize() + + assert artifacts.bundle_path.exists() + assert artifacts.receipt_path.exists() + assert artifacts.summary_path.exists() + assert artifacts.receipt["ok"] is True + assert artifacts.summary["ok"] is True + assert artifacts.summary["receipt_path"] == str(artifacts.receipt_path) + assert artifacts.summary["bundle_path"] == str(artifacts.bundle_path) + assert artifacts.summary["verify_result"] == artifacts.receipt + assert artifacts.supporting_files["manifest"].exists() + assert artifacts.supporting_files["public_key"].exists() + assert artifacts.supporting_files["private_key"].exists() + assert artifacts.supporting_files["runtime_events"].exists() + + written_receipt = json.loads(artifacts.receipt_path.read_text(encoding="utf-8")) + written_summary = json.loads(artifacts.summary_path.read_text(encoding="utf-8")) + assert written_receipt == artifacts.receipt + assert written_summary == artifacts.summary + + +def test_langchain_adapter_reuses_handler_and_finalize_result(tmp_path: Path) -> None: + adapter = LangChainAdapter.for_output_dir(tmp_path / "langchain-run") + + first_handler = adapter.callback_handler() + second_handler = adapter.callback_handler() + + assert isinstance(first_handler, EvidenceCallbackHandler) + assert first_handler is second_handler + + chain = RunnableLambda(lambda text: text.upper()).with_config({"run_name": "uppercase"}) + chain.invoke("hello", config={"callbacks": [first_handler]}) + + first_artifacts = adapter.finalize() + second_artifacts = adapter.finalize() + + assert first_artifacts is second_artifacts + assert first_artifacts.receipt["ok"] is True diff --git a/tests/test_openai_compatible_adapter.py b/tests/test_openai_compatible_adapter.py new file mode 100644 index 0000000..bee40aa --- /dev/null +++ b/tests/test_openai_compatible_adapter.py @@ -0,0 +1,292 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from agent_evidence.integrations.openai_compatible import OpenAICompatibleAdapter + + +class FakeChatCompletionResponse: + def __init__(self, *, id: str, model: str, content: str) -> None: + self.id = id + self.model = model + self.content = content + + def model_dump(self, mode: str = "python") -> dict[str, Any]: + return { + "id": self.id, + "model": self.model, + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": self.content}, + "finish_reason": "stop", + } + ], + "usage": {"prompt_tokens": 4, "completion_tokens": 3}, + } + + +class FakeChatCompletionsClient: + def __init__(self, response: Any): + self.calls: list[dict[str, Any]] = [] + self._response = response + + def create(self, **kwargs: Any) -> Any: + self.calls.append(kwargs) + return self._response + + +class FakeResponsesClient: + def __init__(self, response: Any): + self.calls: list[dict[str, Any]] = [] + self._response = response + + def create(self, **kwargs: Any) -> Any: + self.calls.append(kwargs) + return self._response + + +@dataclass(frozen=True) +class ProviderCase: + name: str + provider_label: str + model: str + base_url: str | None + operation: str + request: dict[str, Any] + response: Any + + +def _bundle_contract_shape(bundle_payload: dict[str, Any]) -> dict[str, Any]: + return { + "top_level_keys": sorted(bundle_payload.keys()), + "manifest_keys": sorted(bundle_payload["manifest"].keys()), + "signature_keys": sorted(bundle_payload["signatures"][0].keys()), + "record_keys": [sorted(record.keys()) for record in bundle_payload["records"]], + "event_keys": [sorted(record["event"].keys()) for record in bundle_payload["records"]], + "context_keys": [ + sorted(record["event"]["context"].keys()) for record in bundle_payload["records"] + ], + "hash_keys": [sorted(record["hashes"].keys()) for record in bundle_payload["records"]], + "event_types": [record["event"]["event_type"] for record in bundle_payload["records"]], + } + + +def _run_provider_case( + tmp_path: Path, + case: ProviderCase, + *, + api_key: str, +) -> dict[str, Any]: + adapter = OpenAICompatibleAdapter.for_output_dir( + tmp_path / case.name, + provider_label=case.provider_label, + model=case.model, + api_key=api_key, + base_url=case.base_url, + digest_only=False, + omit_request=False, + omit_response=False, + temperature=0.2, + max_output_tokens=64, + ) + + if case.operation == "chat.completions.create": + client = FakeChatCompletionsClient(case.response) + + def invoke() -> Any: + return client.create(model=case.model, messages=case.request["messages"]) + + else: + client = FakeResponsesClient(case.response) + + def invoke() -> Any: + return client.create(model=case.model, input=case.request["input"]) + + response = adapter.record_call( + operation=case.operation, + request={**case.request, "api_key": api_key}, + invoke=invoke, + metadata={"case": case.name}, + tags=["compatibility"], + ) + artifacts = adapter.finalize() + bundle_payload = json.loads(artifacts.bundle_path.read_text(encoding="utf-8")) + receipt_payload = json.loads(artifacts.receipt_path.read_text(encoding="utf-8")) + summary_payload = json.loads(artifacts.summary_path.read_text(encoding="utf-8")) + runtime_text = artifacts.supporting_files["runtime_events"].read_text(encoding="utf-8") + + return { + "adapter": adapter, + "artifacts": artifacts, + "bundle_payload": bundle_payload, + "receipt_payload": receipt_payload, + "summary_payload": summary_payload, + "runtime_text": runtime_text, + "response": response, + "client_calls": client.calls, + } + + +def test_openai_compatible_adapter_records_provider_call_and_redacts_api_key( + tmp_path: Path, +) -> None: + api_key = "sk-test-123" + adapter = OpenAICompatibleAdapter.for_output_dir( + tmp_path / "openai-compatible-run", + provider_label="openai", + model="gpt-4.1-mini", + api_key=api_key, + base_url="https://api.openai.com/v1", + digest_only=False, + omit_request=False, + omit_response=False, + ) + + response = adapter.record_call( + operation="responses.create", + request={ + "input": "hello world", + "api_key": api_key, + "prompt": "sensitive prompt text", + }, + invoke=lambda: { + "id": "resp_123", + "output_text": "HELLO WORLD", + "usage": {"input_tokens": 2, "output_tokens": 2}, + }, + metadata={"request_id": "req-123"}, + tags=["smoke"], + ) + + assert response["id"] == "resp_123" + + records = adapter.store.list() + assert [record.event.event_type for record in records] == [ + "provider.call.start", + "provider.call.end", + ] + + start_record, end_record = records + assert start_record.event.context.source == "openai_compatible" + assert start_record.event.context.component == "provider_call" + assert start_record.event.context.source_event_type == "on_provider_call_start" + assert start_record.event.context.name == "responses.create" + assert "openai-compatible" in start_record.event.context.tags + assert "openai" in start_record.event.context.tags + assert start_record.event.metadata["provider_label"] == "openai" + assert start_record.event.metadata["model"] == "gpt-4.1-mini" + assert start_record.event.metadata["base_url"] == "https://api.openai.com/v1" + assert start_record.event.inputs["mode"] == "inline" + assert start_record.event.inputs["content"]["api_key"] == "[REDACTED]" + assert start_record.event.inputs["content"]["prompt"] == "[REDACTED]" + assert end_record.event.outputs["content"]["output_text"] == "HELLO WORLD" + + +def test_openai_compatible_adapter_preserves_artifact_contract_across_provider_switches( + tmp_path: Path, +) -> None: + api_key = "sk-provider-switch-should-not-appear" + default_case = ProviderCase( + name="default-openai", + provider_label="openai", + model="gpt-4.1-mini", + base_url=None, + operation="chat.completions.create", + request={"messages": [{"role": "user", "content": "hello from default config"}]}, + response=FakeChatCompletionResponse( + id="chatcmpl_default", + model="gpt-4.1-mini", + content="hello from default config", + ), + ) + alternate_case = ProviderCase( + name="alternate-base-url", + provider_label="lm-studio", + model="qwen2.5-7b-instruct", + base_url="http://localhost:1234/v1", + operation="responses.create", + request={"input": "hello from alternate config"}, + response={ + "id": "resp_alternate", + "output": [{"type": "output_text", "text": "hello from alternate config"}], + "usage": {"input_tokens": 5, "output_tokens": 4}, + }, + ) + + default_result = _run_provider_case(tmp_path, default_case, api_key=api_key) + alternate_result = _run_provider_case(tmp_path, alternate_case, api_key=api_key) + + for result, case in ( + (default_result, default_case), + (alternate_result, alternate_case), + ): + artifacts = result["artifacts"] + bundle_payload = result["bundle_payload"] + receipt_payload = result["receipt_payload"] + summary_payload = result["summary_payload"] + runtime_text = result["runtime_text"] + + assert len(result["client_calls"]) == 1 + assert artifacts.bundle_path.exists() + assert artifacts.receipt_path.exists() + assert artifacts.summary_path.exists() + assert artifacts.supporting_files["manifest"].exists() + assert artifacts.supporting_files["public_key"].exists() + assert artifacts.supporting_files["private_key"].exists() + assert artifacts.supporting_files["runtime_events"].exists() + + assert receipt_payload["ok"] is True + assert summary_payload["ok"] is True + assert summary_payload["provider_label"] == case.provider_label + assert summary_payload["model"] == case.model + assert summary_payload["base_url"] == case.base_url + assert summary_payload["call_count"] == 1 + assert summary_payload["receipt_path"] == str(artifacts.receipt_path) + assert summary_payload["verify_result"] == receipt_payload + + start_record, end_record = bundle_payload["records"] + assert start_record["event"]["event_type"] == "provider.call.start" + assert end_record["event"]["event_type"] == "provider.call.end" + assert start_record["event"]["metadata"]["provider_label"] == case.provider_label + assert start_record["event"]["metadata"]["model"] == case.model + assert ( + start_record["event"]["context"]["attributes"]["provider_label"] == case.provider_label + ) + assert start_record["event"]["context"]["attributes"]["model"] == case.model + + if case.base_url is None: + assert "base_url" not in start_record["event"]["metadata"] + assert "base_url" not in start_record["event"]["context"]["attributes"] + else: + assert start_record["event"]["metadata"]["base_url"] == case.base_url + assert start_record["event"]["context"]["attributes"]["base_url"] == case.base_url + + bundle_text = artifacts.bundle_path.read_text(encoding="utf-8") + summary_text = artifacts.summary_path.read_text(encoding="utf-8") + assert api_key not in bundle_text + assert api_key not in summary_text + assert api_key not in runtime_text + + default_contract = _bundle_contract_shape(default_result["bundle_payload"]) + alternate_contract = _bundle_contract_shape(alternate_result["bundle_payload"]) + + assert default_contract == alternate_contract + assert set(default_result["receipt_payload"]) == set(alternate_result["receipt_payload"]) + assert set(default_result["summary_payload"]) == set(alternate_result["summary_payload"]) + assert set(default_result["artifacts"].supporting_files) == set( + alternate_result["artifacts"].supporting_files + ) + + default_end_record = default_result["bundle_payload"]["records"][1] + alternate_end_record = alternate_result["bundle_payload"]["records"][1] + assert ( + default_end_record["event"]["outputs"]["content"]["choices"][0]["message"]["content"] + == "hello from default config" + ) + assert alternate_end_record["event"]["outputs"]["content"]["output"][0]["text"] == ( + "hello from alternate config" + ) diff --git a/tests/test_operation_accountability_profile.py b/tests/test_operation_accountability_profile.py index 1c83d0d..da12ebb 100644 --- a/tests/test_operation_accountability_profile.py +++ b/tests/test_operation_accountability_profile.py @@ -15,6 +15,8 @@ [ "minimal-valid-evidence.json", "valid-retention-review-evidence.json", + "valid-high-risk-payment-review-evidence.json", + "valid-trust-binding-evidence.json", ], ) def test_valid_operation_accountability_profile_passes(filename: str) -> None: @@ -36,6 +38,17 @@ def test_valid_operation_accountability_profile_passes(filename: str) -> None: "unresolved_validation_provenance_ref", 1, ), + ("invalid-high-risk-unclosed-reference.json", "unresolved_output_ref", 1), + ( + "invalid-high-risk-policy-link-broken.json", + "unresolved_evidence_policy_ref", + 1, + ), + ( + "invalid-trust-binding-digest-mismatch.json", + "trust_binding_target_digest_mismatch", + 1, + ), ], ) def test_invalid_operation_accountability_profiles_fail( @@ -49,12 +62,19 @@ def test_invalid_operation_accountability_profiles_fail( assert expected_code in issue_codes -def test_validate_profile_cli_command() -> None: +@pytest.mark.parametrize( + "filename", + [ + "minimal-valid-evidence.json", + "valid-high-risk-payment-review-evidence.json", + ], +) +def test_validate_profile_cli_command(filename: str) -> None: runner = CliRunner() result = runner.invoke( main, - ["validate-profile", str(EXAMPLES / "minimal-valid-evidence.json")], + ["validate-profile", str(EXAMPLES / filename)], ) assert result.exit_code == 0, result.output diff --git a/tests/test_quickstart_smoke.py b/tests/test_quickstart_smoke.py new file mode 100644 index 0000000..b2b0f1e --- /dev/null +++ b/tests/test_quickstart_smoke.py @@ -0,0 +1,81 @@ +import json +import shutil +import subprocess +import sys +from pathlib import Path + + +def _repo_root() -> Path: + return Path(__file__).resolve().parents[1] + + +def _agent_evidence_cli() -> Path: + candidates = [ + Path(sys.executable).with_name("agent-evidence"), + Path(sys.executable).with_name("agent-evidence.exe"), + Path(sys.executable).resolve().with_name("agent-evidence"), + Path(sys.executable).resolve().with_name("agent-evidence.exe"), + ] + for candidate in candidates: + if candidate.exists(): + return candidate + + resolved = shutil.which("agent-evidence") + if resolved is None: + raise AssertionError( + "agent-evidence CLI was not found in the active test environment. " + "This smoke test assumes the quickstart install step is already satisfied." + ) + return Path(resolved) + + +def test_quickstart_langchain_minimal_path_smoke(tmp_path: Path) -> None: + repo_root = _repo_root() + output_dir = tmp_path / "quickstart-output" + example_path = repo_root / "examples" / "langchain_minimal_evidence.py" + cli_path = _agent_evidence_cli() + + example_result = subprocess.run( + [sys.executable, str(example_path), "--output-dir", str(output_dir)], + cwd=repo_root, + capture_output=True, + text=True, + check=False, + ) + assert example_result.returncode == 0, example_result.stderr + + example_payload = json.loads(example_result.stdout) + assert example_payload["ok"] is True + + bundle_path = output_dir / "langchain-evidence.bundle.json" + public_key_path = output_dir / "manifest-public.pem" + receipt_path = output_dir / "receipt.json" + summary_path = output_dir / "summary.json" + + assert bundle_path.exists() + assert public_key_path.exists() + + with receipt_path.open("w", encoding="utf-8") as receipt_file: + verify_result = subprocess.run( + [ + str(cli_path), + "verify-export", + "--bundle", + str(bundle_path), + "--public-key", + str(public_key_path), + ], + cwd=repo_root, + stdout=receipt_file, + stderr=subprocess.PIPE, + text=True, + check=False, + ) + assert verify_result.returncode == 0, verify_result.stderr + + receipt_payload = json.loads(receipt_path.read_text(encoding="utf-8")) + assert receipt_payload["ok"] is True + + assert summary_path.exists() + summary_payload = json.loads(summary_path.read_text(encoding="utf-8")) + assert summary_payload["ok"] is True diff --git a/tests/test_review_pack_assembler.py b/tests/test_review_pack_assembler.py new file mode 100644 index 0000000..42f03b1 --- /dev/null +++ b/tests/test_review_pack_assembler.py @@ -0,0 +1,229 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from agent_evidence.integrations import LangChainAdapter +from agent_evidence.integrations.openai_compatible import OpenAICompatibleAdapter +from agent_evidence.review_pack import ReviewPackAssembler + +try: + from langchain_core.runnables import RunnableLambda + from langchain_core.tools import tool +except ModuleNotFoundError as exc: # pragma: no cover - runtime dependency guard + raise ModuleNotFoundError( + "langchain-core is required for test_review_pack_assembler. Install agent-evidence " + "with the [langchain] or [dev] extra." + ) from exc + + +@tool +def multiply(x: int, y: int) -> int: + """Multiply two integers.""" + + return x * y + + +def _build_langchain_artifacts(tmp_path: Path): + adapter = LangChainAdapter.for_output_dir( + tmp_path / "langchain-run", + digest_only=True, + omit_request=False, + omit_response=False, + base_tags=["review-pack"], + ) + handler = adapter.callback_handler() + uppercase = RunnableLambda(lambda text: text.upper()).with_config({"run_name": "uppercase"}) + + uppercase.invoke( + "hello world", + config={ + "callbacks": [handler], + "metadata": {"scenario": "review-pack"}, + "tags": ["review-pack"], + }, + ) + multiply.invoke( + {"x": 6, "y": 7}, + config={ + "callbacks": [handler], + "metadata": {"scenario": "review-pack"}, + "tags": ["review-pack"], + }, + ) + + return adapter.finalize() + + +def _build_openai_compatible_artifacts(tmp_path: Path): + adapter = OpenAICompatibleAdapter.for_output_dir( + tmp_path / "openai-compatible-run", + provider_label="openai", + model="gpt-4.1-mini", + api_key="sk-review-pack-should-not-appear", + base_url="https://api.openai.com/v1", + digest_only=True, + omit_request=False, + omit_response=False, + ) + adapter.record_call( + operation="responses.create", + request={"input": "hello world", "api_key": "sk-review-pack-should-not-appear"}, + invoke=lambda: { + "id": "resp_review_pack", + "output": [{"type": "output_text", "text": "HELLO WORLD"}], + }, + ) + return adapter.finalize() + + +def test_review_pack_assembler_packages_primary_outputs_and_excludes_private_key_by_default( + tmp_path: Path, +) -> None: + artifacts = _build_langchain_artifacts(tmp_path) + assembler = ReviewPackAssembler.for_output_dir(tmp_path / "review-pack") + + pack = assembler.assemble( + bundle_path=artifacts.bundle_path, + receipt_path=artifacts.receipt_path, + summary_path=artifacts.summary_path, + supporting_files={ + "manifest": artifacts.supporting_files["manifest"], + "public_key": artifacts.supporting_files["public_key"], + "runtime_events": artifacts.supporting_files["runtime_events"], + "private_key": artifacts.supporting_files["private_key"], + }, + ) + + assert pack.primary_files["bundle"].exists() + assert pack.primary_files["receipt"].exists() + assert pack.primary_files["summary"].exists() + assert pack.report_path.exists() + assert pack.report_pdf_path.exists() + assert pack.report_pdf_path.read_bytes().startswith(b"%PDF") + assert pack.index_path.exists() + assert "private_key" not in pack.supporting_files + assert pack.supporting_files["manifest"].exists() + assert pack.supporting_files["public_key"].exists() + assert pack.supporting_files["runtime_events"].exists() + assert not (pack.pack_dir / "supporting" / "manifest-private.pem").exists() + + index_payload = json.loads(pack.index_path.read_text(encoding="utf-8")) + receipt_payload = json.loads(pack.primary_files["receipt"].read_text(encoding="utf-8")) + assert index_payload["primary_files"] == { + "bundle": "primary/bundle.json", + "receipt": "primary/receipt.json", + "summary": "primary/summary.json", + } + assert index_payload["supporting_files"] == { + "manifest": "supporting/manifest.json", + "public_key": "supporting/manifest-public.pem", + "runtime_events": "supporting/runtime-events.jsonl", + } + assert index_payload["excluded_supporting_files"] == ["private_key"] + assert index_payload["receipt_facts"]["ok"] == receipt_payload["ok"] + assert index_payload["receipt_facts"]["issues"] == receipt_payload["issues"] + assert index_payload["receipt_facts"]["record_count"] == receipt_payload["record_count"] + + report_text = pack.report_path.read_text(encoding="utf-8") + assert "# 审阅报告" in report_text + assert "## 总体状态" in report_text + assert "## 交付物清单" in report_text + assert "## 校验结果" in report_text + assert "## 问题摘要" in report_text + assert "## 证据引用" in report_text + assert "## 审阅备注" in report_text + assert "primary/receipt.json" in report_text + + +def test_review_pack_assembler_supporting_files_are_optional(tmp_path: Path) -> None: + artifacts = _build_openai_compatible_artifacts(tmp_path) + assembler = ReviewPackAssembler.for_output_dir(tmp_path / "review-pack") + + pack = assembler.assemble( + bundle_path=artifacts.bundle_path, + receipt_path=artifacts.receipt_path, + summary_path=artifacts.summary_path, + supporting_files={"manifest": tmp_path / "missing.manifest.json"}, + ) + + assert pack.primary_files["bundle"].exists() + assert pack.primary_files["receipt"].exists() + assert pack.primary_files["summary"].exists() + assert pack.report_pdf_path.exists() + assert pack.report_pdf_path.read_bytes().startswith(b"%PDF") + assert pack.supporting_files == {} + assert not (pack.pack_dir / "supporting").exists() + + index_payload = json.loads(pack.index_path.read_text(encoding="utf-8")) + assert index_payload["supporting_files"] == {} + assert index_payload["missing_supporting_files"] == ["manifest"] + + report_text = pack.report_path.read_text(encoding="utf-8") + assert "缺少可选附属文件" in report_text + + +def test_review_pack_assembler_preserves_pack_shape_across_adapter_lines(tmp_path: Path) -> None: + langchain_artifacts = _build_langchain_artifacts(tmp_path / "langchain") + openai_artifacts = _build_openai_compatible_artifacts(tmp_path / "openai") + + langchain_pack = ReviewPackAssembler.for_output_dir(tmp_path / "langchain-pack").assemble( + bundle_path=langchain_artifacts.bundle_path, + receipt_path=langchain_artifacts.receipt_path, + summary_path=langchain_artifacts.summary_path, + supporting_files={ + "manifest": langchain_artifacts.supporting_files["manifest"], + "public_key": langchain_artifacts.supporting_files["public_key"], + "runtime_events": langchain_artifacts.supporting_files["runtime_events"], + }, + ) + openai_pack = ReviewPackAssembler.for_output_dir(tmp_path / "openai-pack").assemble( + bundle_path=openai_artifacts.bundle_path, + receipt_path=openai_artifacts.receipt_path, + summary_path=openai_artifacts.summary_path, + supporting_files={ + "manifest": openai_artifacts.supporting_files["manifest"], + "public_key": openai_artifacts.supporting_files["public_key"], + "runtime_events": openai_artifacts.supporting_files["runtime_events"], + }, + ) + + langchain_index = json.loads(langchain_pack.index_path.read_text(encoding="utf-8")) + openai_index = json.loads(openai_pack.index_path.read_text(encoding="utf-8")) + + assert ( + set(langchain_pack.primary_files) + == set(openai_pack.primary_files) + == { + "bundle", + "receipt", + "summary", + } + ) + assert ( + set(langchain_pack.supporting_files) + == set(openai_pack.supporting_files) + == { + "manifest", + "public_key", + "runtime_events", + } + ) + assert set(langchain_index["receipt_facts"]) == set(openai_index["receipt_facts"]) + assert set(langchain_index["summary_orientation"]) == set(openai_index["summary_orientation"]) + assert set(langchain_index["primary_files"]) == set(openai_index["primary_files"]) + + langchain_report = langchain_pack.report_path.read_text(encoding="utf-8") + openai_report = openai_pack.report_path.read_text(encoding="utf-8") + assert langchain_pack.report_pdf_path.exists() + assert openai_pack.report_pdf_path.exists() + for heading in ( + "## 总体状态", + "## 交付物清单", + "## 校验结果", + "## 问题摘要", + "## 证据引用", + "## 审阅备注", + ): + assert heading in langchain_report + assert heading in openai_report diff --git a/tests/test_review_pack_example_smoke.py b/tests/test_review_pack_example_smoke.py new file mode 100644 index 0000000..daf2335 --- /dev/null +++ b/tests/test_review_pack_example_smoke.py @@ -0,0 +1,131 @@ +import json +import subprocess +import sys +from pathlib import Path + + +def _repo_root() -> Path: + return Path(__file__).resolve().parents[1] + + +def _run_json_command(args: list[str], *, cwd: Path) -> dict[str, object]: + result = subprocess.run( + args, + cwd=cwd, + capture_output=True, + text=True, + check=False, + ) + assert result.returncode == 0, result.stderr + return json.loads(result.stdout) + + +def test_review_pack_example_smoke(tmp_path: Path) -> None: + repo_root = _repo_root() + run_dir = tmp_path / "langchain-run" + pack_dir = tmp_path / "review-pack" + primary_only_pack_dir = tmp_path / "review-pack-primary-only" + + langchain_example = repo_root / "examples" / "langchain_minimal_evidence.py" + review_pack_example = repo_root / "examples" / "review_pack" / "build_review_pack.py" + + example_payload = _run_json_command( + [sys.executable, str(langchain_example), "--output-dir", str(run_dir)], + cwd=repo_root, + ) + assert example_payload["ok"] is True + + bundle_path = run_dir / "langchain-evidence.bundle.json" + receipt_path = run_dir / "receipt.json" + summary_path = run_dir / "summary.json" + manifest_path = run_dir / "langchain-evidence.manifest.json" + public_key_path = run_dir / "manifest-public.pem" + runtime_events_path = run_dir / "runtime-events.jsonl" + private_key_path = run_dir / "manifest-private.pem" + + for path in ( + bundle_path, + receipt_path, + summary_path, + manifest_path, + public_key_path, + runtime_events_path, + private_key_path, + ): + assert path.exists() + + pack_payload = _run_json_command( + [ + sys.executable, + str(review_pack_example), + "--bundle-path", + str(bundle_path), + "--receipt-path", + str(receipt_path), + "--summary-path", + str(summary_path), + "--manifest-path", + str(manifest_path), + "--public-key-path", + str(public_key_path), + "--runtime-events-path", + str(runtime_events_path), + "--private-key-path", + str(private_key_path), + "--output-dir", + str(pack_dir), + ], + cwd=repo_root, + ) + + pack_root = Path(str(pack_payload["pack_dir"])) + index_path = Path(str(pack_payload["index_path"])) + report_path = Path(str(pack_payload["report_path"])) + assert pack_root.exists() + assert index_path.exists() + assert report_path.exists() + assert (pack_root / "primary" / "bundle.json").exists() + assert (pack_root / "primary" / "receipt.json").exists() + assert (pack_root / "primary" / "summary.json").exists() + assert (pack_root / "review" / "report.md").exists() + assert (pack_root / "review" / "report.pdf").exists() + assert (pack_root / "review" / "report.pdf").read_bytes().startswith(b"%PDF") + assert (pack_root / "index.json").exists() + assert not (pack_root / "supporting" / "manifest-private.pem").exists() + + pack_index = json.loads(index_path.read_text(encoding="utf-8")) + assert pack_index["excluded_supporting_files"] == ["private_key"] + assert pack_index["supporting_files"] == { + "manifest": "supporting/manifest.json", + "public_key": "supporting/manifest-public.pem", + "runtime_events": "supporting/runtime-events.jsonl", + } + assert "private_key" not in pack_payload["supporting_files"] + + primary_only_payload = _run_json_command( + [ + sys.executable, + str(review_pack_example), + "--bundle-path", + str(bundle_path), + "--receipt-path", + str(receipt_path), + "--summary-path", + str(summary_path), + "--output-dir", + str(primary_only_pack_dir), + ], + cwd=repo_root, + ) + + primary_only_root = Path(str(primary_only_payload["pack_dir"])) + assert primary_only_root.exists() + assert (primary_only_root / "primary" / "bundle.json").exists() + assert (primary_only_root / "primary" / "receipt.json").exists() + assert (primary_only_root / "primary" / "summary.json").exists() + assert (primary_only_root / "review" / "report.md").exists() + assert (primary_only_root / "review" / "report.pdf").exists() + assert (primary_only_root / "review" / "report.pdf").read_bytes().startswith(b"%PDF") + assert (primary_only_root / "index.json").exists() + assert not (primary_only_root / "supporting").exists() + assert primary_only_payload["supporting_files"] == {} diff --git a/tests/test_review_pack_renderer.py b/tests/test_review_pack_renderer.py new file mode 100644 index 0000000..4f48f32 --- /dev/null +++ b/tests/test_review_pack_renderer.py @@ -0,0 +1,154 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from agent_evidence.review_pack import ReviewPackAssembler, ReviewPackRenderer + + +def _bundle_payload(*, record_count: int = 2) -> dict[str, object]: + records = [] + for index in range(record_count): + records.append( + { + "schema_version": "1.0", + "event": { + "event_id": f"evt-{index + 1}", + "event_type": "provider.call" if index == 0 else "provider.result", + "actor": "openai", + }, + "hashes": { + "event_hash": f"event-{index + 1}", + "previous_event_hash": None if index == 0 else f"event-{index}", + "chain_hash": f"chain-{index + 1}", + }, + } + ) + return { + "manifest": { + "export_format": "json", + "record_count": record_count, + "artifact_digest": "artifact-digest-123", + "latest_chain_hash": f"chain-{record_count}", + }, + "records": records, + "signatures": [{"algorithm": "ed25519", "signed_at": "2026-04-16T00:00:00Z"}], + } + + +def _summary_payload() -> dict[str, object]: + return { + "ok": True, + "provider_label": "openai", + "model": "gpt-4.1-mini", + "base_url": "https://api.openai.com/v1", + "record_count": 2, + "signature_count": 1, + "call_count": 1, + "verify_command": ( + "agent-evidence verify-export --bundle ./bundle.json --public-key ./manifest-public.pem" + ), + } + + +def _write_json(path: Path, payload: dict[str, object]) -> Path: + path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + return path + + +def test_review_pack_renderer_renders_deterministic_success_report() -> None: + renderer = ReviewPackRenderer() + bundle = _bundle_payload() + receipt = { + "ok": True, + "issues": [], + "record_count": 2, + "signature_count": 1, + "required_signature_count": 1, + "signature_verified": True, + "latest_chain_hash": "chain-2", + } + summary = _summary_payload() + + first = renderer.render( + bundle=bundle, + receipt=receipt, + summary=summary, + primary_files={ + "bundle": "primary/bundle.json", + "receipt": "primary/receipt.json", + "summary": "primary/summary.json", + }, + supporting_files={"manifest": "supporting/manifest.json"}, + missing_supporting=[], + ) + second = renderer.render( + bundle=bundle, + receipt=receipt, + summary=summary, + primary_files={ + "bundle": "primary/bundle.json", + "receipt": "primary/receipt.json", + "summary": "primary/summary.json", + }, + supporting_files={"manifest": "supporting/manifest.json"}, + missing_supporting=[], + ) + + assert first.markdown == second.markdown + assert first.taxonomy_labels == ("Verification passed",) + assert "# 审阅报告" in first.markdown + assert "## 总体状态" in first.markdown + assert "## 交付物清单" in first.markdown + assert "## 校验结果" in first.markdown + assert "## 问题摘要" in first.markdown + assert "## 证据引用" in first.markdown + assert "## 审阅备注" in first.markdown + assert "签名校验结果(receipt.signature_verified):`True`" in first.markdown + assert ( + "交付摘要指纹(bundle.manifest.artifact_digest):`artifact-digest-123`" in first.markdown + ) + assert "回执未报告问题。" in first.markdown + assert "结果:`校验通过`" in first.markdown + + +def test_review_pack_renderer_keeps_taxonomy_labels_in_renderer_only(tmp_path: Path) -> None: + bundle_path = _write_json(tmp_path / "bundle.json", _bundle_payload()) + receipt_path = _write_json( + tmp_path / "receipt.json", + { + "ok": False, + "issues": [ + "chain: record 1: chain_hash mismatch", + "signature verification failed", + ], + "record_count": 2, + "signature_verified": False, + "latest_chain_hash": "broken-chain", + }, + ) + summary_path = _write_json(tmp_path / "summary.json", _summary_payload()) + + pack = ReviewPackAssembler.for_output_dir(tmp_path / "review-pack").assemble( + bundle_path=bundle_path, + receipt_path=receipt_path, + summary_path=summary_path, + supporting_files={"manifest": tmp_path / "missing.manifest.json"}, + ) + + report_text = pack.report_path.read_text(encoding="utf-8") + index_payload = json.loads(pack.index_path.read_text(encoding="utf-8")) + + assert "校验未通过" in report_text + assert "链路连续性异常" in report_text + assert "签名校验失败" in report_text + assert "缺少可选附属文件" in report_text + assert "chain: record 1: chain_hash mismatch" in report_text + assert "signature verification failed" in report_text + assert "taxonomy_labels" not in index_payload + assert "renderer_labels" not in index_payload + assert index_payload["receipt_facts"]["ok"] is False + assert index_payload["receipt_facts"]["issues"] == [ + "chain: record 1: chain_hash mismatch", + "signature verification failed", + ]