diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6609ee25..6bd9b6f0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -45,3 +45,20 @@ jobs: test -f dist/index.js test -f dist/cli/index.js node dist/cli/index.js --help + + dependency-audit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 20 + cache: 'npm' + # `--force` is required on Linux because package.json restricts `os` to darwin; + # plain `npm ci` would exit with EBADPLATFORM. Matches the lint/build jobs above. + - run: npm ci --force + - name: Runtime dependency audit (blocking) + run: npm run audit:prod + - name: Dev dependency hygiene audit (warning only) + continue-on-error: true + run: npm run audit:all diff --git a/.github/workflows/headless-smoke.yml b/.github/workflows/headless-smoke.yml index c08fd1ad..b7ee1481 100644 --- a/.github/workflows/headless-smoke.yml +++ b/.github/workflows/headless-smoke.yml @@ -167,7 +167,18 @@ jobs: - name: Open Safari on smoke target URL run: | set -euo pipefail - xcrun simctl openurl "$SIMULATOR_UDID" https://example.com + for attempt in $(seq 1 3); do + if xcrun simctl openurl "$SIMULATOR_UDID" https://example.com; then + break + fi + if [ "$attempt" = "3" ]; then + echo "::error::simctl openurl failed after ${attempt} attempts" + exit 1 + fi + echo "::warning::simctl openurl attempt ${attempt} failed; retrying after simulator settles" + xcrun simctl bootstatus "$SIMULATOR_UDID" -b 2>/dev/null || true + sleep 10 + done for i in $(seq 1 30); do COUNT=$(curl --silent "http://localhost:$OPENSAFARI_PROXY_PORT/json" 2>/dev/null \ | node -e 'const t = JSON.parse(require("fs").readFileSync(0,"utf8")); process.stdout.write(String(Array.isArray(t) ? t.length : 0));' 2>/dev/null || echo "0") @@ -360,7 +371,7 @@ jobs: # Wait for `flutter run` to write the resolved VM service URI. # `--vmservice-out-file` only appears once the engine has actually # bound the port and DDS is ready to accept clients. - for i in $(seq 1 120); do + for i in $(seq 1 180); do if [ -s "$VM_SERVICE_FILE" ]; then echo "VM service file written after ${i}s: $(cat "$VM_SERVICE_FILE")" break @@ -383,14 +394,30 @@ jobs: fi # Trim trailing newline / whitespace; --vmservice-out-file writes # the URL with a trailing newline. - VM_SERVICE_URL=$(cat "$VM_SERVICE_FILE" | tr -d '\n\r ' ) - # Ensure trailing slash so downstream HTTP probes resolve correctly. + VM_SERVICE_RAW_URL=$(cat "$VM_SERVICE_FILE" | tr -d '\n\r ' ) + # `flutter run --vmservice-out-file` writes a websocket URL on recent + # Flutter releases. Runtime code expects OPENSAFARI_VM_SERVICE_URL to + # be the HTTP observatory URL and converts it back to /ws internally. + VM_SERVICE_WS_URL="$VM_SERVICE_RAW_URL" + case "$VM_SERVICE_WS_URL" in + */ws/) ;; + */ws) VM_SERVICE_WS_URL="${VM_SERVICE_WS_URL}/" ;; + */) VM_SERVICE_WS_URL="${VM_SERVICE_WS_URL}ws/" ;; + *) VM_SERVICE_WS_URL="${VM_SERVICE_WS_URL}/ws/" ;; + esac + VM_SERVICE_URL="$VM_SERVICE_WS_URL" case "$VM_SERVICE_URL" in - */) ;; - *) VM_SERVICE_URL="${VM_SERVICE_URL}/" ;; + ws://*) VM_SERVICE_URL="http://${VM_SERVICE_URL#ws://}" ;; + wss://*) VM_SERVICE_URL="https://${VM_SERVICE_URL#wss://}" ;; + esac + case "$VM_SERVICE_URL" in + */ws/) VM_SERVICE_URL="${VM_SERVICE_URL%/ws/}/" ;; + */ws) VM_SERVICE_URL="${VM_SERVICE_URL%/ws}/" ;; esac echo "OPENSAFARI_VM_SERVICE_URL=$VM_SERVICE_URL" >> "$GITHUB_ENV" - echo "Discovered VM Service URL: $VM_SERVICE_URL" + echo "OPENSAFARI_VM_SERVICE_WS_URL=$VM_SERVICE_WS_URL" >> "$GITHUB_ENV" + echo "Discovered VM Service HTTP URL: $VM_SERVICE_URL" + echo "Discovered VM Service websocket URL: $VM_SERVICE_WS_URL" - name: Wait for DDS frontend compiler warmup run: | @@ -399,21 +426,91 @@ jobs: echo "::error::No VM Service URL exported — cannot warm up DDS" exit 1 fi - echo "Probing VM Service at $OPENSAFARI_VM_SERVICE_URL ..." - # The DDS HTTP endpoint becomes responsive a few seconds after the - # URL is written. Wait for *any* 200 response before letting jest - # connect — `compileExpression` (the call probeEvaluateCompile - # uses) requires the frontend compiler to have completed its - # initial compile pass, which the warmup sleep covers. + VM_SERVICE_PROBE_URL="${OPENSAFARI_VM_SERVICE_WS_URL:-$OPENSAFARI_VM_SERVICE_URL}" + echo "Probing VM Service at $VM_SERVICE_PROBE_URL ..." + # Dart VM service JSON-RPC traffic is served over the VM service + # websocket path. Some CI runners reject HTTP JSON-RPC probes with + # 405 even after the service is ready, so validate the downstream + # client path directly. for i in $(seq 1 30); do - if curl --silent --max-time 2 --output /dev/null --write-out '%{http_code}' "${OPENSAFARI_VM_SERVICE_URL}" | grep -q '^200$'; then - echo "DDS HTTP endpoint responsive (attempt $i)" + if node <<'NODE' + const WebSocket = require('ws'); + + const rawUrl = process.env.OPENSAFARI_VM_SERVICE_WS_URL || process.env.OPENSAFARI_VM_SERVICE_URL; + if (!rawUrl) { + console.error('OPENSAFARI_VM_SERVICE_URL is empty'); + process.exit(1); + } + + const url = new URL(rawUrl); + if (url.protocol === 'http:') url.protocol = 'ws:'; + if (url.protocol === 'https:') url.protocol = 'wss:'; + if (url.protocol !== 'ws:' && url.protocol !== 'wss:') { + console.error(`Unsupported VM service protocol: ${url.protocol}`); + process.exit(1); + } + if (url.pathname.endsWith('/ws')) { + url.pathname = `${url.pathname}/`; + } else if (!url.pathname.endsWith('/ws/')) { + url.pathname = url.pathname.replace(/\/?$/, '/ws/'); + } + + const ws = new WebSocket(url); + const timer = setTimeout(() => { + console.error(`Timed out waiting for getVM response from ${url.href}`); + ws.terminate(); + process.exit(1); + }, 2000); + + ws.on('open', () => { + ws.send(JSON.stringify({ jsonrpc: '2.0', id: 'warmup', method: 'getVM' })); + }); + + ws.on('message', (data) => { + let message; + try { + message = JSON.parse(data.toString()); + } catch (error) { + console.error(`Invalid VM service JSON: ${error.message}`); + return; + } + + if (message.id !== 'warmup') return; + + clearTimeout(timer); + ws.close(); + if (message.error) { + console.error(`getVM returned error: ${JSON.stringify(message.error)}`); + process.exit(1); + } + + const vmType = message.result && message.result.type; + if (vmType !== 'VM') { + console.error(`getVM returned unexpected result type: ${vmType || 'missing'}`); + process.exit(1); + } + + console.log(`VM service websocket getVM accepted at ${url.href}`); + process.exit(0); + }); + + ws.on('error', (error) => { + clearTimeout(timer); + console.error(`VM service websocket error: ${error.message}`); + process.exit(1); + }); + NODE + then + echo "DDS VM service websocket responsive (attempt $i)" sleep 8 exit 0 fi + if [ "$i" -eq 1 ] || [ $((i % 5)) -eq 0 ]; then + echo "DDS VM service websocket not ready (attempt $i)" + fi sleep 2 done - echo "::error::DDS HTTP endpoint did not respond within warmup window" + echo "::error::DDS VM service websocket did not accept getVM within warmup window" exit 1 - name: Run Flutter VM headless input live test diff --git a/.github/workflows/issue-10-healthy.yml b/.github/workflows/issue-10-healthy.yml new file mode 100644 index 00000000..fcf57414 --- /dev/null +++ b/.github/workflows/issue-10-healthy.yml @@ -0,0 +1,75 @@ +# Depends on `npm run verify:issue-10-healthy` added in PR for issue #47 part 1. +# Merge PR#1 before triggering this workflow. + +name: Issue #10 healthy-path verification + +on: + workflow_dispatch: + inputs: + device_id: + description: 'Simulator UDID to target' + required: false + default: '3BEF4E9A-069A-4419-AC62-AB889348EF12' + flutter_bundle_id: + description: 'Bundle ID of the Flutter fixture app' + required: false + default: 'com.example.osftest' + +jobs: + verify: + runs-on: macos-latest + + steps: + - uses: actions/checkout@v4 + + - uses: maxim-lobanov/setup-xcode@v1 + with: + xcode-version: '26.4' + + - uses: actions/setup-node@v4 + with: + node-version: '20' + cache: 'npm' + + - name: Install dependencies + run: npm ci + + - name: Build + run: npm run build + + - name: Boot simulator + run: | + xcrun simctl boot "${{ inputs.device_id }}" || true + xcrun simctl bootstatus "${{ inputs.device_id }}" -b + + - name: Install fixture app + run: | + # TODO: install fixture ${{ inputs.flutter_bundle_id }} — see tests/integration/fixtures/flutter_sample + # Real fixture build/install pipeline is out of scope for this PR. + # The verify:issue-10-healthy script performs a preflight check and + # will fail cleanly with a descriptive error if the app is not installed. + echo "TODO: install fixture ${{ inputs.flutter_bundle_id }} — see tests/integration/fixtures/flutter_sample" + + - name: Run healthy-path verification + run: | + set -o pipefail + npm run verify:issue-10-healthy 2>&1 | tee verify-issue-10-healthy.log + env: + OSF_DEVICE_ID: ${{ inputs.device_id }} + OSF_FLUTTER_BUNDLE_ID: ${{ inputs.flutter_bundle_id }} + + - name: Append output to job summary + if: always() + run: | + echo '## verify:issue-10-healthy output' >> "$GITHUB_STEP_SUMMARY" + echo '```' >> "$GITHUB_STEP_SUMMARY" + cat verify-issue-10-healthy.log >> "$GITHUB_STEP_SUMMARY" 2>/dev/null || echo '(no output captured)' >> "$GITHUB_STEP_SUMMARY" + echo '```' >> "$GITHUB_STEP_SUMMARY" + + - name: Upload verification report + if: always() + uses: actions/upload-artifact@v4 + with: + name: issue-10-healthy-report + path: scripts/.verify-issue-10-healthy.report.json + if-no-files-found: warn diff --git a/.github/workflows/omofictions-qa.yml b/.github/workflows/omofictions-qa.yml new file mode 100644 index 00000000..a3724c41 --- /dev/null +++ b/.github/workflows/omofictions-qa.yml @@ -0,0 +1,233 @@ +name: Omofictions-App private-route QA + +# Scripted QA lane that closes box 8 of #34. See docs/qa/omofictions-app.md +# for the full environment contract. Non-runtime — only drives an +# already-built Omofictions-App artefact through opensafari's MCP bridge. +# +# Trigger policy (per #44 Clarification 4): +# - workflow_dispatch only until a self-hosted runner with Xcode 26.4 + +# simulator is available and 3 consecutive manual-trigger runs have +# passed. Nightly schedule is added in a follow-up PR that also flips +# the workflow from advisory to required. + +on: + workflow_dispatch: + inputs: + build_url: + description: 'Signed URL to the Omofictions-App `.app.zip` build artefact.' + required: true + build_sha: + description: 'Expected SHA256 of the downloaded artefact (64 hex chars).' + required: true + deeplinks_url: + description: 'Signed URL to the omofictions_deeplinks_qa.json manifest.' + required: true + no_act: + description: 'Run the QA lane in selector-discovery mode (--no-act).' + type: boolean + default: false + pull_request: + # Path-guard job only. It enforces #44 Clarification 5 mechanically on + # every PR that touches this workflow or its scripts/docs. The full QA + # run never fires on PRs — that would require a build artefact that is + # not guaranteed to be available. + paths: + - '.github/workflows/omofictions-qa.yml' + - 'scripts/qa/**' + - 'docs/qa/**' + +jobs: + # ------------------------------------------------------------------------- + # Scope-boundary path guard. + # + # Per #44 Clarification 5, a PR that closes #44 (or extends the QA lane) + # must touch only docs/qa/**, scripts/qa/**, this workflow, or .gitignore. + # If any PR that edits the QA lane also edits src/** or tests/**, the + # change crosses the scope boundary and must be broken out into a + # separate PR that is reviewed on its own merits. + # ------------------------------------------------------------------------- + path-guard: + if: github.event_name == 'pull_request' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Reject runtime-code changes in QA-lane PRs + env: + BASE_SHA: ${{ github.event.pull_request.base.sha }} + HEAD_SHA: ${{ github.event.pull_request.head.sha }} + run: | + set -euo pipefail + changed=$(git diff --name-only "$BASE_SHA" "$HEAD_SHA") + echo "Changed files in this PR:" + echo "$changed" + offenders=$(echo "$changed" | grep -E '^(src/|tests/)' || true) + if [ -n "$offenders" ]; then + echo "" + echo "::error::QA-lane PRs must not touch src/** or tests/**." + echo "::error::Offending files:" + echo "$offenders" | sed 's/^/::error:: /' + echo "::error::See docs/qa/omofictions-app.md scope boundary and #44 Clarification 5." + exit 1 + fi + echo "Path-guard OK: no runtime code touched." + + # ------------------------------------------------------------------------- + # Full QA lane — only fires on manual dispatch. + # + # Runner contract: self-hosted macOS host carrying Xcode 26.4 and an + # iPhone 16 / iOS 26.4 simulator. If the 'simulator' label is absent, the + # run fails fast — there is no fallback to a github-hosted runner because + # macos-14-large is not guaranteed to ship Xcode 26.4 (per #44 + # Clarification 4). + # ------------------------------------------------------------------------- + qa-lane: + if: github.event_name == 'workflow_dispatch' + runs-on: [self-hosted, macOS, xcode-26.4, simulator] + timeout-minutes: 20 + env: + OMOFICTIONS_QA_BUILD_SHA: ${{ inputs.build_sha }} + OMOFICTIONS_QA_EMAIL: ${{ secrets.OMOFICTIONS_QA_EMAIL }} + OMOFICTIONS_QA_PASSWORD: ${{ secrets.OMOFICTIONS_QA_PASSWORD }} + steps: + - uses: actions/checkout@v4 + + - name: Resolve simulator UDID + id: sim + run: | + set -euo pipefail + udid=$(xcrun simctl list devices --json \ + | /usr/bin/python3 -c " + import json, sys + data = json.load(sys.stdin) + for runtime, devices in data.get('devices', {}).items(): + if 'iOS-26-4' not in runtime: + continue + for d in devices: + if d.get('name') == 'iPhone 16' and d.get('isAvailable'): + print(d['udid']); sys.exit(0) + sys.exit(1) + ") + echo "udid=$udid" >> "$GITHUB_OUTPUT" + xcrun simctl bootstatus "$udid" -b + xcrun simctl storekit clear "$udid" || true + + - name: Download build artefact + id: build + run: | + set -euo pipefail + mkdir -p "$RUNNER_TEMP/omofictions" + curl --fail --location --silent --show-error \ + -o "$RUNNER_TEMP/omofictions/build.zip" \ + "${{ inputs.build_url }}" + actual_sha=$(shasum -a 256 "$RUNNER_TEMP/omofictions/build.zip" | awk '{print $1}') + expected_sha="${{ inputs.build_sha }}" + if [ "$actual_sha" != "$expected_sha" ]; then + echo "::error::build SHA256 mismatch (expected=$expected_sha actual=$actual_sha)" + exit 1 + fi + unzip -q "$RUNNER_TEMP/omofictions/build.zip" -d "$RUNNER_TEMP/omofictions/unpacked" + app_path=$(find "$RUNNER_TEMP/omofictions/unpacked" -maxdepth 3 -name '*.app' | head -n1) + if [ -z "$app_path" ]; then + echo "::error::no .app bundle found inside build.zip" + exit 1 + fi + echo "app_path=$app_path" >> "$GITHUB_OUTPUT" + + - name: Download deeplink manifest + id: manifest + run: | + set -euo pipefail + curl --fail --location --silent --show-error \ + -o "$RUNNER_TEMP/omofictions/deeplinks.json" \ + "${{ inputs.deeplinks_url }}" + echo "path=$RUNNER_TEMP/omofictions/deeplinks.json" >> "$GITHUB_OUTPUT" + + - name: Install and launch app + run: | + ./scripts/qa/omofictions-setup.sh \ + --device-id "${{ steps.sim.outputs.udid }}" \ + --build-path "${{ steps.build.outputs.app_path }}" + + - uses: actions/setup-node@v4 + with: + node-version: 20 + cache: 'npm' + + - name: Install opensafari and start MCP bridge + run: | + npm ci --force + npm run build + # Streamable HTTP transport on the default port used by the QA + # lane script. Started in the background and given 3 s to bind. + OMOFICTIONS_BRIDGE_PORT=57337 node dist/cli/index.js \ + --transport http --port 57337 & + echo $! > "$RUNNER_TEMP/omofictions/bridge.pid" + sleep 3 + + - name: Run scripted private-route QA lane + id: lane + run: | + set +e + node ./scripts/qa/omofictions-private-route.mjs \ + --device-id "${{ steps.sim.outputs.udid }}" \ + --deeplinks "${{ steps.manifest.outputs.path }}" \ + ${{ inputs.no_act == true && '--no-act' || '' }} \ + > "$RUNNER_TEMP/omofictions/run.log" + code=$? + echo "exit_code=$code" >> "$GITHUB_OUTPUT" + cat "$RUNNER_TEMP/omofictions/run.log" + exit "$code" + + - name: Capture simulator log + if: always() + run: | + set +e + xcrun simctl spawn "${{ steps.sim.outputs.udid }}" \ + log show --style syslog --last 3m \ + > "$RUNNER_TEMP/omofictions/simulator-log.txt" || true + + - name: Write device-meta + if: always() + run: | + set -euo pipefail + cat > "$RUNNER_TEMP/omofictions/device-meta.json" </dev/null || true + fi + + - name: Package evidence artefact + if: always() + run: | + ts=$(date -u +"%Y-%m-%dT%H-%M-%SZ") + mkdir -p "$RUNNER_TEMP/omofictions/screenshots" + ( + cd "$RUNNER_TEMP/omofictions" \ + && zip -qr "artefact-$ts.zip" \ + run.log simulator-log.txt device-meta.json screenshots + ) + echo "artefact_path=$RUNNER_TEMP/omofictions/artefact-$ts.zip" >> "$GITHUB_ENV" + + - name: Upload evidence + if: always() + uses: actions/upload-artifact@v4 + with: + name: omofictions-qa-artefact + path: ${{ env.artefact_path }} + if-no-files-found: error + retention-days: 30 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 9b5f6b62..7af2ecb2 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -22,6 +22,8 @@ jobs: - run: npm run lint - run: npm run test:ci - run: npm run build + - name: Runtime dependency audit (release gate) + run: npm run audit:prod - name: Verify dist run: | test -f dist/index.js diff --git a/.gitignore b/.gitignore index 523b5eb6..c2a76851 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,7 @@ npm-debug.log* /tmp/ *.tmp .openchrome/ +.omx/ + +# Generated verification reports +scripts/.verify-issue-10-healthy.report.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 5824f278..2f74589a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,97 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +## [0.6.1] - 2026-05-17 + +**OpenSafari 0.6.1 is a *catch-up release* that lands every change accumulated on `develop` since 0.5.0 onto `main` and npm.** `v0.6.0` was tagged on 2026-05-16 but never merged to `main` and never published to the npm registry (`npm view opensafari-mcp versions` confirms `0.5.0` was the latest published until this release). Rather than ship a separate 0.6.0 → 0.6.1 pair, 0.6.1 rolls the entire 0.6.0 body forward together with one targeted pasteboard fix uncovered after the 0.6.0 tag. This is a feature-and-fix release; there are no breaking changes since 0.5.0. + +**Headline fix (post-0.6.0)**: + +- **`app_type_element` pasteboard backend now works on `AXSecureTextField` (password) elements (#760, #761)**. The readback contract introduced for #639 PR C compared `endsWith`/`includes` against the focused element's AX value, but iOS masks the value of any secure text field with bullet characters (`••••…`) regardless of plaintext content, so every successful password paste was rejected as `PASTE_NOT_APPLIED`. The same OS-mask divergence on the simhid path was escalating into `TEXT_INPUT_DROPPED` / `TEXT_INPUT_LAYOUT_MISMATCH` with `isError: true`. After this release the verifier detects secure fields by role (`AXSecureTextField`) or trait (`AXSecureTextField`, `secure text field`) and returns silently — the readback contract is inconclusive by design for this element class. The pasteboard backend also now honours the documented `verify: false` parameter symmetrically with the simhid path (it was previously a silent no-op for pasteboard). + +**Headline change (rolled forward from 0.6.0)**: + +- **`ax-bridge` recursive scored content-root search (#40, follow-up to #4)**. Replaces the single-pass immediate-child content-root heuristic with a recursive, deterministic, integer-scored search, closing the silent-empty-content bug that blocked the "Functional success" section of #4 on Xcode 26.4 / iOS 26.4. The raw bridge now refuses to return a chrome-only tree: when no subtree exposes any app-level accessibility semantics, it emits a typed `DEVICE_CONTENT_ROOT_EMPTY` error with exit code 1 instead of silently falling back to the bare `AXWindow`. Full per-rubric scoring (iOSContentGroup +10 / fits expected rect +8 / app-semantics descendants +5 capped at +25 / toolbar or menu bar −10 / zero descendants −5) plus chrome denylist now live in both `src/native/ax-bridge.swift` and the TypeScript reference scorer `src/native/ax-bridge-content-root.ts`; the two implementations are kept in lock-step by 6 fixture unit tests in `tests/unit/ax-bridge-content-root.test.ts`. + +### Fixed + +- **`app_type_element` AXSecureTextField paste verification (#760, #761)** — see headline above. `assertPasteApplied` accepts an optional `{ role, traits }` descriptor and returns silently when the descriptor signals a secure text field; `typeViaPasteboard` forwards the inspected node's role/traits into the assert and adds `secureField?: true` to `PasteboardTypeResult`. The tool layer echoes `secureField: true` in the success response so callers can distinguish "no readback because secure field" from "no readback because verify opted out". `verifyTypedText` on the simhid path detects the same signal and returns `{ verified: 'unknown', verify_method: 'ax-value-not-readable' }` instead of escalating to a structured input-error code. Unit coverage: 10 new cases in `tests/unit/pasteboard-input.test.ts`, 2 new cases in `tests/unit/app-type-element.test.ts` (inside the Tier-3 readback describe), both passing alongside the existing 2632 unit tests. +- **PointerService Phase 1 swipe semantics clarified (#649)**. `src/tools/pointer-service-input-backend.ts` previously claimed that a swipe failure under `OPENSAFARI_ENABLE_POINTERSERVICE=1` would "surface via the tier chain when we bubble back up". That is not the runtime behaviour: `getInputBackend` caches `PointerServiceInputBackend` as the selected backend, so `swipe()` hard-errors with `HeadlessInputUnavailableError` on Xcode 26+ and does NOT re-enter the tier chain. The comment has been rewritten to match the shipped code and to direct callers to the two working escape hatches (leave the env flag unset, or use an element-targeted swipe). No runtime behaviour changed. +- **Boot / lifecycle / network reliability batch (#752, #753, #754, #755, #756, #757)**. `device_boot` now keeps boot diagnostics reliable when WebKit is late (#756, #757); zombie cleanup no longer spins indefinitely on stale locks (#754, #755); network interception is scoped to MCP sessions and preserves XHR restore semantics under session intercepts (#752, #753); the proxy lifecycle remains stable under parallel startup; Flutter VM resolution remains stable under parallel input. +- **`fix(dom-input)`: always dispatch input event (#726)**. Multiple iterations addressing review feedback — always fire key events with conditional value write, guard `appendChar` before keyboard dispatch. +- **`fix(simulator)`: trailing-bracket strip + bundleId regex + simctl rotate route (#708 series)**. `launchctl` label normalization now strips all trailing bracket groups; `bundleId` regex tightened and not-installed detection centralized; `simctl rotate` routed through `deps.simctl.exec`; shutdown stays best-effort after nuclear erase. +- **`fix(webkit)`**: direct host.emit in EventBridge transport forwarding; `clearCookies` effective on `document.cookie` fallback; throwing protocol-event handler routed through `transport:error`; circular dep resolved, unimplemented `timeoutMs` removed, viewport query unified; RFC 6265 domain matching for `getCookies` filter; `enabledDomainsPerTarget` cleanup on RPC failure. + +### Added + +- **`TRANSITIONAL_STATE_TIMEOUT` classification + `--max-settle-retries` flag for `dist/sim-hid-bridge`** (#46). The wrapper now distinguishes "expected app is running but its UI is still loading" from "no AX data at all": when `--expect-bundle ` is supplied and the first settle window returns `FOREGROUND_CONTEXT_UNAVAILABLE` while `` is in `runningApps`, the wrapper performs one bounded re-probe (another `settleMs` window) and promotes to `TRANSITIONAL_STATE_TIMEOUT` if the tree is still empty. Capped by `--max-settle-retries <0|1|2|3>` (default `1`); set `0` to restore the pre-issue single-probe behaviour byte-for-byte. The surface classifier in `src/tools/raw-mobile-context.ts` stays surface-scoped and never emits the new variant itself — promotion is a wrapper-layer concern. +- **`feat(tap)`: scale AX frame coords from macOS-pt to iOS-pt (#693 WU3, #720)** plus dump-root size emission (#693 WU3-prep, #695) and structured ErrorJSON STDOUT from the ax-bridge wrapper (#693 WU1, #694). Closes the long-standing tap-coordinate offset on retina simulators by carrying the macOS-pt → iOS-pt scale factor through the wrapper and applying it at coordinate dispatch. Regression coverage in `tests/unit/coord-regression.test.ts` (#722). +- **`feat(ax-bridge)`: `--debug` flag emits machine-readable stderr (#660)** plus walker candidate diagnostics. Bare-flag coercion restricted to debug/verbose (#660). Gated ko-KR push-permission live suite added (#660, #692). `localized-button-matcher` gains an extension seam for app-specific labels (#639 follow-up). + +### Refactored — major decomposition batch + +The 0.6.0 cycle landed three large internal decompositions to support the simulator-chrome and reliability work. All public surfaces (tool names, schemas, response shapes) are preserved. + +- **WebKit module split (#706 1/5–5/5)**: error classes (1/5), protocol transport (2/5), target session manager (3/5), browser command implementations (4/5), typed event adapters + finalized facade (5/5). `WebKitClient` is now a thin facade over focused submodules; the public import surface is unchanged. Multiple post-merge fixes preserved behaviour contracts flagged in review. +- **Simulator module split (#708 1/4–4/4)**: errors + device catalog (1/4), lifecycle (2/4), app manager (3/4), UI controller + finalized facade (4/4). Hardens `simctl` JSON parsing and the fuzzy device resolver. +- **Input layer split (#707 a/b)**: backends and resolver into focused modules (a); remaining backends consolidated into `src/input/` (b). DOM-input script builders centralized for webkit + native input (#709). Migrated paths now enforce `no-explicit-any` via lint override (#710 b). +- **Protocol typing (#710 a/b)**: typed DTOs + fixture builders for the WebKit RDP boundary (a); typed RDP guards with `console.type` fallback restored. + +### Performance + +- **CLI lazy-load (#700 a, #729; #700 b, #728)**: command implementations and MCP handler implementations are now lazy-loaded behind static schemas; cold-start measurably faster, especially in `audit` and `serve` flows. +- **WebKit fast paths (#702 a/b, #725)**: new `evaluateValue` helper, screenshot fast path, batched navigation-state read, deduplicated domain enables. +- **Native-input batching (#705, #723)**: reduces process spawns via a batching capability on the simctl backend. +- **Proxy readiness (#701, #727)**: split process readiness from target readiness so the proxy can serve target traffic the instant the target is reachable, without waiting for the proxy to settle on its own port. +- **Simulator boot polling (#703, #724)**: bootstatus-aware polling with shared state cache; eliminates redundant `simctl bootstatus` probes when multiple tools observe boot in parallel. +- **Web Inspector socket discovery (#704, #719)**: cached with staged backoff; first-call cost paid once per simulator boot. + +### Security + +- **HTTP MCP transport hardening (#714)** plus follow-ups: tighter `/mcp` auth + insecure-mode posture, hardened HTTP auth comparison, high-risk tool gating in HTTP mode (with blocked-tool hiding), expanded high-risk gate to JS+VM bypass surfaces, `mock_geolocation` gated as HTTP high-risk and non-finite numerics rejected. All HTTP-only — STDIO transport unaffected. +- **Auth profile persistence hardening (#716)** with follow-up review feedback applied. `tests/unit/auth-manager-persistence.test.ts` bounds the atomic temp filename to a fixed-length hash prefix. +- **Audit log retention hardening (#711, #717)**. +- **CI: separate runtime and dev dependency audits (#712, #718)** so dev-only vulnerabilities do not block runtime audit policy. + +### CI / DX + +- Lint enforces `no-explicit-any` on migrated webkit-rdp paths (#710 b); migrated-path override ordered after the tests glob to avoid suppressing the rule in test code (#741 follow-up). +- Audit policy for runtime vs dev dependencies separated. + +### Migration notes + +- **No tool-name or schema changes since 0.5.0.** All `app_*`, `webkit_*`, and bridge tools keep their parameter shapes and response envelopes. +- **`app_type_element` response shape gains an optional `secureField: true` field** on the pasteboard backend when the focused element is an `AXSecureTextField`. Existing callers that ignore unknown response fields are unaffected. +- **The `verify: false` parameter on `app_type_element` is now honoured on both backends** (`auto`/`simhid` and `pasteboard`). Callers that were relying on the previous silent-no-op on pasteboard should review their flows — readback skip is now actually applied. +- **HTTP MCP transport** tightens defaults; STDIO transport (the default) is unaffected. Review the security section above if you operate a forked HTTP deployment. + +## [0.6.0] - 2026-04-20 + +**OpenSafari 0.6.0 is a *simulator-chrome-regression* release.** It replaces the single-pass immediate-child content-root heuristic in `ax-bridge` with a recursive, deterministic, integer-scored search, closing the silent-empty-content bug that blocked the "Functional success" section of #4 on Xcode 26.4 / iOS 26.4. The raw bridge now refuses to return a chrome-only tree: when no subtree exposes any app-level accessibility semantics, it emits a typed `DEVICE_CONTENT_ROOT_EMPTY` error with exit code 1 instead of silently falling back to the bare `AXWindow`. + +### Fixed — `ax-bridge` recursive scored content-root search (#40, follow-up to #4) + +- **`src/native/ax-bridge.swift`**: `findDeviceContentInWindow` replaced with `findDeviceContentRecursively`. Scoring is deterministic and integer-based so fixtures can be asserted exactly: + - `AXGroup`/`AXScrollArea` with `iOSContentGroup` trait → +10 + - frame fits expected device-content rect (±15pt per edge) → +8 + - each app-semantics descendant (`AXTextField`, `AXStaticText`, `AXButton` with non-chrome label, `AXCell`, `AXImage`, `AXLink`) → +5, capped at +25 + - `AXToolbar` / `AXMenuBar` → −10 + - zero descendants → −5 +- **Chrome denylist** rejects exact labels (Action, Home, Save Screen, Rotate, Volume Up/Down, Sleep/Wake, AXCloseButton, AXFullScreenButton, AXMinimizeButton) and the simulator window-title prefix (`"iPhone – iOS "`). `AXMenuBar` and `AXWindow` are rejected at depth > 0. +- **Typed error**: when no candidate subtree contains any app-semantics role, the bridge returns `{"code":"DEVICE_CONTENT_ROOT_EMPTY"}` with exit code 1 instead of falling back to the bare `AXWindow`. The wrapper at `cli/ax-bridge.ts` forwards error JSON untouched. +- **Reproduction closed** (Xcode 26.4 / iOS 26.4): `node dist/ax-bridge query --device --role AXTextField` on a booted simulator with no foreground app now fails fast with `DEVICE_CONTENT_ROOT_EMPTY` (exit 1) instead of returning `{"total":0,"matches":[]}` with exit code 0. + +### Added — TS reference scorer and 6-fixture unit suite + +- **`src/native/ax-bridge-content-root.ts`**: TypeScript port of the Swift rubric so the algorithm is unit-testable from Jest. The two implementations MUST stay in lock-step; any change to the rubric, chrome denylist, geometry formula, or fallback policy must land in both files together. +- **`tests/unit/ax-bridge-content-root.test.ts`**: 6 fixture trees covering (a) empty `iOSContentGroup` between chrome children → `DEVICE_CONTENT_ROOT_EMPTY`, (b) populated Flutter tree with `iOSContentGroup` + ≥ 3 app-semantics descendants, (c) SpringBoard-only, (d) nested content two levels below window, (e) two candidate groups where only one contains app-semantics (DOM-order-independent), (f) Settings-app shape with `AXTable` at top level (no `iOSContentGroup` trait). + +### Unreleased items carried forward into 0.6.0 + +- **`dist/sim-hid-bridge` wrapper CLI documented** in [`docs/headless-architecture.md`](docs/headless-architecture.md#raw-simhid-cli-reference), including `--settle-ms`, response-shape table, and classification table. Adds a cross-reference from [`docs/api-reference.md`](docs/api-reference.md) so MCP consumers can jump to the raw-CLI contract when scripting without the MCP server. Closes #45. +- **`app_tap_element` coordinate fallback now preserves the verified-interaction contract.** When `ax-press` cannot prove a post-action effect and the tool falls back to a coordinate backend, OpenSafari no longer returns a plain clean success by transport alone. The response now carries `verified: false` / `effect: "verification_unavailable"` when proof is unavailable, or a typed `TAP_NO_EFFECT` error when the post-tap AX tree stays unchanged. The stricter contract matches `app_tap` and closes the false-positive-success gap for bundle-scoped native taps. +- **Raw mobile-bridge context diagnostics and expect-bundle guards.** `dist/ax-bridge` now exposes `context --device [--expect-bundle ] [--require-match true]`, returning machine-readable foreground classifications and expected-bundle matches for downstream QA. `dist/sim-hid-bridge` now ships as a wrapper around the native bridge and enriches `tap` / `swipe` JSON with post-input `classification`, `verified`, `frontmost`, and `expectedBundleMatched`, plus a matching `context` command. Raw HID commands can now fail fast with `--require-match true` instead of looking like a clean success after the simulator drifts to SpringBoard or chrome. + ## [0.5.0] - 2026-04-17 **OpenSafari 0.5.0 is a *stability-commitments* release.** It closes out the Xcode 26 investigation epic with an authoritative stability table, makes cross-context automation first-class (WebView-in-Flutter and WebView-in-native live harnesses land as product-grade E2E tests), expands alert / PointerService / IAP coverage, and — because we would rather ship truthful docs than broken tools — reverts the `simctl storekit` bindings that could not be made to work against real Xcode. Input telemetry is now on by default for every MCP input tool. Private-API deployment scope, StoreKit posture, and fork-friendly sentinel routing are all documented so teams operating forks or inside regulated orgs get clean upgrade guidance from the release notes alone. diff --git a/README.md b/README.md index 9d5bdd04..abee3d79 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ ## Headless Capabilities -OpenSafari runs fully headless on CI — no display server, no mouse focus, no `Simulator.app` window required. See [docs/headless-architecture.md](docs/headless-architecture.md) for the full technical design. +OpenSafari runs fully headless on CI — no display server, no mouse focus, no `Simulator.app` window required. See [docs/headless-architecture.md](docs/headless-architecture.md) for the full technical design. For Universal Link channel recipes (Notes paste-and-tap, `captureLogs`, channel matrix), see [docs/recipes/universal-link-channels.md](docs/recipes/universal-link-channels.md). | Scenario | Query (AX Tree) | Input (Tap/Type) | Headless | Backend | |---|---|---|---|---| @@ -190,6 +190,10 @@ opensafari serve → All simulators start already logged in ``` +Auth profiles are JSON files stored under `~/.opensafari/auth/`. Each profile contains the site name, capture timestamp, current URL, cookies, cookie domain groups, localStorage, and sessionStorage needed to restore login state. On POSIX systems, OpenSafari creates new auth profile directories as private `0700` directories and profile files as private `0600` files, then updates profiles with an atomic same-directory replacement. + +Delete saved login state with `opensafari auth delete myapp.com`, or remove the matching `~/.opensafari/auth/myapp.com.json` file. + ### 4. iOS-Specific Auto-Detection Built-in QA checks that run on real Safari — no approximation: @@ -263,6 +267,8 @@ OpenSafari shares battle-tested infrastructure with [OpenChrome](https://github. | `device_shutdown` | Shutdown simulator | | `device_rotate` | Toggle portrait/landscape | | `appearance_toggle` | Switch light/dark mode via `simctl ui` | +| `device_network_set` | Toggle host-level network state (`online` / `offline` / `airplane`) so native apps see real `SocketException` / `NSURLErrorNotConnectedToInternet` — see [docs/tools/device-network.md](docs/tools/device-network.md) | +| `device_network_get` | Read the current simulated network state set by `device_network_set` | ### App Lifecycle (Tier 2) @@ -272,7 +278,9 @@ OpenSafari shares battle-tested infrastructure with [OpenChrome](https://github. | `app_terminate` | Terminate a running app by bundle ID | | `app_activate` | Bring app to foreground (launches if not running) | | `app_list_running` | List running foreground apps with PIDs | +| `app_context` | Report the current mobile context and optionally guard on an expected bundle | | `app_reset` | Reset app state: terminate, clear permissions, uninstall | +| `app_notes_paste_and_tap_url` | Reviewer-equivalent Universal Link tap: launches Notes.app, paste-injects the URL, waits for iOS Data Detector to produce an `AXLink`, and taps it — see [docs/recipes/universal-link-channels.md](docs/recipes/universal-link-channels.md) | ### Auth Tools (Tier 3) @@ -372,8 +380,8 @@ npm install -g opensafari-mcp # Run (stdio mode — for MCP clients like Claude Code) opensafari serve -# HTTP mode -opensafari serve --http 3100 +# HTTP mode (binds to 127.0.0.1 and requires a bearer token for /mcp) +OPENSAFARI_HTTP_TOKEN="replace-with-a-random-token" opensafari serve --http 3100 # With all tool tiers exposed opensafari serve --all-tools @@ -385,6 +393,23 @@ opensafari serve --devices "iphone-17e,iphone-17-pro-max" opensafari serve --auth ~/.opensafari/auth/mysite.json ``` + +#### HTTP transport security + +HTTP mode listens on `127.0.0.1` by default. The `/health` endpoint is unauthenticated, but `/mcp` requires `Authorization: Bearer ` unless you intentionally pass `--http-insecure-local` for local-only testing. Provide the token with `OPENSAFARI_HTTP_TOKEN` or `--http-token`; OpenSafari never prints the token value. + +Browser CORS for `/mcp` is restricted to local origins (`localhost`, `127.0.0.1`, `::1`) plus any comma-separated origins passed with `--http-allow-origin`. Use `--http-host` only when you intentionally need a non-loopback bind. + +HTTP mode also blocks high-risk MCP tools that execute page/app code or move authentication material: `javascript`, `flutter_evaluate`, `auth_save`, `auth_restore`, and `cookies`. Stdio mode is unchanged. To intentionally expose those tools over HTTP, start with `--http-enable-high-risk-tools` or set `OPENSAFARI_HTTP_ENABLE_HIGH_RISK_TOOLS=1`; allowed and blocked high-risk HTTP calls are audit-logged with sensitive arguments redacted. + +```bash +OPENSAFARI_HTTP_TOKEN="$OPENSAFARI_HTTP_TOKEN" opensafari serve --http 3100 +curl -H "Authorization: Bearer $OPENSAFARI_HTTP_TOKEN" \ + -H "Content-Type: application/json" \ + http://127.0.0.1:3100/mcp \ + -d '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{}}' +``` + ### MCP Client Configuration ```jsonc @@ -438,7 +463,7 @@ const server = createServer({ await server.start(); // Or start with HTTP transport -await server.start({ transport: 'http', port: 3100 }); +await server.start({ transport: 'http', port: 3100, authToken: process.env.OPENSAFARI_HTTP_TOKEN }); ``` ### WebKitClient @@ -542,7 +567,14 @@ Multiple Claude Code sessions can share the same proxy. When a session detects a OpenSafari dispatches native input (`app_tap`, `app_swipe_native`, `app_scroll_native`, `app_double_tap`, `app_type_text`, `app_key_input`) through a 5-tier fallback chain and surfaces the selected path in each tool -result via a `backend` field. +result via a `backend` field. Coordinate and element-targeted tap tools also +surface whether the interaction was **verified** by a post-action AX-tree check. +Transport success alone is no longer treated as interaction success when no +observable UI effect can be confirmed. `app_tap_element` now applies the same +contract after it falls back from `ax-press` to a coordinate backend: callers +get `verified: false` with `effect: "verification_unavailable"` when the AX +proof is unavailable, or a typed `TAP_NO_EFFECT` result when the UI stays +unchanged after the dispatched tap. | Tier | Backend | Identifier | Headless? | When used | |------|---------|------------|-----------|-----------| @@ -557,6 +589,15 @@ decision flowchart and the full scenario matrix. Tool responses also include `_meta: { backendKind, headless, deviceId }` so CI can assert `_meta.headless === true`. +Raw bridge consumers can now ask for the same foreground diagnostics directly: + +- `dist/ax-bridge context --device [--expect-bundle ] [--require-match true]` +- `dist/sim-hid-bridge context [--expect-bundle ] [--require-match true]` + +`dist/sim-hid-bridge tap|swipe` also appends `classification`, `verified`, +`frontmost`, and `expectedBundleMatched` to its JSON result so downstream QA can +tell the difference between a clean in-app tap and a wrong-foreground outcome. + Example tool result: ```json @@ -566,10 +607,73 @@ Example tool result: "y": 200, "deviceId": "…", "backend": "simhid", + "verified": true, + "effect": "subtree_changed", "_meta": { "backendKind": "simhid", "headless": true, "deviceId": "…" } } ``` +### Raw `dist/ax-bridge` Contract + +`dist/ax-bridge` is a Node.js wrapper that sits in front of the compiled +Swift binary `dist/ax-bridge-native` (Mach-O). The wrapper intercepts +`--help` / `-h` before argument validation, runs `ensureSemanticsActive()` +for tree-read commands by default (opt out with `--ensure-semantics off`), +then delegates every other invocation unchanged to `dist/ax-bridge-native`. + +**Bridge resolution order:** + +1. `dist/ax-bridge-native` next to the script (standard installed layout) +2. Swift interpreter fallback — only when `dist/ax-bridge.swift` is present + +**Contract:** commands that cannot expose app content return a typed error +code as JSON on stdout (exit 1) instead of an empty-success tree: + +| Code | Meaning | +|------|---------| +| `DEVICE_RESOLUTION_FAILED` | Requested device not found / not booted | +| `DEVICE_RESOLUTION_AMBIGUOUS` | Multiple booted simulators match | +| `DEVICE_WINDOW_NOT_FOUND` | No AX window matched the requested device | +| `DEVICE_CONTENT_ROOT_EMPTY` | Window resolved but no app-semantics content (#40) | +| `APP_CONTENT_NOT_EXPOSED` | Tree is Simulator chrome only after bootstrap (#41) | +| `EXPECTED_BUNDLE_MISMATCH` | (context) Expected bundle not foreground | +| `BRIDGE_NOT_FOUND` | `ax-bridge-native` / `ax-bridge.swift` missing | +| `AX_WRAPPER_FAILED` | Wrapper-level unexpected error | +| `BAD_ARGS` | Invalid or missing CLI flags | +| `UNKNOWN_COMMAND` | Command not recognized | + +**Example invocations:** + +```bash +# dump — success: full JSON accessibility tree on stdout, exit 0 +node dist/ax-bridge dump --device booted +# dump — error: chrome-only tree after bootstrap, exit 1 +# stdout: {"error":"...","code":"APP_CONTENT_NOT_EXPOSED"} + +# query — success: matched elements on stdout, exit 0 +node dist/ax-bridge query --device booted --label "Sign In" +# query — error: device not found, exit 1 +# stdout: {"error":"...","code":"DEVICE_RESOLUTION_FAILED"} + +# inspect — success: single element detail on stdout, exit 0 +node dist/ax-bridge inspect --device booted --path "0/1/2" +# inspect — error: empty content root, exit 1 +# stdout: {"error":"...","code":"DEVICE_CONTENT_ROOT_EMPTY"} + +# press — success: {"ok":true,"code":"OK",...} on stdout, exit 0 +node dist/ax-bridge press --device booted --path "0/1/2" + +# context — success: foreground bundle info on stdout, exit 0 +node dist/ax-bridge context --device booted +# context — expected bundle mismatch, exit 1 +# stdout: {"error":"...","code":"EXPECTED_BUNDLE_MISMATCH"} +``` + +> **Note:** The higher-level `app_*` MCP tools provide the same semantics +> activation plus richer heuristics (retry, partial-tree promotion, and +> cross-session device resolution). Use the raw bridge for downstream +> harnesses that prefer direct CLI access without the MCP layer. + ### Focus-theft protection (`OPENSAFARI_ALLOW_FOCUS_INPUT`) Tier 3 is **default-deny**. On Xcode 26+ with no Safari connection, diff --git a/cli/ax-bridge.ts b/cli/ax-bridge.ts new file mode 100644 index 00000000..ed9a38cd --- /dev/null +++ b/cli/ax-bridge.ts @@ -0,0 +1,405 @@ +#!/usr/bin/env node + +import { execFile } from 'child_process'; +import { promisify } from 'util'; +import * as path from 'path'; +import * as fs from 'fs'; + +import { ensureSemanticsActive, isLikelyChromeOnlyTree } from '../src/native'; +import { SimulatorManager } from '../src/simulator'; +import { buildRawMobileContext } from '../src/tools/raw-mobile-context'; + +const execFileAsync = promisify(execFile); + +interface ErrorJSON { + error: string; + code: string; +} + +const TOP_LEVEL_HELP = `\ +ax-bridge [flags] + +Commands: + dump Dump accessibility tree for a simulator device + query Query app elements by role/label/text/identifier + inspect Inspect a single element by path + press Issue a press on an element (advanced) + context Report current foreground native context + +Flags: + --device Target simulator (required for most commands) + --max-depth Max tree depth (default: 10) + --role (query) Accessibility role filter + --label