From 0ff03f7bd2dcb9bd84df754ed343cf0d7a8e1779 Mon Sep 17 00:00:00 2001 From: Roman PASSLER Date: Tue, 12 May 2026 20:40:24 +0200 Subject: [PATCH] ci: tighten job timeouts + add step-level limits for timed_out visibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cuts massively-over-budget job-timeouts down to honest worst-case + margin, and pairs each long-running step with its own step-timeout so a hang surfaces as `timed_out` on that step instead of as an opaque job-level `cancelled` after the full budget elapses. - ci.yml `test`: 30 → 25 min job; new 18 min step-timeout on Swift tests (p50 5–7 min, ~3× margin) - e2e.yml `e2e`: 60 → 30 min job; new 25 min step-timeout on Run E2E tests (cold cache covers multi-GB model downloads) - e2e-app.yml `e2e-app`: 15 → 20 min job (the three live-recording lanes now total ~14 min observed); new 10 min step-timeout on each lane so we can tell which one hung Background: docs/plans/.local/open/2026-05-11-job-timeout-visibility.md documents the cancelled-vs-timed_out distinction. Job-level timeouts map to `cancelled` (same as user-cancel and concurrency-cancel) — only step- level timeouts produce `timed_out`. Without step-timeouts, a hung test takes ~30 min to fail AND looks identical to a manual cancel in the UI. --- .github/workflows/ci.yml | 13 +++++++++++-- .github/workflows/e2e-app.yml | 10 +++++++++- .github/workflows/e2e.yml | 11 +++++++---- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 97f783fa..9cea9a4a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -94,7 +94,12 @@ jobs: needs: changes if: needs.changes.outputs.code == 'true' runs-on: macos-26 - timeout-minutes: 30 + # 25 min job-timeout = 20 min setup (cold cache) + ~5 min margin. The + # `Swift tests` step below has its own 18 min step-timeout so a hang + # surfaces as `timed_out` (clearly distinguishable from cancellation), + # not the opaque `cancelled` a bare job-timeout produces. See + # docs/plans/.local/open/2026-05-11-job-timeout-visibility.md. + timeout-minutes: 25 permissions: contents: read actions: write @@ -113,14 +118,18 @@ jobs: steps: - uses: actions/checkout@v6 # 20 min covers SPM + ML cache restore plus the cold-cache pre-warm - # download; well below the 30 min job timeout so a hung download + # download; well below the 25 min job timeout so a hung download # surfaces as a step failure, not a job-timeout. - uses: ./.github/actions/setup-swift-test timeout-minutes: 20 with: cache-key-suffix: ${{ matrix.variant }} swift-flags: ${{ matrix.swift-flags }} + # 18 min step-timeout on the actual tests: typical hot-cache run is + # 5–7 min, so this is ~3× margin while still failing fast as + # `timed_out` (not `cancelled`) when a single test hangs. - name: Swift tests + timeout-minutes: 18 run: cd app/MeetingTranscriber && swift test --parallel --skip ModelPreloadTests ${{ matrix.swift-flags }} - name: Cancel run on failure if: failure() diff --git a/.github/workflows/e2e-app.yml b/.github/workflows/e2e-app.yml index 7bd4b7e2..0b12925b 100644 --- a/.github/workflows/e2e-app.yml +++ b/.github/workflows/e2e-app.yml @@ -33,7 +33,12 @@ jobs: # racing for the audio stack — see runner labels via # `gh api /repos/{owner}/{repo}/actions/runners`. runs-on: [self-hosted, macOS, ARM64, audio] - timeout-minutes: 15 + # Three lanes (transcript, record-only, reimport) at ~4–6 min each + # → ~14 min observed total. 20 min job budget gives honest margin; + # each lane has its own 10 min step-timeout below so a hang in lane + # N surfaces as `timed_out` on that step (not as an opaque job-level + # `cancelled`). See docs/plans/.local/open/2026-05-11-job-timeout-visibility.md. + timeout-minutes: 20 steps: - uses: actions/checkout@v6 @@ -69,6 +74,7 @@ jobs: - name: Run live-recording E2E driver env: DEVELOPER_ID: ${{ secrets.DEVELOPER_ID }} + timeout-minutes: 10 # `--two-meetings` always: meeting 1 covers fresh-state single-run, # meeting 2 covers cooldown + state reset. ~3 min total wall on Mini # vs ~1 min for single-meeting; signal latency is fine on nightly + @@ -86,6 +92,7 @@ jobs: - name: Run live-recording E2E driver (record-only) env: DEVELOPER_ID: ${{ secrets.DEVELOPER_ID }} + timeout-minutes: 10 run: bash scripts/e2e-app.sh --no-build --record-only # Re-import lane: chains a record-only meeting with a POST @@ -101,6 +108,7 @@ jobs: - name: Run live-recording E2E driver (re-import recorded WAV) env: DEVELOPER_ID: ${{ secrets.DEVELOPER_ID }} + timeout-minutes: 10 run: bash scripts/e2e-app.sh --no-build --reimport-recorded # Best-effort: capture the simulator's stdout for post-mortem if diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 73b56ed7..8cacc1a0 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -38,10 +38,12 @@ concurrency: jobs: e2e: runs-on: [self-hosted, macOS, ARM64] - # 60 min headroom: cold runs download three multi-GB models - # (WhisperKit ~1 GB, Parakeet ~50 MB, Qwen3 ~1.75 GB). Subsequent - # runs hit the SPM cache + on-disk model cache and finish in <10 min. - timeout-minutes: 60 + # Cold-cache runs download three multi-GB models (WhisperKit ~1 GB, + # Parakeet ~50 MB, Qwen3 ~1.75 GB); hot runs <10 min. 30 min job + # budget covers worst-case cold download. The `Run E2E tests` step + # below has a 25 min step-timeout so a hang surfaces as `timed_out` + # — see docs/plans/.local/open/2026-05-11-job-timeout-visibility.md. + timeout-minutes: 30 env: E2E_ENABLED: "1" @@ -69,6 +71,7 @@ jobs: # via HTTPS on hit and waste the multi-GB upload on miss — the # latter timed out the post-action on a real run (5.6 GB → GitHub). - name: Run E2E tests + timeout-minutes: 25 run: | cd app/MeetingTranscriber swift test --filter '${{ steps.filter.outputs.filter }}'