e2e #302
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: e2e | |
| # Compile-compatibility matrix that runs the E2E suite against multiple gh-aw | |
| # refs × samples modes. The scheduled (nightly) run exercises the full matrix: | |
| # 1. github/gh-aw `main` (source mode) | |
| # 2. the latest gh-aw pre-release tag (extension mode) | |
| # 3. the latest gh-aw stable release (extension mode) | |
| # each crossed with samples=false (live engine) and samples=true (deterministic). | |
| # | |
| # Execution model | |
| # --------------- | |
| # * SERIAL: the matrix runs with `max-parallel: 1`. Each entry itself dispatches | |
| # dozens of test workflows, so running entries concurrently would flood GitHub | |
| # Actions. Entries are therefore queued one at a time (`fail-fast: false` so a | |
| # single entry's failure doesn't cancel the rest). | |
| # * RUNS FROM MAIN: each entry recompiles its workflows, pushes them to `main`, | |
| # and dispatches every test from `main`. Event-triggered tests (add-comment, | |
| # add-labels, update-issue, etc.) create their own fixtures and wait for the | |
| # GitHub event to fire, exactly as they do when run locally. | |
| # | |
| # Manual dispatch | |
| # --------------- | |
| # `workflow_dispatch` exposes a `selection` choice so a human can run a single | |
| # combination quickly instead of the whole matrix. The default is `quick` | |
| # (main / source / samples=true). `full` runs the entire matrix serially, same | |
| # as the nightly schedule. | |
| # | |
| # Because `$CI` is set to `true` by GitHub Actions, `e2e.sh` runs in CI mode: it | |
| # does NOT mutate the repository's `TEMP_USER_PAT` secret (it relies on the | |
| # pre-configured repo secret + the `GH_AW_TEST_PAT` env var instead). | |
| # | |
| # Required secrets: | |
| # * GH_AW_TEST_PAT (required) — PAT used for all operations except copilot requests | |
| on: | |
| schedule: | |
| - cron: '0 3 * * *' # Nightly at 03:00 UTC | |
| workflow_dispatch: | |
| inputs: | |
| selection: | |
| description: 'Which part of the matrix to run' | |
| required: false | |
| type: choice | |
| default: quick | |
| options: | |
| - quick # main / source / samples=true (fast, deterministic) | |
| - main-live # main / source / samples=false (live engine path) | |
| - release # latest release / extension / samples=true | |
| - prerelease # latest pre-release / extension / samples=true | |
| - full # full matrix, run serially | |
| gh_aw_ref: | |
| description: 'github/gh-aw branch, tag, or SHA to test in source mode (e.g. main, a feature branch, or a commit SHA). Applies to the quick / main-live selections; defaults to main.' | |
| required: false | |
| default: '' | |
| refs: | |
| description: 'Optional comma-separated list of gh-aw refs to test (overrides main / latest pre-release / latest release auto-detection). Only used when selection=full.' | |
| required: false | |
| default: '' | |
| concurrency: | |
| group: ${{ github.workflow }} | |
| cancel-in-progress: true | |
| permissions: | |
| contents: read | |
| issues: write | |
| pull-requests: read | |
| actions: read | |
| jobs: | |
| resolve-refs: | |
| name: Resolve gh-aw refs to test | |
| runs-on: ubuntu-latest | |
| outputs: | |
| matrix: ${{ steps.resolve.outputs.matrix }} | |
| main_sha: ${{ steps.resolve.outputs.main_sha }} | |
| latest_release: ${{ steps.resolve.outputs.latest_release }} | |
| latest_prerelease: ${{ steps.resolve.outputs.latest_prerelease }} | |
| steps: | |
| - name: Resolve refs | |
| id: resolve | |
| env: | |
| GH_TOKEN: ${{ secrets.GH_AW_TEST_PAT }} | |
| OVERRIDE: ${{ inputs.refs }} | |
| # Optional github/gh-aw branch/tag/SHA to test in source mode. Applies | |
| # to the single-entry quick / main-live selections; empty => main. | |
| GH_AW_REF_INPUT: ${{ inputs.gh_aw_ref }} | |
| # On `schedule` (and any non-dispatch event) there is no selection | |
| # input, so default to the full matrix. On workflow_dispatch this is | |
| # the user's chosen option (default `quick`). | |
| SELECTION: ${{ github.event_name == 'workflow_dispatch' && inputs.selection || 'full' }} | |
| run: | | |
| set -euo pipefail | |
| main_sha=$(gh api repos/github/gh-aw/commits/main --jq '.sha' | cut -c1-12) | |
| latest_release=$(gh release list --repo github/gh-aw --limit 50 \ | |
| --json tagName,isPrerelease,isDraft \ | |
| --jq '[.[] | select(.isPrerelease==false and .isDraft==false)][0].tagName' || echo "") | |
| latest_prerelease=$(gh release list --repo github/gh-aw --limit 50 \ | |
| --json tagName,isPrerelease,isDraft \ | |
| --jq '[.[] | select(.isPrerelease==true and .isDraft==false)][0].tagName' || echo "") | |
| if [[ -z "$latest_release" ]]; then | |
| echo "::error::Could not resolve latest gh-aw release" | |
| exit 1 | |
| fi | |
| if [[ -z "$latest_prerelease" ]]; then | |
| latest_prerelease="$latest_release" | |
| fi | |
| echo "Selection: $SELECTION" | |
| # The single-entry source-mode selections (quick / main-live) test a | |
| # specific github/gh-aw ref. Default to `main`, but allow the dispatch | |
| # input `gh_aw_ref` to point them at any branch, tag, or SHA. The label | |
| # is slugified from the ref so job titles / branches stay readable. | |
| src_ref="${GH_AW_REF_INPUT:-main}" | |
| [[ -z "$src_ref" ]] && src_ref="main" | |
| src_label=$(echo "$src_ref" | tr '/' '-' | tr -cd 'A-Za-z0-9._-') | |
| echo "Source-mode ref: $src_ref (label: $src_label)" | |
| # Each matrix entry is a single, fully-specified combination: | |
| # label : short human-readable name used in job titles / report | |
| # ref : git ref / tag passed to the build or install step | |
| # mode : 'source' => check out github/gh-aw at <ref>, `make build`, | |
| # then run e2e.sh --gh-aw-ref <ref>. | |
| # 'extension' => `gh extension install github/gh-aw --pin <ref>`, | |
| # then run e2e.sh with NO --gh-aw-ref. | |
| # samples : true => run with --use-samples (deterministic, no engine) | |
| # false => run against the live AI engine | |
| # | |
| # The job that consumes this matrix runs serially (max-parallel: 1) and | |
| # each entry pushes to its own dedicated branch, so entries never race | |
| # each other or main. | |
| # | |
| # `full` builds the complete (ref × samples) product. The single-entry | |
| # selections pick exactly one combination for a fast manual dispatch. | |
| case "$SELECTION" in | |
| quick) | |
| matrix=$(jq -nc --arg ref "$src_ref" --arg label "$src_label" \ | |
| '[{ label: $label, ref: $ref, mode: "source", samples: true }]') | |
| ;; | |
| main-live) | |
| matrix=$(jq -nc --arg ref "$src_ref" --arg label "$src_label" \ | |
| '[{ label: $label, ref: $ref, mode: "source", samples: false }]') | |
| ;; | |
| release) | |
| matrix=$(jq -nc --arg rel "$latest_release" \ | |
| '[{ label: "release", ref: $rel, mode: "extension", samples: true }]') | |
| ;; | |
| prerelease) | |
| matrix=$(jq -nc --arg pre "$latest_prerelease" \ | |
| '[{ label: "pre-release", ref: $pre, mode: "extension", samples: true }]') | |
| ;; | |
| full|*) | |
| # Override format (workflow_dispatch input `refs`): comma-separated | |
| # tokens. Each token may be a bare ref (mode=source) or | |
| # "<ref>:source" / "<ref>:extension" to force a mode. | |
| if [[ -n "$OVERRIDE" ]]; then | |
| base=$(echo "$OVERRIDE" | jq -Rc ' | |
| split(",") | |
| | map(gsub("^\\s+|\\s+$";"")) | |
| | map(select(length>0)) | |
| | map( | |
| if test(":") then | |
| (split(":") | { label: .[0], ref: .[0], mode: (.[1] // "source") }) | |
| else | |
| { label: ., ref: ., mode: "source" } | |
| end | |
| )') | |
| else | |
| base=$(jq -nc \ | |
| --arg pre "$latest_prerelease" \ | |
| --arg rel "$latest_release" \ | |
| '[ | |
| { label: "main", ref: "main", mode: "source" }, | |
| { label: "pre-release", ref: $pre, mode: "extension" }, | |
| { label: "release", ref: $rel, mode: "extension" } | |
| ] | |
| # de-dupe by ref so pre-release==release does not run twice | |
| | unique_by(.ref)') | |
| fi | |
| # Expand each base entry across samples modes: | |
| # main → samples=false AND samples=true (inference + deterministic) | |
| # pre-release/release → samples=true only (skip inference runs for speed) | |
| matrix=$(echo "$base" | jq -c '[ .[] as $e | if $e.label == "main" then (false, true) else true end | $e + { samples: . } ]') | |
| ;; | |
| esac | |
| echo "Resolved:" | |
| echo " main: main ($main_sha)" | |
| echo " latest pre-release: $latest_prerelease" | |
| echo " latest release: $latest_release" | |
| echo " matrix: $matrix" | |
| { | |
| echo "main_sha=$main_sha" | |
| echo "latest_release=$latest_release" | |
| echo "latest_prerelease=$latest_prerelease" | |
| echo "matrix=$matrix" | |
| } >> "$GITHUB_OUTPUT" | |
| e2e: | |
| name: "E2E: ${{ matrix.entry.label }} (gh-aw@${{ matrix.entry.ref }}, ${{ matrix.entry.mode }}, samples=${{ matrix.entry.samples }})" | |
| needs: resolve-refs | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 90 | |
| strategy: | |
| # Run entries one at a time so the suite never floods GitHub Actions with | |
| # concurrent dispatched test runs (each entry itself dispatches many | |
| # workflows). fail-fast is off so one entry's failure doesn't cancel the | |
| # rest of the (serial) queue. | |
| fail-fast: false | |
| max-parallel: 1 | |
| matrix: | |
| entry: ${{ fromJSON(needs.resolve-refs.outputs.matrix) }} | |
| steps: | |
| - name: Checkout gh-aw-test | |
| uses: actions/checkout@v4 | |
| with: | |
| # Always checkout the latest HEAD of main, not the trigger SHA. | |
| # When e2e.yml is triggered by a push, `github.sha` is the pushed | |
| # commit, but by the time the job runs (serial matrix, queued), a | |
| # prior matrix entry may have pushed a recompile commit on top of | |
| # it. Without `ref: main`, the checkout is behind origin/main and | |
| # `git push` fails with non-fast-forward. | |
| ref: main | |
| path: gh-aw-test | |
| fetch-depth: 0 | |
| # Use the PAT so e2e.sh can push the recompiled workflows to main | |
| # (the default GITHUB_TOKEN only has contents:read in this workflow). | |
| token: ${{ secrets.GH_AW_TEST_PAT }} | |
| # --- source mode: build gh-aw from a checkout --------------------------- | |
| - name: Checkout github/gh-aw at ${{ matrix.entry.ref }} (source mode) | |
| if: matrix.entry.mode == 'source' | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: github/gh-aw | |
| ref: ${{ matrix.entry.ref }} | |
| path: gh-aw | |
| fetch-depth: 1 | |
| - name: Set up Go (source mode) | |
| if: matrix.entry.mode == 'source' | |
| uses: actions/setup-go@v5 | |
| with: | |
| go-version-file: gh-aw/go.mod | |
| cache-dependency-path: gh-aw/go.sum | |
| - name: Build gh-aw binary from source (source mode) | |
| if: matrix.entry.mode == 'source' | |
| working-directory: gh-aw | |
| run: make build | |
| # --- extension mode: install published gh extension at the pinned tag --- | |
| - name: Install published gh-aw extension at ${{ matrix.entry.ref }} (extension mode) | |
| if: matrix.entry.mode == 'extension' | |
| env: | |
| GH_TOKEN: ${{ secrets.GH_AW_TEST_PAT }} | |
| run: | | |
| set -euo pipefail | |
| # Remove any pre-existing install so --pin actually wins | |
| gh extension remove github/gh-aw 2>/dev/null || true | |
| # Retry up to 3 times with backoff. Prior matrix entries (especially | |
| # the 45-minute source-mode run) can exhaust the API rate limit, | |
| # causing a transient 403 on the release-asset download. | |
| for _attempt in 1 2 3; do | |
| if gh extension install github/gh-aw --pin "${{ matrix.entry.ref }}"; then | |
| break | |
| fi | |
| if [[ "$_attempt" -eq 3 ]]; then | |
| echo "::error::Failed to install gh-aw extension after 3 attempts" | |
| exit 1 | |
| fi | |
| echo "Install attempt $_attempt failed; waiting 90s for rate limit to recover..." | |
| sleep 90 | |
| done | |
| gh aw --version | |
| # --- shared steps ------------------------------------------------------- | |
| - name: Set up Node.js | |
| uses: actions/setup-node@v5 | |
| - name: gh auth status | |
| env: | |
| GH_TOKEN: ${{ secrets.GH_AW_TEST_PAT }} | |
| run: gh auth status | |
| - name: Run E2E against gh-aw@${{ matrix.entry.ref }} (${{ matrix.entry.mode }} mode) | |
| id: run | |
| working-directory: gh-aw-test | |
| timeout-minutes: 80 | |
| env: | |
| GH_TOKEN: ${{ secrets.GH_AW_TEST_PAT }} | |
| GITHUB_TOKEN: ${{ secrets.GH_AW_TEST_PAT }} | |
| GH_AW_TEST_PAT: ${{ secrets.GH_AW_TEST_PAT }} | |
| E2E_REF: ${{ matrix.entry.ref }} | |
| E2E_MODE: ${{ matrix.entry.mode }} | |
| E2E_USE_SAMPLES: ${{ matrix.entry.samples }} | |
| run: | | |
| set +e | |
| chmod +x ./e2e.sh | |
| if [[ -z "${GH_AW_TEST_PAT:-}" ]]; then | |
| echo "::error::GH_AW_TEST_PAT secret is not set on this repository. The matrix runs e2e.sh in mode, which does not mutate repo secrets and therefore requires GH_AW_TEST_PAT to be supplied via secrets.GH_AW_TEST_PAT." | |
| exit 1 | |
| fi | |
| # e2e.sh commits the recompiled workflows and pushes them to main; | |
| # CI runners have no default git identity. | |
| git config user.name "gh-aw-test e2e bot" | |
| git config user.email "gh-aw-test-e2e@users.noreply.github.com" | |
| # Slug uniquely identifies this (ref × samples) combination and is used | |
| # for the per-entry report files. | |
| slug=$(echo "${E2E_REF}-samples-${E2E_USE_SAMPLES}" | tr '/' '_' | tr -cd 'A-Za-z0-9._-') | |
| # The matrix is serial (max-parallel: 1), so each entry can safely | |
| # recompile, push to main, and dispatch every test from main without | |
| # clobbering another entry. Running from main tests the common case | |
| # users actually experience (and keeps create-pull-request's | |
| # fetch-depth:1 merge-base against origin/main trivially resolvable). | |
| # Build flags | |
| SAMPLES_FLAG="" | |
| if [[ "$E2E_USE_SAMPLES" == "true" ]]; then | |
| SAMPLES_FLAG="--use-samples" | |
| fi | |
| # Tests to run in every matrix entry. | |
| # Copilot suite in full; one create-issue test for Claude and Codex. | |
| TESTS=('test-copilot-*' 'test-claude-create-issue' 'test-codex-create-issue') | |
| if [[ "$E2E_MODE" == "source" ]]; then | |
| # source mode: locally-built binary + --gh-aw-ref so lockfiles | |
| # reference github/gh-aw/actions/setup@<ref>. | |
| # ($CI=true is set automatically by GitHub Actions, so e2e.sh runs | |
| # in CI mode: no secret mutation. Recompiled workflows are pushed | |
| # to main.) | |
| # --verbose streams the gh-aw build/compile/version diagnostics to | |
| # the job log (otherwise they'd only land in the e2e-test-*.log | |
| # artifact, making build failures opaque in CI). | |
| ./e2e.sh --gh-aw-ref "$E2E_REF" --verbose --batch-size 7 $SAMPLES_FLAG "${TESTS[@]}" 2>&1 | tee e2e-output.log | |
| else | |
| # extension mode: rely on the installed `gh aw` (pinned to $E2E_REF), | |
| # no --gh-aw-ref override — lockfiles will reference the published | |
| # gh-aw-actions release that ships with that gh-aw version, which is | |
| # exactly what end users get. | |
| ./e2e.sh --verbose --batch-size 7 $SAMPLES_FLAG "${TESTS[@]}" 2>&1 | tee e2e-output.log | |
| fi | |
| rc=${PIPESTATUS[0]} | |
| echo "exit_code=$rc" >> "$GITHUB_OUTPUT" | |
| mkdir -p ../report | |
| if [[ -s fails.txt ]]; then | |
| cp fails.txt "../report/fails-${slug}.txt" | |
| else | |
| : > "../report/fails-${slug}.txt" | |
| fi | |
| if [[ -s passes.txt ]]; then | |
| cp passes.txt "../report/passes-${slug}.txt" | |
| else | |
| : > "../report/passes-${slug}.txt" | |
| fi | |
| printf '%s' "$E2E_REF" > "../report/ref-${slug}.txt" | |
| printf '%s' "$E2E_MODE" > "../report/mode-${slug}.txt" | |
| printf '%s' "$rc" > "../report/rc-${slug}.txt" | |
| # Don't fail the step here — let the report job decide overall pass/fail | |
| # so the aggregator always runs and the artifact upload happens. | |
| exit 0 | |
| - name: Append job summary | |
| if: always() | |
| working-directory: gh-aw-test | |
| run: | | |
| { | |
| echo "## E2E: ${{ matrix.entry.label }} — gh-aw@${{ matrix.entry.ref }} (${{ matrix.entry.mode }} mode, samples=${{ matrix.entry.samples }})" | |
| echo | |
| echo "Exit code: \`${{ steps.run.outputs.exit_code }}\`" | |
| echo | |
| if [[ -s fails.txt ]]; then | |
| echo "### Failed tests" | |
| echo '```' | |
| cat fails.txt | |
| echo '```' | |
| else | |
| echo "_No failed tests recorded._" | |
| fi | |
| } >> "$GITHUB_STEP_SUMMARY" | |
| - name: Upload artifacts for ${{ matrix.entry.label }} (samples=${{ matrix.entry.samples }}) | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: e2e-${{ matrix.entry.label }}-samples-${{ matrix.entry.samples }} | |
| path: | | |
| gh-aw-test/e2e-test-*.log | |
| gh-aw-test/e2e-output.log | |
| gh-aw-test/fails.txt | |
| gh-aw-test/passes.txt | |
| report/ | |
| report: | |
| name: Publish status report issue | |
| needs: [resolve-refs, e2e] | |
| if: always() | |
| runs-on: ubuntu-latest | |
| permissions: | |
| issues: write | |
| contents: read | |
| steps: | |
| - name: Download all matrix artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: artifacts | |
| - name: Build status report body | |
| env: | |
| MATRIX_JSON: ${{ needs.resolve-refs.outputs.matrix }} | |
| MAIN_SHA: ${{ needs.resolve-refs.outputs.main_sha }} | |
| LATEST_RELEASE: ${{ needs.resolve-refs.outputs.latest_release }} | |
| LATEST_PRERELEASE: ${{ needs.resolve-refs.outputs.latest_prerelease }} | |
| RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} | |
| SERVER_URL: ${{ github.server_url }} | |
| REPO: ${{ github.repository }} | |
| E2E_RESULT: ${{ needs.e2e.result }} | |
| run: | | |
| set -euo pipefail | |
| # --------------------------------------------------------------- | |
| # Pass 1: gather per-entry data into TSV accumulators so we can | |
| # render an aggregate summary and a failures-first report. | |
| # --------------------------------------------------------------- | |
| entries_tsv=$(mktemp) # label \t ref \t mode \t samples \t rc \t count \t status | |
| failed_tests_tsv=$(mktemp) # test \t label \t ref \t mode \t samples \t run_url | |
| passed_tests_tsv=$(mktemp) # test \t label \t ref \t mode \t samples \t run_url | |
| total_entries=0 | |
| entries_passed=0 | |
| entries_failed=0 | |
| entries_unknown=0 | |
| total_failed_tests=0 | |
| overall_failed=0 | |
| while IFS=$'\t' read -r label ref mode samples; do | |
| [[ -z "$label" ]] && continue | |
| total_entries=$((total_entries + 1)) | |
| slug=$(echo "${ref}-samples-${samples}" | tr '/' '_' | tr -cd 'A-Za-z0-9._-') | |
| art="artifacts/e2e-${label}-samples-${samples}" | |
| rc_file="${art}/report/rc-${slug}.txt" | |
| fails_file="${art}/report/fails-${slug}.txt" | |
| if [[ -f "$rc_file" ]]; then | |
| rc=$(cat "$rc_file") | |
| else | |
| rc="?" | |
| fi | |
| count=0 | |
| if [[ -s "$fails_file" ]]; then | |
| count=$(grep -c . "$fails_file" || true) | |
| # Collect each failed test + a link to its latest run. | |
| while IFS= read -r fline; do | |
| [[ -z "$fline" ]] && continue | |
| tname="${fline%% *}" | |
| run_url="" | |
| if [[ "$fline" == *" "* ]]; then | |
| last_id="${fline##* }" | |
| run_url="${SERVER_URL}/${REPO}/actions/runs/${last_id}" | |
| fi | |
| printf '%s\t%s\t%s\t%s\t%s\t%s\n' \ | |
| "$tname" "$label" "$ref" "$mode" "$samples" "$run_url" >> "$failed_tests_tsv" | |
| done < "$fails_file" | |
| fi | |
| # Collect each passed test + a link to its run. | |
| passes_file="${art}/report/passes-${slug}.txt" | |
| if [[ -s "$passes_file" ]]; then | |
| while IFS= read -r fline; do | |
| [[ -z "$fline" ]] && continue | |
| tname="${fline%% *}" | |
| run_url="" | |
| if [[ "$fline" == *" "* ]]; then | |
| last_id="${fline##* }" | |
| run_url="${SERVER_URL}/${REPO}/actions/runs/${last_id}" | |
| fi | |
| printf '%s\t%s\t%s\t%s\t%s\t%s\n' \ | |
| "$tname" "$label" "$ref" "$mode" "$samples" "$run_url" >> "$passed_tests_tsv" | |
| done < "$passes_file" | |
| fi | |
| # Determine entry status. | |
| if [[ "$rc" == "?" ]]; then | |
| status="⚪ unknown" | |
| entries_unknown=$((entries_unknown + 1)) | |
| overall_failed=1 | |
| elif [[ "$rc" == "0" && "$count" -eq 0 ]]; then | |
| status="✅ pass" | |
| entries_passed=$((entries_passed + 1)) | |
| else | |
| status="❌ fail" | |
| entries_failed=$((entries_failed + 1)) | |
| overall_failed=1 | |
| fi | |
| total_failed_tests=$((total_failed_tests + count)) | |
| printf '%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \ | |
| "$label" "$ref" "$mode" "$samples" "$rc" "$count" "$status" >> "$entries_tsv" | |
| done < <(echo "$MATRIX_JSON" | jq -r '.[] | [.label, .ref, .mode, (.samples|tostring)] | @tsv') | |
| if [[ "$overall_failed" == "0" ]]; then | |
| headline="✅ All ${total_entries} matrix entr$([[ $total_entries -eq 1 ]] && echo y || echo ies) passed" | |
| else | |
| headline="❌ ${entries_failed} of ${total_entries} matrix entr$([[ $total_entries -eq 1 ]] && echo y || echo ies) failed (${total_failed_tests} failed test run(s))" | |
| fi | |
| # --------------------------------------------------------------- | |
| # Build lookup tables for the per-test matrix view. | |
| # --------------------------------------------------------------- | |
| declare -A _ref_for_label=() # label -> ref | |
| declare -A _cell_status=() # "samples:test:label" -> "pass" or "fail" | |
| declare -A _cell_url=() # "samples:test:label" -> run URL | |
| while IFS=$'\t' read -r label ref mode samples rc count status; do | |
| [[ -z "$label" ]] && continue | |
| _ref_for_label["$label"]="$ref" | |
| done < "$entries_tsv" | |
| while IFS=$'\t' read -r tname label ref mode samples run_url; do | |
| [[ -z "$tname" ]] && continue | |
| _cell_status["${samples}:${tname}:${label}"]="fail" | |
| _cell_url["${samples}:${tname}:${label}"]="$run_url" | |
| done < <(sort -f "$failed_tests_tsv") | |
| while IFS=$'\t' read -r tname label ref mode samples run_url; do | |
| [[ -z "$tname" ]] && continue | |
| _cell_status["${samples}:${tname}:${label}"]="pass" | |
| _cell_url["${samples}:${tname}:${label}"]="$run_url" | |
| done < <(sort -f "$passed_tests_tsv") | |
| # Helper: format a ref value as a markdown link. | |
| _make_ref_link() { | |
| local _r="$1" | |
| if [[ "$_r" == "main" ]]; then | |
| echo "[main](${SERVER_URL}/github/gh-aw/commit/${MAIN_SHA})" | |
| else | |
| echo "[${_r}](${SERVER_URL}/github/gh-aw/releases/tag/${_r})" | |
| fi | |
| } | |
| # Render a test × ref matrix table. | |
| # Args: samples_value ("true"|"false"), type ("errors"|"successes"), heading | |
| _render_matrix_table() { | |
| local _sv="$1" # "true" or "false" | |
| local _type="$2" # "errors" or "successes" | |
| local _hdg="$3" | |
| # Ordered columns: release → pre-release → main, then any others. | |
| local _cols=() | |
| for _lbl in "release" "pre-release" "main"; do | |
| [[ -n "${_ref_for_label[$_lbl]:-}" ]] && _cols+=("$_lbl") | |
| done | |
| for _lbl in "${!_ref_for_label[@]}"; do | |
| case "$_lbl" in release|pre-release|main) ;; *) | |
| _cols+=("$_lbl") ;; | |
| esac | |
| done | |
| [[ "${#_cols[@]}" -eq 0 ]] && return 0 | |
| local _tests | |
| if [[ "$_type" == "errors" ]]; then | |
| _tests=$(awk -F'\t' -v sv="$_sv" '$5==sv{print $1}' "$failed_tests_tsv" | sort -u) | |
| else | |
| local _fp _ff | |
| _fp=$(awk -F'\t' -v sv="$_sv" '$5==sv{print $1}' "$passed_tests_tsv" | sort -u) | |
| _ff=$(awk -F'\t' -v sv="$_sv" '$5==sv{print $1}' "$failed_tests_tsv" | sort -u) | |
| if [[ -z "$_fp" ]]; then | |
| _tests="" | |
| elif [[ -z "$_ff" ]]; then | |
| _tests="$_fp" | |
| else | |
| _tests=$(comm -23 <(echo "$_fp") <(echo "$_ff")) | |
| fi | |
| fi | |
| [[ -z "$_tests" ]] && return 0 | |
| echo "## ${_hdg}" | |
| echo | |
| local _hdr="| test |" | |
| local _sep="| :--- |" | |
| for _lbl in "${_cols[@]}"; do | |
| local _ref="${_ref_for_label[$_lbl]}" | |
| local _rlink | |
| _rlink=$(_make_ref_link "$_ref") | |
| _hdr+=" ${_rlink} |" | |
| _sep+=" :---: |" | |
| done | |
| echo "$_hdr" | |
| echo "$_sep" | |
| while IFS= read -r _tname; do | |
| [[ -z "$_tname" ]] && continue | |
| local _test_url="${SERVER_URL}/${REPO}/blob/main/.github/workflows/${_tname}.md" | |
| local _row="| [${_tname}](${_test_url}) |" | |
| for _lbl in "${_cols[@]}"; do | |
| local _key="${_sv}:${_tname}:${_lbl}" | |
| local _st="${_cell_status[$_key]:-}" | |
| local _ru="${_cell_url[$_key]:-}" | |
| local _cell | |
| if [[ "$_st" == "pass" ]]; then | |
| [[ -n "$_ru" ]] && _cell="[✅]($_ru)" || _cell="✅" | |
| elif [[ "$_st" == "fail" ]]; then | |
| [[ -n "$_ru" ]] && _cell="[❌]($_ru)" || _cell="❌" | |
| else | |
| _cell="—" | |
| fi | |
| _row+=" ${_cell} |" | |
| done | |
| echo "$_row" | |
| done <<< "$_tests" | |
| echo | |
| } | |
| # --------------------------------------------------------------- | |
| # Pass 2: render the markdown report. | |
| # --------------------------------------------------------------- | |
| { | |
| echo "**Run:** [$RUN_URL]($RUN_URL) · **Trigger:** \`${{ github.event_name }}\` · **Generated:** \`$(date -u +%FT%TZ)\` · **Outcome:** \`$E2E_RESULT\`" | |
| echo | |
| _render_matrix_table "true" "errors" "❌ Test errors — samples mode" | |
| _render_matrix_table "false" "errors" "❌ Test errors — inference mode" | |
| _render_matrix_table "true" "successes" "✅ Test successes — samples mode" | |
| _render_matrix_table "false" "successes" "✅ Test successes — inference mode" | |
| # When anything failed, append a ready-to-paste prompt for a coding | |
| # agent to triage the failures from the run logs. | |
| if [[ "$overall_failed" != "0" ]]; then | |
| echo "## 🤖 Agent triage prompt" | |
| echo | |
| echo "Copy the block below into a coding agent (e.g. Copilot) to investigate the failures:" | |
| echo | |
| echo '```text' | |
| echo "You are triaging failures from the gh-aw-test E2E suite." | |
| echo "Run: $RUN_URL" | |
| echo "Repository under test: github/gh-aw (the gh-aw CLI/compiler)." | |
| echo "Test harness repository: $GITHUB_REPOSITORY (this repo; runner is e2e.sh)." | |
| echo | |
| echo "Goal: for EACH failed test listed in this status report, access the GitHub" | |
| echo "Actions logs for the run above (and the per-entry artifacts e2e-<label>-samples-<bool>," | |
| echo "which contain e2e-test-*.log, e2e-output.log and fails.txt), determine the root" | |
| echo "cause, and categorize the failure as exactly one of:" | |
| echo | |
| echo " 1. TRANSIENT — flaky/infra/network/rate-limit/timing; not a real defect." | |
| echo " Action: note it and recommend a re-run (./e2e.sh rerun)." | |
| echo " 2. TEST-FRAMEWORK BUG — a defect in this repo's harness (e2e.sh), a workflow" | |
| echo " source file (.github/workflows/test-*.md), a sample, or CI config." | |
| echo " Action: propose a concrete fix (file + change) in $GITHUB_REPOSITORY." | |
| echo " 3. GH-AW BUG — a defect in github/gh-aw itself (compiler output, runtime" | |
| echo " engine behaviour, safe-output handling, etc.)." | |
| echo " Action: open an issue in github/gh-aw with a minimal repro, the failing" | |
| echo " test name, the gh-aw ref/mode/samples combination, and links to the" | |
| echo " relevant log lines. Check for an existing open issue first and link it" | |
| echo " instead of filing a duplicate." | |
| echo | |
| echo "Steps:" | |
| echo " - Use 'gh run view <run-id> --log' and 'gh run download <run-id>' to fetch logs/artifacts." | |
| echo " - Read AGENTS.md in $GITHUB_REPOSITORY for harness conventions before proposing fixes." | |
| echo " - Group failures by suspected root cause; the same gh-aw bug may explain several." | |
| echo " - Produce a table: test | category | root cause | recommended action | issue/PR link." | |
| echo " - Only open github/gh-aw issues for category 3, and only after confirming no duplicate exists." | |
| echo '```' | |
| fi | |
| echo "$overall_failed" > /tmp/overall_failed | |
| } > report.md | |
| rm -f "$entries_tsv" "$failed_tests_tsv" "$passed_tests_tsv" | |
| cat report.md | |
| - name: Ensure status-report label exists | |
| env: | |
| GH_TOKEN: ${{ secrets.GH_AW_TEST_PAT }} | |
| run: | | |
| gh label create e2e-status-report \ | |
| --color BFD4F2 \ | |
| --description "Automated cross-ref E2E status report" \ | |
| --repo "$GITHUB_REPOSITORY" 2>/dev/null || true | |
| - name: Find prior open status reports | |
| id: prior | |
| env: | |
| GH_TOKEN: ${{ secrets.GH_AW_TEST_PAT }} | |
| run: | | |
| set -euo pipefail | |
| numbers=$(gh issue list \ | |
| --repo "$GITHUB_REPOSITORY" \ | |
| --label e2e-status-report \ | |
| --state open \ | |
| --limit 50 \ | |
| --json number \ | |
| --jq '[.[].number] | join(",")') | |
| echo "numbers=$numbers" >> "$GITHUB_OUTPUT" | |
| echo "Prior open status reports: ${numbers:-none}" | |
| - name: Compose final issue body (with links to previous reports) | |
| env: | |
| PRIOR: ${{ steps.prior.outputs.numbers }} | |
| run: | | |
| { | |
| cat report.md | |
| if [[ -n "$PRIOR" ]]; then | |
| echo | |
| echo "## Previous status reports (closed by this run)" | |
| IFS=',' read -ra arr <<< "$PRIOR" | |
| for n in "${arr[@]}"; do | |
| echo "* #$n" | |
| done | |
| fi | |
| } > issue-body.md | |
| - name: Create new status report issue | |
| id: create | |
| env: | |
| GH_TOKEN: ${{ secrets.GH_AW_TEST_PAT }} | |
| run: | | |
| set -euo pipefail | |
| title="E2E status report — $(date -u +%F) (gh-aw matrix)" | |
| url=$(gh issue create \ | |
| --repo "$GITHUB_REPOSITORY" \ | |
| --title "$title" \ | |
| --label e2e-status-report \ | |
| --body-file issue-body.md) | |
| number="${url##*/}" | |
| echo "Created: $url" | |
| echo "number=$number" >> "$GITHUB_OUTPUT" | |
| echo "url=$url" >> "$GITHUB_OUTPUT" | |
| - name: Close previous status report issues | |
| if: steps.prior.outputs.numbers != '' | |
| env: | |
| GH_TOKEN: ${{ secrets.GH_AW_TEST_PAT }} | |
| PRIOR: ${{ steps.prior.outputs.numbers }} | |
| NEW_NUMBER: ${{ steps.create.outputs.number }} | |
| NEW_URL: ${{ steps.create.outputs.url }} | |
| run: | | |
| IFS=',' read -ra arr <<< "$PRIOR" | |
| for n in "${arr[@]}"; do | |
| gh issue comment "$n" \ | |
| --repo "$GITHUB_REPOSITORY" \ | |
| --body "Superseded by #${NEW_NUMBER} — ${NEW_URL}" | |
| gh issue close "$n" \ | |
| --repo "$GITHUB_REPOSITORY" \ | |
| --reason "not planned" | |
| done | |
| - name: Fail the workflow if any matrix entry reported failures | |
| run: | | |
| overall=$(cat /tmp/overall_failed 2>/dev/null || echo 1) | |
| if [[ "$overall" != "0" ]]; then | |
| echo "::error::At least one gh-aw matrix entry reported failures (see status report issue)" | |
| exit 1 | |
| fi |