e2e

e2e #302

Workflow file for this run

	name: e2e

	# Compile-compatibility matrix that runs the E2E suite against multiple gh-aw
	# refs × samples modes. The scheduled (nightly) run exercises the full matrix:
	# 1. github/gh-aw `main` (source mode)
	# 2. the latest gh-aw pre-release tag (extension mode)
	# 3. the latest gh-aw stable release (extension mode)
	# each crossed with samples=false (live engine) and samples=true (deterministic).
	#
	# Execution model
	# ---------------
	# * SERIAL: the matrix runs with `max-parallel: 1`. Each entry itself dispatches
	# dozens of test workflows, so running entries concurrently would flood GitHub
	# Actions. Entries are therefore queued one at a time (`fail-fast: false` so a
	# single entry's failure doesn't cancel the rest).
	# * RUNS FROM MAIN: each entry recompiles its workflows, pushes them to `main`,
	# and dispatches every test from `main`. Event-triggered tests (add-comment,
	# add-labels, update-issue, etc.) create their own fixtures and wait for the
	# GitHub event to fire, exactly as they do when run locally.
	#
	# Manual dispatch
	# ---------------
	# `workflow_dispatch` exposes a `selection` choice so a human can run a single
	# combination quickly instead of the whole matrix. The default is `quick`
	# (main / source / samples=true). `full` runs the entire matrix serially, same
	# as the nightly schedule.
	#
	# Because `$CI` is set to `true` by GitHub Actions, `e2e.sh` runs in CI mode: it
	# does NOT mutate the repository's `TEMP_USER_PAT` secret (it relies on the
	# pre-configured repo secret + the `GH_AW_TEST_PAT` env var instead).
	#
	# Required secrets:
	# * GH_AW_TEST_PAT (required) — PAT used for all operations except copilot requests

	on:
	schedule:
	- cron: '0 3 * * *' # Nightly at 03:00 UTC
	workflow_dispatch:
	inputs:
	selection:
	description: 'Which part of the matrix to run'
	required: false
	type: choice
	default: quick
	options:
	- quick # main / source / samples=true (fast, deterministic)
	- main-live # main / source / samples=false (live engine path)
	- release # latest release / extension / samples=true
	- prerelease # latest pre-release / extension / samples=true
	- full # full matrix, run serially
	gh_aw_ref:
	description: 'github/gh-aw branch, tag, or SHA to test in source mode (e.g. main, a feature branch, or a commit SHA). Applies to the quick / main-live selections; defaults to main.'
	required: false
	default: ''
	refs:
	description: 'Optional comma-separated list of gh-aw refs to test (overrides main / latest pre-release / latest release auto-detection). Only used when selection=full.'
	required: false
	default: ''

	concurrency:
	group: ${{ github.workflow }}
	cancel-in-progress: true

	permissions:
	contents: read
	issues: write
	pull-requests: read
	actions: read

	jobs:
	resolve-refs:
	name: Resolve gh-aw refs to test
	runs-on: ubuntu-latest
	outputs:
	matrix: ${{ steps.resolve.outputs.matrix }}
	main_sha: ${{ steps.resolve.outputs.main_sha }}
	latest_release: ${{ steps.resolve.outputs.latest_release }}
	latest_prerelease: ${{ steps.resolve.outputs.latest_prerelease }}
	steps:
	- name: Resolve refs
	id: resolve
	env:
	GH_TOKEN: ${{ secrets.GH_AW_TEST_PAT }}
	OVERRIDE: ${{ inputs.refs }}
	# Optional github/gh-aw branch/tag/SHA to test in source mode. Applies
	# to the single-entry quick / main-live selections; empty => main.
	GH_AW_REF_INPUT: ${{ inputs.gh_aw_ref }}
	# On `schedule` (and any non-dispatch event) there is no selection
	# input, so default to the full matrix. On workflow_dispatch this is
	# the user's chosen option (default `quick`).
	SELECTION: ${{ github.event_name == 'workflow_dispatch' && inputs.selection \|\| 'full' }}
	run: \|
	set -euo pipefail

	main_sha=$(gh api repos/github/gh-aw/commits/main --jq '.sha' \| cut -c1-12)
	latest_release=$(gh release list --repo github/gh-aw --limit 50 \
	--json tagName,isPrerelease,isDraft \
	--jq '[.[] \| select(.isPrerelease==false and .isDraft==false)][0].tagName' \|\| echo "")
	latest_prerelease=$(gh release list --repo github/gh-aw --limit 50 \
	--json tagName,isPrerelease,isDraft \
	--jq '[.[] \| select(.isPrerelease==true and .isDraft==false)][0].tagName' \|\| echo "")

	if [[ -z "$latest_release" ]]; then
	echo "::error::Could not resolve latest gh-aw release"
	exit 1
	fi
	if [[ -z "$latest_prerelease" ]]; then
	latest_prerelease="$latest_release"
	fi

	echo "Selection: $SELECTION"

	# The single-entry source-mode selections (quick / main-live) test a
	# specific github/gh-aw ref. Default to `main`, but allow the dispatch
	# input `gh_aw_ref` to point them at any branch, tag, or SHA. The label
	# is slugified from the ref so job titles / branches stay readable.
	src_ref="${GH_AW_REF_INPUT:-main}"
	[[ -z "$src_ref" ]] && src_ref="main"
	src_label=$(echo "$src_ref" \| tr '/' '-' \| tr -cd 'A-Za-z0-9._-')
	echo "Source-mode ref: $src_ref (label: $src_label)"

	# Each matrix entry is a single, fully-specified combination:
	# label : short human-readable name used in job titles / report
	# ref : git ref / tag passed to the build or install step
	# mode : 'source' => check out github/gh-aw at <ref>, `make build`,
	# then run e2e.sh --gh-aw-ref <ref>.
	# 'extension' => `gh extension install github/gh-aw --pin <ref>`,
	# then run e2e.sh with NO --gh-aw-ref.
	# samples : true => run with --use-samples (deterministic, no engine)
	# false => run against the live AI engine
	#
	# The job that consumes this matrix runs serially (max-parallel: 1) and
	# each entry pushes to its own dedicated branch, so entries never race
	# each other or main.
	#
	# `full` builds the complete (ref × samples) product. The single-entry
	# selections pick exactly one combination for a fast manual dispatch.
	case "$SELECTION" in
	quick)
	matrix=$(jq -nc --arg ref "$src_ref" --arg label "$src_label" \
	'[{ label: $label, ref: $ref, mode: "source", samples: true }]')
	;;
	main-live)
	matrix=$(jq -nc --arg ref "$src_ref" --arg label "$src_label" \
	'[{ label: $label, ref: $ref, mode: "source", samples: false }]')
	;;
	release)
	matrix=$(jq -nc --arg rel "$latest_release" \
	'[{ label: "release", ref: $rel, mode: "extension", samples: true }]')
	;;
	prerelease)
	matrix=$(jq -nc --arg pre "$latest_prerelease" \
	'[{ label: "pre-release", ref: $pre, mode: "extension", samples: true }]')
	;;
	full\|*)
	# Override format (workflow_dispatch input `refs`): comma-separated
	# tokens. Each token may be a bare ref (mode=source) or
	# "<ref>:source" / "<ref>:extension" to force a mode.
	if [[ -n "$OVERRIDE" ]]; then
	base=$(echo "$OVERRIDE" \| jq -Rc '
	split(",")
	\| map(gsub("^\\s+\|\\s+$";""))
	\| map(select(length>0))
	\| map(
	if test(":") then
	(split(":") \| { label: .[0], ref: .[0], mode: (.[1] // "source") })
	else
	{ label: ., ref: ., mode: "source" }
	end
	)')
	else
	base=$(jq -nc \
	--arg pre "$latest_prerelease" \
	--arg rel "$latest_release" \
	'[
	{ label: "main", ref: "main", mode: "source" },
	{ label: "pre-release", ref: $pre, mode: "extension" },
	{ label: "release", ref: $rel, mode: "extension" }
	]
	# de-dupe by ref so pre-release==release does not run twice
	\| unique_by(.ref)')
	fi
	# Expand each base entry across samples modes:
	# main → samples=false AND samples=true (inference + deterministic)
	# pre-release/release → samples=true only (skip inference runs for speed)
	matrix=$(echo "$base" \| jq -c '[ .[] as $e \| if $e.label == "main" then (false, true) else true end \| $e + { samples: . } ]')
	;;
	esac

	echo "Resolved:"
	echo " main: main ($main_sha)"
	echo " latest pre-release: $latest_prerelease"
	echo " latest release: $latest_release"
	echo " matrix: $matrix"

	{
	echo "main_sha=$main_sha"
	echo "latest_release=$latest_release"
	echo "latest_prerelease=$latest_prerelease"
	echo "matrix=$matrix"
	} >> "$GITHUB_OUTPUT"

	e2e:
	name: "E2E: ${{ matrix.entry.label }} (gh-aw@${{ matrix.entry.ref }}, ${{ matrix.entry.mode }}, samples=${{ matrix.entry.samples }})"
	needs: resolve-refs
	runs-on: ubuntu-latest
	timeout-minutes: 90
	strategy:
	# Run entries one at a time so the suite never floods GitHub Actions with
	# concurrent dispatched test runs (each entry itself dispatches many
	# workflows). fail-fast is off so one entry's failure doesn't cancel the
	# rest of the (serial) queue.
	fail-fast: false
	max-parallel: 1
	matrix:
	entry: ${{ fromJSON(needs.resolve-refs.outputs.matrix) }}
	steps:
	- name: Checkout gh-aw-test
	uses: actions/checkout@v4
	with:
	# Always checkout the latest HEAD of main, not the trigger SHA.
	# When e2e.yml is triggered by a push, `github.sha` is the pushed
	# commit, but by the time the job runs (serial matrix, queued), a
	# prior matrix entry may have pushed a recompile commit on top of
	# it. Without `ref: main`, the checkout is behind origin/main and
	# `git push` fails with non-fast-forward.
	ref: main
	path: gh-aw-test
	fetch-depth: 0
	# Use the PAT so e2e.sh can push the recompiled workflows to main
	# (the default GITHUB_TOKEN only has contents:read in this workflow).
	token: ${{ secrets.GH_AW_TEST_PAT }}

	# --- source mode: build gh-aw from a checkout ---------------------------
	- name: Checkout github/gh-aw at ${{ matrix.entry.ref }} (source mode)
	if: matrix.entry.mode == 'source'
	uses: actions/checkout@v4
	with:
	repository: github/gh-aw
	ref: ${{ matrix.entry.ref }}
	path: gh-aw
	fetch-depth: 1

	- name: Set up Go (source mode)
	if: matrix.entry.mode == 'source'
	uses: actions/setup-go@v5
	with:
	go-version-file: gh-aw/go.mod
	cache-dependency-path: gh-aw/go.sum

	- name: Build gh-aw binary from source (source mode)
	if: matrix.entry.mode == 'source'
	working-directory: gh-aw
	run: make build

	# --- extension mode: install published gh extension at the pinned tag ---
	- name: Install published gh-aw extension at ${{ matrix.entry.ref }} (extension mode)
	if: matrix.entry.mode == 'extension'
	env:
	GH_TOKEN: ${{ secrets.GH_AW_TEST_PAT }}
	run: \|
	set -euo pipefail
	# Remove any pre-existing install so --pin actually wins
	gh extension remove github/gh-aw 2>/dev/null \|\| true
	# Retry up to 3 times with backoff. Prior matrix entries (especially
	# the 45-minute source-mode run) can exhaust the API rate limit,
	# causing a transient 403 on the release-asset download.
	for _attempt in 1 2 3; do
	if gh extension install github/gh-aw --pin "${{ matrix.entry.ref }}"; then
	break
	fi
	if [[ "$_attempt" -eq 3 ]]; then
	echo "::error::Failed to install gh-aw extension after 3 attempts"
	exit 1
	fi
	echo "Install attempt $_attempt failed; waiting 90s for rate limit to recover..."
	sleep 90
	done
	gh aw --version

	# --- shared steps -------------------------------------------------------
	- name: Set up Node.js
	uses: actions/setup-node@v5

	- name: gh auth status
	env:
	GH_TOKEN: ${{ secrets.GH_AW_TEST_PAT }}
	run: gh auth status

	- name: Run E2E against gh-aw@${{ matrix.entry.ref }} (${{ matrix.entry.mode }} mode)
	id: run
	working-directory: gh-aw-test
	timeout-minutes: 80
	env:
	GH_TOKEN: ${{ secrets.GH_AW_TEST_PAT }}
	GITHUB_TOKEN: ${{ secrets.GH_AW_TEST_PAT }}
	GH_AW_TEST_PAT: ${{ secrets.GH_AW_TEST_PAT }}
	E2E_REF: ${{ matrix.entry.ref }}
	E2E_MODE: ${{ matrix.entry.mode }}
	E2E_USE_SAMPLES: ${{ matrix.entry.samples }}
	run: \|
	set +e
	chmod +x ./e2e.sh
	if [[ -z "${GH_AW_TEST_PAT:-}" ]]; then
	echo "::error::GH_AW_TEST_PAT secret is not set on this repository. The matrix runs e2e.sh in mode, which does not mutate repo secrets and therefore requires GH_AW_TEST_PAT to be supplied via secrets.GH_AW_TEST_PAT."
	exit 1
	fi

	# e2e.sh commits the recompiled workflows and pushes them to main;
	# CI runners have no default git identity.
	git config user.name "gh-aw-test e2e bot"
	git config user.email "gh-aw-test-e2e@users.noreply.github.com"

	# Slug uniquely identifies this (ref × samples) combination and is used
	# for the per-entry report files.
	slug=$(echo "${E2E_REF}-samples-${E2E_USE_SAMPLES}" \| tr '/' '_' \| tr -cd 'A-Za-z0-9._-')

	# The matrix is serial (max-parallel: 1), so each entry can safely
	# recompile, push to main, and dispatch every test from main without
	# clobbering another entry. Running from main tests the common case
	# users actually experience (and keeps create-pull-request's
	# fetch-depth:1 merge-base against origin/main trivially resolvable).

	# Build flags
	SAMPLES_FLAG=""
	if [[ "$E2E_USE_SAMPLES" == "true" ]]; then
	SAMPLES_FLAG="--use-samples"
	fi

	# Tests to run in every matrix entry.
	# Copilot suite in full; one create-issue test for Claude and Codex.
	TESTS=('test-copilot-*' 'test-claude-create-issue' 'test-codex-create-issue')

	if [[ "$E2E_MODE" == "source" ]]; then
	# source mode: locally-built binary + --gh-aw-ref so lockfiles
	# reference github/gh-aw/actions/setup@<ref>.
	# ($CI=true is set automatically by GitHub Actions, so e2e.sh runs
	# in CI mode: no secret mutation. Recompiled workflows are pushed
	# to main.)
	# --verbose streams the gh-aw build/compile/version diagnostics to
	# the job log (otherwise they'd only land in the e2e-test-*.log
	# artifact, making build failures opaque in CI).
	./e2e.sh --gh-aw-ref "$E2E_REF" --verbose --batch-size 7 $SAMPLES_FLAG "${TESTS[@]}" 2>&1 \| tee e2e-output.log
	else
	# extension mode: rely on the installed `gh aw` (pinned to $E2E_REF),
	# no --gh-aw-ref override — lockfiles will reference the published
	# gh-aw-actions release that ships with that gh-aw version, which is
	# exactly what end users get.
	./e2e.sh --verbose --batch-size 7 $SAMPLES_FLAG "${TESTS[@]}" 2>&1 \| tee e2e-output.log
	fi
	rc=${PIPESTATUS[0]}
	echo "exit_code=$rc" >> "$GITHUB_OUTPUT"

	mkdir -p ../report
	if [[ -s fails.txt ]]; then
	cp fails.txt "../report/fails-${slug}.txt"
	else
	: > "../report/fails-${slug}.txt"
	fi
	if [[ -s passes.txt ]]; then
	cp passes.txt "../report/passes-${slug}.txt"
	else
	: > "../report/passes-${slug}.txt"
	fi
	printf '%s' "$E2E_REF" > "../report/ref-${slug}.txt"
	printf '%s' "$E2E_MODE" > "../report/mode-${slug}.txt"
	printf '%s' "$rc" > "../report/rc-${slug}.txt"

	# Don't fail the step here — let the report job decide overall pass/fail
	# so the aggregator always runs and the artifact upload happens.
	exit 0

	- name: Append job summary
	if: always()
	working-directory: gh-aw-test
	run: \|
	{
	echo "## E2E: ${{ matrix.entry.label }} — gh-aw@${{ matrix.entry.ref }} (${{ matrix.entry.mode }} mode, samples=${{ matrix.entry.samples }})"
	echo
	echo "Exit code: \`${{ steps.run.outputs.exit_code }}\`"
	echo
	if [[ -s fails.txt ]]; then
	echo "### Failed tests"
	echo '```'
	cat fails.txt
	echo '```'
	else
	echo "_No failed tests recorded._"
	fi
	} >> "$GITHUB_STEP_SUMMARY"

	- name: Upload artifacts for ${{ matrix.entry.label }} (samples=${{ matrix.entry.samples }})
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: e2e-${{ matrix.entry.label }}-samples-${{ matrix.entry.samples }}
	path: \|
	gh-aw-test/e2e-test-*.log
	gh-aw-test/e2e-output.log
	gh-aw-test/fails.txt
	gh-aw-test/passes.txt
	report/

	report:
	name: Publish status report issue
	needs: [resolve-refs, e2e]
	if: always()
	runs-on: ubuntu-latest
	permissions:
	issues: write
	contents: read
	steps:
	- name: Download all matrix artifacts
	uses: actions/download-artifact@v4
	with:
	path: artifacts

	- name: Build status report body
	env:
	MATRIX_JSON: ${{ needs.resolve-refs.outputs.matrix }}
	MAIN_SHA: ${{ needs.resolve-refs.outputs.main_sha }}
	LATEST_RELEASE: ${{ needs.resolve-refs.outputs.latest_release }}
	LATEST_PRERELEASE: ${{ needs.resolve-refs.outputs.latest_prerelease }}
	RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
	SERVER_URL: ${{ github.server_url }}
	REPO: ${{ github.repository }}
	E2E_RESULT: ${{ needs.e2e.result }}
	run: \|
	set -euo pipefail

	# ---------------------------------------------------------------
	# Pass 1: gather per-entry data into TSV accumulators so we can
	# render an aggregate summary and a failures-first report.
	# ---------------------------------------------------------------
	entries_tsv=$(mktemp) # label \t ref \t mode \t samples \t rc \t count \t status
	failed_tests_tsv=$(mktemp) # test \t label \t ref \t mode \t samples \t run_url
	passed_tests_tsv=$(mktemp) # test \t label \t ref \t mode \t samples \t run_url

	total_entries=0
	entries_passed=0
	entries_failed=0
	entries_unknown=0
	total_failed_tests=0
	overall_failed=0

	while IFS=$'\t' read -r label ref mode samples; do
	[[ -z "$label" ]] && continue
	total_entries=$((total_entries + 1))
	slug=$(echo "${ref}-samples-${samples}" \| tr '/' '_' \| tr -cd 'A-Za-z0-9._-')
	art="artifacts/e2e-${label}-samples-${samples}"
	rc_file="${art}/report/rc-${slug}.txt"
	fails_file="${art}/report/fails-${slug}.txt"

	if [[ -f "$rc_file" ]]; then
	rc=$(cat "$rc_file")
	else
	rc="?"
	fi

	count=0
	if [[ -s "$fails_file" ]]; then
	count=$(grep -c . "$fails_file" \|\| true)
	# Collect each failed test + a link to its latest run.
	while IFS= read -r fline; do
	[[ -z "$fline" ]] && continue
	tname="${fline%% *}"
	run_url=""
	if [[ "$fline" == " " ]]; then
	last_id="${fline##* }"
	run_url="${SERVER_URL}/${REPO}/actions/runs/${last_id}"
	fi
	printf '%s\t%s\t%s\t%s\t%s\t%s\n' \
	"$tname" "$label" "$ref" "$mode" "$samples" "$run_url" >> "$failed_tests_tsv"
	done < "$fails_file"
	fi

	# Collect each passed test + a link to its run.
	passes_file="${art}/report/passes-${slug}.txt"
	if [[ -s "$passes_file" ]]; then
	while IFS= read -r fline; do
	[[ -z "$fline" ]] && continue
	tname="${fline%% *}"
	run_url=""
	if [[ "$fline" == " " ]]; then
	last_id="${fline##* }"
	run_url="${SERVER_URL}/${REPO}/actions/runs/${last_id}"
	fi
	printf '%s\t%s\t%s\t%s\t%s\t%s\n' \
	"$tname" "$label" "$ref" "$mode" "$samples" "$run_url" >> "$passed_tests_tsv"
	done < "$passes_file"
	fi

	# Determine entry status.
	if [[ "$rc" == "?" ]]; then
	status="⚪ unknown"
	entries_unknown=$((entries_unknown + 1))
	overall_failed=1
	elif [[ "$rc" == "0" && "$count" -eq 0 ]]; then
	status="✅ pass"
	entries_passed=$((entries_passed + 1))
	else
	status="❌ fail"
	entries_failed=$((entries_failed + 1))
	overall_failed=1
	fi

	total_failed_tests=$((total_failed_tests + count))
	printf '%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
	"$label" "$ref" "$mode" "$samples" "$rc" "$count" "$status" >> "$entries_tsv"
	done < <(echo "$MATRIX_JSON" \| jq -r '.[] \| [.label, .ref, .mode, (.samples\|tostring)] \| @tsv')

	if [[ "$overall_failed" == "0" ]]; then
	headline="✅ All ${total_entries} matrix entr$([[ $total_entries -eq 1 ]] && echo y \|\| echo ies) passed"
	else
	headline="❌ ${entries_failed} of ${total_entries} matrix entr$([[ $total_entries -eq 1 ]] && echo y \|\| echo ies) failed (${total_failed_tests} failed test run(s))"
	fi

	# ---------------------------------------------------------------
	# Build lookup tables for the per-test matrix view.
	# ---------------------------------------------------------------
	declare -A _ref_for_label=() # label -> ref
	declare -A _cell_status=() # "samples:test:label" -> "pass" or "fail"
	declare -A _cell_url=() # "samples:test:label" -> run URL
	while IFS=$'\t' read -r label ref mode samples rc count status; do
	[[ -z "$label" ]] && continue
	_ref_for_label["$label"]="$ref"
	done < "$entries_tsv"
	while IFS=$'\t' read -r tname label ref mode samples run_url; do
	[[ -z "$tname" ]] && continue
	_cell_status["${samples}:${tname}:${label}"]="fail"
	_cell_url["${samples}:${tname}:${label}"]="$run_url"
	done < <(sort -f "$failed_tests_tsv")
	while IFS=$'\t' read -r tname label ref mode samples run_url; do
	[[ -z "$tname" ]] && continue
	_cell_status["${samples}:${tname}:${label}"]="pass"
	_cell_url["${samples}:${tname}:${label}"]="$run_url"
	done < <(sort -f "$passed_tests_tsv")

	# Helper: format a ref value as a markdown link.
	_make_ref_link() {
	local _r="$1"
	if [[ "$_r" == "main" ]]; then
	echo "[main](${SERVER_URL}/github/gh-aw/commit/${MAIN_SHA})"
	else
	echo "[${_r}](${SERVER_URL}/github/gh-aw/releases/tag/${_r})"
	fi
	}

	# Render a test × ref matrix table.
	# Args: samples_value ("true"\|"false"), type ("errors"\|"successes"), heading
	_render_matrix_table() {
	local _sv="$1" # "true" or "false"
	local _type="$2" # "errors" or "successes"
	local _hdg="$3"
	# Ordered columns: release → pre-release → main, then any others.
	local _cols=()
	for _lbl in "release" "pre-release" "main"; do
	[[ -n "${_ref_for_label[$_lbl]:-}" ]] && _cols+=("$_lbl")
	done
	for _lbl in "${!_ref_for_label[@]}"; do
	case "$_lbl" in release\|pre-release\|main) ;; *)
	_cols+=("$_lbl") ;;
	esac
	done
	[[ "${#_cols[@]}" -eq 0 ]] && return 0
	local _tests
	if [[ "$_type" == "errors" ]]; then
	_tests=$(awk -F'\t' -v sv="$_sv" '$5==sv{print $1}' "$failed_tests_tsv" \| sort -u)
	else
	local _fp _ff
	_fp=$(awk -F'\t' -v sv="$_sv" '$5==sv{print $1}' "$passed_tests_tsv" \| sort -u)
	_ff=$(awk -F'\t' -v sv="$_sv" '$5==sv{print $1}' "$failed_tests_tsv" \| sort -u)
	if [[ -z "$_fp" ]]; then
	_tests=""
	elif [[ -z "$_ff" ]]; then
	_tests="$_fp"
	else
	_tests=$(comm -23 <(echo "$_fp") <(echo "$_ff"))
	fi
	fi
	[[ -z "$_tests" ]] && return 0
	echo "## ${_hdg}"
	echo
	local _hdr="\| test \|"
	local _sep="\| :--- \|"
	for _lbl in "${_cols[@]}"; do
	local _ref="${_ref_for_label[$_lbl]}"
	local _rlink
	_rlink=$(_make_ref_link "$_ref")
	_hdr+=" ${_rlink} \|"
	_sep+=" :---: \|"
	done
	echo "$_hdr"
	echo "$_sep"
	while IFS= read -r _tname; do
	[[ -z "$_tname" ]] && continue
	local _test_url="${SERVER_URL}/${REPO}/blob/main/.github/workflows/${_tname}.md"
	local _row="\| [${_tname}](${_test_url}) \|"
	for _lbl in "${_cols[@]}"; do
	local _key="${_sv}:${_tname}:${_lbl}"
	local _st="${_cell_status[$_key]:-}"
	local _ru="${_cell_url[$_key]:-}"
	local _cell
	if [[ "$_st" == "pass" ]]; then
	[[ -n "$_ru" ]] && _cell="[✅]($_ru)" \|\| _cell="✅"
	elif [[ "$_st" == "fail" ]]; then
	[[ -n "$_ru" ]] && _cell="[❌]($_ru)" \|\| _cell="❌"
	else
	_cell="—"
	fi
	_row+=" ${_cell} \|"
	done
	echo "$_row"
	done <<< "$_tests"
	echo
	}

	# ---------------------------------------------------------------
	# Pass 2: render the markdown report.
	# ---------------------------------------------------------------
	{
	echo "Run: [$RUN_URL]($RUN_URL)  ·  Trigger: \`${{ github.event_name }}\`  ·  Generated: \`$(date -u +%FT%TZ)\`  ·  Outcome: \`$E2E_RESULT\`"
	echo

	_render_matrix_table "true" "errors" "❌ Test errors — samples mode"
	_render_matrix_table "false" "errors" "❌ Test errors — inference mode"
	_render_matrix_table "true" "successes" "✅ Test successes — samples mode"
	_render_matrix_table "false" "successes" "✅ Test successes — inference mode"

	# When anything failed, append a ready-to-paste prompt for a coding
	# agent to triage the failures from the run logs.
	if [[ "$overall_failed" != "0" ]]; then
	echo "## 🤖 Agent triage prompt"
	echo
	echo "Copy the block below into a coding agent (e.g. Copilot) to investigate the failures:"
	echo
	echo '```text'
	echo "You are triaging failures from the gh-aw-test E2E suite."
	echo "Run: $RUN_URL"
	echo "Repository under test: github/gh-aw (the gh-aw CLI/compiler)."
	echo "Test harness repository: $GITHUB_REPOSITORY (this repo; runner is e2e.sh)."
	echo
	echo "Goal: for EACH failed test listed in this status report, access the GitHub"
	echo "Actions logs for the run above (and the per-entry artifacts e2e-<label>-samples-<bool>,"
	echo "which contain e2e-test-*.log, e2e-output.log and fails.txt), determine the root"
	echo "cause, and categorize the failure as exactly one of:"
	echo
	echo " 1. TRANSIENT — flaky/infra/network/rate-limit/timing; not a real defect."
	echo " Action: note it and recommend a re-run (./e2e.sh rerun)."
	echo " 2. TEST-FRAMEWORK BUG — a defect in this repo's harness (e2e.sh), a workflow"
	echo " source file (.github/workflows/test-*.md), a sample, or CI config."
	echo " Action: propose a concrete fix (file + change) in $GITHUB_REPOSITORY."
	echo " 3. GH-AW BUG — a defect in github/gh-aw itself (compiler output, runtime"
	echo " engine behaviour, safe-output handling, etc.)."
	echo " Action: open an issue in github/gh-aw with a minimal repro, the failing"
	echo " test name, the gh-aw ref/mode/samples combination, and links to the"
	echo " relevant log lines. Check for an existing open issue first and link it"
	echo " instead of filing a duplicate."
	echo
	echo "Steps:"
	echo " - Use 'gh run view <run-id> --log' and 'gh run download <run-id>' to fetch logs/artifacts."
	echo " - Read AGENTS.md in $GITHUB_REPOSITORY for harness conventions before proposing fixes."
	echo " - Group failures by suspected root cause; the same gh-aw bug may explain several."
	echo " - Produce a table: test \| category \| root cause \| recommended action \| issue/PR link."
	echo " - Only open github/gh-aw issues for category 3, and only after confirming no duplicate exists."
	echo '```'
	fi

	echo "$overall_failed" > /tmp/overall_failed
	} > report.md

	rm -f "$entries_tsv" "$failed_tests_tsv" "$passed_tests_tsv"
	cat report.md

	- name: Ensure status-report label exists
	env:
	GH_TOKEN: ${{ secrets.GH_AW_TEST_PAT }}
	run: \|
	gh label create e2e-status-report \
	--color BFD4F2 \
	--description "Automated cross-ref E2E status report" \
	--repo "$GITHUB_REPOSITORY" 2>/dev/null \|\| true

	- name: Find prior open status reports
	id: prior
	env:
	GH_TOKEN: ${{ secrets.GH_AW_TEST_PAT }}
	run: \|
	set -euo pipefail
	numbers=$(gh issue list \
	--repo "$GITHUB_REPOSITORY" \
	--label e2e-status-report \
	--state open \
	--limit 50 \
	--json number \
	--jq '[.[].number] \| join(",")')
	echo "numbers=$numbers" >> "$GITHUB_OUTPUT"
	echo "Prior open status reports: ${numbers:-none}"

	- name: Compose final issue body (with links to previous reports)
	env:
	PRIOR: ${{ steps.prior.outputs.numbers }}
	run: \|
	{
	cat report.md
	if [[ -n "$PRIOR" ]]; then
	echo
	echo "## Previous status reports (closed by this run)"
	IFS=',' read -ra arr <<< "$PRIOR"
	for n in "${arr[@]}"; do
	echo "* #$n"
	done
	fi
	} > issue-body.md

	- name: Create new status report issue
	id: create
	env:
	GH_TOKEN: ${{ secrets.GH_AW_TEST_PAT }}
	run: \|
	set -euo pipefail
	title="E2E status report — $(date -u +%F) (gh-aw matrix)"
	url=$(gh issue create \
	--repo "$GITHUB_REPOSITORY" \
	--title "$title" \
	--label e2e-status-report \
	--body-file issue-body.md)
	number="${url##*/}"
	echo "Created: $url"
	echo "number=$number" >> "$GITHUB_OUTPUT"
	echo "url=$url" >> "$GITHUB_OUTPUT"

	- name: Close previous status report issues
	if: steps.prior.outputs.numbers != ''
	env:
	GH_TOKEN: ${{ secrets.GH_AW_TEST_PAT }}
	PRIOR: ${{ steps.prior.outputs.numbers }}
	NEW_NUMBER: ${{ steps.create.outputs.number }}
	NEW_URL: ${{ steps.create.outputs.url }}
	run: \|
	IFS=',' read -ra arr <<< "$PRIOR"
	for n in "${arr[@]}"; do
	gh issue comment "$n" \
	--repo "$GITHUB_REPOSITORY" \
	--body "Superseded by #${NEW_NUMBER} — ${NEW_URL}"
	gh issue close "$n" \
	--repo "$GITHUB_REPOSITORY" \
	--reason "not planned"
	done

	- name: Fail the workflow if any matrix entry reported failures
	run: \|
	overall=$(cat /tmp/overall_failed 2>/dev/null \|\| echo 1)
	if [[ "$overall" != "0" ]]; then
	echo "::error::At least one gh-aw matrix entry reported failures (see status report issue)"
	exit 1
	fi

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

e2e #302

Workflow file

e2e #302

Uh oh!

Workflow file for this run