From 7daa534c8677b7f7190ddfc6dede1defbbc9e0db Mon Sep 17 00:00:00 2001
From: Eve Park <eve.park@heygen.com>
Date: Mon, 27 Apr 2026 15:18:38 -0700
Subject: [PATCH 1/4] feat(skills): co-locate references for gh skill
 self-containment

Each skill (heygen-avatar, heygen-video) now bundles its own references/
directory with the docs it links to. This makes both skills install
cleanly via gh skill install (which only copies the skill subdirectory),
without breaking the OpenClaw plugin or direct git clone lanes that use
the repo-root references/ directory.
---
 heygen-avatar/SKILL.md                        |   7 +
 heygen-avatar/references/asset-routing.md     |  86 ++++++
 heygen-avatar/references/avatar-creation.md   | 178 ++++++++++++
 heygen-avatar/references/troubleshooting.md   | 151 ++++++++++
 heygen-video/SKILL.md                         |  63 +++-
 heygen-video/references/asset-routing.md      |  86 ++++++
 heygen-video/references/avatar-discovery.md   | 179 ++++++++++++
 heygen-video/references/frame-check.md        |  98 +++++++
 heygen-video/references/motion-vocabulary.md  | 191 ++++++++++++
 .../references/official-prompt-guide.md       | 116 ++++++++
 heygen-video/references/prompt-craft.md       | 273 ++++++++++++++++++
 heygen-video/references/prompt-styles.md      | 251 ++++++++++++++++
 heygen-video/references/reviewer-prompt.md    |  95 ++++++
 heygen-video/references/troubleshooting.md    | 151 ++++++++++
 heygen-video/scripts/update-check.sh          | 170 +++++++++++
 15 files changed, 2084 insertions(+), 11 deletions(-)
 create mode 100644 heygen-avatar/references/asset-routing.md
 create mode 100644 heygen-avatar/references/avatar-creation.md
 create mode 100644 heygen-avatar/references/troubleshooting.md
 create mode 100644 heygen-video/references/asset-routing.md
 create mode 100644 heygen-video/references/avatar-discovery.md
 create mode 100644 heygen-video/references/frame-check.md
 create mode 100644 heygen-video/references/motion-vocabulary.md
 create mode 100644 heygen-video/references/official-prompt-guide.md
 create mode 100644 heygen-video/references/prompt-craft.md
 create mode 100644 heygen-video/references/prompt-styles.md
 create mode 100644 heygen-video/references/reviewer-prompt.md
 create mode 100644 heygen-video/references/troubleshooting.md
 create mode 100755 heygen-video/scripts/update-check.sh
diff --git a/heygen-avatar/SKILL.md b/heygen-avatar/SKILL.md
index 42d84da..5ccaed3 100644
--- a/heygen-avatar/SKILL.md
+++ b/heygen-avatar/SKILL.md
@@ -1,4 +1,5 @@
 ---
+version: 2.3.0 # x-release-please-version
 name: heygen-avatar
 description: |
   Create a persistent HeyGen avatar — a reusable face + voice identity for the agent,
@@ -204,6 +205,8 @@ For agents and named characters, skip this entire step — go straight to Type A
 
 ### Phase 2 — Avatar Creation
 
+📖 **Full creation API surface (photo / prompt / digital twin), file input formats, identity field → enum mapping, response shape → [references/avatar-creation.md](references/avatar-creation.md)**
+
 Two modes:
 
 **Mode 1 — New character** (omit `avatar_group_id`):
@@ -231,6 +234,8 @@ File options for Type B:
 - `{ "type": "asset_id", "asset_id": "<id>" }` — from `heygen asset create --file <path>`
 - `{ "type": "base64", "media_type": "image/png", "data": "<base64>" }` — inline
 
+📖 **When to use each (URL vs asset_id vs base64), upload routing, and edge cases → [references/asset-routing.md](references/asset-routing.md)**
+
 **Response:** Returns `avatar_item.id` (look ID) and `avatar_item.group_id` (character identity).
 
 Map identity fields to HeyGen enums for the prompt:
@@ -411,3 +416,5 @@ simply `cat AVATAR-AGENT.md` and get whatever the current agent's avatar is.
 - Voice match poor → show all available voices, let user browse
 - Asset upload fails → skip reference image, try prompt-only creation
 - Existing avatar file with stale HeyGen IDs → offer to regenerate or keep
+
+📖 **Known issues, retry patterns, broken voice previews, error → action mapping → [references/troubleshooting.md](references/troubleshooting.md)**
diff --git a/heygen-avatar/references/asset-routing.md b/heygen-avatar/references/asset-routing.md
new file mode 100644
index 0000000..b7eb708
--- /dev/null
+++ b/heygen-avatar/references/asset-routing.md
@@ -0,0 +1,86 @@
+# Asset Handling — The Classification Engine
+
+When the user provides files, URLs, or references, route each asset to the right path. The user should NEVER have to think about this.
+
+## Two Paths
+
+| Path | What happens | When to use |
+|------|-------------|-------------|
+| **A: Contextualize → Prompt** | Read/analyze the asset, extract key info, bake into script. Video Agent never sees the original. | Reference material, auth-walled content, documents where the *information* matters more than the *visual*. |
+| **B: Attach to API** | Upload the raw file via `files[]`. Video Agent analyzes, extracts graphics, uses as frames/B-roll. | Screenshots, branded assets, PDFs with important visual layouts, images the viewer should literally see. |
+| **A+B: Both** | Contextualize for script quality AND attach for visual use. | Long docs where you need to summarize but Video Agent should also have the full source. |
+
+## Classification Flow
+
+```
+1. Can Video Agent access this directly?
+   - Public URL (no auth, no paywall) → YES
+   - Private/internal URL → NO
+   - Local file → NO (must upload first)
+
+2. Should the viewer SEE this asset?
+   - Screenshot, logo, product image, chart → YES → Path B
+   - Research doc, article, context material → NO → Path A
+   - Ambiguous → Path A+B
+
+3. Is the content too long for the prompt?
+   - Short (< 500 words) → fits in prompt
+   - Long (> 500 words) → summarize key points, attach full doc
+```
+
+## Decision Matrix
+
+| Asset Type | Publicly Accessible? | Show On Screen? | Route |
+|-----------|---------------------|----------------|-------|
+| Screenshot / image | N/A | Yes | **B: Attach** + describe in prompt as B-roll |
+| Logo / brand asset | N/A | Yes | **B: Attach** + anchor to intro/outro |
+| Public URL to file (PDF, image, video) | Yes | Maybe | **B: Download → upload via `/v3/assets` → pass `asset_id`** + summarize |
+| Public URL to web page (HTML) | Yes | No | **A: Fetch and contextualize only.** Do NOT pass HTML URLs in `files[]`. |
+| Auth-walled URL (requires login) | No | No | **A: Ask the user to paste the content.** Never fabricate. |
+| PDF (short, text-heavy) | N/A | No | **A+B: Extract key points** + attach |
+| PDF (long, visual-rich) | N/A | Maybe | **B: Attach** + summarize top points |
+| Raw data / spreadsheet | N/A | Partially | **A: Analyze and describe** key stats. Attach if charts should appear. |
+
+## Executing Routes
+
+### Path A (Contextualize)
+- URLs: Use `web_fetch` to retrieve publicly accessible content
+- For auth-walled content you cannot access: ask the user to paste the text directly
+- Extract 3-5 most important points relevant to the video
+- Weave naturally into the script. Don't dump. Integrate.
+
+### Path B (Attach)
+Upload to HeyGen:
+
+**MCP:** upload via the asset tool (depends on environment).
+**CLI:** `heygen asset create --file /path/to/file.png`
+
+Max 32MB per file. Returns JSON with the new `asset_id`.
+
+Or pass inline in `files[]`:
+```json
+{"type": "url", "url": "https://example.com/image.png"}
+{"type": "asset_id", "asset_id": "<from upload>"}
+{"type": "base64", "data": "<base64>", "content_type": "image/png"}
+```
+
+### Describe Asset Usage in Prompt
+Be SPECIFIC:
+- "Use the uploaded dashboard screenshot as B-roll when discussing analytics"
+- "Display the company logo in the intro and end card"
+
+### Log Classification
+In the learning log entry, record:
+```json
+"assets_classified": [{"type": "image", "route": "attach", "accessible": true, "reason": "product screenshot"}]
+```
+
+## Rules
+
+- **Never ask the user which path unless genuinely 50/50.** You're the producer. Make the call.
+- **When in doubt, do both (A+B).** Over-providing costs nothing.
+- **Always describe attached assets in the prompt.** Uploading without description = ignored.
+- **Auth-walled content is YOUR job.** Bridge the gap between your access and Video Agent's.
+- **URLs that fail:** Try `web_fetch`. If login/paywall/404 → tell the user, ask for content directly. Never silently fabricate.
+- **HTML URLs cannot go in `files[]`.** Video Agent rejects `text/html`. Web pages are ALWAYS Path A only.
+- **Prefer download→upload→asset_id** over `files[]{url}`. HeyGen's servers often blocked by CDN/WAF.
diff --git a/heygen-avatar/references/avatar-creation.md b/heygen-avatar/references/avatar-creation.md
new file mode 100644
index 0000000..739f851
--- /dev/null
+++ b/heygen-avatar/references/avatar-creation.md
@@ -0,0 +1,178 @@
+# Avatar Creation API Surface
+
+This guide expands `heygen-avatar` Phase 2 (avatar creation) and Phase 3
+(voice selection) with the full API surface, field mappings, and file
+input formats. The SKILL.md gives the high-level workflow; this file is
+the reference when you need exact arguments, edge cases, or alternative
+creation paths.
+
+For *avatar discovery* (finding an existing avatar at video time), see
+[`heygen-video/references/avatar-discovery.md`](../../heygen-video/references/avatar-discovery.md).
+
+---
+
+## Avatar Creation: Three Types
+
+`heygen-avatar` Phase 2 supports three creation types. Pick based on what
+the user provides:
+
+| User input | Type | API |
+|---|---|---|
+| A photo of a real person | `photo` | `create_photo_avatar` |
+| A description of an appearance | `prompt` | `create_prompt_avatar` |
+| A short video recording of a real person | `video` | `create_digital_twin` |
+
+All three accept an optional `avatar_group_id`:
+- **Omit it** to create a new character (new group).
+- **Include it** to add a new look (variation) to an existing character.
+
+Always use Mode 2 (with `avatar_group_id`) when the avatar already exists
+and you're creating a variant (different outfit, orientation fix, bg
+change). Only use Mode 1 (new character) for genuinely new identities.
+
+### Photo avatar (from user's photo)
+
+**MCP:** `create_photo_avatar(name=<name>, file=<file_object>, avatar_group_id=<optional>)`
+
+**CLI:**
+```bash
+heygen avatar create -d '{
+  "type": "photo",
+  "name": "My Avatar",
+  "file": {"type": "url", "url": "https://example.com/headshot.jpg"},
+  "avatar_group_id": "<optional>"
+}'
+```
+
+Photo requirements:
+- JPEG or PNG
+- Min 512x512
+- Clear front-facing face
+- Good lighting
+
+### AI-generated avatar (from text prompt)
+
+**MCP:** `create_prompt_avatar(name=<name>, prompt=<appearance>, avatar_group_id=<optional>)`
+
+**CLI:**
+```bash
+heygen avatar create -d '{
+  "type": "prompt",
+  "name": "Tech Presenter",
+  "prompt": "Young professional woman, modern workspace, confident smile",
+  "avatar_group_id": "<optional>"
+}'
+```
+
+Prompt limit: 1000 characters (the API spec says 200 but the actual
+enforced limit is 1000). Be descriptive — include style, features,
+expression, lighting.
+
+Optional: up to 3 `reference_images` to anchor the generated appearance.
+
+### Video avatar / digital twin (from a short recording)
+
+**MCP:** `create_digital_twin(name=<name>, file=<file_object>, avatar_group_id=<optional>)`
+
+**CLI:**
+```bash
+heygen avatar create -d '{
+  "type": "video",
+  "name": "My Video Avatar",
+  "file": {"type": "asset_id", "asset_id": "<uploaded_asset_id>"},
+  "avatar_group_id": "<optional>"
+}'
+```
+
+---
+
+## File Input Formats
+
+`file` accepts three forms:
+
+```jsonc
+// Public URL (no auth, no paywall)
+{ "type": "url", "url": "https://example.com/headshot.jpg" }
+
+// Pre-uploaded asset (from `heygen asset create --file <path>`)
+{ "type": "asset_id", "asset_id": "<id>" }
+
+// Inline base64
+{ "type": "base64", "data": "<base64>", "content_type": "image/png" }
+```
+
+For when each is appropriate, see
+[`references/asset-routing.md`](asset-routing.md).
+
+---
+
+## Response Shape
+
+All three types return:
+```jsonc
+{
+  "avatar_item": {
+    "id": "<look_id>",         // ephemeral — the specific look
+    "group_id": "<group_id>"   // stable — the character identity
+  }
+}
+```
+
+- `id` is the **look_id** — what you pass downstream as `avatar_id` to
+  `create_video_agent` for video generation.
+- `group_id` is the **character identity** — stable across looks. Save
+  this in the AVATAR-<NAME>.md file. Always resolve fresh look_ids at
+  video time via `list_avatar_looks(group_id=<id>)` rather than caching
+  a specific look_id.
+
+---
+
+## Identity Field → HeyGen Enum Mapping
+
+When building a prompt-based avatar, map identity attributes to these
+HeyGen enums:
+
+- **age**: Young Adult | Early Middle Age | Late Middle Age | Senior | Unspecified
+- **gender**: Man | Woman | Unspecified
+- **ethnicity**: White | Black | Asian American | East Asian | South East Asian | South Asian | Middle Eastern | Pacific | Hispanic | Unspecified
+- **style**: Realistic | Pixar | Cinematic | Vintage | Noir | Cyberpunk | Unspecified
+- **orientation**: square | horizontal | vertical
+- **pose**: half_body | close_up | full_body
+
+---
+
+## Voice Selection (during avatar setup)
+
+After the avatar look is created, pair it with a voice. Two paths:
+
+### Path A — Voice Design (preferred)
+
+Find matching voices via semantic search using the Voice section from
+the AVATAR file. This searches HeyGen's full voice library. No new
+voices are generated and no quota is consumed.
+
+**Language matching:** The voice design prompt should specify the target
+language from `user_language`. Example for Japanese: `"A calm, warm
+female voice. Professional but approachable. Japanese speaker."` This
+ensures semantic search returns voices in the correct language.
+
+### Path B — Voice Browse (fallback)
+
+For manual catalog browsing:
+
+**MCP:** `list_voices(type=private)` then `list_voices(type=public, language=<lang>, gender=<gender>)`
+
+**CLI:**
+```bash
+heygen voice list --type private --limit 20
+heygen voice list --type public --engine starfish --language en --gender female --limit 20
+```
+
+**ALWAYS show a playable voice preview.** Each voice response includes
+`preview_audio_url` — share it before committing.
+
+**Handling missing/broken previews:** Some voices return bare `s3://`
+paths or `null`. When this happens: note "(no preview available)" and
+offer to generate a short TTS sample via `create_speech` (MCP) or
+`heygen voice speech create --text "<sample>" --voice-id <id>
+--input-type plain_text --language en --locale en-US` (CLI).
diff --git a/heygen-avatar/references/troubleshooting.md b/heygen-avatar/references/troubleshooting.md
new file mode 100644
index 0000000..b9ad832
--- /dev/null
+++ b/heygen-avatar/references/troubleshooting.md
@@ -0,0 +1,151 @@
+# Known Issues & Troubleshooting
+
+## Known Bug: Video Agent "Talking Photo Not Found"
+
+**Error message:** "The Talking Photo for the current narrator could not be found."
+
+**Root Cause:** Confirmed as a Video Agent backend bug by HeyGen engineering (Jerry Yan). Affects `video_avatar` type narrators and stock avatar auto-selection.
+
+**Workaround:**
+- Prefer explicit `avatar_id` over auto-selection
+- If `video_avatar` fails, retry with a `studio_avatar` or `photo_avatar`
+
+**Status:** Fix in progress at HeyGen.
+
+---
+
+## Weird Pauses / Unnatural Silence in Videos
+
+**Symptom:** Video has awkward pauses or breaks between sentences. Narrator stops speaking but video continues with dead air before next line.
+
+**Root Cause:** When Video Agent receives a script shorter than the target duration, it treats the script as verbatim speech and inserts silence/breaks to stretch it to the exact requested duration. It won't ad-lib or expand — it just pads with dead air.
+
+**Fix:** Add this directive to EVERY prompt:
+> "This script is a concept and theme to convey — not a verbatim transcript. You have full creative freedom to expand, elaborate, add examples, and fill the duration naturally. Do not pad with silence or pauses."
+
+This tells Video Agent it can expand the script naturally instead of treating it as a fixed speech transcript. Per Jerry Yan: "If you tell it it's not a script to be strictly followed but concept or theme or give it green light to expand the script it will do well."
+
+**Status:** Skill-side fix (prompt directive). HeyGen is also tuning the default behavior but the explicit directive is the reliable workaround.
+
+---
+
+## Duration Variance (Expected Behavior)
+
+Video Agent controls final video timing internally. Duration accuracy ranges from 79-174% of target across testing. This is NOT a bug.
+
+**Mitigation:** Variable padding multipliers (Script):
+- ≤30s target: 1.6x padding
+- 31-119s target: 1.4x padding
+- ≥120s target: 1.3x padding
+
+With explicit `avatar_id`: ~97% duration accuracy average.
+Without `avatar_id`: ~80% accuracy average.
+
+---
+
+## Frame Check: Video Agent Not Applying Framing
+
+If the Video Agent ignores the FRAMING NOTE or BACKGROUND NOTE and produces black bars, letterboxing, or mismatched framing:
+
+1. **Ensure the note is appended at the END of the prompt**, after all other content (script, style block, etc.). Video Agent processes instructions sequentially and late-prompt directives have the strongest effect.
+2. **Check that the correction note was actually appended.** Log the final prompt text and verify the FRAMING NOTE / BACKGROUND NOTE block is present.
+3. **photo_avatar does NOT need BACKGROUND NOTE.** Video Agent generates avatar + environment together for photo_avatars. Only append framing notes for orientation mismatches. Background notes are for studio_avatars with transparent/empty backgrounds only.
+
+---
+
+## Stock Avatar Auto-Selection Unreliable
+
+When no `avatar_id` is provided, Video Agent uses narrator tags (`{{@narrator_l0ug91}}`) that sometimes fail to resolve during render.
+
+**Fix:** Always use explicit `avatar_id` from discovery. The only exception is Quick Shot mode where the user explicitly wants speed over reliability.
+
+---
+
+## HTML URLs in files[] Rejected
+
+Video Agent rejects `text/html` content type in the `files[]` array. Web pages (blogs, docs sites, articles) must be handled via Path A (contextualize) only.
+
+**What works in files[]:** Direct file URLs (PDFs, images, videos) — but prefer download→upload→asset_id since CDN/WAF often blocks HeyGen's servers.
+
+---
+
+## Avatar Not Ready for Video Generation
+
+**Symptom:** Video generation fails or produces errors immediately after creating a new avatar. The avatar exists in the HeyGen dashboard but videos referencing it fail.
+
+**Root Cause:** Avatar creation is asynchronous. `heygen avatar create` (and `create_photo_avatar` / `create_prompt_avatar` MCP tools) return success immediately, but the avatar image is still being processed. If you submit a video request before processing completes, it fails.
+
+**Detection:** Poll with `heygen avatar looks list --group-id <group_id>` (or MCP `list_avatar_looks`). The avatar is NOT ready until:
+- `preview_image_url` is non-null
+- `image_width` and `image_height` are non-zero
+
+At the group level (`heygen avatar list`), an unready avatar will have no `preview_image_url` on the group object.
+
+**Fix:** Poll every 10 seconds after creation, wait for preview URL to appear. Typical: 30-90s for photo avatars, 1-3 min for prompt avatars. Timeout at 5 min.
+
+**The heygen-avatar skill handles this automatically.** If you bypass the skill, you must implement this polling yourself.
+
+---
+
+## Interactive Sessions Reliability
+
+Interactive sessions (created without `--wait` and iterated via `heygen video-agent send`) have known issues:
+- Sessions frequently stuck at `processing` status
+- `reviewing` state may never be reached
+- Follow-up messages fail with timing errors
+- Stop command may not trigger video generation
+
+**Recommendation:** Use one-shot mode for production. Interactive sessions documented for future use once HeyGen stabilizes the API.
+
+---
+
+## Error Code → Action
+
+Stable CLI exit codes tell you what to do without parsing messages:
+
+| Exit | Class | Action |
+|------|-------|--------|
+| `0` | ok | Continue |
+| `1` | API / network | Retry with backoff. If persistent, check `--verbose` or contact HeyGen support. |
+| `2` | usage | You passed a bad flag. Run `--help` on the command, fix the args, retry. |
+| `3` | auth | Re-auth: `heygen auth login` or set `HEYGEN_API_KEY`. Verify with `heygen auth status`. |
+| `4` | timeout under `--wait` | Operation still running server-side. stdout contains the partial resource (with `session_id` or `video_id`) — resume polling with `heygen video-agent get <id>` or `heygen video get <id>`. Do NOT re-submit. |
+
+Common API-error hints (surfaced in stderr envelope `{error:{code,message,hint}}`):
+
+- `402` / insufficient credits → tell the user their HeyGen plan is out of credits.
+- `403` / forbidden → the resource is not owned by the caller (wrong `group_id`, private avatar).
+- `404` / not found → ID is stale. Re-fetch via `heygen avatar list`, `heygen video-agent get`, etc.
+
+---
+
+## Polling Cadence
+
+When `--wait` isn't an option (e.g., you want to return control to the user between polls), use a back-off schedule rather than a fixed interval:
+
+| Age of job | Poll interval |
+|------------|---------------|
+| 0–2 min | every 10s |
+| 2–5 min | every 30s |
+| 5–10 min | every 60s |
+| > 10 min | surface "taking longer than usual" once, keep polling at 60s, give up at 15 min |
+
+If a job is stuck at the same status for >5 min, that's a signal to surface a status update or check the dashboard.
+
+**Prefer `--wait`** on creation commands. It handles the polling internally and returns the final resource or exits `4` with a resumable `session_id` / `video_id` on timeout.
+
+---
+
+## Direct Video vs Video Agent — Which Endpoint?
+
+Two ways to generate a video. Different pricing, different trade-offs.
+
+| | **Direct Video** | **Video Agent** |
+|---|-------------------|-----------------|
+| Command / Tool | `heygen video create` / no MCP tool yet | `heygen video-agent create` / `create_video_agent` |
+| Input | Full script + avatar + voice + scene JSON | Prompt + optional avatar/voice/style |
+| Control | You author every scene | Video Agent plans scenes, pacing, motion |
+| Pricing | ~$0.0333/sec | ~$0.10/sec |
+| When to use | Deterministic multi-scene videos, tight control, bulk generation | Creative intros, messages, "make a video about X" requests |
+
+The default in this skill is **Video Agent** — it's what `heygen-video` is built around. Drop to Direct Video only for batch or highly scripted workflows where Agent's autonomy is overhead.
diff --git a/heygen-video/SKILL.md b/heygen-video/SKILL.md
index c845c80..3816987 100644
--- a/heygen-video/SKILL.md
+++ b/heygen-video/SKILL.md
@@ -1,4 +1,5 @@
 ---
+version: 2.3.0 # x-release-please-version
 name: heygen-video
 description: |
   Generate HeyGen presenter videos via the v3 Video Agent pipeline — handles Frame Check
@@ -45,13 +46,53 @@ You are a video producer. Not a form. Not a CLI wrapper. A producer who understa
 
 ## API Mode Detection
 
-See the root [SKILL.md](../SKILL.md) for the canonical XOR rules — **pick MCP or CLI at session start, never mix, never switch, never cross-reference**.
+**Pick one transport at session start. Never mix, never switch mid-session, never narrate the choice.**
 
-Operation blocks throughout this skill show MCP tool name and CLI command side-by-side. **Read only the column for your detected mode.** If MCP is available, use the `mcp__heygen__*` tools; ignore the CLI column. If CLI is available, run `heygen ...` commands; ignore the MCP column. Never invoke anything from the other column.
+Detect in this order:
 
-**Do not look up API endpoints.** MCP tool names are the contract in MCP mode. `heygen <noun> <verb> --help` is the contract in CLI mode. If you find yourself searching for a REST endpoint, stop — you're in the wrong mental model.
+1. **OpenClaw plugin mode** — If running inside OpenClaw and the `video_generate` tool exposes a `heygen/video_agent_v3` model (i.e. the user has [`@heygen/openclaw-plugin-heygen`](https://github.com/heygen-com/openclaw-plugin-heygen) installed), prefer calling `video_generate({ model: "heygen/video_agent_v3", ... })` directly for video generation. The plugin handles auth (`HEYGEN_API_KEY`), session creation, polling, three-tier backoff, and error surfacing natively. Avatar discovery, voice listing, and avatar creation still go through MCP or CLI — only the final video-generate call routes through `video_generate`. Frame Check still runs before submission.
+2. **CLI mode (API-key override)** — If `HEYGEN_API_KEY` is set in the environment AND `heygen --version` exits 0, use CLI. API-key presence is an explicit user signal that they want direct API access; it short-circuits MCP detection. No question asked.
+3. **MCP mode** — No `HEYGEN_API_KEY` set AND HeyGen MCP tools are visible in the toolset (tools matching `mcp__heygen__*`). OAuth auth, uses existing plan credits.
+4. **CLI mode (fallback)** — MCP tools NOT available AND `heygen --version` exits 0. Auth via `heygen auth login` (persists to `~/.heygen/credentials`).
+5. **Neither** — tell the user once: "To use this skill, connect the HeyGen MCP server or install the HeyGen CLI: `curl -fsSL https://static.heygen.ai/cli/install.sh | bash` then `heygen auth login`."
 
-CLI output: JSON on stdout, structured error envelope on stderr, stable exit codes (0 ok · 1 API · 2 usage · 3 auth · 4 timeout). See [../references/troubleshooting.md](../references/troubleshooting.md) for error → action mapping and polling cadence. Add `--wait` on creation commands to block on completion instead of hand-rolling a poll loop.
+**Hard rules:**
+- **Never call `curl api.heygen.com/...`** — every mode routes through its own surface.
+- **OpenClaw plugin mode: only use `video_generate` for the generate step.** Never run `heygen ...` CLI for the generate call when the plugin is available. Avatar/voice discovery still uses MCP or CLI.
+- **MCP mode: only use `mcp__heygen__*` tools.** Never run `heygen ...` CLI commands. The MCP tool name IS the API.
+- **CLI mode: only use `heygen ...` commands.** Run `heygen <noun> <verb> --help` to discover arguments.
+- **Never cross over.** Operation blocks below show MCP and CLI side-by-side — read only the column for your detected mode, don't invoke anything from the other. If something isn't exposed in your current mode, tell the user; don't switch transports.
+
+### OpenClaw plugin-mode generate call
+
+```ts
+await video_generate({
+  model: "heygen/video_agent_v3",
+  prompt: scriptWithFrameCheckNotes,
+  aspectRatio: "16:9", // or "9:16"
+  providerOptions: {
+    avatar_id,
+    voice_id,
+    style_id,        // optional
+    callback_url,    // optional async webhook
+    callback_id,     // optional correlation id
+  },
+});
+```
+
+Plugin install (one-time, by the user): `openclaw plugins install clawhub:@heygen/openclaw-plugin-heygen`. Plugin docs: <https://github.com/heygen-com/openclaw-plugin-heygen>.
+
+### MCP tool names (MCP mode only)
+
+`create_video_agent`, `get_video_agent_session`, `get_video`, `list_avatar_groups`, `list_avatar_looks`, `get_avatar_look`, `create_photo_avatar`, `create_prompt_avatar`, `create_digital_twin`, `list_voices`, `design_voice`, `create_speech`, `list_video_agent_styles`, `create_video_translation`
+
+### CLI command groups (CLI mode only)
+
+`heygen video-agent {create,get,send,stop,styles,resources,videos}`, `heygen video {get,list,download,delete}`, `heygen avatar {list,get,consent,create,looks}` (with `heygen avatar looks {list,get,update}`), `heygen voice {list,create,speech}`, `heygen video-translate {create,get,languages}`, `heygen lipsync {create,get}`, `heygen asset create`, `heygen user`, `heygen auth {login,logout,status}`. Every subcommand supports `--help` — that's your reference. Run `heygen --help` to see the full noun list.
+
+**Do not look up API endpoints.** There is no `api-reference.md` lookup step. MCP mode uses tool names. CLI mode uses `heygen ... --help`. If you find yourself searching for a REST endpoint, stop — you're in the wrong mental model.
+
+CLI output: JSON on stdout, `{error:{code,message,hint}}` envelope on stderr, exit codes `0` ok · `1` API · `2` usage · `3` auth · `4` timeout. See [references/troubleshooting.md](references/troubleshooting.md) for error → action mapping and polling cadence. Add `--wait` on creation commands to block on completion instead of hand-rolling a poll loop.
 
 ---
 
@@ -91,7 +132,7 @@ Two paths for every asset:
 - **Path B (Attach):** Upload to HeyGen via `heygen asset create --file <path>` (or include as `files[]` entries on video-agent create). For visuals the viewer should see.
 - **A+B (Both):** Summarize for script AND attach original.
 
-📖 **Full routing matrix and upload examples → [../references/asset-routing.md](../references/asset-routing.md)**
+📖 **Full routing matrix and upload examples → [references/asset-routing.md](references/asset-routing.md)**
 
 **Key rules:**
 - HTML URLs cannot go in `files[]` (Video Agent rejects `text/html`). Web pages are always Path A.
@@ -282,7 +323,7 @@ Snap cuts, flash frames. Zero breathing room.
 
 ### Avatar
 
-📖 **Full avatar discovery flow, creation APIs, voice selection → [../references/avatar-discovery.md](../references/avatar-discovery.md)**
+📖 **Full avatar discovery flow, creation APIs, voice selection → [references/avatar-discovery.md](references/avatar-discovery.md)**
 
 **AVATAR file resolution (run before any external avatar lookup):**
 
@@ -388,7 +429,7 @@ Include an intro sequence, outro sequence, and chapter breaks using Motion Graph
 
 **Brand-specific:** Include hex codes (`#1E40AF`), font families (`Inter`), and which media types to prefer per scene type.
 
-📖 **Style presets (Minimalistic, Cinematic, Bold, etc.) → [../references/official-prompt-guide.md](../references/official-prompt-guide.md)**
+📖 **Style presets (Minimalistic, Cinematic, Bold, etc.) → [references/official-prompt-guide.md](references/official-prompt-guide.md)**
 
 ### Media Type Selection
 
@@ -402,9 +443,9 @@ Video Agent supports three media types. Guide it explicitly or it guesses (often
 
 Be explicit in the prompt: "Use motion graphics for the statistics, stock footage for the office scene, AI-generated visuals for the futuristic concept."
 
-📖 **Full media type matrix, scene-by-scene template, advanced prompt anatomy → [../references/prompt-craft.md](../references/prompt-craft.md)**
+📖 **Full media type matrix, scene-by-scene template, advanced prompt anatomy → [references/prompt-craft.md](references/prompt-craft.md)**
 📖 **Named styles (Deconstructed, Swiss Pulse, etc.) → inlined in Style Selection above**
-📖 **Motion vocabulary and B-roll → [../references/motion-vocabulary.md](../references/motion-vocabulary.md)**
+📖 **Motion vocabulary and B-roll → [references/motion-vocabulary.md](references/motion-vocabulary.md)**
 
 ### Orientation
 
@@ -467,7 +508,7 @@ FRAMING NOTE: The selected avatar image is in {source} orientation but this vide
 BACKGROUND NOTE: The selected avatar has no background or a transparent backdrop. Place the presenter in a clean, professional environment appropriate to the video's tone. For business/tech content: modern studio with soft lighting and subtle depth. For casual content: bright, minimal space with natural light. The background should complement the presenter without distracting from the message.
 ```
 
-📖 **Full correction templates and stacking matrix → [../references/frame-check.md](../references/frame-check.md)**
+📖 **Full correction templates and stacking matrix → [references/frame-check.md](references/frame-check.md)**
 
 ---
 
@@ -571,4 +612,4 @@ If user wants changes: adjust prompt based on feedback, re-generate. Never retry
 - **One idea per video.** Single-topic produces dramatically better results.
 - **Write for the ear.** If you wouldn't say it to a friend, rewrite it.
 
-📖 **Known issues → [../references/troubleshooting.md](../references/troubleshooting.md)**
+📖 **Known issues → [references/troubleshooting.md](references/troubleshooting.md)**
diff --git a/heygen-video/references/asset-routing.md b/heygen-video/references/asset-routing.md
new file mode 100644
index 0000000..b7eb708
--- /dev/null
+++ b/heygen-video/references/asset-routing.md
@@ -0,0 +1,86 @@
+# Asset Handling — The Classification Engine
+
+When the user provides files, URLs, or references, route each asset to the right path. The user should NEVER have to think about this.
+
+## Two Paths
+
+| Path | What happens | When to use |
+|------|-------------|-------------|
+| **A: Contextualize → Prompt** | Read/analyze the asset, extract key info, bake into script. Video Agent never sees the original. | Reference material, auth-walled content, documents where the *information* matters more than the *visual*. |
+| **B: Attach to API** | Upload the raw file via `files[]`. Video Agent analyzes, extracts graphics, uses as frames/B-roll. | Screenshots, branded assets, PDFs with important visual layouts, images the viewer should literally see. |
+| **A+B: Both** | Contextualize for script quality AND attach for visual use. | Long docs where you need to summarize but Video Agent should also have the full source. |
+
+## Classification Flow
+
+```
+1. Can Video Agent access this directly?
+   - Public URL (no auth, no paywall) → YES
+   - Private/internal URL → NO
+   - Local file → NO (must upload first)
+
+2. Should the viewer SEE this asset?
+   - Screenshot, logo, product image, chart → YES → Path B
+   - Research doc, article, context material → NO → Path A
+   - Ambiguous → Path A+B
+
+3. Is the content too long for the prompt?
+   - Short (< 500 words) → fits in prompt
+   - Long (> 500 words) → summarize key points, attach full doc
+```
+
+## Decision Matrix
+
+| Asset Type | Publicly Accessible? | Show On Screen? | Route |
+|-----------|---------------------|----------------|-------|
+| Screenshot / image | N/A | Yes | **B: Attach** + describe in prompt as B-roll |
+| Logo / brand asset | N/A | Yes | **B: Attach** + anchor to intro/outro |
+| Public URL to file (PDF, image, video) | Yes | Maybe | **B: Download → upload via `/v3/assets` → pass `asset_id`** + summarize |
+| Public URL to web page (HTML) | Yes | No | **A: Fetch and contextualize only.** Do NOT pass HTML URLs in `files[]`. |
+| Auth-walled URL (requires login) | No | No | **A: Ask the user to paste the content.** Never fabricate. |
+| PDF (short, text-heavy) | N/A | No | **A+B: Extract key points** + attach |
+| PDF (long, visual-rich) | N/A | Maybe | **B: Attach** + summarize top points |
+| Raw data / spreadsheet | N/A | Partially | **A: Analyze and describe** key stats. Attach if charts should appear. |
+
+## Executing Routes
+
+### Path A (Contextualize)
+- URLs: Use `web_fetch` to retrieve publicly accessible content
+- For auth-walled content you cannot access: ask the user to paste the text directly
+- Extract 3-5 most important points relevant to the video
+- Weave naturally into the script. Don't dump. Integrate.
+
+### Path B (Attach)
+Upload to HeyGen:
+
+**MCP:** upload via the asset tool (depends on environment).
+**CLI:** `heygen asset create --file /path/to/file.png`
+
+Max 32MB per file. Returns JSON with the new `asset_id`.
+
+Or pass inline in `files[]`:
+```json
+{"type": "url", "url": "https://example.com/image.png"}
+{"type": "asset_id", "asset_id": "<from upload>"}
+{"type": "base64", "data": "<base64>", "content_type": "image/png"}
+```
+
+### Describe Asset Usage in Prompt
+Be SPECIFIC:
+- "Use the uploaded dashboard screenshot as B-roll when discussing analytics"
+- "Display the company logo in the intro and end card"
+
+### Log Classification
+In the learning log entry, record:
+```json
+"assets_classified": [{"type": "image", "route": "attach", "accessible": true, "reason": "product screenshot"}]
+```
+
+## Rules
+
+- **Never ask the user which path unless genuinely 50/50.** You're the producer. Make the call.
+- **When in doubt, do both (A+B).** Over-providing costs nothing.
+- **Always describe attached assets in the prompt.** Uploading without description = ignored.
+- **Auth-walled content is YOUR job.** Bridge the gap between your access and Video Agent's.
+- **URLs that fail:** Try `web_fetch`. If login/paywall/404 → tell the user, ask for content directly. Never silently fabricate.
+- **HTML URLs cannot go in `files[]`.** Video Agent rejects `text/html`. Web pages are ALWAYS Path A only.
+- **Prefer download→upload→asset_id** over `files[]{url}`. HeyGen's servers often blocked by CDN/WAF.
diff --git a/heygen-video/references/avatar-discovery.md b/heygen-video/references/avatar-discovery.md
new file mode 100644
index 0000000..94ba4d1
--- /dev/null
+++ b/heygen-video/references/avatar-discovery.md
@@ -0,0 +1,179 @@
+# Avatar Discovery & Voice Selection (heygen-video)
+
+This guide covers *avatar discovery for video generation* — how heygen-video
+finds an appropriate presenter (or skips presenter entirely) before calling
+the Video Agent. For *avatar creation*, see `heygen-avatar` and
+[`heygen-avatar/references/avatar-creation.md`](../../heygen-avatar/references/avatar-creation.md).
+
+## Path 0: Resolve workspace AVATAR files first
+
+Before any HeyGen catalog lookup, check the workspace root for an
+applicable `AVATAR-*.md` file. These are written by `heygen-avatar`
+and contain `Group ID` + `Voice ID` ready to use, with no API call
+needed.
+
+Resolution precedence:
+
+| Request signal | File to read |
+|---|---|
+| Named subject ("video with Eve", "Cleo's update") | `AVATAR-<NAME>.md` |
+| Agent self-reference ("video of yourself", "give us your update") | `AVATAR-AGENT.md` (symlink) |
+| User self-reference ("video of me", "my video update") | `AVATAR-USER.md` (symlink) |
+| No subject in request | Skip to Path A |
+
+`AVATAR-AGENT.md` and `AVATAR-USER.md` are role-based symlinks maintained
+by `heygen-avatar` Phase 5; they resolve to the current agent's / user's
+named AVATAR file at read time. Treat them like any other AVATAR file
+once read.
+
+If the resolved file has a populated HeyGen section, extract `Group ID`
+and `Voice ID` and proceed to Frame Check. Skip Path A entirely. If the
+file exists but the HeyGen section is empty, run `heygen-avatar` Phase 2
+first.
+
+If no file applies (no name match, no role alias, generic catalog
+browsing requested) — fall through to Path A below.
+
+## Path A: Discover Existing Avatars
+
+### A1: Check for private avatars first
+
+**If user specifies an avatar by name** (e.g. "use Eve's Podcast look"), take the fast path:
+
+**MCP:** `list_avatar_looks(ownership=private)` — filter client-side by name match.
+**CLI:**
+```bash
+heygen avatar looks list --ownership private --limit 50
+```
+Avoids the 2-call group→looks pattern.
+
+**If user wants to browse**, use the group-first flow:
+
+**MCP:**
+1. `list_avatar_groups(ownership=private)` — list groups (each group = one person)
+2. `list_avatar_looks(group_id=<group_id>)` — show looks for chosen group
+
+**CLI:**
+```bash
+heygen avatar list --ownership private --limit 50
+heygen avatar looks list --group-id <group_id> --limit 50
+```
+
+Each look has an `id` — this is the `avatar_id` you pass downstream.
+
+Avatar types: `studio_avatar`, `video_avatar`, `photo_avatar`. Photo avatars support `motion_prompt` and `expressiveness`.
+
+**ALWAYS show the preview image** when presenting an avatar look. Each look response includes `preview_image_url` — display inline.
+
+### A2: Check last-used avatar
+
+Check `heygen-video-log.jsonl` for last used avatar_id. If found:
+
+**MCP:** `get_avatar_look(look_id=<look_id>)`
+**CLI:** `heygen avatar looks get --look-id <look_id>`
+
+Show preview image: "Last time you used [Avatar Name]. Use her again?"
+
+### A3: Avatar conversation
+
+Ask: "Do you want a visible presenter, or voice-over only?"
+
+If voice-over only → no `avatar_id`. State in prompt: "Voice-over narration only."
+
+If presenter wanted, present private avatars first. For public/stock avatars, browse by group:
+
+**MCP:** `list_avatar_groups(ownership=public)`
+**CLI:**
+```bash
+heygen avatar list --ownership public --limit 20
+```
+
+Show group names + one representative image. Let the user pick a person.
+
+**MCP:** `list_avatar_looks(group_id=<group_id>)`
+**CLI:**
+```bash
+heygen avatar looks list --group-id <group_id> --limit 10
+```
+
+**Why group-first:** The flat `heygen avatar looks list --ownership public` call returns 50+ results for only 3 unique people per page. Group-level browsing (2 calls) gives much better discovery UX.
+
+### A4: Voice direction
+
+After avatar is settled, confirm voice preferences (accent, delivery style, language).
+
+**ALWAYS show a playable voice preview.** Each voice response includes `preview_audio_url` — share it.
+
+**Handling missing/broken previews:** Some voices return bare `s3://` paths or `null`. When this happens: note "(no preview available)" and offer to generate a short TTS sample via `create_speech` (MCP) or `heygen voice speech create --text "<sample>" --voice-id <id> --input-type plain_text --language en --locale en-US` (CLI).
+
+---
+
+## Path B: Create a New Avatar
+
+If no existing avatar fits and the user wants one created, route to the
+`heygen-avatar` skill. See
+[`heygen-avatar/references/avatar-creation.md`](../../heygen-avatar/references/avatar-creation.md)
+for the full creation API surface (photo / prompt / digital twin), file
+input formats, and identity field mappings.
+
+After `heygen-avatar` finishes, an `AVATAR-<NAME>.md` file is written and
+heygen-video resumes here at Path 0 to pick it up.
+
+---
+
+## Path C: Direct Image (Simplest for One-Off)
+
+Skip avatar creation. Pass `image_url` directly:
+
+**MCP:** `create_video_from_image(image_url=<url>, script=<script>, voice_id=<voice_id>, aspect_ratio="16:9")`
+**CLI:**
+```bash
+heygen video create -d '{
+  "image_url": "https://example.com/headshot.jpg",
+  "script": "<script>",
+  "voice_id": "<voice_id>",
+  "aspect_ratio": "16:9"
+}'
+```
+Also accepts `image_asset_id`. Fastest path for one-off talking-head video.
+
+---
+
+## Voice Selection (downstream)
+
+Voice catalog browsing for video generation:
+
+**MCP:** `list_voices(type=private)` then `list_voices(type=public, language=<lang>, gender=<gender>)`
+**CLI:**
+```bash
+heygen voice list --type private --limit 20
+
+# Public voices with filters
+heygen voice list --type public --engine starfish --language en --gender female --limit 20
+```
+
+For voice *design* (semantic search by description) and the full voice
+selection workflow during avatar setup, see
+[`heygen-avatar/references/avatar-creation.md`](../../heygen-avatar/references/avatar-creation.md).
+
+---
+
+## How Avatar/Voice Are Passed
+
+**MCP:** `create_video_agent(prompt=<prompt>, avatar_id=<look_id>, voice_id=<voice_id>, style_id=<optional>, orientation=<orientation>)`
+
+**CLI:** `heygen video-agent create` with flags:
+```bash
+heygen video-agent create \
+  --prompt "..." \
+  --avatar-id "<look_id_from_discovery>" \
+  --voice-id "<voice_id_from_discovery>" \
+  --style-id "<optional_style_id>" \
+  --orientation landscape
+```
+
+- **Custom/stock avatar with known ID** → pass `--avatar-id`. Do NOT describe avatar's appearance in prompt. Only delivery style + background/environment.
+- **No avatar_id (auto-select)** → describe desired presenter in prompt. Less reliable (~80% vs ~97%).
+- **Voice-over only** → omit `--avatar-id`, state in prompt.
+
+> Always provide explicit `--avatar-id` for presenter videos. 97.6% duration accuracy vs ~80% without.
diff --git a/heygen-video/references/frame-check.md b/heygen-video/references/frame-check.md
new file mode 100644
index 0000000..de2a769
--- /dev/null
+++ b/heygen-video/references/frame-check.md
@@ -0,0 +1,98 @@
+# Frame Check — Aspect Ratio & Background Pre-Check
+
+Runs automatically when `avatar_id` is set, before Generate. Appends correction notes to the Video Agent prompt. Does NOT generate images or create new looks.
+
+## Step 1: Fetch the avatar look metadata
+
+**MCP:** `get_avatar_look(look_id=<avatar_id>)`
+**CLI:** `heygen avatar looks get --look-id <avatar_id>`
+
+Extract:
+- `avatar_type`: `"photo_avatar"` | `"studio_avatar"` | `"video_avatar"`
+- `preview_image_url`: use to determine orientation
+- `image_width` and `image_height`: use for orientation calculation
+
+## Step 2: Determine avatar orientation
+
+Use `image_width` and `image_height` from the API response (or fetch the preview image dimensions if those fields are missing).
+- width > height → landscape avatar
+- height > width → portrait avatar
+- width == height → **square avatar** (1:1) → always needs framing correction
+- Fetch fails or no preview → assume portrait (safer default)
+
+## Step 3: Determine if background exists
+
+| `avatar_type` | Background? | Reason |
+|---|---|---|
+| `photo_avatar` | ✅ Handled by Video Agent | Video Agent generates avatar + environment together during video creation. No standalone bg correction needed. |
+| `studio_avatar` | ⚠️ Maybe | Check preview image — if transparent/solid/empty → "No background" → apply Correction C |
+| `video_avatar` | ✅ Yes | Recorded in a real environment |
+
+## Step 4: Append correction notes to prompt
+
+Based on Steps 2-3, append zero or more correction notes **verbatim to the end of the Video Agent prompt text.** Do NOT ask the user. Do NOT generate images. Do NOT create new looks.
+
+### A) Portrait avatar → Landscape video (most common)
+
+```
+FRAMING NOTE: The selected avatar image is in portrait orientation but this video is landscape (16:9). Frame the presenter from the chest up, centered in the landscape canvas. Use AI Image tool to generative fill to extend the scene horizontally with a complementary background environment that matches the video's tone (studio, office, or contextually appropriate setting). Do NOT add black bars or pillarboxing. The avatar should feel natural in the 16:9 frame.
+```
+
+### B) Landscape avatar → Portrait video
+
+```
+FRAMING NOTE: The selected avatar image is in landscape orientation but this video is portrait (9:16). Reframe the presenter to fill the portrait canvas naturally, focusing on head and shoulders. Use AI Image tool to generative fill to extend vertically if needed. Do NOT add letterboxing. The avatar should fill the portrait frame comfortably.
+```
+
+### D) Square avatar → Landscape video
+
+```
+FRAMING NOTE: The selected avatar image is in square (1:1) orientation but this video is landscape (16:9). Frame the presenter from the chest up, centered in the landscape canvas. Use AI Image tool to generative fill to extend the scene horizontally with a complementary background environment that matches the video's tone (studio, office, or contextually appropriate setting). Do NOT add black bars or pillarboxing. The avatar should feel natural in the 16:9 frame.
+```
+
+### E) Square avatar → Portrait video
+
+```
+FRAMING NOTE: The selected avatar image is in square (1:1) orientation but this video is portrait (9:16). Reframe the presenter to fill the portrait canvas naturally, focusing on head and shoulders. Use AI Image tool to generative fill to extend vertically if needed. Do NOT add letterboxing. The avatar should fill the portrait frame comfortably.
+```
+
+### C) Missing background — studio_avatar only
+
+**Only for `studio_avatar` with transparent/solid/empty background. NOT for photo_avatar** (Video Agent handles photo_avatar environments during generation).
+
+```
+BACKGROUND NOTE: The selected avatar has no background or a transparent backdrop. Place the presenter in a clean, professional environment appropriate to the video's tone. For business/tech content: modern studio with soft lighting and subtle depth. For casual content: bright, minimal space with natural light. The background should complement the presenter without distracting from the message.
+```
+
+## Correction Stacking Matrix
+
+Corrections can stack. Use the matrix to determine which notes to append.
+
+| avatar_type | Orientation Match? | Has Background? | Corrections |
+|---|---|---|---|
+| `video_avatar` | ✅ matched | ✅ Yes | None |
+| `video_avatar` | ❌ mismatched | ✅ Yes | Framing only (A or B) |
+| `video_avatar` | ◻ square | ✅ Yes | Framing only (D or E) |
+| `studio_avatar` | ✅ matched | ✅ Yes (check preview) | None |
+| `studio_avatar` | ✅ matched | ❌ No | Background (C) |
+| `studio_avatar` | ❌ mismatched | ✅ Yes | Framing only (A or B) |
+| `studio_avatar` | ❌ mismatched | ❌ No | Framing (A or B) + Background (C) |
+| `studio_avatar` | ◻ square | ✅ Yes | Framing only (D or E) |
+| `studio_avatar` | ◻ square | ❌ No | Framing (D or E) + Background (C) |
+| `photo_avatar` | ✅ matched | (n/a) | **None** — Video Agent handles avatar + environment together |
+| `photo_avatar` | ❌ mismatched | (n/a) | **Framing only (A or B)** |
+| `photo_avatar` | ◻ square | (n/a) | **Framing only (D or E)** |
+
+**How to check if studio_avatar has a background:** Fetch `preview_image_url`. If transparent/checkered, solid color, or cutout → "No background" → append Correction C.
+
+**photo_avatar rule:** Video Agent generates the avatar and its environment together during video creation. Do NOT append Correction C for photo_avatars. Only append framing corrections (A, B, D, or E) if there's an orientation mismatch.
+
+## Step 5: Submit with original avatar_id
+
+After appending correction notes to the prompt, submit the video request using the **original `avatar_id`** (unchanged). Video Agent handles framing and background internally based on the FRAMING NOTE and BACKGROUND NOTE directives in the prompt.
+
+## Step 6: Log the correction
+
+Add to learning log entry:
+- `"aspect_correction"`: `"portrait_to_landscape"` | `"landscape_to_portrait"` | `"square_to_landscape"` | `"square_to_portrait"` | `"background_fill"` | `"both"` | `"none"`
+- `"avatar_type"`: the raw value from the API
diff --git a/heygen-video/references/motion-vocabulary.md b/heygen-video/references/motion-vocabulary.md
new file mode 100644
index 0000000..4d27b30
--- /dev/null
+++ b/heygen-video/references/motion-vocabulary.md
@@ -0,0 +1,191 @@
+---
+name: motion-vocabulary
+description: Motion verbs, the 5-layer visual system, scene types, and anti-patterns for Video Agent prompts
+---
+
+# Motion Vocabulary & Visual Layer System
+
+Video Agent is an HTML interpreter. It renders layouts, typography, and structured content natively. The key to great B-roll: describe elements with **action verbs** ("slams in," "types on," "counts up") not layout specs ("upper-left, 48pt").
+
+Based on patterns from 40+ produced videos.
+
+> **Language note:** Motion verbs MUST remain in English regardless of the video's content language. Video Agent's rendering engine responds to these specific English verbs. Do not translate "SLAMS", "CASCADE", "COUNTS UP", etc. They are API-level commands, not viewer-facing text.
+
+## Motion Verbs
+
+Use these exact verbs in prompts. Video Agent responds to them. Without them, you get static frames.
+
+### High Energy
+| Verb | Example |
+|------|---------|
+| **SLAMS** | `"$95M" SLAMS in from left at -5 degrees` |
+| **CRASHES** | `Title CRASHES in from right, screen-shake on impact` |
+| **PUNCHES** | `Quote card PUNCHES up from bottom` |
+| **STAMPS** | `Data blocks STAMP in staggered 0.4s` |
+| **SHATTERS** | `Text SHATTERS after 1.5s, revealing number underneath` |
+
+### Medium Energy
+| Verb | Example |
+|------|---------|
+| **CASCADE** | `Three cards CASCADE from top, staggered 0.3s` |
+| **SLIDES** | `Ticker SLIDES in from right — continuous scroll` |
+| **DROPS** | `"TIER 1" DROPS in with white flash` |
+| **FILLS** | `Progress bar FILLS 0 to 90% in orange` |
+| **DRAWS** | `Chart line DRAWS itself left to right` |
+
+### Low Energy
+| Verb | Example |
+|------|---------|
+| **types on** | `Quote types on word by word in italic white` |
+| **fades in** | `Logo fades in at center, held for 3 seconds` |
+| **FLOATS** | `Bokeh orbs FLOAT across frame at different speeds` |
+| **morphs** | `Number morphs from 17 to 18.9` |
+| **COUNTS UP** | `"1.85M" COUNTS UP from 0 in amber 96pt` |
+
+## Transition Types
+
+| Transition | Energy | Best With Styles |
+|------------|--------|-----------------|
+| Smash cut | Aggressive | Deconstructed, Maximalist, Carnival Surge |
+| White flash frame | Punchy | Deconstructed, Maximalist |
+| Grid wipe | Systematic | Swiss Pulse, Digital Grid |
+| Hard cut | Clean | Swiss Pulse, Shadow Cut |
+| Liquid dissolve | Elegant | Data Drift, Dream State |
+| Slow cross-dissolve | Refined | Velvet Standard |
+| Pop cut / bounce | Fun | Play Mode, Carnival Surge |
+| Snap cut | Urgent | Red Wire, Contact Sheet |
+| Soft dissolve | Warm | Soft Signal, Warm Grain, Quiet Drama |
+| Iris wipe | Nostalgic | Heritage Reel |
+
+## The 5-Layer Visual System
+
+Break B-roll scenes into 5 stacked layers. This is the most powerful technique for motion graphics.
+
+| Layer | Purpose | Examples |
+|-------|---------|---------|
+| **L1** | Background | Textured surface, grid, gradient, color field |
+| **L2** | Hero content | Main headline/number that dominates the frame |
+| **L3** | Supporting data | Cards, stats, bullet points, secondary information |
+| **L4** | Information bar | Tickers, labels, source attributions, quotes |
+| **L5** | Effects | Particles, glitches, grid animations, ambient motion |
+
+**Rules:**
+- Every B-roll scene: 4+ layers minimum
+- Every overlay content side: 3+ layers minimum
+- **Every element must MOVE.** No static frames.
+
+### Example: B-Roll Scene with Layers
+
+```
+SCENE 2 — FULL SCREEN B-ROLL (12s)
+[NO AVATAR — motion graphic only]
+VOICEOVER: "One-point-eight-five million signups. Twenty-eight percent month over month."
+LAYER 1: Dark #1a1a1a background with thin grid lines pulsing at 8% opacity.
+LAYER 2: "1.85M" SLAMS in from left, white Bold 140pt. "+28% MoM" appears in amber.
+LAYER 3: Three stat cards CASCADE from top-right, staggered 0.3s.
+         Each number COUNTS UP from 0.
+LAYER 4: Bottom ticker scrolls: "Non-brand search +36% • Brand impressions 9.2M"
+LAYER 5: Grid lines RIPPLE outward on "1.85M" slam.
+Hard cut.
+```
+
+## Scene Types
+
+| Type | Format | When to Use |
+|------|--------|-------------|
+| **A-ROLL** | Avatar speaking to camera | Intros, key insights, CTAs, emotional beats |
+| **FULL SCREEN B-ROLL** | No avatar, motion graphics only | Data visualization, information-dense content |
+| **A-ROLL + OVERLAY** | Split frame: avatar + content | Presenting data while maintaining human connection |
+
+**Rotation is mandatory.** Never 3+ of the same type in a row. Every video needs at least 2 pure B-roll scenes.
+
+**Voiceover on EVERY scene.** Every B-roll scene MUST include a `VOICEOVER:` line. Silent B-roll = broken video.
+
+## Timing Guidelines
+
+| Content Type | Duration |
+|---|---|
+| Hook/Intro (A-roll) | 6-10 seconds |
+| Data-heavy B-roll | 10-15 seconds (NEVER ≤5s — causes black frames) |
+| A-roll + Overlay | 8-12 seconds |
+| CTA / Close (A-roll) | 6-8 seconds |
+
+**Common video lengths:**
+- Social clip: 30-45s (5-7 scenes)
+- Briefing: 60-75s (7-9 scenes)
+- Deep dive: 90-120s (10-13 scenes)
+
+## Avatar Description Guide
+
+**The avatar is NOT a fixed headshot.** Design it for each video like a movie character.
+
+### Thematic Wardrobe Rule
+
+The avatar's outfit and environment MUST match the content's emotional/cultural context:
+
+| Content Type | Avatar Design | NOT This |
+|---|---|---|
+| Breaking tech news | Field reporter, windswept hair, earpiece, city skyline | "Anchor at a desk" |
+| Data analysis | Black merino turtleneck, minimalist desk, dual monitors with charts | "Business casual" |
+| Product launch | Branded tee, open-plan startup space, product prototype on desk | "Generic office" |
+| Tutorial | Casual hoodie, messy developer desk, sticky notes, coffee mug | "Presenter in a studio" |
+
+### What to Specify
+
+| Element | Weak | Strong |
+|---|---|---|
+| Clothing | "Business casual" | "Black ribbed merino turtleneck, high collar framing jaw" |
+| Environment | "An office" | "Glass-walled conference room. Whiteboard with hand-drawn tier pyramid" |
+| Monitor content | "Computer screens" | "Monitor shows scrolling green terminal text and red security alerts" |
+| Lighting | "Well lit" | "Cool blue monitor glow from left, warm amber desk lamp from right" |
+
+### Template (60-100 words)
+```
+AVATAR: [Clothing — fabric, color, fit, accessories, posture].
+[Setting — specific props, brand logos, what's on the walls].
+[Monitors/desk — content visible on screens, items on desk].
+[Lighting — direction, color temperature]. [Mood of the space].
+```
+
+**Remember:** When `avatar_id` is set as an API parameter, do NOT describe appearance. Only delivery style and environment notes. Say "The selected presenter" instead.
+
+## Critical On-Screen Text
+
+List every piece of text that MUST appear literally on screen:
+
+```
+CRITICAL ON-SCREEN TEXT (display literally):
+- "$141M ARR — All-Time High"
+- "1.85M Signups — +28% MoM"
+- Quote: "Use technology to serve the message, not distract from it." — Shalev Hani
+- "@eve_builds" — exact social handle
+```
+
+Without this block, Video Agent will summarize, round numbers, or rephrase quotes.
+
+**Voiceover number rule:** Spell out numbers in speech ("one-point-eight-five million"), use figures on screen ("1.85M").
+
+## What Doesn't Work
+
+Patterns that consistently produce poor results (from 40+ videos):
+
+**Layout language** — Screen coordinates cause empty/black B-roll:
+```
+❌ "UPPER-LEFT: headline in 48pt Helvetica"
+❌ "CENTER-SCREEN: display at coordinates (400, 300)"
+✅ "135K" SLAMS in from left, white Impact 120pt, fills 40% of frame.
+```
+
+**Named artists without specs** — "Ikko Tanaka style" means nothing to Video Agent. Translate to concrete rules:
+```
+❌ "Use an Ikko Tanaka style"
+✅ "Flat color blocks, maximum 3 colors per frame, 60% negative space, typography as primary element"
+```
+
+**Style examples injected into prompts** — Full example scenes from a style library confuse the agent. Use the style's **rules**, not example scenes.
+
+**Forced short B-roll (≤5 seconds)** — Too short for rendering. Every tested video with 5s B-roll had empty/black screens. Minimum 10s.
+
+**Content as a list, not a story** — "Here are 5 tweets" produces flat videos. Always synthesize a thesis: *"X is happening because Y — here's the proof."*
+
+**Static frames** — Every element must have a motion verb. "Title appears" → dead frame. "Title SLAMS in from left" → alive.
diff --git a/heygen-video/references/official-prompt-guide.md b/heygen-video/references/official-prompt-guide.md
new file mode 100644
index 0000000..15af5dd
--- /dev/null
+++ b/heygen-video/references/official-prompt-guide.md
@@ -0,0 +1,116 @@
+# HeyGen Video Agent — Official Prompt Guide (Complete)
+Source: https://www.notion.so/heygen/Video-Agent-Prompt-Guide-2e6449792c69801d9353c885aad92c9e
+
+## Core Philosophy
+"Video Agent isn't magic; it's a production partner that executes your creative direction."
+"The more specific you are about content, style, media types, and scene structure, the closer you'll get to exactly what you envision."
+
+## Three UI Controls
+1. **Avatar** — select specific avatar, Auto mode, or "no avatar" for voice-over only (MUST explicitly say "no avatar" in prompt)
+2. **Duration** — 30s, 1min, 2min, or Auto (agent follows prompt/script for length, not forced)
+3. **Aspect Ratio** — Portrait or Landscape, or Auto
+
+## Prompting Hierarchy (Basic → Advanced)
+
+### Level 1: Basic Prompt
+Describe the content you want delivered:
+- "Introduce HeyGen to knowledge workers, talk about its Talking Avatar models"
+- "Make a compliance training video and explain phishing in detail"
+
+### Level 2: Script-Driven (STRONGLY RECOMMENDED)
+Paste a full video script. Agent follows scene-by-scene while improving flow, timing, and visuals.
+This is "the single biggest upgrade most people miss."
+
+### Level 3: Scene-by-Scene (Maximum Control)
+```
+Scene 1: [Scene Type]
+  Visual: [Describe exact visual]
+  VO/Script: "[What the avatar says]"
+  Duration: [Approximate length]
+```
+
+## Attachments
+- Images, videos, product screenshots, diagrams
+- PDFs, documents (agent extracts key info)
+- Upload own photo → agent uses as talking avatar
+- ALWAYS add context: "Use the attached screenshots as B-roll when discussing features"
+
+## The "Catchall" Style Block (Personal Favorite of HeyGen Team)
+```
+Use minimal, clean styled visuals. Blue, black, and white as main colors.
+Leverage motion graphics as B-rolls and A-roll overlays. Use AI videos when necessary.
+When real-world footage is needed, use Stock Media.
+Include an intro sequence, outro sequence, and chapter breaks using Motion Graphics.
+```
+
+## Style Descriptor Presets
+| Style | Best For | Prompt Addition |
+|-------|----------|-----------------|
+| Minimalistic | Corporate, Tech, SaaS | "Use minimalistic, clean visuals with lots of white space" |
+| Cartoon/Animated | Education, Kids content | "Use cartoon-style illustrated visuals" |
+| Bold & Vibrant | Marketing, Social | "Use bold, vibrant colors and dynamic visuals" |
+| Cinematic | Brand films, High-end | "Use cinematic quality visuals with dramatic lighting" |
+| Flat Design | Modern, App demos | "Use flat design style with geometric shapes" |
+| Gradient Modern | Tech, Startup | "Use modern gradient backgrounds and sleek transitions" |
+| Retro/Vintage | Nostalgia, Creative | "Use retro-inspired visuals with warm tones" |
+
+## Color Specification
+- Exact hex codes: "Use #1E40AF as primary blue, #F8FAFC as background white, #0F172A for text"
+- Brand colors: "Stick to our brand colors: coral (#FF6B6B), navy (#2C3E50), cream (#FFF5E6)"
+- Font families: "Use Inter font family throughout"
+- WHY: Without defined style, visuals look inconsistent scene-to-scene
+
+## Media Types
+
+### Motion Graphics
+Animated text, icons, charts, shapes, transitions.
+- A-roll overlays: lower thirds, bullet points, animated callouts
+- B-roll scenes: animated explanations, data viz, process flows
+- Chapter cards: section breaks, intros, outros
+- Information display: statistics, comparisons, timelines
+
+### AI-Generated Images & Videos
+- Conceptual illustrations, abstract concepts
+- Custom scenarios stock can't cover
+- Stylized visuals in particular artistic style
+- Product mockups in various contexts
+
+### Stock Media
+Real-world footage from stock libraries.
+- Authentic scenes (offices, cities, people)
+- Industry-specific (medical, manufacturing, retail)
+- Emotional moments, human connection
+- Establishing shots, locations
+
+### Media Type Decision Matrix
+| Content Type | Motion Graphics | AI Generated | Stock Media |
+|---|---|---|---|
+| Data/Statistics | ✅ Best | ❌ | ❌ |
+| Abstract Concepts | ✅ Good | ✅ Best | ❌ |
+| Real Environments | ❌ | ⚠️ Can work | ✅ Best |
+| Brand Elements | ✅ Best | ❌ | ❌ |
+| Human Emotions | ❌ | ⚠️ Uncanny | ✅ Best |
+| Custom Scenarios | ⚠️ Limited | ✅ Best | ⚠️ May not exist |
+| Technical Diagrams | ✅ Best | ❌ | ❌ |
+
+## Example Prompts (Steal These)
+
+### Compliance Training
+"Use a professional female avatar. Make a compliance training video explaining phishing in detail. Use examples and list top watch-outs. Leverage motion graphics as A-roll overlay and B-roll to help explain core concepts."
+
+### Educational Explainer (Voice-Over Only)
+"Create a 1-minute video about camera aperture. Use minimal science diagrams and visualizations. No avatar needed, only voice-over. Cool neutrals (navy, cyan), thin-line diagrams, and slow elegant motion. B-roll is abstract scientific illustrations. Sequencing: definition → diagram expansion → conceptual layering, with fade-through transitions."
+
+### Brand Story (Animated)
+"Make a video telling the story of how Twitch got started. Use cartoon-style animations and overlays. I want Twitch's iconic colors and fonts. Use motion graphics overlays and AI-generated B-roll."
+
+## Community Pro Tips
+
+### Stack style instructions at the end
+Put content/script first, then add all style directives (colors, motion graphics preferences, media type guidance) as a block at the bottom. Keeps creative intent clean and technical specs organized.
+
+### Save your catchall as a template
+If you find a style combo that works, reuse it across all videos. Consistency builds brand recognition.
+
+### Iterate in conversation
+Video Agent remembers context within a session. After first render: "make the intro shorter" or "swap the B-roll in scene 3 for stock footage" without re-prompting everything.
diff --git a/heygen-video/references/prompt-craft.md b/heygen-video/references/prompt-craft.md
new file mode 100644
index 0000000..c32df69
--- /dev/null
+++ b/heygen-video/references/prompt-craft.md
@@ -0,0 +1,273 @@
+# Prompt Craft Reference
+
+Production-quality prompt engineering for HeyGen Video Agent. Combines official HeyGen guidance with patterns validated across 80+ test videos.
+
+Load this when the user wants cinematic/polished output, scene-by-scene control, or specific visual styles.
+
+---
+
+## Prompting Levels (from HeyGen Official Guide)
+
+### Level 1: Basic
+Just describe content. Video Agent fills in the rest.
+```
+"Introduce HeyGen to knowledge workers, talk about its Talking Avatar models"
+```
+
+### Level 2: Script-Driven (RECOMMENDED DEFAULT)
+Paste a full video script. Agent follows scene-by-scene while improving flow, timing, and visuals.
+This is "the single biggest upgrade most people miss." — HeyGen docs
+
+### Level 3: Scene-by-Scene (Maximum Control)
+```
+Scene 1: [Scene Type]
+  Visual: [Describe exact visual]
+  VO/Script: "[What the avatar says]"
+```
+
+**Official recommendation:** Don't assign per-scene timestamps. Natural flow + tone description outperforms rigid scene structure.
+
+---
+
+## Prompt Anatomy (Production Quality)
+
+```
+FORMAT:    What kind of video, how long, what energy
+TONE:      Emotional register, references
+AVATAR:    "The selected presenter" (when avatar_id set) or delivery style
+STYLE:     Colors, typography, motion rules, transitions (see Style Block)
+CRITICAL ON-SCREEN TEXT:  Exact strings that must appear literally
+SCENE-BY-SCENE:  (if >60s) Individual scene breakdowns with VO and visual type
+MUSIC:     Genre, energy arc
+```
+
+**Rule: Content/script first, style block at the end.** Keeps creative intent clean and technical specs organized.
+
+### Critical On-Screen Text
+
+List every exact string. Without this, Video Agent rephrases, summarizes, or rounds numbers.
+
+```
+CRITICAL ON-SCREEN TEXT (display literally):
+- "$141M ARR — All-Time High"
+- "1.85M Signups — +28% MoM"
+- Quote: "Use technology to serve the message." — Shalev Hani
+```
+
+---
+
+## Style Block
+
+Every prompt should end with a style block. Without one, visuals look inconsistent scene-to-scene.
+
+### The HeyGen Catchall (official team recommendation)
+```
+Use minimal, clean styled visuals. Blue, black, and white as main colors.
+Leverage motion graphics as B-rolls and A-roll overlays. Use AI videos when necessary.
+When real-world footage is needed, use Stock Media.
+Include an intro sequence, outro sequence, and chapter breaks using Motion Graphics.
+```
+
+### Style Presets (from HeyGen docs)
+
+| Style | Best For | Prompt Language |
+|-------|----------|-----------------|
+| Minimalistic | Corporate, Tech, SaaS | "Use minimalistic, clean visuals with lots of white space" |
+| Cartoon/Animated | Education, Kids | "Use cartoon-style illustrated visuals" |
+| Bold & Vibrant | Marketing, Social | "Use bold, vibrant colors and dynamic visuals" |
+| Cinematic | Brand films, High-end | "Use cinematic quality visuals with dramatic lighting" |
+| Flat Design | Modern, App demos | "Use flat design style with geometric shapes" |
+| Gradient Modern | Tech, Startup | "Use modern gradient backgrounds and sleek transitions" |
+| Retro/Vintage | Nostalgia, Creative | "Use retro-inspired visuals with warm tones" |
+
+### Brand Colors
+
+Be explicit with hex codes and fonts:
+```
+Use #1E40AF as primary blue, #F8FAFC as background white, #0F172A for text.
+Font: Inter family throughout.
+```
+
+Without defined colors, visuals look inconsistent scene-to-scene.
+
+---
+
+## Media Types & When to Use Each
+
+Video Agent supports three media types. Guide it explicitly or it guesses (often wrong).
+
+### Motion Graphics
+Animated text, icons, charts, shapes, transitions.
+- **A-roll overlays:** lower thirds, bullet points, animated callouts
+- **B-roll scenes:** animated explanations, data viz, process flows
+- **Chapter cards:** section breaks, intros, outros
+- **Best for:** Data, statistics, brand elements, technical diagrams
+
+### AI-Generated Images & Videos
+- Conceptual illustrations, abstract concepts
+- Custom scenarios stock can't cover
+- Stylized visuals in particular artistic style
+- **Best for:** Abstract concepts, custom scenarios, product mockups
+
+### Stock Media
+Real-world footage from stock libraries.
+- Authentic scenes (offices, cities, people)
+- Industry-specific (medical, manufacturing, retail)
+- **Best for:** Real environments, human emotions, establishing shots
+
+### Decision Matrix
+
+| Content Type | Motion Graphics | AI Generated | Stock Media |
+|---|---|---|---|
+| Data/Statistics | ✅ Best | ❌ | ❌ |
+| Abstract Concepts | ✅ Good | ✅ Best | ❌ |
+| Real Environments | ❌ | ⚠️ Can work | ✅ Best |
+| Brand Elements | ✅ Best | ❌ | ❌ |
+| Human Emotions | ❌ | ⚠️ Uncanny | ✅ Best |
+| Custom Scenarios | ⚠️ Limited | ✅ Best | ⚠️ May not exist |
+| Technical Diagrams | ✅ Best | ❌ | ❌ |
+
+---
+
+## Scene Types
+
+| Type | Format | When |
+|------|--------|------|
+| **A-ROLL** | Avatar speaking to camera | Intros, key insights, CTAs, emotional beats |
+| **FULL SCREEN B-ROLL** | No avatar, motion graphics only | Data visualization, info-dense content |
+| **A-ROLL + OVERLAY** | Split frame: avatar + content | Presenting data while maintaining human connection |
+
+**Rotation is mandatory.** Never 3+ of the same type in a row.
+
+**Voiceover on EVERY scene.** Silent B-roll = broken video.
+
+### Scene-by-Scene Template (HeyGen Official Format)
+
+```
+Scene 1: [Scene Type]
+  Visual: [Describe exact visual — include media type]
+  VO/Script: "[What the avatar says]"
+```
+
+### Detailed Scene Templates (validated in testing)
+
+**A-ROLL:**
+```
+SCENE 1 — A-ROLL
+[Avatar center-frame, excited, hands gesturing]
+VOICEOVER: "The exact script for this scene."
+Lower-third: "TITLE TEXT" white on blue bar.
+```
+
+**B-ROLL with layered motion:**
+```
+SCENE 2 — FULL SCREEN B-ROLL
+[NO AVATAR — motion graphic only]
+VOICEOVER: "The exact script for this scene."
+Dark background with subtle grid. "HEADLINE" SLAMS in from left.
+Three data cards CASCADE from right, staggered. Bottom ticker SLIDES in.
+```
+
+**A-ROLL + OVERLAY:**
+```
+SCENE 3 — A-ROLL + OVERLAY
+[Avatar LEFT 35%. Content RIGHT 65%.]
+VOICEOVER: "The exact script for this scene."
+RIGHT SIDE: Stats COUNT UP below headline.
+```
+
+---
+
+### Non-English Videos
+
+The same prompt structure applies regardless of language:
+1. **Script/narration:** In the video language
+2. **Style block:** Always English (Video Agent directive)
+3. **Motion verbs:** Always English (SLAMS, CASCADE, etc.)
+4. **Critical on-screen text:** In whatever language should appear on screen
+5. **Scene labels:** English (Scene 1, Scene 2) — structural, not rendered
+
+---
+
+## Example Prompts (from HeyGen Official Guide)
+
+### Compliance Training
+```
+Use a professional female avatar. Make a compliance training video explaining phishing
+in detail. Use examples and list top watch-outs. Leverage motion graphics as A-roll
+overlay and B-roll to help explain core concepts.
+```
+
+### Educational Explainer (Voice-Over Only)
+```
+Create a 1-minute video about camera aperture. Use minimal science diagrams and
+visualizations. No avatar needed, only voice-over. Cool neutrals (navy, cyan),
+thin-line diagrams, and slow elegant motion. B-roll is abstract scientific
+illustrations. Sequencing: definition → diagram expansion → conceptual layering.
+```
+
+### Brand Story (Animated)
+```
+Make a video telling the story of how Twitch got started. Use cartoon-style
+animations and overlays. I want Twitch's iconic colors and fonts. Use motion
+graphics overlays and AI-generated B-roll.
+```
+
+### Product Introduction (Japanese — non-English example)
+```
+日本のナレッジワーカー向けにHeyGenを紹介する1分間のビデオを作成してください。
+トーキングアバターモデルの特徴を説明し、具体的な活用例を3つ含めてください。
+ナレーターは選択されたプレゼンターが説明します。
+
+CRITICAL ON-SCREEN TEXT (display literally):
+- "HeyGen アバター V"
+- "3分で動画作成"
+- "API連携対応"
+
+STYLE — SWISS PULSE (Müller-Brockmann): Black/white + electric blue #0066FF.
+Grid-locked. Helvetica Bold. Animated counters. Diagonal accents.
+Grid wipe transitions.
+```
+
+Note: Script content is in Japanese but STYLE block and scene labels remain in English — these are Video Agent directives, not viewer-facing content.
+
+---
+
+## Motion Vocabulary
+
+Every visual element should have a motion verb. Static frames look dead.
+
+### High Energy
+- **SLAMS** — `"$95M" SLAMS in from left at -5 degrees`
+- **CRASHES** — `Title CRASHES in from right, screen-shake on impact`
+- **PUNCHES** — `Quote card PUNCHES up from bottom`
+
+### Medium Energy
+- **CASCADE** — `Three cards CASCADE from top, staggered 0.3s`
+- **SLIDES** — `Ticker SLIDES in from right, continuous scroll`
+- **FILLS** — `Progress bar FILLS 0 to 90% in orange`
+- **DRAWS** — `Chart line DRAWS itself left to right`
+
+### Low Energy
+- **types on** — `Quote types on word by word in italic white`
+- **fades in** — `Logo fades in at center, held 3 seconds`
+- **COUNTS UP** — `"1.85M" COUNTS UP from 0 in amber`
+
+---
+
+## Pro Tips (from HeyGen community)
+
+1. **Save your catchall as a template.** Find a style combo that works, reuse it. Consistency builds brand.
+2. **Iterate in conversation.** Video Agent remembers context within a session. "Make the intro shorter" or "swap B-roll in scene 3 for stock footage" without re-prompting everything.
+3. **Stack style at the end.** Content first, style directives last.
+4. **Describe B-roll as motion verbs** ("slams in," "counts up"), NOT layout coordinates ("upper-left, 48pt").
+
+---
+
+## What Doesn't Work
+
+- **Layout coordinates** — "upper-left: headline in 48pt" → blank frames. Use motion verbs.
+- **Named artists without specs** — "Ikko Tanaka style" means nothing. Translate to colors + shapes + motion.
+- **B-roll under 5 seconds** — Causes black/empty frames. 10s+ minimum.
+- **Static elements** — Every element needs a motion verb.
+- **Per-scene timestamps** — Makes delivery robotic (per HeyGen's own research). Use overall duration only.
diff --git a/heygen-video/references/prompt-styles.md b/heygen-video/references/prompt-styles.md
new file mode 100644
index 0000000..3961fbd
--- /dev/null
+++ b/heygen-video/references/prompt-styles.md
@@ -0,0 +1,251 @@
+---
+name: prompt-styles
+description: 20 named visual styles for Video Agent prompts — mood-first selection, copy-paste STYLE blocks
+---
+
+# Prompt Style Library
+
+Named visual styles you inject directly into the prompt text. Each is inspired by a real graphic designer and tested across 40+ videos.
+
+**These are different from HeyGen API styles (`style_id`).** API styles are curated templates on HeyGen's backend. Prompt styles give you full control over colors, typography, motion, and transitions directly in the prompt.
+
+**How to use:** Pick a style. Copy the STYLE block. Paste it into the prompt after your script content.
+
+**How to pick:** Match mood first, content second. Ask: *"What should the viewer FEEL?"*
+
+> **Language note:** Style blocks stay in English regardless of the video's content language. They are technical directives to Video Agent's rendering engine, not viewer-facing text. The video's script and narration should be in the video language, but the STYLE block at the end is always English.
+
+## Mood-to-Style Guide
+
+| Content feels... | Use... |
+|---|---|
+| Personal, intimate | Soft Signal, Quiet Drama |
+| Natural, earthy | Warm Grain, Earth Pulse |
+| Nostalgic, historical | Heritage Reel |
+| Data-driven, analytical | Swiss Pulse, Digital Grid |
+| Elegant, premium | Velvet Standard, Geometric Bold |
+| Cultural, global | Silk Route, Folk Frequency |
+| Investigative, serious | Contact Sheet, Shadow Cut |
+| Fun, lighthearted | Play Mode, Carnival Surge |
+| Philosophical, abstract | Dream State |
+| Punk, grassroots, raw | Deconstructed |
+| Hype, loud, high-energy | Maximalist Type |
+| Tech-forward, futuristic | Data Drift |
+| Breaking, urgent | Red Wire |
+
+## Quick Reference
+
+| # | Style | Artist | Mood | Best For |
+|---|---|---|---|---|
+| 1 | Soft Signal | Sagmeister | Intimate, warm | Personal stories, wellness |
+| 2 | Warm Grain | Eksell | Organic, friendly | Environmental, sustainability |
+| 3 | Quiet Drama | Ray | Humanist, contemplative | Profiles, biographical |
+| 4 | Heritage Reel | Cassandre | Nostalgic, vintage | History, retrospectives |
+| 5 | Silk Route | Abedini | Flowing, mysterious | Global affairs, cross-cultural |
+| 6 | Swiss Pulse | Müller-Brockmann | Clinical, precise | Data-heavy, analytical |
+| 7 | Geometric Bold | Tanaka | Minimal, elegant | Lifestyle, visual essays |
+| 8 | Velvet Standard | Vignelli | Premium, timeless | Luxury, investor updates |
+| 9 | Digital Grid | Crouwel | Systematic, technical | Infrastructure, engineering |
+| 10 | Contact Sheet | Brodovitch | Editorial, investigative | Journalism, deep dives |
+| 11 | Folk Frequency | Terrazas | Cultural, vivid | Festivals, food, heritage |
+| 12 | Earth Pulse | Ghariokwu | Grounded, communal | Community, grassroots |
+| 13 | Dream State | Tomaszewski | Surreal, poetic | Op-eds, philosophy |
+| 14 | Play Mode | Ahn Sang-soo | Playful, irreverent | Entertainment, pop culture |
+| 15 | Carnival Surge | Lins | Euphoric, celebratory | Milestones, hype |
+| 16 | Shadow Cut | Hillmann | Dark, cinematic | Exposés, investigations |
+| 17 | Deconstructed | Brody | Industrial, raw | Tech news, punk energy |
+| 18 | Maximalist Type | Scher | Loud, kinetic | Big announcements, launches |
+| 19 | Data Drift | Anadol | Futuristic, immersive | AI/tech, innovation |
+| 20 | Red Wire | Tartakover | Urgent, immediate | Breaking news, crisis |
+
+## Production Performance (from 40+ videos)
+
+| Rank | Style | Strength |
+|------|-------|----------|
+| 1 | Deconstructed (Brody) | Most reliable across all topics |
+| 2 | Swiss Pulse (Müller-Brockmann) | Best for data-heavy content |
+| 3 | Digital Grid (Crouwel) | Strong for tech topics |
+| 4 | Geometric Bold (Tanaka) | Elegant and versatile |
+| 5 | Maximalist Type (Scher) | High energy, use sparingly |
+
+---
+
+## Style Blocks (Copy-Paste Ready)
+
+### 1. Soft Signal — Sagmeister
+**Mood:** Intimate, warm | **Best for:** Personal stories, wellness
+```
+STYLE — SOFT SIGNAL (Sagmeister): Warm amber/cream, dusty rose, sage green.
+Handwritten-style text. Close-up framing. Slow drifts and floats.
+Soft dissolves with warm light leaks.
+```
+
+### 2. Warm Grain — Eksell
+**Mood:** Organic, friendly | **Best for:** Environmental, sustainability
+```
+STYLE — WARM GRAIN (Eksell): Earth tones — ochre, forest green, terracotta, cream.
+Organic rounded compositions. 16mm film grain. Rounded sans-serif.
+Gentle wipes and soft cuts.
+```
+
+### 3. Quiet Drama — Ray
+**Mood:** Humanist, contemplative | **Best for:** Profiles, biographical
+```
+STYLE — QUIET DRAMA (Ray): Muted warm — sepia, deep brown, soft gold.
+Portrait framing. Clean serif. Strong single-source contrast.
+Slow fades to black.
+```
+
+### 4. Heritage Reel — Cassandre
+**Mood:** Nostalgic, vintage | **Best for:** History, retrospectives
+```
+STYLE — HERITAGE REEL (Cassandre): Faded gold, burgundy, navy, sepia wash.
+Elegant centered serif. Vignetting and aged film grain.
+Iris wipe transitions.
+```
+
+### 5. Silk Route — Abedini
+**Mood:** Flowing, mysterious | **Best for:** Global affairs, cross-cultural
+```
+STYLE — SILK ROUTE (Abedini): Jewel tones — deep teal, burgundy, gold, lapis blue.
+Layered compositions, all depths active. Elegant spaced type.
+Flowing dissolves and smooth morphs.
+```
+
+### 6. Swiss Pulse — Müller-Brockmann
+**Mood:** Clinical, precise | **Best for:** Data-heavy, analytical, financial
+```
+STYLE — SWISS PULSE (Müller-Brockmann): Black/white + electric blue #0066FF.
+Grid-locked. Helvetica Bold. Animated counters. Diagonal accents.
+Grid wipe transitions.
+```
+
+### 7. Geometric Bold — Tanaka
+**Mood:** Minimal, elegant | **Best for:** Lifestyle, visual essays
+```
+STYLE — GEOMETRIC BOLD (Tanaka): Max 3 flat colors per frame.
+60% negative space. Bold type as primary element.
+Single focal point. Clean cuts on beat.
+```
+
+### 8. Velvet Standard — Vignelli
+**Mood:** Premium, timeless | **Best for:** Luxury, investor updates, keynotes
+```
+STYLE — VELVET STANDARD (Vignelli): Black, white, one accent: gold #c9a84c.
+Thin ALL CAPS, wide spacing. Generous negative space.
+Slow elegant cross-dissolves.
+```
+
+### 9. Digital Grid — Crouwel
+**Mood:** Systematic, technical | **Best for:** Infrastructure, engineering, code
+```
+STYLE — DIGITAL GRID (Crouwel): Monospaced type. Dark #0a0a0a with cyan #00E5FF, amber #FFB300.
+Pixel grid overlays. Terminal aesthetic. Clean wipe transitions.
+```
+
+### 10. Contact Sheet — Brodovitch
+**Mood:** Editorial, investigative | **Best for:** Journalism, deep dives
+```
+STYLE — CONTACT SHEET (Brodovitch): High contrast B&W, desaturated accents.
+Photo-editorial framing. Bold sans-serif annotations. Raw grain.
+Hard cuts on beat. Snap-zooms.
+```
+
+### 11. Folk Frequency — Terrazas
+**Mood:** Cultural, vivid | **Best for:** Festivals, food, heritage
+```
+STYLE — FOLK FREQUENCY (Terrazas): Vivid folk — hot pink, cobalt blue, sun yellow, emerald.
+Bold rounded type. Folk art rhythms. Rich handmade textures.
+Colorful wipes on festive rhythm.
+```
+
+### 12. Earth Pulse — Ghariokwu
+**Mood:** Grounded, communal | **Best for:** Community, music/culture
+```
+STYLE — EARTH PULSE (Ghariokwu): Warm saturated — burnt orange, deep green, rich yellow.
+Bold expressive type. Wide community framing.
+Rhythmic cuts on beat. Freeze-frames.
+```
+
+### 13. Dream State — Tomaszewski
+**Mood:** Surreal, poetic | **Best for:** Op-eds, philosophy
+```
+STYLE — DREAM STATE (Tomaszewski): Muted palette + one surreal accent.
+Thin elegant floating type. Soft edges, atmospheric haze.
+Slow morph dissolves — NEVER hard cuts.
+```
+
+### 14. Play Mode — Ahn Sang-soo
+**Mood:** Playful, irreverent | **Best for:** Entertainment, pop culture
+```
+STYLE — PLAY MODE (Ahn Sang-soo): Electric blue, hot pink, lime green.
+Bouncy spring physics. Oversized tilted text. Score cards, XP bars.
+Pop cuts, bounce effects.
+```
+
+### 15. Carnival Surge — Lins
+**Mood:** Euphoric, celebratory | **Best for:** Milestones, hype
+```
+STYLE — CARNIVAL SURGE (Lins): Max color — hot pink #FF1493, yellow #FFE000, teal #00CED1.
+Collage layering. Text MASSIVE at ANGLES. Confetti bursts.
+Smash cuts, flash frames.
+```
+
+### 16. Shadow Cut — Hillmann
+**Mood:** Dark, cinematic | **Best for:** Exposés, investigations
+```
+STYLE — SHADOW CUT (Hillmann): Deep blacks, cold greys + blood red accent.
+Sharp angular text. Heavy shadow. Slow creeping push-ins.
+Hard cuts to black. Film noir tension.
+```
+
+### 17. Deconstructed — Brody
+**Mood:** Industrial, raw | **Best for:** Tech news, punk energy
+```
+STYLE — DECONSTRUCTED (Brody): Dark grey #1a1a1a, rust orange #D4501E.
+Type at angles, overlapping. Gritty textures, scan-line glitch.
+Smash cuts with flash frames.
+```
+
+### 18. Maximalist Type — Scher
+**Mood:** Loud, kinetic | **Best for:** Big announcements, launches
+```
+STYLE — MAXIMALIST TYPE (Scher): Red, yellow, black, white — max contrast.
+Text IS the visual. Overlapping at different scales, 50-80% of frame.
+Kinetic everything. Smash cuts, flash frames.
+```
+
+### 19. Data Drift — Anadol
+**Mood:** Futuristic, immersive | **Best for:** AI/tech, innovation
+```
+STYLE — DATA DRIFT (Anadol): Iridescent — purple #7c3aed, cyan #06b6d4, deep black.
+Fluid morphing compositions. Thin futuristic type.
+Liquid dissolves. Particles coalesce into numbers.
+```
+
+### 20. Red Wire — Tartakover
+**Mood:** Urgent, immediate | **Best for:** Breaking news, crisis
+```
+STYLE — RED WIRE (Tartakover): Red, black, white, emergency yellow.
+Bold condensed all-caps. Split screens, tickers, timestamps.
+Snap cuts, flash frames. Zero breathing room.
+```
+
+---
+
+## Custom Styles
+
+These 20 are starting points. Create your own by combining:
+1. **Named style + designer reference** (grounds the aesthetic)
+2. **Color palette with hex codes** (specific > vague)
+3. **Typography rules** (font style, weight, case, spacing)
+4. **Motion rules** (how elements enter/exit, timing)
+5. **Transition type** (cuts, dissolves, wipes)
+
+Example custom style:
+```
+STYLE — NEON TERMINAL (custom): Black #0a0a0a background, neon green #00FF41 text,
+cyan #00E5FF highlights. Monospaced type throughout. Terminal cursor blinks.
+Text types on character by character. Scan-line overlay at 5% opacity.
+Hard cuts only. Matrix-style code rain in transitions.
+```
diff --git a/heygen-video/references/reviewer-prompt.md b/heygen-video/references/reviewer-prompt.md
new file mode 100644
index 0000000..20d7871
--- /dev/null
+++ b/heygen-video/references/reviewer-prompt.md
@@ -0,0 +1,95 @@
+# Video Producer — Prompt Reviewer
+
+You are a senior video production reviewer. You have been given a prompt that is about to be sent to HeyGen's Video Agent API for one-shot video generation. This is a one-shot API — there is no back-and-forth. The prompt must be as good as possible on the first attempt.
+
+Your job: review this prompt objectively and provide a professional assessment. Be specific. Be critical. Don't rubber-stamp.
+
+## Review Criteria
+
+### 1. Scene Structure
+- Is the prompt structured as individual scenes (Scene 1, Scene 2, etc.) with Visual + VO + Duration per scene?
+- Or is it a flat paragraph? Flat paragraphs produce generic videos. FAIL if flat.
+- Are scene types varied? (Mix of A-roll, B-roll, Motion Graphics, Stock). Monotonous scene types = boring video.
+
+### 2. Opening Hook
+- Does the first scene grab attention in under 10 seconds?
+- Does it lead with the most compelling statement or a question?
+- Or does it start with context-setting ("In today's world...")? Context-setting openings lose viewers.
+
+### 3. Visual Style
+- Is there a visual style block? (Color palette, style descriptor, font preferences)
+- Without one, Video Agent produces inconsistent visuals across scenes.
+- Are the style choices appropriate for the audience? (Tech → minimalistic, Marketing → bold, Education → illustrated)
+
+### 4. Media Type Direction
+- Does each scene specify its media type? (Motion Graphics / Stock Media / AI Generated)
+- Are the media types appropriate for the content? Use this matrix:
+  - Data/Statistics → Motion Graphics (best)
+  - Abstract Concepts → AI Generated (best)
+  - Real Environments → Stock Media (best)
+  - Brand Elements → Motion Graphics (best)
+  - Human Emotions → Stock Media (best), NOT AI Generated (uncanny)
+  - Technical Diagrams → Motion Graphics (best)
+
+### 5. Pacing & Word Count
+- Is the total word count within the 150 words/minute budget?
+- 30s = ~75 words, 60s = ~150 words, 90s = ~225 words, 2min = ~300 words
+- Are scene durations balanced? No single scene should be >30% of total duration.
+
+### 6. Script Quality
+- Is it written for the ear? (Conversational, short sentences, active voice)
+- Or does it read like a written essay? ("It is important to note that..." = bad)
+- Are there natural pauses/transitions between sections?
+
+### 7. Narrator Framing
+- Does the prompt frame the request as narrator-driven? ("A confident narrator explains...")
+- Or does it use generic framing? ("Create a video about..." = weaker)
+
+### 8. Asset Anchoring (if applicable)
+- If assets were mentioned, are they anchored to specific scenes and moments?
+- "Use the screenshot as B-roll when discussing features" >> "include the screenshot"
+
+### 9. Negative Constraints
+- Are negative constraints present only if explicitly requested by the user?
+- "No text overlays" should NOT be a default. Text overlays improve videos.
+
+### 10. Overall Production Quality
+- Would a real video producer approve this prompt?
+- Does it feel like a professional production brief or a casual request?
+- Is there anything missing that would significantly improve the output?
+
+### 11. Language Consistency (non-English videos only)
+- If the video language is not English, is the script/narration in the correct language?
+- Are technical directives (style block, motion verbs, frame check corrections) still in English?
+- Is there a clean separation between content language and directive language?
+- FAIL if the script is in English but the user requested a non-English video.
+- FAIL if technical directives were translated out of English.
+
+## Output Format
+
+Respond with EXACTLY this format:
+
+```
+VERDICT: APPROVE | REVISE | REJECT
+
+SCORE: X/10
+
+STRENGTHS:
+- [specific strength]
+- [specific strength]
+
+ISSUES:
+- [CRITICAL] [specific issue + how to fix]
+- [IMPORTANT] [specific issue + how to fix]
+- [MINOR] [specific issue + how to fix]
+
+REVISED PROMPT (only if VERDICT is REVISE):
+[The full improved prompt, ready to send to the API]
+```
+
+Rules:
+- APPROVE: Score 8+, no CRITICAL issues. Ready to generate.
+- REVISE: Score 5-7, has issues but fixable. Provide the revised prompt.
+- REJECT: Score <5, fundamental problems. List what needs to change.
+- Always provide the REVISED PROMPT if verdict is REVISE. The agent will use your version directly.
+- Be specific in issues. "Could be better" is useless. "Scene 3 uses AI Generated for a real office environment — switch to Stock Media" is useful.
diff --git a/heygen-video/references/troubleshooting.md b/heygen-video/references/troubleshooting.md
new file mode 100644
index 0000000..b9ad832
--- /dev/null
+++ b/heygen-video/references/troubleshooting.md
@@ -0,0 +1,151 @@
+# Known Issues & Troubleshooting
+
+## Known Bug: Video Agent "Talking Photo Not Found"
+
+**Error message:** "The Talking Photo for the current narrator could not be found."
+
+**Root Cause:** Confirmed as a Video Agent backend bug by HeyGen engineering (Jerry Yan). Affects `video_avatar` type narrators and stock avatar auto-selection.
+
+**Workaround:**
+- Prefer explicit `avatar_id` over auto-selection
+- If `video_avatar` fails, retry with a `studio_avatar` or `photo_avatar`
+
+**Status:** Fix in progress at HeyGen.
+
+---
+
+## Weird Pauses / Unnatural Silence in Videos
+
+**Symptom:** Video has awkward pauses or breaks between sentences. Narrator stops speaking but video continues with dead air before next line.
+
+**Root Cause:** When Video Agent receives a script shorter than the target duration, it treats the script as verbatim speech and inserts silence/breaks to stretch it to the exact requested duration. It won't ad-lib or expand — it just pads with dead air.
+
+**Fix:** Add this directive to EVERY prompt:
+> "This script is a concept and theme to convey — not a verbatim transcript. You have full creative freedom to expand, elaborate, add examples, and fill the duration naturally. Do not pad with silence or pauses."
+
+This tells Video Agent it can expand the script naturally instead of treating it as a fixed speech transcript. Per Jerry Yan: "If you tell it it's not a script to be strictly followed but concept or theme or give it green light to expand the script it will do well."
+
+**Status:** Skill-side fix (prompt directive). HeyGen is also tuning the default behavior but the explicit directive is the reliable workaround.
+
+---
+
+## Duration Variance (Expected Behavior)
+
+Video Agent controls final video timing internally. Duration accuracy ranges from 79-174% of target across testing. This is NOT a bug.
+
+**Mitigation:** Variable padding multipliers (Script):
+- ≤30s target: 1.6x padding
+- 31-119s target: 1.4x padding
+- ≥120s target: 1.3x padding
+
+With explicit `avatar_id`: ~97% duration accuracy average.
+Without `avatar_id`: ~80% accuracy average.
+
+---
+
+## Frame Check: Video Agent Not Applying Framing
+
+If the Video Agent ignores the FRAMING NOTE or BACKGROUND NOTE and produces black bars, letterboxing, or mismatched framing:
+
+1. **Ensure the note is appended at the END of the prompt**, after all other content (script, style block, etc.). Video Agent processes instructions sequentially and late-prompt directives have the strongest effect.
+2. **Check that the correction note was actually appended.** Log the final prompt text and verify the FRAMING NOTE / BACKGROUND NOTE block is present.
+3. **photo_avatar does NOT need BACKGROUND NOTE.** Video Agent generates avatar + environment together for photo_avatars. Only append framing notes for orientation mismatches. Background notes are for studio_avatars with transparent/empty backgrounds only.
+
+---
+
+## Stock Avatar Auto-Selection Unreliable
+
+When no `avatar_id` is provided, Video Agent uses narrator tags (`{{@narrator_l0ug91}}`) that sometimes fail to resolve during render.
+
+**Fix:** Always use explicit `avatar_id` from discovery. The only exception is Quick Shot mode where the user explicitly wants speed over reliability.
+
+---
+
+## HTML URLs in files[] Rejected
+
+Video Agent rejects `text/html` content type in the `files[]` array. Web pages (blogs, docs sites, articles) must be handled via Path A (contextualize) only.
+
+**What works in files[]:** Direct file URLs (PDFs, images, videos) — but prefer download→upload→asset_id since CDN/WAF often blocks HeyGen's servers.
+
+---
+
+## Avatar Not Ready for Video Generation
+
+**Symptom:** Video generation fails or produces errors immediately after creating a new avatar. The avatar exists in the HeyGen dashboard but videos referencing it fail.
+
+**Root Cause:** Avatar creation is asynchronous. `heygen avatar create` (and `create_photo_avatar` / `create_prompt_avatar` MCP tools) return success immediately, but the avatar image is still being processed. If you submit a video request before processing completes, it fails.
+
+**Detection:** Poll with `heygen avatar looks list --group-id <group_id>` (or MCP `list_avatar_looks`). The avatar is NOT ready until:
+- `preview_image_url` is non-null
+- `image_width` and `image_height` are non-zero
+
+At the group level (`heygen avatar list`), an unready avatar will have no `preview_image_url` on the group object.
+
+**Fix:** Poll every 10 seconds after creation, wait for preview URL to appear. Typical: 30-90s for photo avatars, 1-3 min for prompt avatars. Timeout at 5 min.
+
+**The heygen-avatar skill handles this automatically.** If you bypass the skill, you must implement this polling yourself.
+
+---
+
+## Interactive Sessions Reliability
+
+Interactive sessions (created without `--wait` and iterated via `heygen video-agent send`) have known issues:
+- Sessions frequently stuck at `processing` status
+- `reviewing` state may never be reached
+- Follow-up messages fail with timing errors
+- Stop command may not trigger video generation
+
+**Recommendation:** Use one-shot mode for production. Interactive sessions documented for future use once HeyGen stabilizes the API.
+
+---
+
+## Error Code → Action
+
+Stable CLI exit codes tell you what to do without parsing messages:
+
+| Exit | Class | Action |
+|------|-------|--------|
+| `0` | ok | Continue |
+| `1` | API / network | Retry with backoff. If persistent, check `--verbose` or contact HeyGen support. |
+| `2` | usage | You passed a bad flag. Run `--help` on the command, fix the args, retry. |
+| `3` | auth | Re-auth: `heygen auth login` or set `HEYGEN_API_KEY`. Verify with `heygen auth status`. |
+| `4` | timeout under `--wait` | Operation still running server-side. stdout contains the partial resource (with `session_id` or `video_id`) — resume polling with `heygen video-agent get <id>` or `heygen video get <id>`. Do NOT re-submit. |
+
+Common API-error hints (surfaced in stderr envelope `{error:{code,message,hint}}`):
+
+- `402` / insufficient credits → tell the user their HeyGen plan is out of credits.
+- `403` / forbidden → the resource is not owned by the caller (wrong `group_id`, private avatar).
+- `404` / not found → ID is stale. Re-fetch via `heygen avatar list`, `heygen video-agent get`, etc.
+
+---
+
+## Polling Cadence
+
+When `--wait` isn't an option (e.g., you want to return control to the user between polls), use a back-off schedule rather than a fixed interval:
+
+| Age of job | Poll interval |
+|------------|---------------|
+| 0–2 min | every 10s |
+| 2–5 min | every 30s |
+| 5–10 min | every 60s |
+| > 10 min | surface "taking longer than usual" once, keep polling at 60s, give up at 15 min |
+
+If a job is stuck at the same status for >5 min, that's a signal to surface a status update or check the dashboard.
+
+**Prefer `--wait`** on creation commands. It handles the polling internally and returns the final resource or exits `4` with a resumable `session_id` / `video_id` on timeout.
+
+---
+
+## Direct Video vs Video Agent — Which Endpoint?
+
+Two ways to generate a video. Different pricing, different trade-offs.
+
+| | **Direct Video** | **Video Agent** |
+|---|-------------------|-----------------|
+| Command / Tool | `heygen video create` / no MCP tool yet | `heygen video-agent create` / `create_video_agent` |
+| Input | Full script + avatar + voice + scene JSON | Prompt + optional avatar/voice/style |
+| Control | You author every scene | Video Agent plans scenes, pacing, motion |
+| Pricing | ~$0.0333/sec | ~$0.10/sec |
+| When to use | Deterministic multi-scene videos, tight control, bulk generation | Creative intros, messages, "make a video about X" requests |
+
+The default in this skill is **Video Agent** — it's what `heygen-video` is built around. Drop to Direct Video only for batch or highly scripted workflows where Agent's autonomy is overhead.
diff --git a/heygen-video/scripts/update-check.sh b/heygen-video/scripts/update-check.sh
new file mode 100755
index 0000000..6b69961
--- /dev/null
+++ b/heygen-video/scripts/update-check.sh
@@ -0,0 +1,170 @@
+#!/usr/bin/env bash
+# heygen-skills update-check — periodic version check for all skills.
+# Inspired by gstack's update-check pattern (MIT license).
+#
+# Output (one line, or nothing):
+#   JUST_UPGRADED <old> <new>       — marker found from recent upgrade
+#   UPGRADE_AVAILABLE <old> <new>   — remote VERSION differs from local
+#   (nothing)                       — up to date, snoozed, disabled, or check skipped
+#
+# Env overrides (for testing):
+#   HEYGEN_SKILL_DIR      — override auto-detected root
+#   HEYGEN_SKILLS_STATE    — override ~/.heygen-skills state directory
+#   HEYGEN_REMOTE_URL     — override remote VERSION URL
+set -euo pipefail
+
+SKILL_DIR="${HEYGEN_SKILL_DIR:-$(cd "$(dirname "$0")/.." && pwd)}"
+STATE_DIR="${HEYGEN_SKILLS_STATE:-$HOME/.heygen-skills}"
+CACHE_FILE="$STATE_DIR/last-update-check"
+MARKER_FILE="$STATE_DIR/just-upgraded-from"
+SNOOZE_FILE="$STATE_DIR/update-snoozed"
+# VERSION resolution order: repo-root VERSION (legacy / direct clone),
+# fall back to local SKILL.md frontmatter for gh skill / clawhub subdir-only installs.
+VERSION_FILE="$SKILL_DIR/VERSION"
+LOCAL_SKILL_MD="$SKILL_DIR/SKILL.md"
+REMOTE_URL="${HEYGEN_REMOTE_URL:-https://raw.githubusercontent.com/heygen-com/skills/master/VERSION}"
+
+# ─── Force flag (busts cache + snooze) ────────────────────────
+if [ "${1:-}" = "--force" ]; then
+  rm -f "$CACHE_FILE"
+  rm -f "$SNOOZE_FILE"
+fi
+
+# ─── Step 0: Check if updates are disabled ────────────────────
+if [ -f "$STATE_DIR/update-check-disabled" ]; then
+  exit 0
+fi
+
+# ─── Snooze helper ───────────────────────────────────────────
+# check_snooze <remote_version>
+#   Returns 0 if snoozed (stay quiet), 1 if not snoozed (should output).
+#
+#   Snooze file format: <version> <level> <epoch>
+#   Level durations: 1=24h, 2=48h, 3+=7d
+#   New version resets snooze.
+check_snooze() {
+  local remote_ver="$1"
+  if [ ! -f "$SNOOZE_FILE" ]; then
+    return 1
+  fi
+  local snoozed_ver snoozed_level snoozed_epoch
+  snoozed_ver="$(awk '{print $1}' "$SNOOZE_FILE" 2>/dev/null || true)"
+  snoozed_level="$(awk '{print $2}' "$SNOOZE_FILE" 2>/dev/null || true)"
+  snoozed_epoch="$(awk '{print $3}' "$SNOOZE_FILE" 2>/dev/null || true)"
+
+  # Validate: all three fields must be non-empty
+  if [ -z "$snoozed_ver" ] || [ -z "$snoozed_level" ] || [ -z "$snoozed_epoch" ]; then
+    return 1
+  fi
+
+  # Validate: level and epoch must be integers
+  case "$snoozed_level" in *[!0-9]*) return 1 ;; esac
+  case "$snoozed_epoch" in *[!0-9]*) return 1 ;; esac
+
+  # New version dropped? Ignore snooze.
+  if [ "$snoozed_ver" != "$remote_ver" ]; then
+    return 1
+  fi
+
+  # Compute snooze duration based on level
+  local duration
+  case "$snoozed_level" in
+    1) duration=86400 ;;   # 24 hours
+    2) duration=172800 ;;  # 48 hours
+    *) duration=604800 ;;  # 7 days (level 3+)
+  esac
+
+  local now
+  now="$(date +%s)"
+  local expires=$(( snoozed_epoch + duration ))
+  if [ "$now" -lt "$expires" ]; then
+    return 0  # still snoozed
+  fi
+
+  return 1  # snooze expired
+}
+
+# ─── Step 1: Read local version ──────────────────────────────
+LOCAL=""
+# Prefer repo-root VERSION file when present.
+if [ -f "$VERSION_FILE" ]; then
+  LOCAL="$(cat "$VERSION_FILE" 2>/dev/null | tr -d '[:space:]')"
+fi
+# Fall back to SKILL.md frontmatter version when running from a subdir-only install.
+if [ -z "$LOCAL" ] && [ -f "$LOCAL_SKILL_MD" ]; then
+  LOCAL="$(awk '/^---/{c++; next} c==1 && /^version:/{gsub(/^version:[[:space:]]*/,""); gsub(/[[:space:]]*#.*$/,""); gsub(/[[:space:]]/,""); print; exit}' "$LOCAL_SKILL_MD" 2>/dev/null)"
+fi
+if [ -z "$LOCAL" ]; then
+  exit 0  # No VERSION file → skip check
+fi
+
+# ─── Step 2: Check "just upgraded" marker ─────────────────────
+if [ -f "$MARKER_FILE" ]; then
+  OLD="$(cat "$MARKER_FILE" 2>/dev/null | tr -d '[:space:]')"
+  rm -f "$MARKER_FILE"
+  rm -f "$SNOOZE_FILE"
+  if [ -n "$OLD" ]; then
+    echo "JUST_UPGRADED $OLD $LOCAL"
+  fi
+fi
+
+# ─── Step 3: Check cache freshness ──────────────────────────
+# UP_TO_DATE: 60 min TTL (detect new releases quickly)
+# UPGRADE_AVAILABLE: 720 min TTL (keep nagging less often)
+if [ -f "$CACHE_FILE" ]; then
+  CACHED="$(cat "$CACHE_FILE" 2>/dev/null || true)"
+  case "$CACHED" in
+    UP_TO_DATE*)        CACHE_TTL=60 ;;
+    UPGRADE_AVAILABLE*) CACHE_TTL=720 ;;
+    *)                  CACHE_TTL=0 ;;
+  esac
+
+  STALE=$(find "$CACHE_FILE" -mmin +$CACHE_TTL 2>/dev/null || true)
+  if [ -z "$STALE" ] && [ "$CACHE_TTL" -gt 0 ]; then
+    case "$CACHED" in
+      UP_TO_DATE*)
+        CACHED_VER="$(echo "$CACHED" | awk '{print $2}')"
+        if [ "$CACHED_VER" = "$LOCAL" ]; then
+          exit 0
+        fi
+        ;;
+      UPGRADE_AVAILABLE*)
+        CACHED_OLD="$(echo "$CACHED" | awk '{print $2}')"
+        if [ "$CACHED_OLD" = "$LOCAL" ]; then
+          CACHED_NEW="$(echo "$CACHED" | awk '{print $3}')"
+          if check_snooze "$CACHED_NEW"; then
+            exit 0
+          fi
+          echo "$CACHED"
+          exit 0
+        fi
+        ;;
+    esac
+  fi
+fi
+
+# ─── Step 4: Fetch remote version ────────────────────────────
+mkdir -p "$STATE_DIR"
+
+REMOTE=""
+REMOTE="$(curl -sf --max-time 5 "$REMOTE_URL" 2>/dev/null || true)"
+REMOTE="$(echo "$REMOTE" | tr -d '[:space:]')"
+
+# Validate: must look like a version number (reject HTML error pages)
+if ! echo "$REMOTE" | grep -qE '^[0-9]+\.[0-9.]+$'; then
+  echo "UP_TO_DATE $LOCAL" > "$CACHE_FILE"
+  exit 0
+fi
+
+if [ "$LOCAL" = "$REMOTE" ]; then
+  echo "UP_TO_DATE $LOCAL" > "$CACHE_FILE"
+  exit 0
+fi
+
+# Versions differ — upgrade available
+echo "UPGRADE_AVAILABLE $LOCAL $REMOTE" > "$CACHE_FILE"
+if check_snooze "$REMOTE"; then
+  exit 0
+fi
+
+echo "UPGRADE_AVAILABLE $LOCAL $REMOTE"

From 3dd7abd4741692f32b1d65945c7423cb0c687e3c Mon Sep 17 00:00:00 2001
From: Eve Park <eve.park@heygen.com>
Date: Mon, 27 Apr 2026 15:21:29 -0700
Subject: [PATCH 2/4] docs: add gh skill install instructions + CI validation

- README + INSTALL.md: list gh skill install as Option 1, alongside
  ClawHub, OpenClaw plugin, and git clone
- Add validate-skills.yml CI: runs gh skill install --from-local for
  both skills and asserts each installed bundle is self-contained
  (no broken ../ refs, every references/ link resolves)
- Cross-skill references in references/ files now use absolute GitHub
  URLs so they remain clickable on GitHub and never produce broken
  links inside an installed bundle
- .gitignore: skip local CI fixtures (.agents/, _ghskill_test/, _install_test/)
---
 .github/workflows/validate-skills.yml       | 136 ++++++++++++++++++++
 .gitignore                                  |   5 +
 INSTALL.md                                  |  44 ++++++-
 README.md                                   |  30 ++++-
 heygen-avatar/references/avatar-creation.md |   2 +-
 heygen-video/references/avatar-discovery.md |   6 +-
 6 files changed, 213 insertions(+), 10 deletions(-)
 create mode 100644 .github/workflows/validate-skills.yml

diff --git a/.github/workflows/validate-skills.yml b/.github/workflows/validate-skills.yml
new file mode 100644
index 0000000..ba557e6
--- /dev/null
+++ b/.github/workflows/validate-skills.yml
@@ -0,0 +1,136 @@
+name: Validate Skills
+
+on:
+  pull_request:
+    paths:
+      - "heygen-avatar/**"
+      - "heygen-video/**"
+      - ".github/workflows/validate-skills.yml"
+      - "SKILL.md"
+  push:
+    branches: [master]
+    paths:
+      - "heygen-avatar/**"
+      - "heygen-video/**"
+      - ".github/workflows/validate-skills.yml"
+      - "SKILL.md"
+
+jobs:
+  self-contained-bundles:
+    name: Skills install cleanly via gh skill (self-contained)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install latest gh
+        run: |
+          curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
+            | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg
+          sudo chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg
+          echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
+            | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null
+          sudo apt update
+          sudo apt install -y gh
+          gh --version
+
+      - name: Verify gh skill is available
+        run: |
+          if ! gh skill --help >/dev/null 2>&1; then
+            echo "::error::gh skill subcommand not available in installed gh"
+            exit 1
+          fi
+
+      - name: Stage skills under skills/ for from-local install
+        run: |
+          # gh skill install --from-local requires the skills/<name>/SKILL.md convention.
+          # Stage the in-repo subdir skills under a temporary skills/ root so we can validate.
+          mkdir -p _ghskill_test/skills
+          cp -R heygen-avatar _ghskill_test/skills/heygen-avatar
+          cp -R heygen-video  _ghskill_test/skills/heygen-video
+          ls -la _ghskill_test/skills/
+
+      - name: Install heygen-avatar from local
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          mkdir -p _install_test
+          cd _install_test
+          gh skill install ../_ghskill_test heygen-avatar --from-local --scope project
+          test -f .agents/skills/heygen-avatar/SKILL.md
+          echo "✓ heygen-avatar installed"
+
+      - name: Verify heygen-avatar installed bundle is self-contained
+        run: |
+          set -euo pipefail
+          cd _install_test/.agents/skills/heygen-avatar
+          # No parent-dir references in installed SKILL.md.
+          if grep -nE '\.\./' SKILL.md; then
+            echo "::error::heygen-avatar/SKILL.md contains parent-dir (../) references after install"
+            exit 1
+          fi
+          # Every relative reference in SKILL.md must exist inside the installed bundle.
+          fail=0
+          for ref in $(grep -oE '(references|scripts)/[a-zA-Z0-9_./-]+\.(md|sh)' SKILL.md | sort -u); do
+            if [ ! -f "$ref" ]; then
+              echo "::error::heygen-avatar references $ref but it's not in the installed bundle"
+              fail=1
+            fi
+          done
+          if [ "$fail" -ne 0 ]; then exit 1; fi
+          echo "✓ heygen-avatar bundle is self-contained"
+
+      - name: Install heygen-video from local
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          cd _install_test
+          gh skill install ../_ghskill_test heygen-video --from-local --scope project
+          test -f .agents/skills/heygen-video/SKILL.md
+          echo "✓ heygen-video installed"
+
+      - name: Verify heygen-video installed bundle is self-contained
+        run: |
+          set -euo pipefail
+          cd _install_test/.agents/skills/heygen-video
+          if grep -nE '\.\./' SKILL.md; then
+            echo "::error::heygen-video/SKILL.md contains parent-dir (../) references after install"
+            exit 1
+          fi
+          fail=0
+          for ref in $(grep -oE '(references|scripts)/[a-zA-Z0-9_./-]+\.(md|sh)' SKILL.md | sort -u); do
+            if [ ! -f "$ref" ]; then
+              echo "::error::heygen-video references $ref but it's not in the installed bundle"
+              fail=1
+            fi
+          done
+          if [ "$fail" -ne 0 ]; then exit 1; fi
+          echo "✓ heygen-video bundle is self-contained"
+
+  spec-validate-soft:
+    name: agentskills.io spec validation (advisory)
+    runs-on: ubuntu-latest
+    # Advisory only — fails are reported as warnings, not blocking.
+    # Root SKILL.md will fail validation today (name: heygen-skills doesn't match directory `.`).
+    # That's tracked as a known issue and is not a blocker for gh skill install.
+    continue-on-error: true
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install latest gh
+        run: |
+          curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
+            | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg
+          sudo chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg
+          echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
+            | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null
+          sudo apt update
+          sudo apt install -y gh
+
+      - name: Run gh skill publish --dry-run
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          gh skill publish --dry-run || true
+          echo "::warning::Root SKILL.md does not satisfy gh skill publish naming rules (expected — gh skill publish to agentskills.io registry is a follow-up)."
diff --git a/.gitignore b/.gitignore
index fe7534a..f2701ce 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,8 @@ node_modules/
 
 evals/
 scripts/release.sh
+
+# Local CI fixtures
+.agents/
+_ghskill_test/
+_install_test/
diff --git a/INSTALL.md b/INSTALL.md
index 6ab2805..cce942d 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -2,15 +2,53 @@
 
 Grab an [API key](https://app.heygen.com/api) and set it in your shell. If you're already on a HeyGen plan with MCP connected to your agent, you can skip the key — MCP will be used automatically.
 
-## Option 1 — ClawHub (recommended)
+The repo ships *two* skills you can install:
+
+- **`heygen-avatar`** — build a persistent digital identity from a photo or description
+- **`heygen-video`** — generate identity-first presenter videos
+
+Most users want both. They chain together: `heygen-avatar` returns an avatar id that `heygen-video` consumes.
+
+## Option 1 — `gh skill install` (works across 12+ agents)
+
+If you have [GitHub CLI](https://cli.github.com) v2.90+ available, this is the most portable install. `gh skill` writes to the right directory for your agent automatically (Claude Code, Cursor, Codex, Gemini CLI, GitHub Copilot, Junie, Goose, OpenHands, Amp, Cline, OpenCode, Warp, and more):
+
+```bash
+gh skill install heygen-com/skills heygen-avatar
+gh skill install heygen-com/skills heygen-video
+```
+
+Project scope (current repo only) is the default. For user scope (every project on this machine):
+
+```bash
+gh skill install heygen-com/skills heygen-avatar --scope user
+gh skill install heygen-com/skills heygen-video  --scope user
+```
+
+Pin to a release tag for reproducibility:
+
+```bash
+gh skill install heygen-com/skills heygen-avatar@v2.3.1 --pin
+gh skill install heygen-com/skills heygen-video@v2.3.1  --pin
+```
+
+## Option 2 — ClawHub
 
 ```bash
 clawhub install heygen-skills
 ```
 
-ClawHub installs to your agent's default skills directory automatically.
+ClawHub installs both skills to your agent's default skills directory automatically.
+
+## Option 3 — OpenClaw plugin
+
+For OpenClaw users who want bundled MCP support too:
+
+```bash
+openclaw plugins install clawhub:@heygen/openclaw-plugin-heygen
+```
 
-## Option 2 — Git clone
+## Option 4 — Git clone
 
 Clone into your agent's skills directory:
 
diff --git a/README.md b/README.md
index e51504a..67b223a 100644
--- a/README.md
+++ b/README.md
@@ -22,21 +22,45 @@ That's it. The agent fetches [INSTALL_FOR_AGENTS.md](./INSTALL_FOR_AGENTS.md) an
 
 Want to install manually instead? Follow the steps below.
 
-### Step 1 — Clone into your skills directory
+### Step 1 — Install the skills
 
-Detect which agent you are and clone to the right path:
+The repo ships two skills, `heygen-avatar` and `heygen-video`. Pick the install path that matches your tooling.
+
+**Option A: `gh skill install`** (works across Claude Code, Cursor, Codex, Gemini CLI, Copilot, Junie, Goose, OpenHands, Amp, Cline, OpenCode, Warp, and more — [agentskills.io](https://agentskills.io)):
+
+```bash
+gh skill install heygen-com/skills heygen-avatar
+gh skill install heygen-com/skills heygen-video
+```
+
+Requires GitHub CLI v2.90+. The CLI writes to the right directory for your agent automatically.
+
+**Option B: ClawHub:**
+
+```bash
+clawhub install heygen-skills
+```
+
+**Option C: OpenClaw plugin** (bundles MCP support):
+
+```bash
+openclaw plugins install clawhub:@heygen/openclaw-plugin-heygen
+```
+
+**Option D: Git clone** (legacy path):
 
 | Agent | Install path |
 |-------|-------------|
 | **Claude Code** | `~/.claude/skills/heygen-skills` |
 | **OpenClaw** | `~/.openclaw/workspace/skills/heygen-skills` |
-| **ClawHub** | Run `clawhub install heygen-skills` and skip to Step 2 |
 | **Other** | Clone anywhere your agent loads skills from |
 
 ```bash
 git clone --single-branch --depth 1 https://github.com/heygen-com/skills.git <install-path>/heygen-skills
 ```
 
+After cloning, the two skills are auto-discovered at `heygen-avatar/SKILL.md` and `heygen-video/SKILL.md`.
+
 ### Step 2 — Get your HeyGen API key
 
 1. Go to **[app.heygen.com/api](https://app.heygen.com/api)** (Settings → API)
diff --git a/heygen-avatar/references/avatar-creation.md b/heygen-avatar/references/avatar-creation.md
index 739f851..df52e62 100644
--- a/heygen-avatar/references/avatar-creation.md
+++ b/heygen-avatar/references/avatar-creation.md
@@ -7,7 +7,7 @@ the reference when you need exact arguments, edge cases, or alternative
 creation paths.
 
 For *avatar discovery* (finding an existing avatar at video time), see
-[`heygen-video/references/avatar-discovery.md`](../../heygen-video/references/avatar-discovery.md).
+[`heygen-video/references/avatar-discovery.md`](https://github.com/heygen-com/skills/blob/master/heygen-video/references/avatar-discovery.md).
 
 ---
 
diff --git a/heygen-video/references/avatar-discovery.md b/heygen-video/references/avatar-discovery.md
index 94ba4d1..2e52344 100644
--- a/heygen-video/references/avatar-discovery.md
+++ b/heygen-video/references/avatar-discovery.md
@@ -3,7 +3,7 @@
 This guide covers *avatar discovery for video generation* — how heygen-video
 finds an appropriate presenter (or skips presenter entirely) before calling
 the Video Agent. For *avatar creation*, see `heygen-avatar` and
-[`heygen-avatar/references/avatar-creation.md`](../../heygen-avatar/references/avatar-creation.md).
+[`heygen-avatar/references/avatar-creation.md`](https://github.com/heygen-com/skills/blob/master/heygen-avatar/references/avatar-creation.md).
 
 ## Path 0: Resolve workspace AVATAR files first
 
@@ -112,7 +112,7 @@ After avatar is settled, confirm voice preferences (accent, delivery style, lang
 
 If no existing avatar fits and the user wants one created, route to the
 `heygen-avatar` skill. See
-[`heygen-avatar/references/avatar-creation.md`](../../heygen-avatar/references/avatar-creation.md)
+[`heygen-avatar/references/avatar-creation.md`](https://github.com/heygen-com/skills/blob/master/heygen-avatar/references/avatar-creation.md)
 for the full creation API surface (photo / prompt / digital twin), file
 input formats, and identity field mappings.
 
@@ -154,7 +154,7 @@ heygen voice list --type public --engine starfish --language en --gender female
 
 For voice *design* (semantic search by description) and the full voice
 selection workflow during avatar setup, see
-[`heygen-avatar/references/avatar-creation.md`](../../heygen-avatar/references/avatar-creation.md).
+[`heygen-avatar/references/avatar-creation.md`](https://github.com/heygen-com/skills/blob/master/heygen-avatar/references/avatar-creation.md).
 
 ---
 

From a2e8d004991c214b0d611fe5aed17a1f089f9269 Mon Sep 17 00:00:00 2001
From: Eve Park <eve.park@heygen.com>
Date: Mon, 27 Apr 2026 15:31:40 -0700
Subject: [PATCH 3/4] fix(ci): isolate install target + add references-in-sync
 drift gate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address review feedback:

1. CI install path mismatch: gh skill install --scope project resolves
   to the outer repo project root (skills/.agents/), not _install_test/.
   Initialize _install_test as its own git repo so it becomes its own
   project root and the install lands inside it. Verified locally that
   the assertion now actually runs.

2. No drift-prevention CI step: add a references-in-sync job that
   diff-checks all 10 shared files (heygen-{avatar,video}/references/X
   vs root references/X) for byte-identity. Fails CI on drift with a
   helpful diff. New per-skill files (avatar-creation.md, the cleaved
   avatar-discovery.md halves) are intentionally NOT in this matrix —
   only the truly shared docs are gated.

3. Use bash 3.2-portable parallel array (pipe-delimited pairs) instead
   of associative array so the workflow can be sanity-checked locally
   on macOS without bash 5+.
---
 .github/workflows/validate-skills.yml | 61 +++++++++++++++++++++++++--
 1 file changed, 57 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/validate-skills.yml b/.github/workflows/validate-skills.yml
index ba557e6..5092a22 100644
--- a/.github/workflows/validate-skills.yml
+++ b/.github/workflows/validate-skills.yml
@@ -5,6 +5,8 @@ on:
     paths:
       - "heygen-avatar/**"
       - "heygen-video/**"
+      - "references/**"
+      - "scripts/**"
       - ".github/workflows/validate-skills.yml"
       - "SKILL.md"
   push:
@@ -12,10 +14,53 @@ on:
     paths:
       - "heygen-avatar/**"
       - "heygen-video/**"
+      - "references/**"
+      - "scripts/**"
       - ".github/workflows/validate-skills.yml"
       - "SKILL.md"
 
 jobs:
+  references-in-sync:
+    name: Root references/ stays in sync with subdir copies
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Diff shared references against root
+        run: |
+          set -euo pipefail
+          # These files are deliberately byte-identical between root references/
+          # and the per-skill copies. Any drift here indicates an edit that should
+          # have been propagated. New per-skill files (e.g. avatar-creation.md)
+          # and the cleaved per-skill avatar-discovery.md are intentionally NOT in
+          # this matrix — only the truly shared files are gated.
+          # Format: <subdir-copy>|<canonical-root-source>
+          PAIRS=(
+            'heygen-avatar/references/asset-routing.md|references/asset-routing.md'
+            'heygen-avatar/references/troubleshooting.md|references/troubleshooting.md'
+            'heygen-video/references/asset-routing.md|references/asset-routing.md'
+            'heygen-video/references/frame-check.md|references/frame-check.md'
+            'heygen-video/references/motion-vocabulary.md|references/motion-vocabulary.md'
+            'heygen-video/references/official-prompt-guide.md|references/official-prompt-guide.md'
+            'heygen-video/references/prompt-craft.md|references/prompt-craft.md'
+            'heygen-video/references/prompt-styles.md|references/prompt-styles.md'
+            'heygen-video/references/reviewer-prompt.md|references/reviewer-prompt.md'
+            'heygen-video/references/troubleshooting.md|references/troubleshooting.md'
+          )
+          fail=0
+          for pair in "${PAIRS[@]}"; do
+            sub="${pair%%|*}"
+            root="${pair##*|}"
+            if ! diff -q "$root" "$sub" >/dev/null; then
+              echo "::error::Drift detected — '$sub' differs from canonical '$root'"
+              echo "::error::If you edited one, edit the other (or add a sync script)"
+              diff -u "$root" "$sub" | head -40 || true
+              fail=1
+            fi
+          done
+          if [ "$fail" -ne 0 ]; then exit 1; fi
+          echo "✓ All shared references are byte-identical to canonical root copies"
+
   self-contained-bundles:
     name: Skills install cleanly via gh skill (self-contained)
     runs-on: ubuntu-latest
@@ -47,18 +92,26 @@ jobs:
           mkdir -p _ghskill_test/skills
           cp -R heygen-avatar _ghskill_test/skills/heygen-avatar
           cp -R heygen-video  _ghskill_test/skills/heygen-video
-          ls -la _ghskill_test/skills/
+
+      - name: Init isolated install project
+        run: |
+          # gh skill install --scope project writes to <project_root>/.agents/skills.
+          # Initialize an isolated git repo for the install target so it becomes
+          # its own project root (otherwise gh skill walks up to the outer repo).
+          mkdir -p _install_test
+          cd _install_test
+          git init -q
+          git -c user.email='ci@heygen.com' -c user.name='CI' commit -q --allow-empty -m bootstrap
 
       - name: Install heygen-avatar from local
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
           set -euo pipefail
-          mkdir -p _install_test
           cd _install_test
           gh skill install ../_ghskill_test heygen-avatar --from-local --scope project
           test -f .agents/skills/heygen-avatar/SKILL.md
-          echo "✓ heygen-avatar installed"
+          echo "✓ heygen-avatar installed at _install_test/.agents/skills/heygen-avatar/"
 
       - name: Verify heygen-avatar installed bundle is self-contained
         run: |
@@ -88,7 +141,7 @@ jobs:
           cd _install_test
           gh skill install ../_ghskill_test heygen-video --from-local --scope project
           test -f .agents/skills/heygen-video/SKILL.md
-          echo "✓ heygen-video installed"
+          echo "✓ heygen-video installed at _install_test/.agents/skills/heygen-video/"
 
       - name: Verify heygen-video installed bundle is self-contained
         run: |

From 675b0ee936b53ffb5e788cc3431a62b0e68961d9 Mon Sep 17 00:00:00 2001
From: Eve Park <eve.park@heygen.com>
Date: Mon, 27 Apr 2026 15:47:20 -0700
Subject: [PATCH 4/4] chore: purge orphaned references, wire prompt-styles, add
 sync script
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Audit of references/*.md outcome:
- 7 files: actively linked from skills, kept as-is
- 1 file (prompt-styles.md): linked only from root SKILL.md, orphaned in
  the gh-skill subdir bundle. Wired into heygen-video/SKILL.md alongside
  the existing prompt-craft / motion-vocabulary pointers.
- 1 file (reviewer-prompt.md): zero links from any SKILL.md, dead in
  the workflow. Deleted from root references/ and heygen-video/references/.

Sync workflow (Option 2):
- Add scripts/sync-references.sh — propagates root references/ to per-skill
  copies (--check mode for CI drift detection, no args for sync).
- validate-skills.yml now invokes ./scripts/sync-references.sh --check
  instead of inlining the pair list (single source of truth).
- New CI orphan-check: every bundled references/* / scripts/* file in an
  installed bundle must be linked from SKILL.md.
- CONTRIBUTING.md documents the references layout + editor workflow.
---
 .github/workflows/validate-skills.yml      |  55 ++++-------
 CONTRIBUTING.md                            |  16 +++
 heygen-video/SKILL.md                      |   2 +-
 heygen-video/references/reviewer-prompt.md |  95 ------------------
 references/reviewer-prompt.md              |  95 ------------------
 scripts/sync-references.sh                 | 107 +++++++++++++++++++++
 6 files changed, 143 insertions(+), 227 deletions(-)
 delete mode 100644 heygen-video/references/reviewer-prompt.md
 delete mode 100644 references/reviewer-prompt.md
 create mode 100755 scripts/sync-references.sh

diff --git a/.github/workflows/validate-skills.yml b/.github/workflows/validate-skills.yml
index 5092a22..fba6ebe 100644
--- a/.github/workflows/validate-skills.yml
+++ b/.github/workflows/validate-skills.yml
@@ -26,40 +26,8 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
-      - name: Diff shared references against root
-        run: |
-          set -euo pipefail
-          # These files are deliberately byte-identical between root references/
-          # and the per-skill copies. Any drift here indicates an edit that should
-          # have been propagated. New per-skill files (e.g. avatar-creation.md)
-          # and the cleaved per-skill avatar-discovery.md are intentionally NOT in
-          # this matrix — only the truly shared files are gated.
-          # Format: <subdir-copy>|<canonical-root-source>
-          PAIRS=(
-            'heygen-avatar/references/asset-routing.md|references/asset-routing.md'
-            'heygen-avatar/references/troubleshooting.md|references/troubleshooting.md'
-            'heygen-video/references/asset-routing.md|references/asset-routing.md'
-            'heygen-video/references/frame-check.md|references/frame-check.md'
-            'heygen-video/references/motion-vocabulary.md|references/motion-vocabulary.md'
-            'heygen-video/references/official-prompt-guide.md|references/official-prompt-guide.md'
-            'heygen-video/references/prompt-craft.md|references/prompt-craft.md'
-            'heygen-video/references/prompt-styles.md|references/prompt-styles.md'
-            'heygen-video/references/reviewer-prompt.md|references/reviewer-prompt.md'
-            'heygen-video/references/troubleshooting.md|references/troubleshooting.md'
-          )
-          fail=0
-          for pair in "${PAIRS[@]}"; do
-            sub="${pair%%|*}"
-            root="${pair##*|}"
-            if ! diff -q "$root" "$sub" >/dev/null; then
-              echo "::error::Drift detected — '$sub' differs from canonical '$root'"
-              echo "::error::If you edited one, edit the other (or add a sync script)"
-              diff -u "$root" "$sub" | head -40 || true
-              fail=1
-            fi
-          done
-          if [ "$fail" -ne 0 ]; then exit 1; fi
-          echo "✓ All shared references are byte-identical to canonical root copies"
+      - name: Verify references are in sync (no drift)
+        run: ./scripts/sync-references.sh --check
 
   self-contained-bundles:
     name: Skills install cleanly via gh skill (self-contained)
@@ -130,8 +98,16 @@ jobs:
               fail=1
             fi
           done
+          # Every bundled references/* and scripts/* file must be linked from SKILL.md.
+          while IFS= read -r f; do
+            base=$(basename "$f")
+            if ! grep -q "$base" SKILL.md; then
+              echo "::error::orphaned bundled file (not linked from SKILL.md): $f"
+              fail=1
+            fi
+          done < <(find references scripts -type f 2>/dev/null)
           if [ "$fail" -ne 0 ]; then exit 1; fi
-          echo "✓ heygen-avatar bundle is self-contained"
+          echo "✓ heygen-avatar bundle is self-contained, no orphans"
 
       - name: Install heygen-video from local
         env:
@@ -158,8 +134,15 @@ jobs:
               fail=1
             fi
           done
+          while IFS= read -r f; do
+            base=$(basename "$f")
+            if ! grep -q "$base" SKILL.md; then
+              echo "::error::orphaned bundled file (not linked from SKILL.md): $f"
+              fail=1
+            fi
+          done < <(find references scripts -type f 2>/dev/null)
           if [ "$fail" -ne 0 ]; then exit 1; fi
-          echo "✓ heygen-video bundle is self-contained"
+          echo "✓ heygen-video bundle is self-contained, no orphans"
 
   spec-validate-soft:
     name: agentskills.io spec validation (advisory)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index dc2c3b0..ab3c659 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -41,6 +41,22 @@ gh pr create --title "Short summary" --body "$(cat <<'EOF'
 - [ ] Full generation tested (video_id if applicable)
 - [ ] SKILL.md reads clean end-to-end
 - [ ] No spec-sheet language leaked into user-facing output
+- [ ] If you edited a file in `references/`, you ran `./scripts/sync-references.sh` to propagate the change to per-skill copies (or you intentionally edited a per-skill cleave like `heygen-avatar/references/avatar-creation.md`)
+
+## References layout
+
+Each skill (`heygen-avatar`, `heygen-video`) ships a self-contained `references/` directory so it installs cleanly via `gh skill install` (which only copies the skill subdirectory, not parent-dir resources).
+
+- **Source of truth** for shared docs: `references/<file>.md` at the repo root.
+- **Per-skill copies** are byte-identical mirrors of the root files.
+- **Per-skill cleaves** (`heygen-avatar/references/avatar-creation.md`, `heygen-video/references/avatar-discovery.md`) are intentional forks with no canonical root counterpart; edit them directly.
+
+**Editor workflow:**
+1. Edit the canonical root file (`references/<file>.md`).
+2. Run `./scripts/sync-references.sh` to propagate the change to per-skill copies.
+3. `git add` everything together and commit.
+
+CI (`.github/workflows/validate-skills.yml`) runs `./scripts/sync-references.sh --check` on every PR and fails on drift.
 
 ## Breaking changes
 
diff --git a/heygen-video/SKILL.md b/heygen-video/SKILL.md
index 3816987..0e09970 100644
--- a/heygen-video/SKILL.md
+++ b/heygen-video/SKILL.md
@@ -444,7 +444,7 @@ Video Agent supports three media types. Guide it explicitly or it guesses (often
 Be explicit in the prompt: "Use motion graphics for the statistics, stock footage for the office scene, AI-generated visuals for the futuristic concept."
 
 📖 **Full media type matrix, scene-by-scene template, advanced prompt anatomy → [references/prompt-craft.md](references/prompt-craft.md)**
-📖 **Named styles (Deconstructed, Swiss Pulse, etc.) → inlined in Style Selection above**
+📖 **20 named visual styles (mood-first selection, copy-paste STYLE blocks) → [references/prompt-styles.md](references/prompt-styles.md)**
 📖 **Motion vocabulary and B-roll → [references/motion-vocabulary.md](references/motion-vocabulary.md)**
 
 ### Orientation
diff --git a/heygen-video/references/reviewer-prompt.md b/heygen-video/references/reviewer-prompt.md
deleted file mode 100644
index 20d7871..0000000
--- a/heygen-video/references/reviewer-prompt.md
+++ /dev/null
@@ -1,95 +0,0 @@
-# Video Producer — Prompt Reviewer
-
-You are a senior video production reviewer. You have been given a prompt that is about to be sent to HeyGen's Video Agent API for one-shot video generation. This is a one-shot API — there is no back-and-forth. The prompt must be as good as possible on the first attempt.
-
-Your job: review this prompt objectively and provide a professional assessment. Be specific. Be critical. Don't rubber-stamp.
-
-## Review Criteria
-
-### 1. Scene Structure
-- Is the prompt structured as individual scenes (Scene 1, Scene 2, etc.) with Visual + VO + Duration per scene?
-- Or is it a flat paragraph? Flat paragraphs produce generic videos. FAIL if flat.
-- Are scene types varied? (Mix of A-roll, B-roll, Motion Graphics, Stock). Monotonous scene types = boring video.
-
-### 2. Opening Hook
-- Does the first scene grab attention in under 10 seconds?
-- Does it lead with the most compelling statement or a question?
-- Or does it start with context-setting ("In today's world...")? Context-setting openings lose viewers.
-
-### 3. Visual Style
-- Is there a visual style block? (Color palette, style descriptor, font preferences)
-- Without one, Video Agent produces inconsistent visuals across scenes.
-- Are the style choices appropriate for the audience? (Tech → minimalistic, Marketing → bold, Education → illustrated)
-
-### 4. Media Type Direction
-- Does each scene specify its media type? (Motion Graphics / Stock Media / AI Generated)
-- Are the media types appropriate for the content? Use this matrix:
-  - Data/Statistics → Motion Graphics (best)
-  - Abstract Concepts → AI Generated (best)
-  - Real Environments → Stock Media (best)
-  - Brand Elements → Motion Graphics (best)
-  - Human Emotions → Stock Media (best), NOT AI Generated (uncanny)
-  - Technical Diagrams → Motion Graphics (best)
-
-### 5. Pacing & Word Count
-- Is the total word count within the 150 words/minute budget?
-- 30s = ~75 words, 60s = ~150 words, 90s = ~225 words, 2min = ~300 words
-- Are scene durations balanced? No single scene should be >30% of total duration.
-
-### 6. Script Quality
-- Is it written for the ear? (Conversational, short sentences, active voice)
-- Or does it read like a written essay? ("It is important to note that..." = bad)
-- Are there natural pauses/transitions between sections?
-
-### 7. Narrator Framing
-- Does the prompt frame the request as narrator-driven? ("A confident narrator explains...")
-- Or does it use generic framing? ("Create a video about..." = weaker)
-
-### 8. Asset Anchoring (if applicable)
-- If assets were mentioned, are they anchored to specific scenes and moments?
-- "Use the screenshot as B-roll when discussing features" >> "include the screenshot"
-
-### 9. Negative Constraints
-- Are negative constraints present only if explicitly requested by the user?
-- "No text overlays" should NOT be a default. Text overlays improve videos.
-
-### 10. Overall Production Quality
-- Would a real video producer approve this prompt?
-- Does it feel like a professional production brief or a casual request?
-- Is there anything missing that would significantly improve the output?
-
-### 11. Language Consistency (non-English videos only)
-- If the video language is not English, is the script/narration in the correct language?
-- Are technical directives (style block, motion verbs, frame check corrections) still in English?
-- Is there a clean separation between content language and directive language?
-- FAIL if the script is in English but the user requested a non-English video.
-- FAIL if technical directives were translated out of English.
-
-## Output Format
-
-Respond with EXACTLY this format:
-
-```
-VERDICT: APPROVE | REVISE | REJECT
-
-SCORE: X/10
-
-STRENGTHS:
-- [specific strength]
-- [specific strength]
-
-ISSUES:
-- [CRITICAL] [specific issue + how to fix]
-- [IMPORTANT] [specific issue + how to fix]
-- [MINOR] [specific issue + how to fix]
-
-REVISED PROMPT (only if VERDICT is REVISE):
-[The full improved prompt, ready to send to the API]
-```
-
-Rules:
-- APPROVE: Score 8+, no CRITICAL issues. Ready to generate.
-- REVISE: Score 5-7, has issues but fixable. Provide the revised prompt.
-- REJECT: Score <5, fundamental problems. List what needs to change.
-- Always provide the REVISED PROMPT if verdict is REVISE. The agent will use your version directly.
-- Be specific in issues. "Could be better" is useless. "Scene 3 uses AI Generated for a real office environment — switch to Stock Media" is useful.
diff --git a/references/reviewer-prompt.md b/references/reviewer-prompt.md
deleted file mode 100644
index 20d7871..0000000
--- a/references/reviewer-prompt.md
+++ /dev/null
@@ -1,95 +0,0 @@
-# Video Producer — Prompt Reviewer
-
-You are a senior video production reviewer. You have been given a prompt that is about to be sent to HeyGen's Video Agent API for one-shot video generation. This is a one-shot API — there is no back-and-forth. The prompt must be as good as possible on the first attempt.
-
-Your job: review this prompt objectively and provide a professional assessment. Be specific. Be critical. Don't rubber-stamp.
-
-## Review Criteria
-
-### 1. Scene Structure
-- Is the prompt structured as individual scenes (Scene 1, Scene 2, etc.) with Visual + VO + Duration per scene?
-- Or is it a flat paragraph? Flat paragraphs produce generic videos. FAIL if flat.
-- Are scene types varied? (Mix of A-roll, B-roll, Motion Graphics, Stock). Monotonous scene types = boring video.
-
-### 2. Opening Hook
-- Does the first scene grab attention in under 10 seconds?
-- Does it lead with the most compelling statement or a question?
-- Or does it start with context-setting ("In today's world...")? Context-setting openings lose viewers.
-
-### 3. Visual Style
-- Is there a visual style block? (Color palette, style descriptor, font preferences)
-- Without one, Video Agent produces inconsistent visuals across scenes.
-- Are the style choices appropriate for the audience? (Tech → minimalistic, Marketing → bold, Education → illustrated)
-
-### 4. Media Type Direction
-- Does each scene specify its media type? (Motion Graphics / Stock Media / AI Generated)
-- Are the media types appropriate for the content? Use this matrix:
-  - Data/Statistics → Motion Graphics (best)
-  - Abstract Concepts → AI Generated (best)
-  - Real Environments → Stock Media (best)
-  - Brand Elements → Motion Graphics (best)
-  - Human Emotions → Stock Media (best), NOT AI Generated (uncanny)
-  - Technical Diagrams → Motion Graphics (best)
-
-### 5. Pacing & Word Count
-- Is the total word count within the 150 words/minute budget?
-- 30s = ~75 words, 60s = ~150 words, 90s = ~225 words, 2min = ~300 words
-- Are scene durations balanced? No single scene should be >30% of total duration.
-
-### 6. Script Quality
-- Is it written for the ear? (Conversational, short sentences, active voice)
-- Or does it read like a written essay? ("It is important to note that..." = bad)
-- Are there natural pauses/transitions between sections?
-
-### 7. Narrator Framing
-- Does the prompt frame the request as narrator-driven? ("A confident narrator explains...")
-- Or does it use generic framing? ("Create a video about..." = weaker)
-
-### 8. Asset Anchoring (if applicable)
-- If assets were mentioned, are they anchored to specific scenes and moments?
-- "Use the screenshot as B-roll when discussing features" >> "include the screenshot"
-
-### 9. Negative Constraints
-- Are negative constraints present only if explicitly requested by the user?
-- "No text overlays" should NOT be a default. Text overlays improve videos.
-
-### 10. Overall Production Quality
-- Would a real video producer approve this prompt?
-- Does it feel like a professional production brief or a casual request?
-- Is there anything missing that would significantly improve the output?
-
-### 11. Language Consistency (non-English videos only)
-- If the video language is not English, is the script/narration in the correct language?
-- Are technical directives (style block, motion verbs, frame check corrections) still in English?
-- Is there a clean separation between content language and directive language?
-- FAIL if the script is in English but the user requested a non-English video.
-- FAIL if technical directives were translated out of English.
-
-## Output Format
-
-Respond with EXACTLY this format:
-
-```
-VERDICT: APPROVE | REVISE | REJECT
-
-SCORE: X/10
-
-STRENGTHS:
-- [specific strength]
-- [specific strength]
-
-ISSUES:
-- [CRITICAL] [specific issue + how to fix]
-- [IMPORTANT] [specific issue + how to fix]
-- [MINOR] [specific issue + how to fix]
-
-REVISED PROMPT (only if VERDICT is REVISE):
-[The full improved prompt, ready to send to the API]
-```
-
-Rules:
-- APPROVE: Score 8+, no CRITICAL issues. Ready to generate.
-- REVISE: Score 5-7, has issues but fixable. Provide the revised prompt.
-- REJECT: Score <5, fundamental problems. List what needs to change.
-- Always provide the REVISED PROMPT if verdict is REVISE. The agent will use your version directly.
-- Be specific in issues. "Could be better" is useless. "Scene 3 uses AI Generated for a real office environment — switch to Stock Media" is useful.
diff --git a/scripts/sync-references.sh b/scripts/sync-references.sh
new file mode 100755
index 0000000..aa7376f
--- /dev/null
+++ b/scripts/sync-references.sh
@@ -0,0 +1,107 @@
+#!/usr/bin/env bash
+# sync-references.sh — propagate canonical root references/ into per-skill copies.
+#
+# Source of truth: references/ at the repo root.
+# Destinations:    heygen-avatar/references/ and heygen-video/references/.
+#
+# Each skill bundles only the references it actually links to. This script
+# enforces that mapping and copies the canonical root file into each
+# destination. It does NOT touch:
+#
+#   - heygen-avatar/references/avatar-creation.md
+#       (per-skill creator-side cleave, no root counterpart)
+#   - heygen-video/references/avatar-discovery.md
+#       (per-skill consumer-side cleave from the original avatar-discovery.md;
+#        the cleave is intentional and the two halves diverge by design)
+#
+# Usage:
+#   ./scripts/sync-references.sh           # propagate root → subdirs
+#   ./scripts/sync-references.sh --check   # exit 1 if any subdir copy drifts from root
+#
+# CI gate: validate-skills.yml runs this with --check on every PR.
+
+set -euo pipefail
+
+# Resolve repo root regardless of where the script is invoked from.
+REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$REPO_ROOT"
+
+# Format: <subdir-copy>|<canonical-root-source>
+PAIRS=(
+  'heygen-avatar/references/asset-routing.md|references/asset-routing.md'
+  'heygen-avatar/references/troubleshooting.md|references/troubleshooting.md'
+  'heygen-video/references/asset-routing.md|references/asset-routing.md'
+  'heygen-video/references/frame-check.md|references/frame-check.md'
+  'heygen-video/references/motion-vocabulary.md|references/motion-vocabulary.md'
+  'heygen-video/references/official-prompt-guide.md|references/official-prompt-guide.md'
+  'heygen-video/references/prompt-craft.md|references/prompt-craft.md'
+  'heygen-video/references/prompt-styles.md|references/prompt-styles.md'
+  'heygen-video/references/troubleshooting.md|references/troubleshooting.md'
+)
+
+mode="sync"
+case "${1:-}" in
+  --check) mode="check" ;;
+  -h|--help)
+    sed -n '1,28p' "$0"
+    exit 0
+    ;;
+  "") ;;
+  *)
+    echo "unknown argument: $1" >&2
+    echo "usage: $0 [--check]" >&2
+    exit 2
+    ;;
+esac
+
+fail=0
+synced=0
+
+for pair in "${PAIRS[@]}"; do
+  sub="${pair%%|*}"
+  root="${pair##*|}"
+
+  if [ ! -f "$root" ]; then
+    echo "::error::canonical source missing: $root"
+    fail=1
+    continue
+  fi
+
+  if [ "$mode" = "check" ]; then
+    if [ ! -f "$sub" ]; then
+      echo "::error::subdir copy missing: $sub (run scripts/sync-references.sh to create it)"
+      fail=1
+    elif ! diff -q "$root" "$sub" >/dev/null 2>&1; then
+      echo "::error::drift detected — '$sub' differs from canonical '$root'"
+      diff -u "$root" "$sub" | head -40 || true
+      fail=1
+    fi
+  else
+    mkdir -p "$(dirname "$sub")"
+    if [ -f "$sub" ] && diff -q "$root" "$sub" >/dev/null 2>&1; then
+      :
+    else
+      cp "$root" "$sub"
+      echo "synced: $root → $sub"
+      synced=$((synced + 1))
+    fi
+  fi
+done
+
+if [ "$mode" = "check" ]; then
+  if [ "$fail" -ne 0 ]; then
+    echo ""
+    echo "Drift detected. Run: ./scripts/sync-references.sh"
+    exit 1
+  fi
+  echo "✓ All ${#PAIRS[@]} shared references are in sync with canonical root copies."
+else
+  if [ "$synced" -eq 0 ]; then
+    echo "✓ All ${#PAIRS[@]} shared references already in sync — no changes."
+  else
+    echo ""
+    echo "Synced $synced file(s). Review and commit:"
+    echo "  git add heygen-avatar/references heygen-video/references"
+    echo "  git commit"
+  fi
+fi