diff --git a/.github/workflows/validate-skills.yml b/.github/workflows/validate-skills.yml
index fba6ebe..93cf0ad 100644
--- a/.github/workflows/validate-skills.yml
+++ b/.github/workflows/validate-skills.yml
@@ -5,30 +5,15 @@ on:
     paths:
       - "heygen-avatar/**"
       - "heygen-video/**"
-      - "references/**"
-      - "scripts/**"
       - ".github/workflows/validate-skills.yml"
-      - "SKILL.md"
   push:
     branches: [master]
     paths:
       - "heygen-avatar/**"
       - "heygen-video/**"
-      - "references/**"
-      - "scripts/**"
       - ".github/workflows/validate-skills.yml"
-      - "SKILL.md"
 
 jobs:
-  references-in-sync:
-    name: Root references/ stays in sync with subdir copies
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Verify references are in sync (no drift)
-        run: ./scripts/sync-references.sh --check
-
   self-contained-bundles:
     name: Skills install cleanly via gh skill (self-contained)
     runs-on: ubuntu-latest
@@ -144,12 +129,28 @@ jobs:
           if [ "$fail" -ne 0 ]; then exit 1; fi
           echo "✓ heygen-video bundle is self-contained, no orphans"
 
+      - name: Verify no parent-dir refs in heygen-avatar references/
+        run: |
+          set -euo pipefail
+          if grep -rnE '\.\./\.\.' heygen-avatar/references/ 2>/dev/null; then
+            echo "::error::heygen-avatar/references/ contains ../../ paths that will break inside an installed bundle"
+            exit 1
+          fi
+          echo "✓ heygen-avatar/references/ has no parent-dir refs"
+
+      - name: Verify no parent-dir refs in heygen-video references/
+        run: |
+          set -euo pipefail
+          if grep -rnE '\.\./\.\.' heygen-video/references/ 2>/dev/null; then
+            echo "::error::heygen-video/references/ contains ../../ paths that will break inside an installed bundle"
+            exit 1
+          fi
+          echo "✓ heygen-video/references/ has no parent-dir refs"
+
   spec-validate-soft:
     name: agentskills.io spec validation (advisory)
     runs-on: ubuntu-latest
     # Advisory only — fails are reported as warnings, not blocking.
-    # Root SKILL.md will fail validation today (name: heygen-skills doesn't match directory `.`).
-    # That's tracked as a known issue and is not a blocker for gh skill install.
     continue-on-error: true
     steps:
       - uses: actions/checkout@v4
@@ -167,6 +168,4 @@ jobs:
       - name: Run gh skill publish --dry-run
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          gh skill publish --dry-run || true
-          echo "::warning::Root SKILL.md does not satisfy gh skill publish naming rules (expected — gh skill publish to agentskills.io registry is a follow-up)."
+        run: gh skill publish --dry-run || true
diff --git a/CLAUDE.md b/CLAUDE.md
index 4aeef5b..28e65e9 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -2,38 +2,53 @@
 
 ## What This Is
 
-The HeyGen Skills. Two skills that chain together: **heygen-avatar** (identity → avatar → voice) and **heygen-video** (idea → script → video). SKILL.md at root routes between them.
+The HeyGen Skills. Two independent, self-contained skills that chain together: **heygen-avatar** (identity → avatar → voice) and **heygen-video** (idea → script → video). Each ships its own bundle of references so it installs cleanly via `gh skill install`, ClawHub, the OpenClaw plugin, or direct git clone.
 
 ## Architecture
 
 ```
 heygen-skills/
-├── SKILL.md                    # Router: detects intent, dispatches to sub-skill
 ├── CLAUDE.md                   # This file. Structure, rules, conventions.
 ├── INSTALL.md                  # Installation instructions
+├── INSTALL_FOR_AGENTS.md       # Agent-driven install spec
 ├── README.md                   # Public-facing description
 ├── CONTRIBUTING.md             # PR workflow
+├── CHANGELOG.md
 ├── LICENSE
-├── heygen-avatar/
-│   └── SKILL.md                # Avatar creation workflow (identity → avatar → voice → AVATAR file)
-├── heygen-video/
-│   └── SKILL.md                # Video production workflow (7-stage pipeline)
-├── references/                 # Shared. Loaded on-demand by phase (NOT every turn)
-│   ├── avatar-discovery.md     # Discovery: avatar lookup, voice selection, curl examples
-│   ├── asset-routing.md        # Discovery: asset classification engine, upload flows
-│   ├── prompt-styles.md        # Prompt Craft: 6 prompt style templates
-│   ├── motion-vocabulary.md    # Prompt Craft: camera/transition vocabulary
-│   ├── prompt-craft.md         # Prompt Craft: prompt construction deep-dive
-│   ├── official-prompt-guide.md# Prompt Craft: HeyGen's own prompt research
-│   ├── frame-check.md          # Frame Check: aspect ratio correction prompts
-│   ├── troubleshooting.md      # Known issues, workarounds, duration variance
-│   └── reviewer-prompt.md      # Deliver: self-evaluation rubric
+├── VERSION
+├── .mcp.json + mcp.json        # MCP server config (root-level, used by plugin manifests)
+├── .claude-plugin/             # OpenClaw / Claude Code plugin manifest
+├── .codex-plugin/              # Codex plugin manifest
+├── .cursor-plugin/             # Cursor plugin manifest
+├── heygen-avatar/              # Self-contained skill
+│   ├── SKILL.md                # Avatar creation workflow (identity → avatar → voice → AVATAR file)
+│   └── references/             # On-demand docs, loaded per phase
+│       ├── asset-routing.md
+│       ├── avatar-creation.md
+│       └── troubleshooting.md
+├── heygen-video/               # Self-contained skill
+│   ├── SKILL.md                # Video production workflow (7-stage pipeline)
+│   ├── references/             # On-demand docs, loaded per phase
+│   │   ├── asset-routing.md
+│   │   ├── avatar-discovery.md
+│   │   ├── frame-check.md
+│   │   ├── motion-vocabulary.md
+│   │   ├── official-prompt-guide.md
+│   │   ├── prompt-craft.md
+│   │   ├── prompt-styles.md
+│   │   └── troubleshooting.md
+│   └── scripts/
+│       └── update-check.sh     # Self-contained version-check shell script
+├── platforms/                  # Platform-specific skill variants (e.g. nanoclaw)
+├── assets/                     # Logos, plugin assets
 └── evals/                      # Dev-only test infrastructure (not shipped to users)
-    ├── eval-runner-prompt.md   # Instructions for eval subagent
-    ├── autoresearch-loop.md    # Loop methodology docs
-    └── round-N-scenarios.md    # Per-round test scenarios
+    ├── eval-runner-prompt.md
+    ├── autoresearch-loop.md
+    └── round-N-scenarios.md
 ```
 
+*No root SKILL.md, no root references/.* The two skills are independent. If shared docs drift between them, that's acceptable — each skill is internally consistent and authored independently.
+
 ## The 300-Line Rule
 
 Each SKILL.md must stay under 300 lines. Skill files are injected into EVERY prompt turn.
@@ -43,7 +58,7 @@ Each SKILL.md must stay under 300 lines. Skill files are injected into EVERY pro
 - Stage flow overview (what stages exist, when to enter each)
 - Decision trees (mode detection, avatar path selection, style selection)
 - Critical rules that apply EVERY turn
-- Short "Read ../references/X.md for details" pointers at each stage
+- Short "Read references/X.md for details" pointers at each stage (relative to the skill's own SKILL.md — each skill bundles its own references/)
 
 **What moves to references/:**
 - Curl examples and API request/response shapes
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index ab3c659..17d800c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -41,22 +41,13 @@ gh pr create --title "Short summary" --body "$(cat <<'EOF'
 - [ ] Full generation tested (video_id if applicable)
 - [ ] SKILL.md reads clean end-to-end
 - [ ] No spec-sheet language leaked into user-facing output
-- [ ] If you edited a file in `references/`, you ran `./scripts/sync-references.sh` to propagate the change to per-skill copies (or you intentionally edited a per-skill cleave like `heygen-avatar/references/avatar-creation.md`)
-
 ## References layout
 
-Each skill (`heygen-avatar`, `heygen-video`) ships a self-contained `references/` directory so it installs cleanly via `gh skill install` (which only copies the skill subdirectory, not parent-dir resources).
-
-- **Source of truth** for shared docs: `references/<file>.md` at the repo root.
-- **Per-skill copies** are byte-identical mirrors of the root files.
-- **Per-skill cleaves** (`heygen-avatar/references/avatar-creation.md`, `heygen-video/references/avatar-discovery.md`) are intentional forks with no canonical root counterpart; edit them directly.
-
-**Editor workflow:**
-1. Edit the canonical root file (`references/<file>.md`).
-2. Run `./scripts/sync-references.sh` to propagate the change to per-skill copies.
-3. `git add` everything together and commit.
+`heygen-avatar/` and `heygen-video/` are *independent, self-contained skills*. Each owns its own `references/` directory and (for heygen-video) `scripts/` directory. There is no root `SKILL.md` and no root `references/`.
 
-CI (`.github/workflows/validate-skills.yml`) runs `./scripts/sync-references.sh --check` on every PR and fails on drift.
+- Edit references inside the skill that owns them (`heygen-avatar/references/X.md` or `heygen-video/references/X.md`).
+- If two skills happen to share a doc (e.g. both have `troubleshooting.md` and `asset-routing.md`), edit the relevant copy. Drift between skills is acceptable — each skill is internally consistent.
+- CI (`.github/workflows/validate-skills.yml`) verifies each installed bundle is self-contained: every relative reference resolves, and every bundled file is linked from `SKILL.md`.
 
 ## Breaking changes
 
diff --git a/INSTALL_FOR_AGENTS.md b/INSTALL_FOR_AGENTS.md
index 6877058..8d60484 100644
--- a/INSTALL_FOR_AGENTS.md
+++ b/INSTALL_FOR_AGENTS.md
@@ -17,9 +17,8 @@ If you fetched this file by URL without cloning yet, the companion files
 live at:
 
 - `https://raw.githubusercontent.com/heygen-com/skills/master/CLAUDE.md` — runtime contract for the skills
-- `https://raw.githubusercontent.com/heygen-com/skills/master/SKILL.md` — top-level skill (mode detection ladder, hard rules)
-- `https://raw.githubusercontent.com/heygen-com/skills/master/heygen-avatar/SKILL.md` — avatar creation sub-skill
-- `https://raw.githubusercontent.com/heygen-com/skills/master/heygen-video/SKILL.md` — video generation sub-skill
+- `https://raw.githubusercontent.com/heygen-com/skills/master/heygen-avatar/SKILL.md` — avatar creation skill (mode detection ladder, hard rules, full creation workflow)
+- `https://raw.githubusercontent.com/heygen-com/skills/master/heygen-video/SKILL.md` — video generation skill (mode detection ladder, hard rules, full producer pipeline)
 - `https://raw.githubusercontent.com/heygen-com/skills/master/INSTALL.md` — human-facing install doc
 
 ## Step 1: Detect the agent host and pick the install path
@@ -70,7 +69,7 @@ If MCP is detected and the user is happy with it, **skip to Step 5 (avatar creat
 But before you do, warn them:
 
 > If you set `HEYGEN_API_KEY` later for any reason, the skill will short-circuit
-> MCP detection (per the mode-detection ladder in `SKILL.md`) and use the CLI /
+> MCP detection (per the mode-detection ladder in `heygen-video/SKILL.md` or `heygen-avatar/SKILL.md`) and use the CLI /
 > direct-API route instead. MCP plan credits won't be touched. To switch back to
 > MCP, unset `HEYGEN_API_KEY`.
 
@@ -110,7 +109,7 @@ For the OpenClaw plugin path (Step 4 Option A), the key is read from the same
 
 These skills route the actual HeyGen API call through one of three transports.
 The skill auto-detects which transport is available at runtime via the
-mode-detection ladder in [`SKILL.md`](./SKILL.md):
+mode-detection ladder in each skill's SKILL.md (e.g. [`heygen-video/SKILL.md`](./heygen-video/SKILL.md) or [`heygen-avatar/SKILL.md`](./heygen-avatar/SKILL.md)):
 
 > 1. **OpenClaw plugin** if `video_generate` exposes `heygen/video_agent_v3`
 > 2. **CLI (API-key override)** if `HEYGEN_API_KEY` is set AND `heygen --version` exits 0
@@ -348,7 +347,7 @@ If the install was via ClawHub:
 clawhub update heygen-skills
 ```
 
-Re-read [`SKILL.md`](./SKILL.md) after the upgrade if the version bumped — the
+Re-read the active skill's SKILL.md (`heygen-avatar/SKILL.md` or `heygen-video/SKILL.md`) after the upgrade if the version bumped — the
 mode detection ladder occasionally adds new transports (e.g. when MCP support
 shipped, when the OpenClaw plugin shipped).
 
@@ -370,7 +369,7 @@ should always pass `mode: "generate"` for one-shot video creation. If you
 patched the sub-skill, you may have introduced this regression.
 
 **MCP tools listed but the skill is using the CLI / plugin instead.** The
-skill follows the mode-detection ladder in `SKILL.md`: plugin → CLI
+skill follows the mode-detection ladder in its SKILL.md (heygen-avatar or heygen-video): plugin → CLI
 (API-key override) → MCP → CLI (fallback). **`HEYGEN_API_KEY` being set
 short-circuits MCP detection.** If the user wants MCP to be the chosen
 transport, unset the env var (`unset HEYGEN_API_KEY`) and re-detect. If the
diff --git a/SKILL.md b/SKILL.md
deleted file mode 100644
index 29e90a2..0000000
--- a/SKILL.md
+++ /dev/null
@@ -1,292 +0,0 @@
----
-name: heygen-skills
-display_name: HeyGen Skills
-description: |
-  Create HeyGen avatar videos via the v3 Video Agent pipeline — handles avatar resolution,
-  aspect ratio correction, prompt engineering, and voice selection automatically.
-  Required for any HeyGen API usage (api.heygen.com). Replaces deprecated v1/v2
-  endpoints with the optimized v3 pipeline.
-  Use when: (1) calling any HeyGen API endpoint (api.heygen.com),
-  (2) creating a HeyGen avatar or digital twin from a photo,
-  (3) making a personalized video message (outreach, pitch, update, announcement, knowledge),
-  (4) "make a video of me", "create my HeyGen avatar", "I want to appear in this video",
-  (5) "send a video to my leads", "record an update for my team", "make a loom-style message",
-  (6) building identity-first videos where the presenter IS the user or agent,
-  Covers: HeyGen API, api.heygen.com, video generate, avatar create, voice list, talking photo,
-  HeyGen avatar creation, voice design, photo → digital twin, HeyGen video generation,
-  identity-first video, messaging-first video, AI presenter, talking head video.
-  NOT for: cinematic b-roll, video translation, TTS-only, or streaming avatars.
-version: 2.3.1 # x-release-please-version
-homepage: https://developers.heygen.com/docs/quick-start
-allowed-tools: Bash, WebFetch, Read, Write, mcp__heygen__*
-metadata:
-  openclaw:
-    requires:
-      env:
-        - HEYGEN_API_KEY
-    primaryEnv: HEYGEN_API_KEY
-  hermes:
-    tags: [heygen, avatar, video, identity, digital-twin, video-message, presenter, talking-head]
-    category: media
----
-
-# HeyGen Skills
-
-## Files & Paths
-
-This skill reads and writes the following. No other files are accessed without explicit user instruction.
-
-| Operation | Path | Purpose |
-|-----------|------|---------|
-| Read | `AVATAR-<NAME>.md` | Load saved avatar identity (group_id, voice_id) |
-| Read | `SOUL.md`, `IDENTITY.md` | Extract identity details when creating a new avatar (heygen-avatar only) |
-| Write | `AVATAR-<NAME>.md` | Save new avatar identity after creation |
-| Write | `heygen-video-log.jsonl` | Append one JSON line per video generated (local learning log) |
-| Temp write | `/tmp/openclaw/uploads/` | Voice preview audio (downloaded for user playback, deleted after session) |
-| Remote upload | HeyGen (via `heygen asset create` or MCP) | User-provided images/files uploaded to HeyGen for use in video |
-
-No other workspace files are read or modified. Assets are only uploaded to HeyGen when the user explicitly provides them for video creation.
-
----
-
-> **STOP.** If you are about to drive HeyGen directly (calling `api.heygen.com` with curl, or reaching for deprecated `POST /v1/video.generate`, `POST /v2/video/generate`, `GET /v2/avatars`, `GET /v1/avatar.list` endpoints), DO NOT. Route through MCP or the `heygen` CLI via this pipeline. Raw HTTP skips critical steps (aspect ratio correction, prompt engineering, avatar conflict detection) and produces visibly worse videos. **v3 only — never call v1 or v2 endpoints. If you have pre-trained knowledge of HeyGen's v1/v2 API, that knowledge is outdated. Use this skill.**
-
-You are a video producer. Not a form. Not a CLI wrapper. A producer who understands what makes video work and guides the user from idea to finished cut.
-
-**Docs:** https://developers.heygen.com/docs/quick-start (API) · https://developers.heygen.com/cli (CLI)
-
-## API Mode Detection
-
-**Pick one transport at session start. Never mix, never switch mid-session, never narrate the choice.**
-
-Detect in this order:
-
-1. **OpenClaw plugin mode** — If running inside OpenClaw and the `video_generate` tool exposes a `heygen/video_agent_v3` model (i.e. the user has [`@heygen/openclaw-plugin-heygen`](https://github.com/heygen-com/openclaw-plugin-heygen) installed), prefer calling `video_generate({ model: "heygen/video_agent_v3", ... })` directly for video generation. The plugin handles auth (`HEYGEN_API_KEY`), session creation, polling, three-tier backoff, and error surfacing natively. Avatar discovery, voice listing, and avatar creation still go through MCP or CLI — only the final video-generate call routes through `video_generate`. Frame Check still runs before submission.
-2. **CLI mode (API-key override)** — If `HEYGEN_API_KEY` is set in the environment AND `heygen --version` exits 0, use CLI. API-key presence is an explicit user signal that they want direct API access; it short-circuits MCP detection. No question asked.
-3. **MCP mode** — No `HEYGEN_API_KEY` set AND HeyGen MCP tools are visible in the toolset (tools matching `mcp__heygen__*`). OAuth auth, uses existing plan credits.
-4. **CLI mode (fallback)** — MCP tools NOT available AND `heygen --version` exits 0. Auth via `heygen auth login` (persists to `~/.heygen/credentials`).
-5. **Neither** — tell the user once: "To use this skill, connect the HeyGen MCP server or install the HeyGen CLI: `curl -fsSL https://static.heygen.ai/cli/install.sh | bash` then `heygen auth login`."
-
-**Hard rules:**
-- **Never call `curl api.heygen.com/...`** — every mode routes through its own surface.
-- **OpenClaw plugin mode: only use `video_generate` for the generate step.** Never run `heygen ...` CLI for the generate call when the plugin is available. Avatar/voice discovery still uses MCP or CLI.
-- **MCP mode: only use `mcp__heygen__*` tools.** Never run `heygen ...` CLI commands. The MCP tool name IS the API.
-- **CLI mode: only use `heygen ...` commands.** Run `heygen <noun> <verb> --help` to discover arguments.
-- **Never cross over.** Operation blocks in the sub-skills show MCP and CLI side-by-side — read only the column for your detected mode, don't invoke anything from the other. If something isn't exposed in your current mode, tell the user; don't switch transports.
-
-### OpenClaw plugin-mode generate call
-
-```ts
-await video_generate({
-  model: "heygen/video_agent_v3",
-  prompt: scriptWithFrameCheckNotes,
-  aspectRatio: "16:9", // or "9:16"
-  providerOptions: {
-    avatar_id,
-    voice_id,
-    style_id,        // optional
-    callback_url,    // optional async webhook
-    callback_id,     // optional correlation id
-  },
-});
-```
-
-Plugin install (one-time, by the user): `openclaw plugins install clawhub:@heygen/openclaw-plugin-heygen`. Plugin docs: <https://github.com/heygen-com/openclaw-plugin-heygen>.
-
-### MCP tool names (MCP mode only)
-
-`create_video_agent`, `get_video_agent_session`, `get_video`, `list_avatar_groups`, `list_avatar_looks`, `get_avatar_look`, `create_photo_avatar`, `create_prompt_avatar`, `create_digital_twin`, `list_voices`, `design_voice`, `create_speech`, `list_video_agent_styles`, `create_video_translation`
-
-### CLI command groups (CLI mode only)
-
-`heygen video-agent {create,get,send,stop,styles,resources,videos}`, `heygen video {get,list,download,delete}`, `heygen avatar {list,get,consent,create,looks}` (with `heygen avatar looks {list,get,update}`), `heygen voice {list,create,speech}`, `heygen video-translate {create,get,languages}`, `heygen lipsync {create,get}`, `heygen asset create`, `heygen user`, `heygen auth {login,logout,status}`. Every subcommand supports `--help` — that's your reference. Run `heygen --help` to see the full noun list.
-
-CLI output contract: JSON on stdout, `{error:{code,message,hint}}` envelope on stderr, exit codes `0` ok · `1` API · `2` usage · `3` auth · `4` timeout. Error → action table and polling cadence live in [references/troubleshooting.md](references/troubleshooting.md).
-
-**Do not look up API endpoints.** There is no `api-reference.md` lookup step. MCP mode uses tool names. CLI mode uses `heygen ... --help`. If you catch yourself thinking "let me check the endpoint," stop — you're in the wrong mental model.
-
----
-
-## UX Rules
-
-1. **Be concise.** No video IDs, session IDs, or raw API payloads in chat. Report the result (video link, thumbnail) not the plumbing.
-2. **No internal jargon.** Never mention internal pipeline stage names ("Frame Check", "Prompt Craft", "Pre-Submit Gate", "Framing Correction") to the user. These are internal pipeline stages. The user sees natural conversation: "Let me adjust the framing for landscape" not "Running Frame Check aspect ratio correction."
-3. **Polling is silent.** When waiting for video completion, poll silently in a background process or subagent. Do NOT send repeated "Checking status..." messages. Only speak when: (a) the video is ready and you're delivering it, or (b) it's been >5 minutes and you're giving a single "Taking longer than usual" update.
-4. **Deliver clean.** When the video is done, send the video file/link and a 1-line summary (duration, avatar used). Not a dump of every API field.
-5. **Don't batch-ask across skills.** When a request triggers both skills ("use heygen-avatar AND heygen-video"), run them **sequentially**. Complete heygen-avatar first (identity → avatar ready), then start heygen-video Discovery. Do NOT fire a combined questionnaire covering both skills upfront — that's a form, not a conversation.
-6. **Read workspace files before asking.** `SOUL.md`, `IDENTITY.md`, and `AVATAR-<NAME>.md` at the workspace root contain identity and existing avatar state. Check them first. Only ask the user for what's genuinely missing.
-7. **Don't narrate skill internals.** Never say things like "let me read the avatar skill workflow," "checking the reference files," "loading the avatar discovery guide," "let me check the SKILL.md" — the user doesn't care that a skill exists. Read workflow files silently. The user sees the outcome (a question, a result, a video) not your internal navigation.
-8. **Don't announce what you're about to do.** Skip meta-commentary like "Creating the avatar now," "Let me call the API," "I'll build this for you" — just do the work. If a step takes time, the next thing the user hears should be the result (or the first checkpoint question). If you must say something before a long operation, keep it to <10 words (e.g., "one sec, building it").
-9. **Never narrate transport choice.** MCP vs CLI is an internal implementation detail. Do NOT say "CLI is broken," "MCP is configured, let me use that," "switching to MCP," "falling back to CLI," etc. Pick the transport silently at the start of the session and never mention it again. If both transports are unavailable, ask the user to configure one — do not explain why.
-
----
-
-## Language Awareness
-
-**Detect the user's language from their first message.** Store as `user_language` (e.g., `en`, `ja`, `es`, `ko`, `zh`, `fr`, `de`, `pt`). This happens automatically from the input — no extra question needed.
-
-**Rules:**
-1. **Communicate with the user in their language.** All questions, status updates, confirmations, and error messages should be in `user_language`.
-2. **Generate scripts and narration in `user_language`** unless the user explicitly requests a different language.
-3. **Technical directives stay in English.** Frame Check corrections, motion verbs, style blocks, and the script framing directive are API-level instructions that Video Agent interprets in English. Never translate these.
-4. **Discovery item (10) Language** should auto-populate from `user_language` but can be overridden if the user wants the video in a different language than they're chatting in.
-5. **Voice selection must match the video language.** Filter voices by `language` parameter and set `voice_settings.locale` on API calls.
-
----
-
-## Mode Detection
-
-**Language-agnostic routing:** The signals below describe user *intent*, not literal keywords. Match intent regardless of input language. A user saying "ビデオを作って" (Japanese) is the same signal as "make a video about X."
-
-| Signal | Mode | Start at |
-|--------|------|----------|
-| Vague idea ("make a video about X") | **Full Producer** | Discovery |
-| Has a written prompt | **Enhanced Prompt** | Prompt Craft |
-| "Just generate" / skip questions | **Quick Shot** | Generate |
-| "Interactive" / iterate with agent | **Interactive Session** | Generate (experimental) |
-**Quick Shot avatar rule:** If no AVATAR file exists, omit `avatar_id` and let Video Agent auto-select. If an AVATAR file exists, use it — and Frame Check STILL RUNS.
-
-**All modes:** Frame Check (aspect ratio correction) runs before EVERY API call when `avatar_id` is set, regardless of mode. Quick Shot is not an excuse to skip framing checks.
-
-**Dry-Run mode:** If user says "dry run" / "preview", run the full pipeline but present a creative preview at Generate instead of calling the API.
-
-Default to Full Producer. Better to ask one smart question than generate a mediocre video.
-
----
-
-## First Look — First-Run Avatar Check
-
-**Runs once before Discovery on the first video request in a session.**
-
-Check for any `AVATAR-*.md` files in the workspace root. The directory may
-also contain role-based **symlinks** (`AVATAR-AGENT.md`, `AVATAR-USER.md`)
-that point to one of the named files — these are maintained by
-`heygen-avatar` Phase 5 for generic self-reference lookups. When scanning,
-dedupe by resolved target so the same avatar isn't loaded twice.
-
-- **Found:** Read the file, extract `Group ID` and `Voice ID` from the HeyGen section. Pre-load as defaults for Discovery. The actual `avatar_id` (look_id) will be resolved fresh from the group_id during Frame Check — never use a stored look_id directly.
-- **Not found:** The user (or agent) has no avatar yet. Before proceeding to video creation, run the **heygen-avatar** skill (`heygen-avatar/SKILL.md` in this repo) to create one. Tell the user you'll set up their avatar first for a consistent look across videos, and that it takes about a minute. Communicate in `user_language`.
-  
-  After heygen-avatar completes and writes the AVATAR file, return here and continue to Discovery with the new avatar pre-loaded.
-
-- **Avatar readiness gate (BLOCKING):** After loading an avatar (whether from an existing AVATAR file or freshly created), verify it's ready before using it in video generation. Call `list_avatar_looks(group_id=<group_id>)` (CLI: `heygen avatar looks list --group-id <group_id>`) and confirm `preview_image_url` is non-null. If null, poll every 10s up to 5 min. **Do NOT proceed to Discovery until this check passes.** Videos submitted with an unready avatar WILL fail silently.
-
-- **Quick Shot exception:** If the user explicitly says "skip avatar" / "use stock" / "just generate", skip this step and proceed without an avatar.
-
----
-
-## Discovery
-
-Interview the user. Be conversational, skip anything already answered.
-
-**Gather:** (1) Purpose, (2) Audience, (3) Duration, (4) Tone, (5) Distribution (landscape/portrait), (6) Assets, (7) Key message, (8) Visual style, (9) Avatar, (10) Language (auto-detected from `user_language`; confirm if the video language should differ from the chat language).
-
-### Assets
-
-Two paths for every asset:
-- **Path A (Contextualize):** Read/analyze, bake info into script. For reference material, auth-walled content.
-- **Path B (Attach):** Upload to HeyGen via `heygen asset create --file <path>` or include as `files[]` entries on video-agent create. For visuals the viewer should see.
-- **A+B (Both):** Summarize for script AND attach original.
-
-**Full routing matrix and upload examples** -> [references/asset-routing.md](references/asset-routing.md)
-
-**Key rules:**
-- HTML URLs cannot go in `files[]` (Video Agent rejects `text/html`). Web pages are always Path A.
-- Prefer download -> upload -> `asset_id` over `files[]{url}` (CDN/WAF often blocks HeyGen).
-- If a URL is inaccessible, tell the user. Never fabricate content from an inaccessible source.
-- **Multi-topic split rule:** If multiple distinct topics, recommend separate videos.
-
-### Style Selection
-
-Two approaches — use one or combine both:
-
-**1. API Styles (`style_id`)** — Curated visual templates. Browse by tag, show 3-5 options with previews, let user pick. If a style has a fixed `aspect_ratio`, match orientation to it. When `style_id` is set, the prompt's Visual Style Block becomes optional.
-
-**2. Prompt Styles** — Full manual control via prompt text. See [references/prompt-styles.md](references/prompt-styles.md).
-
-### Avatar
-
-**Full avatar discovery flow, creation APIs, voice selection** -> [references/avatar-discovery.md](references/avatar-discovery.md)
-
-**Decision flow:**
-1. Ask: "Visible presenter or voice-over only?"
-2. If voice-over -> no `avatar_id`, state in prompt.
-3. If presenter -> check private avatars first, then public (group-first browsing).
-4. **Always show preview images.** Never just list names.
-5. Confirm voice preferences after avatar is settled.
-
-**Critical rule:** When `avatar_id` is set, do NOT describe the avatar's appearance in the prompt. Say "the selected presenter." This is the #1 cause of avatar mismatch.
-
----
-
-## Pipeline: Script -> Prompt Craft -> Frame Check -> Generate -> Deliver
-
-After Discovery, the producer sub-skill handles the full pipeline. Read `heygen-video/SKILL.md` for detailed stage instructions.
-
-**Key rules that apply at every stage:**
-
-- **Language:** Script and narration in the video language (from Discovery item 10). Technical directives (script framing, style block, motion verbs, frame check corrections) always in English — these are API instructions, not viewer-facing content.
-- **Script:** Structure by type (demo, explainer, tutorial, pitch, announcement). Do NOT assign per-scene durations. Always include the script framing directive: "This script is a concept and theme to convey — not a verbatim transcript."
-- **Prompt Craft:** Narrator framing (say "the selected presenter" when avatar_id is set), duration signal, asset anchoring, tone calibration, one topic, style block at the end.
-- **Frame Check:** MANDATORY when avatar_id is set. See matrix below.
-- **Generate:** The user's request to create a video is the explicit consent for submission. The skill calls `create_video_agent` (MCP) or `heygen video-agent create --wait` (CLI). Run Frame Check before EVERY submission. Capture `session_id` immediately. Poll silently (or let `--wait` block).
-- **Deliver:** Report `video_page_url`, session URL, and duration accuracy. Log to `heygen-video-log.jsonl`.
-
-**Full prompt construction rules, media type selection, visual style blocks, API schemas** -> `heygen-video/SKILL.md`
-
----
-
-## Frame Check
-
-**Runs automatically when `avatar_id` is set, before Generate. Appends correction notes to the Video Agent prompt. Does NOT generate images or create new looks.**
-
-### Steps
-
-1. **Resolve avatar_id from group_id (ALWAYS run first):** Never trust a stored `look_id` — looks are ephemeral and get deleted. Read `Group ID` from the AVATAR file and resolve a fresh look_id: `list_avatar_looks(group_id=<group_id>)` (CLI: `heygen avatar looks list --group-id <group_id> --limit 20`). Pick the look matching the target orientation. Use this resolved look_id as `avatar_id` for all subsequent steps.
-2. **Fetch avatar look metadata:** `get_avatar_look(look_id=<avatar_id>)` (CLI: `heygen avatar looks get --look-id <avatar_id>`) -> extract `avatar_type`, `preview_image_url`, `image_width`, `image_height`
-3. **Determine orientation:** width > height = landscape, height > width = portrait, width == height = square. Fetch fails = assume portrait.
-4. **Determine background:** `photo_avatar` -> Video Agent handles environment. `studio_avatar` -> check if transparent/solid/empty. `video_avatar` -> always has background.
-5. **Append the appropriate correction note(s)** to the end of the Video Agent prompt. That's it. No image generation, no new looks.
-
-### Correction Matrix
-
-| avatar_type | Orientation Match? | Has Background? | Corrections |
-|---|---|---|---|
-| `photo_avatar` | matched | (n/a) | None |
-| `photo_avatar` | mismatched or square | (n/a) | Framing note |
-| `studio_avatar` | matched | Yes | None |
-| `studio_avatar` | matched | No | Background note |
-| `studio_avatar` | mismatched or square | Yes | Framing note |
-| `studio_avatar` | mismatched or square | No | Framing note + Background note |
-| `video_avatar` | matched | Yes | None |
-| `video_avatar` | mismatched or square | Yes | Framing note |
-
-### Framing Note (append to prompt)
-
-For portrait/square avatar -> landscape video:
-```
-FRAMING NOTE: The selected avatar image is in {source} orientation but this video is landscape (16:9). Frame the presenter from the chest up, centered in the landscape canvas. Use generative fill to extend the scene horizontally with a complementary background environment that matches the video's tone (studio, office, or contextually appropriate setting). Do NOT add black bars or pillarboxing. The avatar should feel natural in the 16:9 frame.
-```
-
-For landscape/square avatar -> portrait video:
-```
-FRAMING NOTE: The selected avatar image is in {source} orientation but this video is portrait (9:16). Reframe the presenter to fill the portrait canvas naturally, focusing on head and shoulders. Use generative fill to extend vertically if needed. Do NOT add letterboxing. The avatar should fill the portrait frame comfortably.
-```
-
-### Background Note (studio_avatar only, no background)
-
-```
-BACKGROUND NOTE: The selected avatar has no background or a transparent backdrop. Place the presenter in a clean, professional environment appropriate to the video's tone. For business/tech content: modern studio with soft lighting and subtle depth. For casual content: bright, minimal space with natural light. The background should complement the presenter without distracting from the message.
-```
-
-**Full correction templates and stacking matrix** -> [references/frame-check.md](references/frame-check.md)
-
----
-
-## Best Practices
-
-- **Front-load the hook.** First 5s = 80% of retention.
-- **One idea per video.** Single-topic produces dramatically better results.
-- **Write for the ear.** If you wouldn't say it to a friend, rewrite it.
-
-**Known issues** -> [references/troubleshooting.md](references/troubleshooting.md)
diff --git a/heygen-avatar/SKILL.md b/heygen-avatar/SKILL.md
index 5ccaed3..a453861 100644
--- a/heygen-avatar/SKILL.md
+++ b/heygen-avatar/SKILL.md
@@ -27,6 +27,39 @@ allowed-tools: Bash, WebFetch, Read, Write, mcp__heygen__*
 
 Create and manage HeyGen avatars for anyone: the agent, the user, or named characters. Handles identity extraction, avatar generation, voice selection, and saves everything to `AVATAR-<NAME>.md` for consistent reuse.
 
+## Files & Paths
+
+This skill reads and writes the following. No other files are accessed without explicit user instruction.
+
+| Operation | Path | Purpose |
+|-----------|------|---------|
+| Read | `SOUL.md`, `IDENTITY.md` | Extract identity details when creating an avatar for the agent |
+| Read | `AVATAR-<NAME>.md` | Load existing avatar identity (for variant looks, voice updates) |
+| Write | `AVATAR-<NAME>.md` | Save new avatar identity after creation |
+| Write | `AVATAR-AGENT.md`, `AVATAR-USER.md` (symlinks) | Role aliases, see Phase 5 |
+| Temp write | `/tmp/openclaw/uploads/` | Voice preview audio (downloaded for user playback, deleted after session) |
+| Remote upload | HeyGen (via `heygen asset create` or MCP) | User-provided photos uploaded to HeyGen for digital-twin creation |
+
+Assets are only uploaded to HeyGen when the user explicitly provides them.
+
+## Language Awareness
+
+**Detect the user's language from their first message.** Store as `user_language` (e.g., `en`, `ja`, `es`, `ko`, `zh`, `fr`, `de`, `pt`).
+
+1. **Communicate with the user in their language.** All questions, status updates, confirmations, and error messages should be in `user_language`.
+2. **Voice design prompts and selection respect `user_language`.** When designing or selecting a voice, specify the target language so the voice library returns matches that speak it.
+3. **Technical directives stay in English** — enum values (`Young Adult`, `Realistic`, `landscape`, etc.) are API-level and not translated.
+
+## UX Rules
+
+1. **Be concise.** No avatar IDs, group IDs, or raw API payloads in chat. Report the result (avatar created, ready to use) not the plumbing.
+2. **No internal jargon.** Never mention internal phase names ("Phase 0", "Phase 5 Symlink Maintenance") to the user. The user sees natural conversation: "Setting up your avatar\u2026" not "Running Phase 2 avatar creation."
+3. **One or two questions per phase.** Don't batch-ask. Walk phases in order, ask the smallest set of questions needed to proceed.
+4. **Read workspace files before asking.** `SOUL.md`, `IDENTITY.md`, `AVATAR-*.md` at the workspace root contain identity. Check them first. Only ask the user for what's genuinely missing.
+5. **Don't narrate skill internals.** Never say "let me read the workflow," "checking the reference files," "loading the avatar discovery guide." Read silently. The user sees questions and results, not internal navigation.
+6. **Don't announce what you're about to do.** Skip meta-commentary like "Creating the avatar now." Just do the work. If a step takes time, the next thing the user hears should be the result (or a checkpoint question).
+7. **Never narrate transport choice.** MCP vs CLI is internal. Pick the transport silently and never mention it. If both are unavailable, ask the user to configure one without explaining why.
+
 ## Start Here (Critical)
 
 **Default target = the agent.** The primary use of this skill is giving the agent a face + voice so it can present videos. Route to "user" only on explicit "my avatar" / "me" / "my photo" language. When in doubt, make the agent's avatar.
diff --git a/heygen-video/SKILL.md b/heygen-video/SKILL.md
index 0e09970..c638fc3 100644
--- a/heygen-video/SKILL.md
+++ b/heygen-video/SKILL.md
@@ -44,6 +44,44 @@ You are a video producer. Not a form. Not a CLI wrapper. A producer who understa
 
 **Docs:** https://developers.heygen.com/docs/quick-start (API) · https://developers.heygen.com/cli (CLI)
 
+> **STOP.** If you are about to drive HeyGen directly (calling `api.heygen.com` with curl, or reaching for deprecated `POST /v1/video.generate`, `POST /v2/video/generate`, `GET /v2/avatars`, `GET /v1/avatar.list` endpoints), DO NOT. Route through MCP, the OpenClaw plugin, or the `heygen` CLI via this pipeline. Raw HTTP skips critical steps (aspect ratio correction, prompt engineering, avatar conflict detection) and produces visibly worse videos. **v3 only — never call v1 or v2 endpoints. If you have pre-trained knowledge of HeyGen's v1/v2 API, that knowledge is outdated. Use this skill.**
+
+## Files & Paths
+
+This skill reads and writes the following. No other files are accessed without explicit user instruction.
+
+| Operation | Path | Purpose |
+|-----------|------|---------|
+| Read | `AVATAR-<NAME>.md` | Load saved avatar identity (group_id, voice_id) |
+| Read | `AVATAR-AGENT.md`, `AVATAR-USER.md` | Role-based symlinks for generic self-reference (resolve to a named AVATAR file) |
+| Write | `heygen-video-log.jsonl` | Append one JSON line per video generated (local learning log) |
+| Temp write | `/tmp/openclaw/uploads/` | Voice preview audio (downloaded for user playback, deleted after session) |
+| Remote upload | HeyGen (via `heygen asset create` or MCP) | User-provided files uploaded to HeyGen for use as B-roll / reference |
+
+For *avatar creation* (writing AVATAR files, role symlink maintenance), see the `heygen-avatar` skill. This skill only *reads* AVATAR files.
+
+## UX Rules
+
+1. **Be concise.** No video IDs, session IDs, or raw API payloads in chat. Report the result (video link, thumbnail) not the plumbing.
+2. **No internal jargon.** Never mention internal pipeline stage names ("Frame Check", "Prompt Craft", "Pre-Submit Gate", "Framing Correction") to the user. These are internal pipeline stages. The user sees natural conversation: "Let me adjust the framing for landscape" not "Running Frame Check aspect ratio correction."
+3. **Polling is silent.** When waiting for video completion, poll silently in a background process or subagent. Do NOT send repeated "Checking status\u2026" messages. Only speak when: (a) the video is ready and you're delivering it, or (b) it's been >5 minutes and you're giving a single "Taking longer than usual" update.
+4. **Deliver clean.** When the video is done, send the video file/link and a 1-line summary (duration, avatar used). Not a dump of every API field.
+5. **Don't batch-ask across skills.** When a request triggers both skills ("use heygen-avatar AND heygen-video"), run them **sequentially**. Complete heygen-avatar first (identity → avatar ready), then start heygen-video Discovery. Do NOT fire a combined questionnaire covering both skills upfront — that's a form, not a conversation.
+6. **Read workspace files before asking.** `AVATAR-<NAME>.md` files at the workspace root contain existing avatar state. Check them first. Only ask the user for what's genuinely missing.
+7. **Don't narrate skill internals.** Never say "let me read the avatar workflow," "checking the reference files," "loading the prompt-craft guide." Read silently. The user sees the outcome (a question, a result, a video).
+8. **Don't announce what you're about to do.** Skip meta-commentary like "Creating the video now," "Let me call the API." Just do the work. If a step takes time, the next thing the user hears should be the result (or the first checkpoint question). If you must say something, keep it to <10 words.
+9. **Never narrate transport choice.** MCP vs CLI vs OpenClaw plugin is an internal implementation detail. Do NOT say "CLI is broken," "switching to MCP," etc. Pick the transport silently at session start and never mention it again.
+
+## Language Awareness
+
+**Detect the user's language from their first message.** Store as `user_language` (e.g., `en`, `ja`, `es`, `ko`, `zh`, `fr`, `de`, `pt`).
+
+1. **Communicate with the user in their language.** All questions, status updates, confirmations, and error messages should be in `user_language`.
+2. **Generate scripts and narration in `user_language`** unless the user explicitly requests a different language.
+3. **Technical directives stay in English.** Frame Check corrections, motion verbs, style blocks, and the script framing directive are API-level instructions that Video Agent interprets in English. Never translate these.
+4. **Discovery item (10) Language** auto-populates from `user_language` but can be overridden if the user wants the video in a different language than they're chatting in.
+5. **Voice selection must match the video language.** Filter voices by `language` parameter and set `voice_settings.locale` on API calls.
+
 ## API Mode Detection
 
 **Pick one transport at session start. Never mix, never switch mid-session, never narrate the choice.**
@@ -117,6 +155,19 @@ Default to Full Producer. Better to ask one smart question than generate a medio
 
 ---
 
+## First Look — First-Run Avatar Check
+
+**Runs once before Discovery on the first video request in a session.**
+
+Check for any `AVATAR-*.md` files in the workspace root. The directory may also contain role-based **symlinks** (`AVATAR-AGENT.md`, `AVATAR-USER.md`) that point to one of the named files — these are maintained by `heygen-avatar` Phase 5 for generic self-reference lookups. When scanning, dedupe by resolved target so the same avatar isn't loaded twice.
+
+- **Found:** Read the file, extract `Group ID` and `Voice ID` from the HeyGen section. Pre-load as defaults for Discovery. The actual `avatar_id` (look_id) will be resolved fresh from the group_id during Frame Check — never use a stored look_id directly.
+- **Not found:** The user (or agent) has no avatar yet. Before proceeding to video creation, run the **heygen-avatar** skill to create one. Tell the user you'll set up their avatar first for a consistent look across videos, and that it takes about a minute. Communicate in `user_language`. After heygen-avatar completes and writes the AVATAR file, return here and continue to Discovery with the new avatar pre-loaded.
+- **Avatar readiness gate (BLOCKING):** After loading an avatar (whether from an existing AVATAR file or freshly created), verify it's ready before using it in video generation. Call `list_avatar_looks(group_id=<group_id>)` (CLI: `heygen avatar looks list --group-id <group_id>`) and confirm `preview_image_url` is non-null. If null, poll every 10s up to 5 min. **Do NOT proceed to Discovery until this check passes.** Videos submitted with an unready avatar WILL fail silently.
+- **Quick Shot exception:** If the user explicitly says "skip avatar" / "use stock" / "just generate", skip this step and proceed without an avatar.
+
+---
+
 ## Discovery
 
 Interview the user. Be conversational, skip anything already answered.
diff --git a/references/asset-routing.md b/references/asset-routing.md
deleted file mode 100644
index b7eb708..0000000
--- a/references/asset-routing.md
+++ /dev/null
@@ -1,86 +0,0 @@
-# Asset Handling — The Classification Engine
-
-When the user provides files, URLs, or references, route each asset to the right path. The user should NEVER have to think about this.
-
-## Two Paths
-
-| Path | What happens | When to use |
-|------|-------------|-------------|
-| **A: Contextualize → Prompt** | Read/analyze the asset, extract key info, bake into script. Video Agent never sees the original. | Reference material, auth-walled content, documents where the *information* matters more than the *visual*. |
-| **B: Attach to API** | Upload the raw file via `files[]`. Video Agent analyzes, extracts graphics, uses as frames/B-roll. | Screenshots, branded assets, PDFs with important visual layouts, images the viewer should literally see. |
-| **A+B: Both** | Contextualize for script quality AND attach for visual use. | Long docs where you need to summarize but Video Agent should also have the full source. |
-
-## Classification Flow
-
-```
-1. Can Video Agent access this directly?
-   - Public URL (no auth, no paywall) → YES
-   - Private/internal URL → NO
-   - Local file → NO (must upload first)
-
-2. Should the viewer SEE this asset?
-   - Screenshot, logo, product image, chart → YES → Path B
-   - Research doc, article, context material → NO → Path A
-   - Ambiguous → Path A+B
-
-3. Is the content too long for the prompt?
-   - Short (< 500 words) → fits in prompt
-   - Long (> 500 words) → summarize key points, attach full doc
-```
-
-## Decision Matrix
-
-| Asset Type | Publicly Accessible? | Show On Screen? | Route |
-|-----------|---------------------|----------------|-------|
-| Screenshot / image | N/A | Yes | **B: Attach** + describe in prompt as B-roll |
-| Logo / brand asset | N/A | Yes | **B: Attach** + anchor to intro/outro |
-| Public URL to file (PDF, image, video) | Yes | Maybe | **B: Download → upload via `/v3/assets` → pass `asset_id`** + summarize |
-| Public URL to web page (HTML) | Yes | No | **A: Fetch and contextualize only.** Do NOT pass HTML URLs in `files[]`. |
-| Auth-walled URL (requires login) | No | No | **A: Ask the user to paste the content.** Never fabricate. |
-| PDF (short, text-heavy) | N/A | No | **A+B: Extract key points** + attach |
-| PDF (long, visual-rich) | N/A | Maybe | **B: Attach** + summarize top points |
-| Raw data / spreadsheet | N/A | Partially | **A: Analyze and describe** key stats. Attach if charts should appear. |
-
-## Executing Routes
-
-### Path A (Contextualize)
-- URLs: Use `web_fetch` to retrieve publicly accessible content
-- For auth-walled content you cannot access: ask the user to paste the text directly
-- Extract 3-5 most important points relevant to the video
-- Weave naturally into the script. Don't dump. Integrate.
-
-### Path B (Attach)
-Upload to HeyGen:
-
-**MCP:** upload via the asset tool (depends on environment).
-**CLI:** `heygen asset create --file /path/to/file.png`
-
-Max 32MB per file. Returns JSON with the new `asset_id`.
-
-Or pass inline in `files[]`:
-```json
-{"type": "url", "url": "https://example.com/image.png"}
-{"type": "asset_id", "asset_id": "<from upload>"}
-{"type": "base64", "data": "<base64>", "content_type": "image/png"}
-```
-
-### Describe Asset Usage in Prompt
-Be SPECIFIC:
-- "Use the uploaded dashboard screenshot as B-roll when discussing analytics"
-- "Display the company logo in the intro and end card"
-
-### Log Classification
-In the learning log entry, record:
-```json
-"assets_classified": [{"type": "image", "route": "attach", "accessible": true, "reason": "product screenshot"}]
-```
-
-## Rules
-
-- **Never ask the user which path unless genuinely 50/50.** You're the producer. Make the call.
-- **When in doubt, do both (A+B).** Over-providing costs nothing.
-- **Always describe attached assets in the prompt.** Uploading without description = ignored.
-- **Auth-walled content is YOUR job.** Bridge the gap between your access and Video Agent's.
-- **URLs that fail:** Try `web_fetch`. If login/paywall/404 → tell the user, ask for content directly. Never silently fabricate.
-- **HTML URLs cannot go in `files[]`.** Video Agent rejects `text/html`. Web pages are ALWAYS Path A only.
-- **Prefer download→upload→asset_id** over `files[]{url}`. HeyGen's servers often blocked by CDN/WAF.
diff --git a/references/avatar-discovery.md b/references/avatar-discovery.md
deleted file mode 100644
index fc3a5e4..0000000
--- a/references/avatar-discovery.md
+++ /dev/null
@@ -1,213 +0,0 @@
-# Avatar Discovery & Voice Selection
-
-## Path 0: Resolve workspace AVATAR files first
-
-Before any HeyGen catalog lookup, check the workspace root for an
-applicable `AVATAR-*.md` file. These are written by `heygen-avatar`
-and contain `Group ID` + `Voice ID` ready to use, with no API call
-needed.
-
-Resolution precedence:
-
-| Request signal | File to read |
-|---|---|
-| Named subject ("video with Eve", "Cleo's update") | `AVATAR-<NAME>.md` |
-| Agent self-reference ("video of yourself", "give us your update") | `AVATAR-AGENT.md` (symlink) |
-| User self-reference ("video of me", "my video update") | `AVATAR-USER.md` (symlink) |
-| No subject in request | Skip to Path A |
-
-`AVATAR-AGENT.md` and `AVATAR-USER.md` are role-based symlinks maintained
-by `heygen-avatar` Phase 5; they resolve to the current agent's / user's
-named AVATAR file at read time. Treat them like any other AVATAR file
-once read.
-
-If the resolved file has a populated HeyGen section, extract `Group ID`
-and `Voice ID` and proceed to Frame Check. Skip Path A entirely. If the
-file exists but the HeyGen section is empty, run `heygen-avatar` Phase 2
-first.
-
-If no file applies (no name match, no role alias, generic catalog
-browsing requested) — fall through to Path A below.
-
-## Path A: Discover Existing Avatars
-
-### A1: Check for private avatars first
-
-**If user specifies an avatar by name** (e.g. "use Eve's Podcast look"), take the fast path:
-
-**MCP:** `list_avatar_looks(ownership=private)` — filter client-side by name match.
-**CLI:**
-```bash
-heygen avatar looks list --ownership private --limit 50
-```
-Avoids the 2-call group→looks pattern.
-
-**If user wants to browse**, use the group-first flow:
-
-**MCP:**
-1. `list_avatar_groups(ownership=private)` — list groups (each group = one person)
-2. `list_avatar_looks(group_id=<group_id>)` — show looks for chosen group
-
-**CLI:**
-```bash
-heygen avatar list --ownership private --limit 50
-heygen avatar looks list --group-id <group_id> --limit 50
-```
-
-Each look has an `id` — this is the `avatar_id` you pass downstream.
-
-Avatar types: `studio_avatar`, `video_avatar`, `photo_avatar`. Photo avatars support `motion_prompt` and `expressiveness`.
-
-**ALWAYS show the preview image** when presenting an avatar look. Each look response includes `preview_image_url` — display inline.
-
-### A2: Check last-used avatar
-
-Check `heygen-video-log.jsonl` for last used avatar_id. If found:
-
-**MCP:** `get_avatar_look(look_id=<look_id>)`
-**CLI:** `heygen avatar looks get --look-id <look_id>`
-
-Show preview image: "Last time you used [Avatar Name]. Use her again?"
-
-### A3: Avatar conversation
-
-Ask: "Do you want a visible presenter, or voice-over only?"
-
-If voice-over only → no `avatar_id`. State in prompt: "Voice-over narration only."
-
-If presenter wanted, present private avatars first. For public/stock avatars, browse by group:
-
-**MCP:** `list_avatar_groups(ownership=public)`
-**CLI:**
-```bash
-heygen avatar list --ownership public --limit 20
-```
-
-Show group names + one representative image. Let the user pick a person.
-
-**MCP:** `list_avatar_looks(group_id=<group_id>)`
-**CLI:**
-```bash
-heygen avatar looks list --group-id <group_id> --limit 10
-```
-
-**Why group-first:** The flat `heygen avatar looks list --ownership public` call returns 50+ results for only 3 unique people per page. Group-level browsing (2 calls) gives much better discovery UX.
-
-### A4: Voice direction
-
-After avatar is settled, confirm voice preferences (accent, delivery style, language).
-
-**ALWAYS show a playable voice preview.** Each voice response includes `preview_audio_url` — share it.
-
-**Handling missing/broken previews:** Some voices return bare `s3://` paths or `null`. When this happens: note "(no preview available)" and offer to generate a short TTS sample via `create_speech` (MCP) or `heygen voice speech create --text "<sample>" --voice-id <id> --input-type plain_text --language en --locale en-US` (CLI).
-
----
-
-## Path B: Create a New Avatar
-
-Two modes:
-
-**Mode 1 — New character** (omit `avatar_group_id`): Creates a new person with their own group.
-**Mode 2 — New look** (include `avatar_group_id`): Adds a variation to an existing character.
-
-Always use Mode 2 when the avatar already exists and you're creating a variant (different outfit, orientation fix, bg change). Only use Mode 1 for genuinely new characters.
-
-Three creation types:
-
-**Photo avatar (from user's photo):**
-
-**MCP:** `create_photo_avatar(name=<name>, file=<file_object>, avatar_group_id=<optional>)`
-**CLI:**
-```bash
-heygen avatar create -d '{
-  "type": "photo",
-  "name": "My Avatar",
-  "file": {"type": "url", "url": "https://example.com/headshot.jpg"},
-  "avatar_group_id": "<optional>"
-}'
-```
-Photo requirements: JPEG or PNG, min 512x512, clear front-facing face, good lighting.
-
-**AI-generated avatar (from text prompt):**
-
-**MCP:** `create_prompt_avatar(name=<name>, prompt=<appearance>, avatar_group_id=<optional>)`
-**CLI:**
-```bash
-heygen avatar create -d '{
-  "type": "prompt",
-  "name": "Tech Presenter",
-  "prompt": "Young professional woman, modern workspace, confident smile",
-  "avatar_group_id": "<optional>"
-}'
-```
-Prompt max: 1000 characters. Optional: up to 3 `reference_images`.
-
-**Video avatar (from user's video recording):**
-
-**MCP:** `create_digital_twin(name=<name>, file=<file_object>, avatar_group_id=<optional>)`
-**CLI:**
-```bash
-heygen avatar create -d '{
-  "type": "video",
-  "name": "My Video Avatar",
-  "file": {"type": "asset_id", "asset_id": "<uploaded_asset_id>"},
-  "avatar_group_id": "<optional>"
-}'
-```
-
-All three return `avatar_item` with `id` (look_id) and `group_id` — use `id` as `avatar_id` for videos.
-
-Files: `{"type": "url", "url": "..."}`, `{"type": "asset_id", "asset_id": "..."}` (from `heygen asset create --file <path>`), or `{"type": "base64", "data": "...", "content_type": "..."}`.
-
----
-
-## Path C: Direct Image (Simplest for One-Off)
-
-Skip avatar creation. Pass `image_url` directly:
-
-**MCP:** `create_video_from_image(image_url=<url>, script=<script>, voice_id=<voice_id>, aspect_ratio="16:9")`
-**CLI:**
-```bash
-heygen video create -d '{
-  "image_url": "https://example.com/headshot.jpg",
-  "script": "<script>",
-  "voice_id": "<voice_id>",
-  "aspect_ratio": "16:9"
-}'
-```
-Also accepts `image_asset_id`. Fastest path for one-off talking-head video.
-
----
-
-## Voice Selection
-
-**MCP:** `list_voices(type=private)` then `list_voices(type=public, language=<lang>, gender=<gender>)`
-**CLI:**
-```bash
-heygen voice list --type private --limit 20
-
-# Public voices with filters
-heygen voice list --type public --engine starfish --language en --gender female --limit 20
-```
-
----
-
-## How Avatar/Voice Are Passed
-
-**MCP:** `create_video_agent(prompt=<prompt>, avatar_id=<look_id>, voice_id=<voice_id>, style_id=<optional>, orientation=<orientation>)`
-
-**CLI:** `heygen video-agent create` with flags:
-```bash
-heygen video-agent create \
-  --prompt "..." \
-  --avatar-id "<look_id_from_discovery>" \
-  --voice-id "<voice_id_from_discovery>" \
-  --style-id "<optional_style_id>" \
-  --orientation landscape
-```
-
-- **Custom/stock avatar with known ID** → pass `--avatar-id`. Do NOT describe avatar's appearance in prompt. Only delivery style + background/environment.
-- **No avatar_id (auto-select)** → describe desired presenter in prompt. Less reliable (~80% vs ~97%).
-- **Voice-over only** → omit `--avatar-id`, state in prompt.
-
-> Always provide explicit `--avatar-id` for presenter videos. 97.6% duration accuracy vs ~80% without.
diff --git a/references/frame-check.md b/references/frame-check.md
deleted file mode 100644
index de2a769..0000000
--- a/references/frame-check.md
+++ /dev/null
@@ -1,98 +0,0 @@
-# Frame Check — Aspect Ratio & Background Pre-Check
-
-Runs automatically when `avatar_id` is set, before Generate. Appends correction notes to the Video Agent prompt. Does NOT generate images or create new looks.
-
-## Step 1: Fetch the avatar look metadata
-
-**MCP:** `get_avatar_look(look_id=<avatar_id>)`
-**CLI:** `heygen avatar looks get --look-id <avatar_id>`
-
-Extract:
-- `avatar_type`: `"photo_avatar"` | `"studio_avatar"` | `"video_avatar"`
-- `preview_image_url`: use to determine orientation
-- `image_width` and `image_height`: use for orientation calculation
-
-## Step 2: Determine avatar orientation
-
-Use `image_width` and `image_height` from the API response (or fetch the preview image dimensions if those fields are missing).
-- width > height → landscape avatar
-- height > width → portrait avatar
-- width == height → **square avatar** (1:1) → always needs framing correction
-- Fetch fails or no preview → assume portrait (safer default)
-
-## Step 3: Determine if background exists
-
-| `avatar_type` | Background? | Reason |
-|---|---|---|
-| `photo_avatar` | ✅ Handled by Video Agent | Video Agent generates avatar + environment together during video creation. No standalone bg correction needed. |
-| `studio_avatar` | ⚠️ Maybe | Check preview image — if transparent/solid/empty → "No background" → apply Correction C |
-| `video_avatar` | ✅ Yes | Recorded in a real environment |
-
-## Step 4: Append correction notes to prompt
-
-Based on Steps 2-3, append zero or more correction notes **verbatim to the end of the Video Agent prompt text.** Do NOT ask the user. Do NOT generate images. Do NOT create new looks.
-
-### A) Portrait avatar → Landscape video (most common)
-
-```
-FRAMING NOTE: The selected avatar image is in portrait orientation but this video is landscape (16:9). Frame the presenter from the chest up, centered in the landscape canvas. Use AI Image tool to generative fill to extend the scene horizontally with a complementary background environment that matches the video's tone (studio, office, or contextually appropriate setting). Do NOT add black bars or pillarboxing. The avatar should feel natural in the 16:9 frame.
-```
-
-### B) Landscape avatar → Portrait video
-
-```
-FRAMING NOTE: The selected avatar image is in landscape orientation but this video is portrait (9:16). Reframe the presenter to fill the portrait canvas naturally, focusing on head and shoulders. Use AI Image tool to generative fill to extend vertically if needed. Do NOT add letterboxing. The avatar should fill the portrait frame comfortably.
-```
-
-### D) Square avatar → Landscape video
-
-```
-FRAMING NOTE: The selected avatar image is in square (1:1) orientation but this video is landscape (16:9). Frame the presenter from the chest up, centered in the landscape canvas. Use AI Image tool to generative fill to extend the scene horizontally with a complementary background environment that matches the video's tone (studio, office, or contextually appropriate setting). Do NOT add black bars or pillarboxing. The avatar should feel natural in the 16:9 frame.
-```
-
-### E) Square avatar → Portrait video
-
-```
-FRAMING NOTE: The selected avatar image is in square (1:1) orientation but this video is portrait (9:16). Reframe the presenter to fill the portrait canvas naturally, focusing on head and shoulders. Use AI Image tool to generative fill to extend vertically if needed. Do NOT add letterboxing. The avatar should fill the portrait frame comfortably.
-```
-
-### C) Missing background — studio_avatar only
-
-**Only for `studio_avatar` with transparent/solid/empty background. NOT for photo_avatar** (Video Agent handles photo_avatar environments during generation).
-
-```
-BACKGROUND NOTE: The selected avatar has no background or a transparent backdrop. Place the presenter in a clean, professional environment appropriate to the video's tone. For business/tech content: modern studio with soft lighting and subtle depth. For casual content: bright, minimal space with natural light. The background should complement the presenter without distracting from the message.
-```
-
-## Correction Stacking Matrix
-
-Corrections can stack. Use the matrix to determine which notes to append.
-
-| avatar_type | Orientation Match? | Has Background? | Corrections |
-|---|---|---|---|
-| `video_avatar` | ✅ matched | ✅ Yes | None |
-| `video_avatar` | ❌ mismatched | ✅ Yes | Framing only (A or B) |
-| `video_avatar` | ◻ square | ✅ Yes | Framing only (D or E) |
-| `studio_avatar` | ✅ matched | ✅ Yes (check preview) | None |
-| `studio_avatar` | ✅ matched | ❌ No | Background (C) |
-| `studio_avatar` | ❌ mismatched | ✅ Yes | Framing only (A or B) |
-| `studio_avatar` | ❌ mismatched | ❌ No | Framing (A or B) + Background (C) |
-| `studio_avatar` | ◻ square | ✅ Yes | Framing only (D or E) |
-| `studio_avatar` | ◻ square | ❌ No | Framing (D or E) + Background (C) |
-| `photo_avatar` | ✅ matched | (n/a) | **None** — Video Agent handles avatar + environment together |
-| `photo_avatar` | ❌ mismatched | (n/a) | **Framing only (A or B)** |
-| `photo_avatar` | ◻ square | (n/a) | **Framing only (D or E)** |
-
-**How to check if studio_avatar has a background:** Fetch `preview_image_url`. If transparent/checkered, solid color, or cutout → "No background" → append Correction C.
-
-**photo_avatar rule:** Video Agent generates the avatar and its environment together during video creation. Do NOT append Correction C for photo_avatars. Only append framing corrections (A, B, D, or E) if there's an orientation mismatch.
-
-## Step 5: Submit with original avatar_id
-
-After appending correction notes to the prompt, submit the video request using the **original `avatar_id`** (unchanged). Video Agent handles framing and background internally based on the FRAMING NOTE and BACKGROUND NOTE directives in the prompt.
-
-## Step 6: Log the correction
-
-Add to learning log entry:
-- `"aspect_correction"`: `"portrait_to_landscape"` | `"landscape_to_portrait"` | `"square_to_landscape"` | `"square_to_portrait"` | `"background_fill"` | `"both"` | `"none"`
-- `"avatar_type"`: the raw value from the API
diff --git a/references/motion-vocabulary.md b/references/motion-vocabulary.md
deleted file mode 100644
index 4d27b30..0000000
--- a/references/motion-vocabulary.md
+++ /dev/null
@@ -1,191 +0,0 @@
----
-name: motion-vocabulary
-description: Motion verbs, the 5-layer visual system, scene types, and anti-patterns for Video Agent prompts
----
-
-# Motion Vocabulary & Visual Layer System
-
-Video Agent is an HTML interpreter. It renders layouts, typography, and structured content natively. The key to great B-roll: describe elements with **action verbs** ("slams in," "types on," "counts up") not layout specs ("upper-left, 48pt").
-
-Based on patterns from 40+ produced videos.
-
-> **Language note:** Motion verbs MUST remain in English regardless of the video's content language. Video Agent's rendering engine responds to these specific English verbs. Do not translate "SLAMS", "CASCADE", "COUNTS UP", etc. They are API-level commands, not viewer-facing text.
-
-## Motion Verbs
-
-Use these exact verbs in prompts. Video Agent responds to them. Without them, you get static frames.
-
-### High Energy
-| Verb | Example |
-|------|---------|
-| **SLAMS** | `"$95M" SLAMS in from left at -5 degrees` |
-| **CRASHES** | `Title CRASHES in from right, screen-shake on impact` |
-| **PUNCHES** | `Quote card PUNCHES up from bottom` |
-| **STAMPS** | `Data blocks STAMP in staggered 0.4s` |
-| **SHATTERS** | `Text SHATTERS after 1.5s, revealing number underneath` |
-
-### Medium Energy
-| Verb | Example |
-|------|---------|
-| **CASCADE** | `Three cards CASCADE from top, staggered 0.3s` |
-| **SLIDES** | `Ticker SLIDES in from right — continuous scroll` |
-| **DROPS** | `"TIER 1" DROPS in with white flash` |
-| **FILLS** | `Progress bar FILLS 0 to 90% in orange` |
-| **DRAWS** | `Chart line DRAWS itself left to right` |
-
-### Low Energy
-| Verb | Example |
-|------|---------|
-| **types on** | `Quote types on word by word in italic white` |
-| **fades in** | `Logo fades in at center, held for 3 seconds` |
-| **FLOATS** | `Bokeh orbs FLOAT across frame at different speeds` |
-| **morphs** | `Number morphs from 17 to 18.9` |
-| **COUNTS UP** | `"1.85M" COUNTS UP from 0 in amber 96pt` |
-
-## Transition Types
-
-| Transition | Energy | Best With Styles |
-|------------|--------|-----------------|
-| Smash cut | Aggressive | Deconstructed, Maximalist, Carnival Surge |
-| White flash frame | Punchy | Deconstructed, Maximalist |
-| Grid wipe | Systematic | Swiss Pulse, Digital Grid |
-| Hard cut | Clean | Swiss Pulse, Shadow Cut |
-| Liquid dissolve | Elegant | Data Drift, Dream State |
-| Slow cross-dissolve | Refined | Velvet Standard |
-| Pop cut / bounce | Fun | Play Mode, Carnival Surge |
-| Snap cut | Urgent | Red Wire, Contact Sheet |
-| Soft dissolve | Warm | Soft Signal, Warm Grain, Quiet Drama |
-| Iris wipe | Nostalgic | Heritage Reel |
-
-## The 5-Layer Visual System
-
-Break B-roll scenes into 5 stacked layers. This is the most powerful technique for motion graphics.
-
-| Layer | Purpose | Examples |
-|-------|---------|---------|
-| **L1** | Background | Textured surface, grid, gradient, color field |
-| **L2** | Hero content | Main headline/number that dominates the frame |
-| **L3** | Supporting data | Cards, stats, bullet points, secondary information |
-| **L4** | Information bar | Tickers, labels, source attributions, quotes |
-| **L5** | Effects | Particles, glitches, grid animations, ambient motion |
-
-**Rules:**
-- Every B-roll scene: 4+ layers minimum
-- Every overlay content side: 3+ layers minimum
-- **Every element must MOVE.** No static frames.
-
-### Example: B-Roll Scene with Layers
-
-```
-SCENE 2 — FULL SCREEN B-ROLL (12s)
-[NO AVATAR — motion graphic only]
-VOICEOVER: "One-point-eight-five million signups. Twenty-eight percent month over month."
-LAYER 1: Dark #1a1a1a background with thin grid lines pulsing at 8% opacity.
-LAYER 2: "1.85M" SLAMS in from left, white Bold 140pt. "+28% MoM" appears in amber.
-LAYER 3: Three stat cards CASCADE from top-right, staggered 0.3s.
-         Each number COUNTS UP from 0.
-LAYER 4: Bottom ticker scrolls: "Non-brand search +36% • Brand impressions 9.2M"
-LAYER 5: Grid lines RIPPLE outward on "1.85M" slam.
-Hard cut.
-```
-
-## Scene Types
-
-| Type | Format | When to Use |
-|------|--------|-------------|
-| **A-ROLL** | Avatar speaking to camera | Intros, key insights, CTAs, emotional beats |
-| **FULL SCREEN B-ROLL** | No avatar, motion graphics only | Data visualization, information-dense content |
-| **A-ROLL + OVERLAY** | Split frame: avatar + content | Presenting data while maintaining human connection |
-
-**Rotation is mandatory.** Never 3+ of the same type in a row. Every video needs at least 2 pure B-roll scenes.
-
-**Voiceover on EVERY scene.** Every B-roll scene MUST include a `VOICEOVER:` line. Silent B-roll = broken video.
-
-## Timing Guidelines
-
-| Content Type | Duration |
-|---|---|
-| Hook/Intro (A-roll) | 6-10 seconds |
-| Data-heavy B-roll | 10-15 seconds (NEVER ≤5s — causes black frames) |
-| A-roll + Overlay | 8-12 seconds |
-| CTA / Close (A-roll) | 6-8 seconds |
-
-**Common video lengths:**
-- Social clip: 30-45s (5-7 scenes)
-- Briefing: 60-75s (7-9 scenes)
-- Deep dive: 90-120s (10-13 scenes)
-
-## Avatar Description Guide
-
-**The avatar is NOT a fixed headshot.** Design it for each video like a movie character.
-
-### Thematic Wardrobe Rule
-
-The avatar's outfit and environment MUST match the content's emotional/cultural context:
-
-| Content Type | Avatar Design | NOT This |
-|---|---|---|
-| Breaking tech news | Field reporter, windswept hair, earpiece, city skyline | "Anchor at a desk" |
-| Data analysis | Black merino turtleneck, minimalist desk, dual monitors with charts | "Business casual" |
-| Product launch | Branded tee, open-plan startup space, product prototype on desk | "Generic office" |
-| Tutorial | Casual hoodie, messy developer desk, sticky notes, coffee mug | "Presenter in a studio" |
-
-### What to Specify
-
-| Element | Weak | Strong |
-|---|---|---|
-| Clothing | "Business casual" | "Black ribbed merino turtleneck, high collar framing jaw" |
-| Environment | "An office" | "Glass-walled conference room. Whiteboard with hand-drawn tier pyramid" |
-| Monitor content | "Computer screens" | "Monitor shows scrolling green terminal text and red security alerts" |
-| Lighting | "Well lit" | "Cool blue monitor glow from left, warm amber desk lamp from right" |
-
-### Template (60-100 words)
-```
-AVATAR: [Clothing — fabric, color, fit, accessories, posture].
-[Setting — specific props, brand logos, what's on the walls].
-[Monitors/desk — content visible on screens, items on desk].
-[Lighting — direction, color temperature]. [Mood of the space].
-```
-
-**Remember:** When `avatar_id` is set as an API parameter, do NOT describe appearance. Only delivery style and environment notes. Say "The selected presenter" instead.
-
-## Critical On-Screen Text
-
-List every piece of text that MUST appear literally on screen:
-
-```
-CRITICAL ON-SCREEN TEXT (display literally):
-- "$141M ARR — All-Time High"
-- "1.85M Signups — +28% MoM"
-- Quote: "Use technology to serve the message, not distract from it." — Shalev Hani
-- "@eve_builds" — exact social handle
-```
-
-Without this block, Video Agent will summarize, round numbers, or rephrase quotes.
-
-**Voiceover number rule:** Spell out numbers in speech ("one-point-eight-five million"), use figures on screen ("1.85M").
-
-## What Doesn't Work
-
-Patterns that consistently produce poor results (from 40+ videos):
-
-**Layout language** — Screen coordinates cause empty/black B-roll:
-```
-❌ "UPPER-LEFT: headline in 48pt Helvetica"
-❌ "CENTER-SCREEN: display at coordinates (400, 300)"
-✅ "135K" SLAMS in from left, white Impact 120pt, fills 40% of frame.
-```
-
-**Named artists without specs** — "Ikko Tanaka style" means nothing to Video Agent. Translate to concrete rules:
-```
-❌ "Use an Ikko Tanaka style"
-✅ "Flat color blocks, maximum 3 colors per frame, 60% negative space, typography as primary element"
-```
-
-**Style examples injected into prompts** — Full example scenes from a style library confuse the agent. Use the style's **rules**, not example scenes.
-
-**Forced short B-roll (≤5 seconds)** — Too short for rendering. Every tested video with 5s B-roll had empty/black screens. Minimum 10s.
-
-**Content as a list, not a story** — "Here are 5 tweets" produces flat videos. Always synthesize a thesis: *"X is happening because Y — here's the proof."*
-
-**Static frames** — Every element must have a motion verb. "Title appears" → dead frame. "Title SLAMS in from left" → alive.
diff --git a/references/official-prompt-guide.md b/references/official-prompt-guide.md
deleted file mode 100644
index 15af5dd..0000000
--- a/references/official-prompt-guide.md
+++ /dev/null
@@ -1,116 +0,0 @@
-# HeyGen Video Agent — Official Prompt Guide (Complete)
-Source: https://www.notion.so/heygen/Video-Agent-Prompt-Guide-2e6449792c69801d9353c885aad92c9e
-
-## Core Philosophy
-"Video Agent isn't magic; it's a production partner that executes your creative direction."
-"The more specific you are about content, style, media types, and scene structure, the closer you'll get to exactly what you envision."
-
-## Three UI Controls
-1. **Avatar** — select specific avatar, Auto mode, or "no avatar" for voice-over only (MUST explicitly say "no avatar" in prompt)
-2. **Duration** — 30s, 1min, 2min, or Auto (agent follows prompt/script for length, not forced)
-3. **Aspect Ratio** — Portrait or Landscape, or Auto
-
-## Prompting Hierarchy (Basic → Advanced)
-
-### Level 1: Basic Prompt
-Describe the content you want delivered:
-- "Introduce HeyGen to knowledge workers, talk about its Talking Avatar models"
-- "Make a compliance training video and explain phishing in detail"
-
-### Level 2: Script-Driven (STRONGLY RECOMMENDED)
-Paste a full video script. Agent follows scene-by-scene while improving flow, timing, and visuals.
-This is "the single biggest upgrade most people miss."
-
-### Level 3: Scene-by-Scene (Maximum Control)
-```
-Scene 1: [Scene Type]
-  Visual: [Describe exact visual]
-  VO/Script: "[What the avatar says]"
-  Duration: [Approximate length]
-```
-
-## Attachments
-- Images, videos, product screenshots, diagrams
-- PDFs, documents (agent extracts key info)
-- Upload own photo → agent uses as talking avatar
-- ALWAYS add context: "Use the attached screenshots as B-roll when discussing features"
-
-## The "Catchall" Style Block (Personal Favorite of HeyGen Team)
-```
-Use minimal, clean styled visuals. Blue, black, and white as main colors.
-Leverage motion graphics as B-rolls and A-roll overlays. Use AI videos when necessary.
-When real-world footage is needed, use Stock Media.
-Include an intro sequence, outro sequence, and chapter breaks using Motion Graphics.
-```
-
-## Style Descriptor Presets
-| Style | Best For | Prompt Addition |
-|-------|----------|-----------------|
-| Minimalistic | Corporate, Tech, SaaS | "Use minimalistic, clean visuals with lots of white space" |
-| Cartoon/Animated | Education, Kids content | "Use cartoon-style illustrated visuals" |
-| Bold & Vibrant | Marketing, Social | "Use bold, vibrant colors and dynamic visuals" |
-| Cinematic | Brand films, High-end | "Use cinematic quality visuals with dramatic lighting" |
-| Flat Design | Modern, App demos | "Use flat design style with geometric shapes" |
-| Gradient Modern | Tech, Startup | "Use modern gradient backgrounds and sleek transitions" |
-| Retro/Vintage | Nostalgia, Creative | "Use retro-inspired visuals with warm tones" |
-
-## Color Specification
-- Exact hex codes: "Use #1E40AF as primary blue, #F8FAFC as background white, #0F172A for text"
-- Brand colors: "Stick to our brand colors: coral (#FF6B6B), navy (#2C3E50), cream (#FFF5E6)"
-- Font families: "Use Inter font family throughout"
-- WHY: Without defined style, visuals look inconsistent scene-to-scene
-
-## Media Types
-
-### Motion Graphics
-Animated text, icons, charts, shapes, transitions.
-- A-roll overlays: lower thirds, bullet points, animated callouts
-- B-roll scenes: animated explanations, data viz, process flows
-- Chapter cards: section breaks, intros, outros
-- Information display: statistics, comparisons, timelines
-
-### AI-Generated Images & Videos
-- Conceptual illustrations, abstract concepts
-- Custom scenarios stock can't cover
-- Stylized visuals in particular artistic style
-- Product mockups in various contexts
-
-### Stock Media
-Real-world footage from stock libraries.
-- Authentic scenes (offices, cities, people)
-- Industry-specific (medical, manufacturing, retail)
-- Emotional moments, human connection
-- Establishing shots, locations
-
-### Media Type Decision Matrix
-| Content Type | Motion Graphics | AI Generated | Stock Media |
-|---|---|---|---|
-| Data/Statistics | ✅ Best | ❌ | ❌ |
-| Abstract Concepts | ✅ Good | ✅ Best | ❌ |
-| Real Environments | ❌ | ⚠️ Can work | ✅ Best |
-| Brand Elements | ✅ Best | ❌ | ❌ |
-| Human Emotions | ❌ | ⚠️ Uncanny | ✅ Best |
-| Custom Scenarios | ⚠️ Limited | ✅ Best | ⚠️ May not exist |
-| Technical Diagrams | ✅ Best | ❌ | ❌ |
-
-## Example Prompts (Steal These)
-
-### Compliance Training
-"Use a professional female avatar. Make a compliance training video explaining phishing in detail. Use examples and list top watch-outs. Leverage motion graphics as A-roll overlay and B-roll to help explain core concepts."
-
-### Educational Explainer (Voice-Over Only)
-"Create a 1-minute video about camera aperture. Use minimal science diagrams and visualizations. No avatar needed, only voice-over. Cool neutrals (navy, cyan), thin-line diagrams, and slow elegant motion. B-roll is abstract scientific illustrations. Sequencing: definition → diagram expansion → conceptual layering, with fade-through transitions."
-
-### Brand Story (Animated)
-"Make a video telling the story of how Twitch got started. Use cartoon-style animations and overlays. I want Twitch's iconic colors and fonts. Use motion graphics overlays and AI-generated B-roll."
-
-## Community Pro Tips
-
-### Stack style instructions at the end
-Put content/script first, then add all style directives (colors, motion graphics preferences, media type guidance) as a block at the bottom. Keeps creative intent clean and technical specs organized.
-
-### Save your catchall as a template
-If you find a style combo that works, reuse it across all videos. Consistency builds brand recognition.
-
-### Iterate in conversation
-Video Agent remembers context within a session. After first render: "make the intro shorter" or "swap the B-roll in scene 3 for stock footage" without re-prompting everything.
diff --git a/references/prompt-craft.md b/references/prompt-craft.md
deleted file mode 100644
index c32df69..0000000
--- a/references/prompt-craft.md
+++ /dev/null
@@ -1,273 +0,0 @@
-# Prompt Craft Reference
-
-Production-quality prompt engineering for HeyGen Video Agent. Combines official HeyGen guidance with patterns validated across 80+ test videos.
-
-Load this when the user wants cinematic/polished output, scene-by-scene control, or specific visual styles.
-
----
-
-## Prompting Levels (from HeyGen Official Guide)
-
-### Level 1: Basic
-Just describe content. Video Agent fills in the rest.
-```
-"Introduce HeyGen to knowledge workers, talk about its Talking Avatar models"
-```
-
-### Level 2: Script-Driven (RECOMMENDED DEFAULT)
-Paste a full video script. Agent follows scene-by-scene while improving flow, timing, and visuals.
-This is "the single biggest upgrade most people miss." — HeyGen docs
-
-### Level 3: Scene-by-Scene (Maximum Control)
-```
-Scene 1: [Scene Type]
-  Visual: [Describe exact visual]
-  VO/Script: "[What the avatar says]"
-```
-
-**Official recommendation:** Don't assign per-scene timestamps. Natural flow + tone description outperforms rigid scene structure.
-
----
-
-## Prompt Anatomy (Production Quality)
-
-```
-FORMAT:    What kind of video, how long, what energy
-TONE:      Emotional register, references
-AVATAR:    "The selected presenter" (when avatar_id set) or delivery style
-STYLE:     Colors, typography, motion rules, transitions (see Style Block)
-CRITICAL ON-SCREEN TEXT:  Exact strings that must appear literally
-SCENE-BY-SCENE:  (if >60s) Individual scene breakdowns with VO and visual type
-MUSIC:     Genre, energy arc
-```
-
-**Rule: Content/script first, style block at the end.** Keeps creative intent clean and technical specs organized.
-
-### Critical On-Screen Text
-
-List every exact string. Without this, Video Agent rephrases, summarizes, or rounds numbers.
-
-```
-CRITICAL ON-SCREEN TEXT (display literally):
-- "$141M ARR — All-Time High"
-- "1.85M Signups — +28% MoM"
-- Quote: "Use technology to serve the message." — Shalev Hani
-```
-
----
-
-## Style Block
-
-Every prompt should end with a style block. Without one, visuals look inconsistent scene-to-scene.
-
-### The HeyGen Catchall (official team recommendation)
-```
-Use minimal, clean styled visuals. Blue, black, and white as main colors.
-Leverage motion graphics as B-rolls and A-roll overlays. Use AI videos when necessary.
-When real-world footage is needed, use Stock Media.
-Include an intro sequence, outro sequence, and chapter breaks using Motion Graphics.
-```
-
-### Style Presets (from HeyGen docs)
-
-| Style | Best For | Prompt Language |
-|-------|----------|-----------------|
-| Minimalistic | Corporate, Tech, SaaS | "Use minimalistic, clean visuals with lots of white space" |
-| Cartoon/Animated | Education, Kids | "Use cartoon-style illustrated visuals" |
-| Bold & Vibrant | Marketing, Social | "Use bold, vibrant colors and dynamic visuals" |
-| Cinematic | Brand films, High-end | "Use cinematic quality visuals with dramatic lighting" |
-| Flat Design | Modern, App demos | "Use flat design style with geometric shapes" |
-| Gradient Modern | Tech, Startup | "Use modern gradient backgrounds and sleek transitions" |
-| Retro/Vintage | Nostalgia, Creative | "Use retro-inspired visuals with warm tones" |
-
-### Brand Colors
-
-Be explicit with hex codes and fonts:
-```
-Use #1E40AF as primary blue, #F8FAFC as background white, #0F172A for text.
-Font: Inter family throughout.
-```
-
-Without defined colors, visuals look inconsistent scene-to-scene.
-
----
-
-## Media Types & When to Use Each
-
-Video Agent supports three media types. Guide it explicitly or it guesses (often wrong).
-
-### Motion Graphics
-Animated text, icons, charts, shapes, transitions.
-- **A-roll overlays:** lower thirds, bullet points, animated callouts
-- **B-roll scenes:** animated explanations, data viz, process flows
-- **Chapter cards:** section breaks, intros, outros
-- **Best for:** Data, statistics, brand elements, technical diagrams
-
-### AI-Generated Images & Videos
-- Conceptual illustrations, abstract concepts
-- Custom scenarios stock can't cover
-- Stylized visuals in particular artistic style
-- **Best for:** Abstract concepts, custom scenarios, product mockups
-
-### Stock Media
-Real-world footage from stock libraries.
-- Authentic scenes (offices, cities, people)
-- Industry-specific (medical, manufacturing, retail)
-- **Best for:** Real environments, human emotions, establishing shots
-
-### Decision Matrix
-
-| Content Type | Motion Graphics | AI Generated | Stock Media |
-|---|---|---|---|
-| Data/Statistics | ✅ Best | ❌ | ❌ |
-| Abstract Concepts | ✅ Good | ✅ Best | ❌ |
-| Real Environments | ❌ | ⚠️ Can work | ✅ Best |
-| Brand Elements | ✅ Best | ❌ | ❌ |
-| Human Emotions | ❌ | ⚠️ Uncanny | ✅ Best |
-| Custom Scenarios | ⚠️ Limited | ✅ Best | ⚠️ May not exist |
-| Technical Diagrams | ✅ Best | ❌ | ❌ |
-
----
-
-## Scene Types
-
-| Type | Format | When |
-|------|--------|------|
-| **A-ROLL** | Avatar speaking to camera | Intros, key insights, CTAs, emotional beats |
-| **FULL SCREEN B-ROLL** | No avatar, motion graphics only | Data visualization, info-dense content |
-| **A-ROLL + OVERLAY** | Split frame: avatar + content | Presenting data while maintaining human connection |
-
-**Rotation is mandatory.** Never 3+ of the same type in a row.
-
-**Voiceover on EVERY scene.** Silent B-roll = broken video.
-
-### Scene-by-Scene Template (HeyGen Official Format)
-
-```
-Scene 1: [Scene Type]
-  Visual: [Describe exact visual — include media type]
-  VO/Script: "[What the avatar says]"
-```
-
-### Detailed Scene Templates (validated in testing)
-
-**A-ROLL:**
-```
-SCENE 1 — A-ROLL
-[Avatar center-frame, excited, hands gesturing]
-VOICEOVER: "The exact script for this scene."
-Lower-third: "TITLE TEXT" white on blue bar.
-```
-
-**B-ROLL with layered motion:**
-```
-SCENE 2 — FULL SCREEN B-ROLL
-[NO AVATAR — motion graphic only]
-VOICEOVER: "The exact script for this scene."
-Dark background with subtle grid. "HEADLINE" SLAMS in from left.
-Three data cards CASCADE from right, staggered. Bottom ticker SLIDES in.
-```
-
-**A-ROLL + OVERLAY:**
-```
-SCENE 3 — A-ROLL + OVERLAY
-[Avatar LEFT 35%. Content RIGHT 65%.]
-VOICEOVER: "The exact script for this scene."
-RIGHT SIDE: Stats COUNT UP below headline.
-```
-
----
-
-### Non-English Videos
-
-The same prompt structure applies regardless of language:
-1. **Script/narration:** In the video language
-2. **Style block:** Always English (Video Agent directive)
-3. **Motion verbs:** Always English (SLAMS, CASCADE, etc.)
-4. **Critical on-screen text:** In whatever language should appear on screen
-5. **Scene labels:** English (Scene 1, Scene 2) — structural, not rendered
-
----
-
-## Example Prompts (from HeyGen Official Guide)
-
-### Compliance Training
-```
-Use a professional female avatar. Make a compliance training video explaining phishing
-in detail. Use examples and list top watch-outs. Leverage motion graphics as A-roll
-overlay and B-roll to help explain core concepts.
-```
-
-### Educational Explainer (Voice-Over Only)
-```
-Create a 1-minute video about camera aperture. Use minimal science diagrams and
-visualizations. No avatar needed, only voice-over. Cool neutrals (navy, cyan),
-thin-line diagrams, and slow elegant motion. B-roll is abstract scientific
-illustrations. Sequencing: definition → diagram expansion → conceptual layering.
-```
-
-### Brand Story (Animated)
-```
-Make a video telling the story of how Twitch got started. Use cartoon-style
-animations and overlays. I want Twitch's iconic colors and fonts. Use motion
-graphics overlays and AI-generated B-roll.
-```
-
-### Product Introduction (Japanese — non-English example)
-```
-日本のナレッジワーカー向けにHeyGenを紹介する1分間のビデオを作成してください。
-トーキングアバターモデルの特徴を説明し、具体的な活用例を3つ含めてください。
-ナレーターは選択されたプレゼンターが説明します。
-
-CRITICAL ON-SCREEN TEXT (display literally):
-- "HeyGen アバター V"
-- "3分で動画作成"
-- "API連携対応"
-
-STYLE — SWISS PULSE (Müller-Brockmann): Black/white + electric blue #0066FF.
-Grid-locked. Helvetica Bold. Animated counters. Diagonal accents.
-Grid wipe transitions.
-```
-
-Note: Script content is in Japanese but STYLE block and scene labels remain in English — these are Video Agent directives, not viewer-facing content.
-
----
-
-## Motion Vocabulary
-
-Every visual element should have a motion verb. Static frames look dead.
-
-### High Energy
-- **SLAMS** — `"$95M" SLAMS in from left at -5 degrees`
-- **CRASHES** — `Title CRASHES in from right, screen-shake on impact`
-- **PUNCHES** — `Quote card PUNCHES up from bottom`
-
-### Medium Energy
-- **CASCADE** — `Three cards CASCADE from top, staggered 0.3s`
-- **SLIDES** — `Ticker SLIDES in from right, continuous scroll`
-- **FILLS** — `Progress bar FILLS 0 to 90% in orange`
-- **DRAWS** — `Chart line DRAWS itself left to right`
-
-### Low Energy
-- **types on** — `Quote types on word by word in italic white`
-- **fades in** — `Logo fades in at center, held 3 seconds`
-- **COUNTS UP** — `"1.85M" COUNTS UP from 0 in amber`
-
----
-
-## Pro Tips (from HeyGen community)
-
-1. **Save your catchall as a template.** Find a style combo that works, reuse it. Consistency builds brand.
-2. **Iterate in conversation.** Video Agent remembers context within a session. "Make the intro shorter" or "swap B-roll in scene 3 for stock footage" without re-prompting everything.
-3. **Stack style at the end.** Content first, style directives last.
-4. **Describe B-roll as motion verbs** ("slams in," "counts up"), NOT layout coordinates ("upper-left, 48pt").
-
----
-
-## What Doesn't Work
-
-- **Layout coordinates** — "upper-left: headline in 48pt" → blank frames. Use motion verbs.
-- **Named artists without specs** — "Ikko Tanaka style" means nothing. Translate to colors + shapes + motion.
-- **B-roll under 5 seconds** — Causes black/empty frames. 10s+ minimum.
-- **Static elements** — Every element needs a motion verb.
-- **Per-scene timestamps** — Makes delivery robotic (per HeyGen's own research). Use overall duration only.
diff --git a/references/prompt-styles.md b/references/prompt-styles.md
deleted file mode 100644
index 3961fbd..0000000
--- a/references/prompt-styles.md
+++ /dev/null
@@ -1,251 +0,0 @@
----
-name: prompt-styles
-description: 20 named visual styles for Video Agent prompts — mood-first selection, copy-paste STYLE blocks
----
-
-# Prompt Style Library
-
-Named visual styles you inject directly into the prompt text. Each is inspired by a real graphic designer and tested across 40+ videos.
-
-**These are different from HeyGen API styles (`style_id`).** API styles are curated templates on HeyGen's backend. Prompt styles give you full control over colors, typography, motion, and transitions directly in the prompt.
-
-**How to use:** Pick a style. Copy the STYLE block. Paste it into the prompt after your script content.
-
-**How to pick:** Match mood first, content second. Ask: *"What should the viewer FEEL?"*
-
-> **Language note:** Style blocks stay in English regardless of the video's content language. They are technical directives to Video Agent's rendering engine, not viewer-facing text. The video's script and narration should be in the video language, but the STYLE block at the end is always English.
-
-## Mood-to-Style Guide
-
-| Content feels... | Use... |
-|---|---|
-| Personal, intimate | Soft Signal, Quiet Drama |
-| Natural, earthy | Warm Grain, Earth Pulse |
-| Nostalgic, historical | Heritage Reel |
-| Data-driven, analytical | Swiss Pulse, Digital Grid |
-| Elegant, premium | Velvet Standard, Geometric Bold |
-| Cultural, global | Silk Route, Folk Frequency |
-| Investigative, serious | Contact Sheet, Shadow Cut |
-| Fun, lighthearted | Play Mode, Carnival Surge |
-| Philosophical, abstract | Dream State |
-| Punk, grassroots, raw | Deconstructed |
-| Hype, loud, high-energy | Maximalist Type |
-| Tech-forward, futuristic | Data Drift |
-| Breaking, urgent | Red Wire |
-
-## Quick Reference
-
-| # | Style | Artist | Mood | Best For |
-|---|---|---|---|---|
-| 1 | Soft Signal | Sagmeister | Intimate, warm | Personal stories, wellness |
-| 2 | Warm Grain | Eksell | Organic, friendly | Environmental, sustainability |
-| 3 | Quiet Drama | Ray | Humanist, contemplative | Profiles, biographical |
-| 4 | Heritage Reel | Cassandre | Nostalgic, vintage | History, retrospectives |
-| 5 | Silk Route | Abedini | Flowing, mysterious | Global affairs, cross-cultural |
-| 6 | Swiss Pulse | Müller-Brockmann | Clinical, precise | Data-heavy, analytical |
-| 7 | Geometric Bold | Tanaka | Minimal, elegant | Lifestyle, visual essays |
-| 8 | Velvet Standard | Vignelli | Premium, timeless | Luxury, investor updates |
-| 9 | Digital Grid | Crouwel | Systematic, technical | Infrastructure, engineering |
-| 10 | Contact Sheet | Brodovitch | Editorial, investigative | Journalism, deep dives |
-| 11 | Folk Frequency | Terrazas | Cultural, vivid | Festivals, food, heritage |
-| 12 | Earth Pulse | Ghariokwu | Grounded, communal | Community, grassroots |
-| 13 | Dream State | Tomaszewski | Surreal, poetic | Op-eds, philosophy |
-| 14 | Play Mode | Ahn Sang-soo | Playful, irreverent | Entertainment, pop culture |
-| 15 | Carnival Surge | Lins | Euphoric, celebratory | Milestones, hype |
-| 16 | Shadow Cut | Hillmann | Dark, cinematic | Exposés, investigations |
-| 17 | Deconstructed | Brody | Industrial, raw | Tech news, punk energy |
-| 18 | Maximalist Type | Scher | Loud, kinetic | Big announcements, launches |
-| 19 | Data Drift | Anadol | Futuristic, immersive | AI/tech, innovation |
-| 20 | Red Wire | Tartakover | Urgent, immediate | Breaking news, crisis |
-
-## Production Performance (from 40+ videos)
-
-| Rank | Style | Strength |
-|------|-------|----------|
-| 1 | Deconstructed (Brody) | Most reliable across all topics |
-| 2 | Swiss Pulse (Müller-Brockmann) | Best for data-heavy content |
-| 3 | Digital Grid (Crouwel) | Strong for tech topics |
-| 4 | Geometric Bold (Tanaka) | Elegant and versatile |
-| 5 | Maximalist Type (Scher) | High energy, use sparingly |
-
----
-
-## Style Blocks (Copy-Paste Ready)
-
-### 1. Soft Signal — Sagmeister
-**Mood:** Intimate, warm | **Best for:** Personal stories, wellness
-```
-STYLE — SOFT SIGNAL (Sagmeister): Warm amber/cream, dusty rose, sage green.
-Handwritten-style text. Close-up framing. Slow drifts and floats.
-Soft dissolves with warm light leaks.
-```
-
-### 2. Warm Grain — Eksell
-**Mood:** Organic, friendly | **Best for:** Environmental, sustainability
-```
-STYLE — WARM GRAIN (Eksell): Earth tones — ochre, forest green, terracotta, cream.
-Organic rounded compositions. 16mm film grain. Rounded sans-serif.
-Gentle wipes and soft cuts.
-```
-
-### 3. Quiet Drama — Ray
-**Mood:** Humanist, contemplative | **Best for:** Profiles, biographical
-```
-STYLE — QUIET DRAMA (Ray): Muted warm — sepia, deep brown, soft gold.
-Portrait framing. Clean serif. Strong single-source contrast.
-Slow fades to black.
-```
-
-### 4. Heritage Reel — Cassandre
-**Mood:** Nostalgic, vintage | **Best for:** History, retrospectives
-```
-STYLE — HERITAGE REEL (Cassandre): Faded gold, burgundy, navy, sepia wash.
-Elegant centered serif. Vignetting and aged film grain.
-Iris wipe transitions.
-```
-
-### 5. Silk Route — Abedini
-**Mood:** Flowing, mysterious | **Best for:** Global affairs, cross-cultural
-```
-STYLE — SILK ROUTE (Abedini): Jewel tones — deep teal, burgundy, gold, lapis blue.
-Layered compositions, all depths active. Elegant spaced type.
-Flowing dissolves and smooth morphs.
-```
-
-### 6. Swiss Pulse — Müller-Brockmann
-**Mood:** Clinical, precise | **Best for:** Data-heavy, analytical, financial
-```
-STYLE — SWISS PULSE (Müller-Brockmann): Black/white + electric blue #0066FF.
-Grid-locked. Helvetica Bold. Animated counters. Diagonal accents.
-Grid wipe transitions.
-```
-
-### 7. Geometric Bold — Tanaka
-**Mood:** Minimal, elegant | **Best for:** Lifestyle, visual essays
-```
-STYLE — GEOMETRIC BOLD (Tanaka): Max 3 flat colors per frame.
-60% negative space. Bold type as primary element.
-Single focal point. Clean cuts on beat.
-```
-
-### 8. Velvet Standard — Vignelli
-**Mood:** Premium, timeless | **Best for:** Luxury, investor updates, keynotes
-```
-STYLE — VELVET STANDARD (Vignelli): Black, white, one accent: gold #c9a84c.
-Thin ALL CAPS, wide spacing. Generous negative space.
-Slow elegant cross-dissolves.
-```
-
-### 9. Digital Grid — Crouwel
-**Mood:** Systematic, technical | **Best for:** Infrastructure, engineering, code
-```
-STYLE — DIGITAL GRID (Crouwel): Monospaced type. Dark #0a0a0a with cyan #00E5FF, amber #FFB300.
-Pixel grid overlays. Terminal aesthetic. Clean wipe transitions.
-```
-
-### 10. Contact Sheet — Brodovitch
-**Mood:** Editorial, investigative | **Best for:** Journalism, deep dives
-```
-STYLE — CONTACT SHEET (Brodovitch): High contrast B&W, desaturated accents.
-Photo-editorial framing. Bold sans-serif annotations. Raw grain.
-Hard cuts on beat. Snap-zooms.
-```
-
-### 11. Folk Frequency — Terrazas
-**Mood:** Cultural, vivid | **Best for:** Festivals, food, heritage
-```
-STYLE — FOLK FREQUENCY (Terrazas): Vivid folk — hot pink, cobalt blue, sun yellow, emerald.
-Bold rounded type. Folk art rhythms. Rich handmade textures.
-Colorful wipes on festive rhythm.
-```
-
-### 12. Earth Pulse — Ghariokwu
-**Mood:** Grounded, communal | **Best for:** Community, music/culture
-```
-STYLE — EARTH PULSE (Ghariokwu): Warm saturated — burnt orange, deep green, rich yellow.
-Bold expressive type. Wide community framing.
-Rhythmic cuts on beat. Freeze-frames.
-```
-
-### 13. Dream State — Tomaszewski
-**Mood:** Surreal, poetic | **Best for:** Op-eds, philosophy
-```
-STYLE — DREAM STATE (Tomaszewski): Muted palette + one surreal accent.
-Thin elegant floating type. Soft edges, atmospheric haze.
-Slow morph dissolves — NEVER hard cuts.
-```
-
-### 14. Play Mode — Ahn Sang-soo
-**Mood:** Playful, irreverent | **Best for:** Entertainment, pop culture
-```
-STYLE — PLAY MODE (Ahn Sang-soo): Electric blue, hot pink, lime green.
-Bouncy spring physics. Oversized tilted text. Score cards, XP bars.
-Pop cuts, bounce effects.
-```
-
-### 15. Carnival Surge — Lins
-**Mood:** Euphoric, celebratory | **Best for:** Milestones, hype
-```
-STYLE — CARNIVAL SURGE (Lins): Max color — hot pink #FF1493, yellow #FFE000, teal #00CED1.
-Collage layering. Text MASSIVE at ANGLES. Confetti bursts.
-Smash cuts, flash frames.
-```
-
-### 16. Shadow Cut — Hillmann
-**Mood:** Dark, cinematic | **Best for:** Exposés, investigations
-```
-STYLE — SHADOW CUT (Hillmann): Deep blacks, cold greys + blood red accent.
-Sharp angular text. Heavy shadow. Slow creeping push-ins.
-Hard cuts to black. Film noir tension.
-```
-
-### 17. Deconstructed — Brody
-**Mood:** Industrial, raw | **Best for:** Tech news, punk energy
-```
-STYLE — DECONSTRUCTED (Brody): Dark grey #1a1a1a, rust orange #D4501E.
-Type at angles, overlapping. Gritty textures, scan-line glitch.
-Smash cuts with flash frames.
-```
-
-### 18. Maximalist Type — Scher
-**Mood:** Loud, kinetic | **Best for:** Big announcements, launches
-```
-STYLE — MAXIMALIST TYPE (Scher): Red, yellow, black, white — max contrast.
-Text IS the visual. Overlapping at different scales, 50-80% of frame.
-Kinetic everything. Smash cuts, flash frames.
-```
-
-### 19. Data Drift — Anadol
-**Mood:** Futuristic, immersive | **Best for:** AI/tech, innovation
-```
-STYLE — DATA DRIFT (Anadol): Iridescent — purple #7c3aed, cyan #06b6d4, deep black.
-Fluid morphing compositions. Thin futuristic type.
-Liquid dissolves. Particles coalesce into numbers.
-```
-
-### 20. Red Wire — Tartakover
-**Mood:** Urgent, immediate | **Best for:** Breaking news, crisis
-```
-STYLE — RED WIRE (Tartakover): Red, black, white, emergency yellow.
-Bold condensed all-caps. Split screens, tickers, timestamps.
-Snap cuts, flash frames. Zero breathing room.
-```
-
----
-
-## Custom Styles
-
-These 20 are starting points. Create your own by combining:
-1. **Named style + designer reference** (grounds the aesthetic)
-2. **Color palette with hex codes** (specific > vague)
-3. **Typography rules** (font style, weight, case, spacing)
-4. **Motion rules** (how elements enter/exit, timing)
-5. **Transition type** (cuts, dissolves, wipes)
-
-Example custom style:
-```
-STYLE — NEON TERMINAL (custom): Black #0a0a0a background, neon green #00FF41 text,
-cyan #00E5FF highlights. Monospaced type throughout. Terminal cursor blinks.
-Text types on character by character. Scan-line overlay at 5% opacity.
-Hard cuts only. Matrix-style code rain in transitions.
-```
diff --git a/references/troubleshooting.md b/references/troubleshooting.md
deleted file mode 100644
index b9ad832..0000000
--- a/references/troubleshooting.md
+++ /dev/null
@@ -1,151 +0,0 @@
-# Known Issues & Troubleshooting
-
-## Known Bug: Video Agent "Talking Photo Not Found"
-
-**Error message:** "The Talking Photo for the current narrator could not be found."
-
-**Root Cause:** Confirmed as a Video Agent backend bug by HeyGen engineering (Jerry Yan). Affects `video_avatar` type narrators and stock avatar auto-selection.
-
-**Workaround:**
-- Prefer explicit `avatar_id` over auto-selection
-- If `video_avatar` fails, retry with a `studio_avatar` or `photo_avatar`
-
-**Status:** Fix in progress at HeyGen.
-
----
-
-## Weird Pauses / Unnatural Silence in Videos
-
-**Symptom:** Video has awkward pauses or breaks between sentences. Narrator stops speaking but video continues with dead air before next line.
-
-**Root Cause:** When Video Agent receives a script shorter than the target duration, it treats the script as verbatim speech and inserts silence/breaks to stretch it to the exact requested duration. It won't ad-lib or expand — it just pads with dead air.
-
-**Fix:** Add this directive to EVERY prompt:
-> "This script is a concept and theme to convey — not a verbatim transcript. You have full creative freedom to expand, elaborate, add examples, and fill the duration naturally. Do not pad with silence or pauses."
-
-This tells Video Agent it can expand the script naturally instead of treating it as a fixed speech transcript. Per Jerry Yan: "If you tell it it's not a script to be strictly followed but concept or theme or give it green light to expand the script it will do well."
-
-**Status:** Skill-side fix (prompt directive). HeyGen is also tuning the default behavior but the explicit directive is the reliable workaround.
-
----
-
-## Duration Variance (Expected Behavior)
-
-Video Agent controls final video timing internally. Duration accuracy ranges from 79-174% of target across testing. This is NOT a bug.
-
-**Mitigation:** Variable padding multipliers (Script):
-- ≤30s target: 1.6x padding
-- 31-119s target: 1.4x padding
-- ≥120s target: 1.3x padding
-
-With explicit `avatar_id`: ~97% duration accuracy average.
-Without `avatar_id`: ~80% accuracy average.
-
----
-
-## Frame Check: Video Agent Not Applying Framing
-
-If the Video Agent ignores the FRAMING NOTE or BACKGROUND NOTE and produces black bars, letterboxing, or mismatched framing:
-
-1. **Ensure the note is appended at the END of the prompt**, after all other content (script, style block, etc.). Video Agent processes instructions sequentially and late-prompt directives have the strongest effect.
-2. **Check that the correction note was actually appended.** Log the final prompt text and verify the FRAMING NOTE / BACKGROUND NOTE block is present.
-3. **photo_avatar does NOT need BACKGROUND NOTE.** Video Agent generates avatar + environment together for photo_avatars. Only append framing notes for orientation mismatches. Background notes are for studio_avatars with transparent/empty backgrounds only.
-
----
-
-## Stock Avatar Auto-Selection Unreliable
-
-When no `avatar_id` is provided, Video Agent uses narrator tags (`{{@narrator_l0ug91}}`) that sometimes fail to resolve during render.
-
-**Fix:** Always use explicit `avatar_id` from discovery. The only exception is Quick Shot mode where the user explicitly wants speed over reliability.
-
----
-
-## HTML URLs in files[] Rejected
-
-Video Agent rejects `text/html` content type in the `files[]` array. Web pages (blogs, docs sites, articles) must be handled via Path A (contextualize) only.
-
-**What works in files[]:** Direct file URLs (PDFs, images, videos) — but prefer download→upload→asset_id since CDN/WAF often blocks HeyGen's servers.
-
----
-
-## Avatar Not Ready for Video Generation
-
-**Symptom:** Video generation fails or produces errors immediately after creating a new avatar. The avatar exists in the HeyGen dashboard but videos referencing it fail.
-
-**Root Cause:** Avatar creation is asynchronous. `heygen avatar create` (and `create_photo_avatar` / `create_prompt_avatar` MCP tools) return success immediately, but the avatar image is still being processed. If you submit a video request before processing completes, it fails.
-
-**Detection:** Poll with `heygen avatar looks list --group-id <group_id>` (or MCP `list_avatar_looks`). The avatar is NOT ready until:
-- `preview_image_url` is non-null
-- `image_width` and `image_height` are non-zero
-
-At the group level (`heygen avatar list`), an unready avatar will have no `preview_image_url` on the group object.
-
-**Fix:** Poll every 10 seconds after creation, wait for preview URL to appear. Typical: 30-90s for photo avatars, 1-3 min for prompt avatars. Timeout at 5 min.
-
-**The heygen-avatar skill handles this automatically.** If you bypass the skill, you must implement this polling yourself.
-
----
-
-## Interactive Sessions Reliability
-
-Interactive sessions (created without `--wait` and iterated via `heygen video-agent send`) have known issues:
-- Sessions frequently stuck at `processing` status
-- `reviewing` state may never be reached
-- Follow-up messages fail with timing errors
-- Stop command may not trigger video generation
-
-**Recommendation:** Use one-shot mode for production. Interactive sessions documented for future use once HeyGen stabilizes the API.
-
----
-
-## Error Code → Action
-
-Stable CLI exit codes tell you what to do without parsing messages:
-
-| Exit | Class | Action |
-|------|-------|--------|
-| `0` | ok | Continue |
-| `1` | API / network | Retry with backoff. If persistent, check `--verbose` or contact HeyGen support. |
-| `2` | usage | You passed a bad flag. Run `--help` on the command, fix the args, retry. |
-| `3` | auth | Re-auth: `heygen auth login` or set `HEYGEN_API_KEY`. Verify with `heygen auth status`. |
-| `4` | timeout under `--wait` | Operation still running server-side. stdout contains the partial resource (with `session_id` or `video_id`) — resume polling with `heygen video-agent get <id>` or `heygen video get <id>`. Do NOT re-submit. |
-
-Common API-error hints (surfaced in stderr envelope `{error:{code,message,hint}}`):
-
-- `402` / insufficient credits → tell the user their HeyGen plan is out of credits.
-- `403` / forbidden → the resource is not owned by the caller (wrong `group_id`, private avatar).
-- `404` / not found → ID is stale. Re-fetch via `heygen avatar list`, `heygen video-agent get`, etc.
-
----
-
-## Polling Cadence
-
-When `--wait` isn't an option (e.g., you want to return control to the user between polls), use a back-off schedule rather than a fixed interval:
-
-| Age of job | Poll interval |
-|------------|---------------|
-| 0–2 min | every 10s |
-| 2–5 min | every 30s |
-| 5–10 min | every 60s |
-| > 10 min | surface "taking longer than usual" once, keep polling at 60s, give up at 15 min |
-
-If a job is stuck at the same status for >5 min, that's a signal to surface a status update or check the dashboard.
-
-**Prefer `--wait`** on creation commands. It handles the polling internally and returns the final resource or exits `4` with a resumable `session_id` / `video_id` on timeout.
-
----
-
-## Direct Video vs Video Agent — Which Endpoint?
-
-Two ways to generate a video. Different pricing, different trade-offs.
-
-| | **Direct Video** | **Video Agent** |
-|---|-------------------|-----------------|
-| Command / Tool | `heygen video create` / no MCP tool yet | `heygen video-agent create` / `create_video_agent` |
-| Input | Full script + avatar + voice + scene JSON | Prompt + optional avatar/voice/style |
-| Control | You author every scene | Video Agent plans scenes, pacing, motion |
-| Pricing | ~$0.0333/sec | ~$0.10/sec |
-| When to use | Deterministic multi-scene videos, tight control, bulk generation | Creative intros, messages, "make a video about X" requests |
-
-The default in this skill is **Video Agent** — it's what `heygen-video` is built around. Drop to Direct Video only for batch or highly scripted workflows where Agent's autonomy is overhead.
diff --git a/release-please-config.json b/release-please-config.json
index c26e2d2..1e06c47 100644
--- a/release-please-config.json
+++ b/release-please-config.json
@@ -7,10 +7,6 @@
       "changelog-path": "CHANGELOG.md",
       "version-file": "VERSION",
       "extra-files": [
-        {
-          "type": "generic",
-          "path": "SKILL.md"
-        },
         {
           "type": "generic",
           "path": "heygen-video/SKILL.md"
@@ -27,5 +23,5 @@
   "include-component-in-tag": false,
   "tag-separator": "",
   "pull-request-title-pattern": "chore: release ${version}",
-  "pull-request-header": "Release PR — bumps VERSION, SKILL.md frontmatter, and User-Agent strings from conventional commits since last release."
+  "pull-request-header": "Release PR — bumps VERSION and per-skill SKILL.md frontmatter from conventional commits since last release."
 }
diff --git a/scripts/sync-references.sh b/scripts/sync-references.sh
deleted file mode 100755
index aa7376f..0000000
--- a/scripts/sync-references.sh
+++ /dev/null
@@ -1,107 +0,0 @@
-#!/usr/bin/env bash
-# sync-references.sh — propagate canonical root references/ into per-skill copies.
-#
-# Source of truth: references/ at the repo root.
-# Destinations:    heygen-avatar/references/ and heygen-video/references/.
-#
-# Each skill bundles only the references it actually links to. This script
-# enforces that mapping and copies the canonical root file into each
-# destination. It does NOT touch:
-#
-#   - heygen-avatar/references/avatar-creation.md
-#       (per-skill creator-side cleave, no root counterpart)
-#   - heygen-video/references/avatar-discovery.md
-#       (per-skill consumer-side cleave from the original avatar-discovery.md;
-#        the cleave is intentional and the two halves diverge by design)
-#
-# Usage:
-#   ./scripts/sync-references.sh           # propagate root → subdirs
-#   ./scripts/sync-references.sh --check   # exit 1 if any subdir copy drifts from root
-#
-# CI gate: validate-skills.yml runs this with --check on every PR.
-
-set -euo pipefail
-
-# Resolve repo root regardless of where the script is invoked from.
-REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
-cd "$REPO_ROOT"
-
-# Format: <subdir-copy>|<canonical-root-source>
-PAIRS=(
-  'heygen-avatar/references/asset-routing.md|references/asset-routing.md'
-  'heygen-avatar/references/troubleshooting.md|references/troubleshooting.md'
-  'heygen-video/references/asset-routing.md|references/asset-routing.md'
-  'heygen-video/references/frame-check.md|references/frame-check.md'
-  'heygen-video/references/motion-vocabulary.md|references/motion-vocabulary.md'
-  'heygen-video/references/official-prompt-guide.md|references/official-prompt-guide.md'
-  'heygen-video/references/prompt-craft.md|references/prompt-craft.md'
-  'heygen-video/references/prompt-styles.md|references/prompt-styles.md'
-  'heygen-video/references/troubleshooting.md|references/troubleshooting.md'
-)
-
-mode="sync"
-case "${1:-}" in
-  --check) mode="check" ;;
-  -h|--help)
-    sed -n '1,28p' "$0"
-    exit 0
-    ;;
-  "") ;;
-  *)
-    echo "unknown argument: $1" >&2
-    echo "usage: $0 [--check]" >&2
-    exit 2
-    ;;
-esac
-
-fail=0
-synced=0
-
-for pair in "${PAIRS[@]}"; do
-  sub="${pair%%|*}"
-  root="${pair##*|}"
-
-  if [ ! -f "$root" ]; then
-    echo "::error::canonical source missing: $root"
-    fail=1
-    continue
-  fi
-
-  if [ "$mode" = "check" ]; then
-    if [ ! -f "$sub" ]; then
-      echo "::error::subdir copy missing: $sub (run scripts/sync-references.sh to create it)"
-      fail=1
-    elif ! diff -q "$root" "$sub" >/dev/null 2>&1; then
-      echo "::error::drift detected — '$sub' differs from canonical '$root'"
-      diff -u "$root" "$sub" | head -40 || true
-      fail=1
-    fi
-  else
-    mkdir -p "$(dirname "$sub")"
-    if [ -f "$sub" ] && diff -q "$root" "$sub" >/dev/null 2>&1; then
-      :
-    else
-      cp "$root" "$sub"
-      echo "synced: $root → $sub"
-      synced=$((synced + 1))
-    fi
-  fi
-done
-
-if [ "$mode" = "check" ]; then
-  if [ "$fail" -ne 0 ]; then
-    echo ""
-    echo "Drift detected. Run: ./scripts/sync-references.sh"
-    exit 1
-  fi
-  echo "✓ All ${#PAIRS[@]} shared references are in sync with canonical root copies."
-else
-  if [ "$synced" -eq 0 ]; then
-    echo "✓ All ${#PAIRS[@]} shared references already in sync — no changes."
-  else
-    echo ""
-    echo "Synced $synced file(s). Review and commit:"
-    echo "  git add heygen-avatar/references heygen-video/references"
-    echo "  git commit"
-  fi
-fi
diff --git a/scripts/update-check.sh b/scripts/update-check.sh
deleted file mode 100755
index deda9b3..0000000
--- a/scripts/update-check.sh
+++ /dev/null
@@ -1,162 +0,0 @@
-#!/usr/bin/env bash
-# heygen-skills update-check — periodic version check for all skills.
-# Inspired by gstack's update-check pattern (MIT license).
-#
-# Output (one line, or nothing):
-#   JUST_UPGRADED <old> <new>       — marker found from recent upgrade
-#   UPGRADE_AVAILABLE <old> <new>   — remote VERSION differs from local
-#   (nothing)                       — up to date, snoozed, disabled, or check skipped
-#
-# Env overrides (for testing):
-#   HEYGEN_SKILL_DIR      — override auto-detected root
-#   HEYGEN_SKILLS_STATE    — override ~/.heygen-skills state directory
-#   HEYGEN_REMOTE_URL     — override remote VERSION URL
-set -euo pipefail
-
-SKILL_DIR="${HEYGEN_SKILL_DIR:-$(cd "$(dirname "$0")/.." && pwd)}"
-STATE_DIR="${HEYGEN_SKILLS_STATE:-$HOME/.heygen-skills}"
-CACHE_FILE="$STATE_DIR/last-update-check"
-MARKER_FILE="$STATE_DIR/just-upgraded-from"
-SNOOZE_FILE="$STATE_DIR/update-snoozed"
-VERSION_FILE="$SKILL_DIR/VERSION"
-REMOTE_URL="${HEYGEN_REMOTE_URL:-https://raw.githubusercontent.com/heygen-com/skills/master/VERSION}"
-
-# ─── Force flag (busts cache + snooze) ────────────────────────
-if [ "${1:-}" = "--force" ]; then
-  rm -f "$CACHE_FILE"
-  rm -f "$SNOOZE_FILE"
-fi
-
-# ─── Step 0: Check if updates are disabled ────────────────────
-if [ -f "$STATE_DIR/update-check-disabled" ]; then
-  exit 0
-fi
-
-# ─── Snooze helper ───────────────────────────────────────────
-# check_snooze <remote_version>
-#   Returns 0 if snoozed (stay quiet), 1 if not snoozed (should output).
-#
-#   Snooze file format: <version> <level> <epoch>
-#   Level durations: 1=24h, 2=48h, 3+=7d
-#   New version resets snooze.
-check_snooze() {
-  local remote_ver="$1"
-  if [ ! -f "$SNOOZE_FILE" ]; then
-    return 1
-  fi
-  local snoozed_ver snoozed_level snoozed_epoch
-  snoozed_ver="$(awk '{print $1}' "$SNOOZE_FILE" 2>/dev/null || true)"
-  snoozed_level="$(awk '{print $2}' "$SNOOZE_FILE" 2>/dev/null || true)"
-  snoozed_epoch="$(awk '{print $3}' "$SNOOZE_FILE" 2>/dev/null || true)"
-
-  # Validate: all three fields must be non-empty
-  if [ -z "$snoozed_ver" ] || [ -z "$snoozed_level" ] || [ -z "$snoozed_epoch" ]; then
-    return 1
-  fi
-
-  # Validate: level and epoch must be integers
-  case "$snoozed_level" in *[!0-9]*) return 1 ;; esac
-  case "$snoozed_epoch" in *[!0-9]*) return 1 ;; esac
-
-  # New version dropped? Ignore snooze.
-  if [ "$snoozed_ver" != "$remote_ver" ]; then
-    return 1
-  fi
-
-  # Compute snooze duration based on level
-  local duration
-  case "$snoozed_level" in
-    1) duration=86400 ;;   # 24 hours
-    2) duration=172800 ;;  # 48 hours
-    *) duration=604800 ;;  # 7 days (level 3+)
-  esac
-
-  local now
-  now="$(date +%s)"
-  local expires=$(( snoozed_epoch + duration ))
-  if [ "$now" -lt "$expires" ]; then
-    return 0  # still snoozed
-  fi
-
-  return 1  # snooze expired
-}
-
-# ─── Step 1: Read local version ──────────────────────────────
-LOCAL=""
-if [ -f "$VERSION_FILE" ]; then
-  LOCAL="$(cat "$VERSION_FILE" 2>/dev/null | tr -d '[:space:]')"
-fi
-if [ -z "$LOCAL" ]; then
-  exit 0  # No VERSION file → skip check
-fi
-
-# ─── Step 2: Check "just upgraded" marker ─────────────────────
-if [ -f "$MARKER_FILE" ]; then
-  OLD="$(cat "$MARKER_FILE" 2>/dev/null | tr -d '[:space:]')"
-  rm -f "$MARKER_FILE"
-  rm -f "$SNOOZE_FILE"
-  if [ -n "$OLD" ]; then
-    echo "JUST_UPGRADED $OLD $LOCAL"
-  fi
-fi
-
-# ─── Step 3: Check cache freshness ──────────────────────────
-# UP_TO_DATE: 60 min TTL (detect new releases quickly)
-# UPGRADE_AVAILABLE: 720 min TTL (keep nagging less often)
-if [ -f "$CACHE_FILE" ]; then
-  CACHED="$(cat "$CACHE_FILE" 2>/dev/null || true)"
-  case "$CACHED" in
-    UP_TO_DATE*)        CACHE_TTL=60 ;;
-    UPGRADE_AVAILABLE*) CACHE_TTL=720 ;;
-    *)                  CACHE_TTL=0 ;;
-  esac
-
-  STALE=$(find "$CACHE_FILE" -mmin +$CACHE_TTL 2>/dev/null || true)
-  if [ -z "$STALE" ] && [ "$CACHE_TTL" -gt 0 ]; then
-    case "$CACHED" in
-      UP_TO_DATE*)
-        CACHED_VER="$(echo "$CACHED" | awk '{print $2}')"
-        if [ "$CACHED_VER" = "$LOCAL" ]; then
-          exit 0
-        fi
-        ;;
-      UPGRADE_AVAILABLE*)
-        CACHED_OLD="$(echo "$CACHED" | awk '{print $2}')"
-        if [ "$CACHED_OLD" = "$LOCAL" ]; then
-          CACHED_NEW="$(echo "$CACHED" | awk '{print $3}')"
-          if check_snooze "$CACHED_NEW"; then
-            exit 0
-          fi
-          echo "$CACHED"
-          exit 0
-        fi
-        ;;
-    esac
-  fi
-fi
-
-# ─── Step 4: Fetch remote version ────────────────────────────
-mkdir -p "$STATE_DIR"
-
-REMOTE=""
-REMOTE="$(curl -sf --max-time 5 "$REMOTE_URL" 2>/dev/null || true)"
-REMOTE="$(echo "$REMOTE" | tr -d '[:space:]')"
-
-# Validate: must look like a version number (reject HTML error pages)
-if ! echo "$REMOTE" | grep -qE '^[0-9]+\.[0-9.]+$'; then
-  echo "UP_TO_DATE $LOCAL" > "$CACHE_FILE"
-  exit 0
-fi
-
-if [ "$LOCAL" = "$REMOTE" ]; then
-  echo "UP_TO_DATE $LOCAL" > "$CACHE_FILE"
-  exit 0
-fi
-
-# Versions differ — upgrade available
-echo "UPGRADE_AVAILABLE $LOCAL $REMOTE" > "$CACHE_FILE"
-if check_snooze "$REMOTE"; then
-  exit 0
-fi
-
-echo "UPGRADE_AVAILABLE $LOCAL $REMOTE"