diff --git a/.github/workflows/validate-skills.yml b/.github/workflows/validate-skills.yml new file mode 100644 index 0000000..fba6ebe --- /dev/null +++ b/.github/workflows/validate-skills.yml @@ -0,0 +1,172 @@ +name: Validate Skills + +on: + pull_request: + paths: + - "heygen-avatar/**" + - "heygen-video/**" + - "references/**" + - "scripts/**" + - ".github/workflows/validate-skills.yml" + - "SKILL.md" + push: + branches: [master] + paths: + - "heygen-avatar/**" + - "heygen-video/**" + - "references/**" + - "scripts/**" + - ".github/workflows/validate-skills.yml" + - "SKILL.md" + +jobs: + references-in-sync: + name: Root references/ stays in sync with subdir copies + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Verify references are in sync (no drift) + run: ./scripts/sync-references.sh --check + + self-contained-bundles: + name: Skills install cleanly via gh skill (self-contained) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install latest gh + run: | + curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ + | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg + sudo chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg + echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \ + | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null + sudo apt update + sudo apt install -y gh + gh --version + + - name: Verify gh skill is available + run: | + if ! gh skill --help >/dev/null 2>&1; then + echo "::error::gh skill subcommand not available in installed gh" + exit 1 + fi + + - name: Stage skills under skills/ for from-local install + run: | + # gh skill install --from-local requires the skills//SKILL.md convention. + # Stage the in-repo subdir skills under a temporary skills/ root so we can validate. + mkdir -p _ghskill_test/skills + cp -R heygen-avatar _ghskill_test/skills/heygen-avatar + cp -R heygen-video _ghskill_test/skills/heygen-video + + - name: Init isolated install project + run: | + # gh skill install --scope project writes to /.agents/skills. + # Initialize an isolated git repo for the install target so it becomes + # its own project root (otherwise gh skill walks up to the outer repo). + mkdir -p _install_test + cd _install_test + git init -q + git -c user.email='ci@heygen.com' -c user.name='CI' commit -q --allow-empty -m bootstrap + + - name: Install heygen-avatar from local + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + cd _install_test + gh skill install ../_ghskill_test heygen-avatar --from-local --scope project + test -f .agents/skills/heygen-avatar/SKILL.md + echo "✓ heygen-avatar installed at _install_test/.agents/skills/heygen-avatar/" + + - name: Verify heygen-avatar installed bundle is self-contained + run: | + set -euo pipefail + cd _install_test/.agents/skills/heygen-avatar + # No parent-dir references in installed SKILL.md. + if grep -nE '\.\./' SKILL.md; then + echo "::error::heygen-avatar/SKILL.md contains parent-dir (../) references after install" + exit 1 + fi + # Every relative reference in SKILL.md must exist inside the installed bundle. + fail=0 + for ref in $(grep -oE '(references|scripts)/[a-zA-Z0-9_./-]+\.(md|sh)' SKILL.md | sort -u); do + if [ ! -f "$ref" ]; then + echo "::error::heygen-avatar references $ref but it's not in the installed bundle" + fail=1 + fi + done + # Every bundled references/* and scripts/* file must be linked from SKILL.md. + while IFS= read -r f; do + base=$(basename "$f") + if ! grep -q "$base" SKILL.md; then + echo "::error::orphaned bundled file (not linked from SKILL.md): $f" + fail=1 + fi + done < <(find references scripts -type f 2>/dev/null) + if [ "$fail" -ne 0 ]; then exit 1; fi + echo "✓ heygen-avatar bundle is self-contained, no orphans" + + - name: Install heygen-video from local + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + cd _install_test + gh skill install ../_ghskill_test heygen-video --from-local --scope project + test -f .agents/skills/heygen-video/SKILL.md + echo "✓ heygen-video installed at _install_test/.agents/skills/heygen-video/" + + - name: Verify heygen-video installed bundle is self-contained + run: | + set -euo pipefail + cd _install_test/.agents/skills/heygen-video + if grep -nE '\.\./' SKILL.md; then + echo "::error::heygen-video/SKILL.md contains parent-dir (../) references after install" + exit 1 + fi + fail=0 + for ref in $(grep -oE '(references|scripts)/[a-zA-Z0-9_./-]+\.(md|sh)' SKILL.md | sort -u); do + if [ ! -f "$ref" ]; then + echo "::error::heygen-video references $ref but it's not in the installed bundle" + fail=1 + fi + done + while IFS= read -r f; do + base=$(basename "$f") + if ! grep -q "$base" SKILL.md; then + echo "::error::orphaned bundled file (not linked from SKILL.md): $f" + fail=1 + fi + done < <(find references scripts -type f 2>/dev/null) + if [ "$fail" -ne 0 ]; then exit 1; fi + echo "✓ heygen-video bundle is self-contained, no orphans" + + spec-validate-soft: + name: agentskills.io spec validation (advisory) + runs-on: ubuntu-latest + # Advisory only — fails are reported as warnings, not blocking. + # Root SKILL.md will fail validation today (name: heygen-skills doesn't match directory `.`). + # That's tracked as a known issue and is not a blocker for gh skill install. + continue-on-error: true + steps: + - uses: actions/checkout@v4 + + - name: Install latest gh + run: | + curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ + | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg + sudo chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg + echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \ + | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null + sudo apt update + sudo apt install -y gh + + - name: Run gh skill publish --dry-run + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh skill publish --dry-run || true + echo "::warning::Root SKILL.md does not satisfy gh skill publish naming rules (expected — gh skill publish to agentskills.io registry is a follow-up)." diff --git a/.gitignore b/.gitignore index fe7534a..f2701ce 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,8 @@ node_modules/ evals/ scripts/release.sh + +# Local CI fixtures +.agents/ +_ghskill_test/ +_install_test/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index dc2c3b0..ab3c659 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -41,6 +41,22 @@ gh pr create --title "Short summary" --body "$(cat <<'EOF' - [ ] Full generation tested (video_id if applicable) - [ ] SKILL.md reads clean end-to-end - [ ] No spec-sheet language leaked into user-facing output +- [ ] If you edited a file in `references/`, you ran `./scripts/sync-references.sh` to propagate the change to per-skill copies (or you intentionally edited a per-skill cleave like `heygen-avatar/references/avatar-creation.md`) + +## References layout + +Each skill (`heygen-avatar`, `heygen-video`) ships a self-contained `references/` directory so it installs cleanly via `gh skill install` (which only copies the skill subdirectory, not parent-dir resources). + +- **Source of truth** for shared docs: `references/.md` at the repo root. +- **Per-skill copies** are byte-identical mirrors of the root files. +- **Per-skill cleaves** (`heygen-avatar/references/avatar-creation.md`, `heygen-video/references/avatar-discovery.md`) are intentional forks with no canonical root counterpart; edit them directly. + +**Editor workflow:** +1. Edit the canonical root file (`references/.md`). +2. Run `./scripts/sync-references.sh` to propagate the change to per-skill copies. +3. `git add` everything together and commit. + +CI (`.github/workflows/validate-skills.yml`) runs `./scripts/sync-references.sh --check` on every PR and fails on drift. ## Breaking changes diff --git a/INSTALL.md b/INSTALL.md index 6ab2805..cce942d 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -2,15 +2,53 @@ Grab an [API key](https://app.heygen.com/api) and set it in your shell. If you're already on a HeyGen plan with MCP connected to your agent, you can skip the key — MCP will be used automatically. -## Option 1 — ClawHub (recommended) +The repo ships *two* skills you can install: + +- **`heygen-avatar`** — build a persistent digital identity from a photo or description +- **`heygen-video`** — generate identity-first presenter videos + +Most users want both. They chain together: `heygen-avatar` returns an avatar id that `heygen-video` consumes. + +## Option 1 — `gh skill install` (works across 12+ agents) + +If you have [GitHub CLI](https://cli.github.com) v2.90+ available, this is the most portable install. `gh skill` writes to the right directory for your agent automatically (Claude Code, Cursor, Codex, Gemini CLI, GitHub Copilot, Junie, Goose, OpenHands, Amp, Cline, OpenCode, Warp, and more): + +```bash +gh skill install heygen-com/skills heygen-avatar +gh skill install heygen-com/skills heygen-video +``` + +Project scope (current repo only) is the default. For user scope (every project on this machine): + +```bash +gh skill install heygen-com/skills heygen-avatar --scope user +gh skill install heygen-com/skills heygen-video --scope user +``` + +Pin to a release tag for reproducibility: + +```bash +gh skill install heygen-com/skills heygen-avatar@v2.3.1 --pin +gh skill install heygen-com/skills heygen-video@v2.3.1 --pin +``` + +## Option 2 — ClawHub ```bash clawhub install heygen-skills ``` -ClawHub installs to your agent's default skills directory automatically. +ClawHub installs both skills to your agent's default skills directory automatically. + +## Option 3 — OpenClaw plugin + +For OpenClaw users who want bundled MCP support too: + +```bash +openclaw plugins install clawhub:@heygen/openclaw-plugin-heygen +``` -## Option 2 — Git clone +## Option 4 — Git clone Clone into your agent's skills directory: diff --git a/README.md b/README.md index e51504a..67b223a 100644 --- a/README.md +++ b/README.md @@ -22,21 +22,45 @@ That's it. The agent fetches [INSTALL_FOR_AGENTS.md](./INSTALL_FOR_AGENTS.md) an Want to install manually instead? Follow the steps below. -### Step 1 — Clone into your skills directory +### Step 1 — Install the skills -Detect which agent you are and clone to the right path: +The repo ships two skills, `heygen-avatar` and `heygen-video`. Pick the install path that matches your tooling. + +**Option A: `gh skill install`** (works across Claude Code, Cursor, Codex, Gemini CLI, Copilot, Junie, Goose, OpenHands, Amp, Cline, OpenCode, Warp, and more — [agentskills.io](https://agentskills.io)): + +```bash +gh skill install heygen-com/skills heygen-avatar +gh skill install heygen-com/skills heygen-video +``` + +Requires GitHub CLI v2.90+. The CLI writes to the right directory for your agent automatically. + +**Option B: ClawHub:** + +```bash +clawhub install heygen-skills +``` + +**Option C: OpenClaw plugin** (bundles MCP support): + +```bash +openclaw plugins install clawhub:@heygen/openclaw-plugin-heygen +``` + +**Option D: Git clone** (legacy path): | Agent | Install path | |-------|-------------| | **Claude Code** | `~/.claude/skills/heygen-skills` | | **OpenClaw** | `~/.openclaw/workspace/skills/heygen-skills` | -| **ClawHub** | Run `clawhub install heygen-skills` and skip to Step 2 | | **Other** | Clone anywhere your agent loads skills from | ```bash git clone --single-branch --depth 1 https://github.com/heygen-com/skills.git /heygen-skills ``` +After cloning, the two skills are auto-discovered at `heygen-avatar/SKILL.md` and `heygen-video/SKILL.md`. + ### Step 2 — Get your HeyGen API key 1. Go to **[app.heygen.com/api](https://app.heygen.com/api)** (Settings → API) diff --git a/heygen-avatar/SKILL.md b/heygen-avatar/SKILL.md index 42d84da..5ccaed3 100644 --- a/heygen-avatar/SKILL.md +++ b/heygen-avatar/SKILL.md @@ -1,4 +1,5 @@ --- +version: 2.3.0 # x-release-please-version name: heygen-avatar description: | Create a persistent HeyGen avatar — a reusable face + voice identity for the agent, @@ -204,6 +205,8 @@ For agents and named characters, skip this entire step — go straight to Type A ### Phase 2 — Avatar Creation +📖 **Full creation API surface (photo / prompt / digital twin), file input formats, identity field → enum mapping, response shape → [references/avatar-creation.md](references/avatar-creation.md)** + Two modes: **Mode 1 — New character** (omit `avatar_group_id`): @@ -231,6 +234,8 @@ File options for Type B: - `{ "type": "asset_id", "asset_id": "" }` — from `heygen asset create --file ` - `{ "type": "base64", "media_type": "image/png", "data": "" }` — inline +📖 **When to use each (URL vs asset_id vs base64), upload routing, and edge cases → [references/asset-routing.md](references/asset-routing.md)** + **Response:** Returns `avatar_item.id` (look ID) and `avatar_item.group_id` (character identity). Map identity fields to HeyGen enums for the prompt: @@ -411,3 +416,5 @@ simply `cat AVATAR-AGENT.md` and get whatever the current agent's avatar is. - Voice match poor → show all available voices, let user browse - Asset upload fails → skip reference image, try prompt-only creation - Existing avatar file with stale HeyGen IDs → offer to regenerate or keep + +📖 **Known issues, retry patterns, broken voice previews, error → action mapping → [references/troubleshooting.md](references/troubleshooting.md)** diff --git a/heygen-avatar/references/asset-routing.md b/heygen-avatar/references/asset-routing.md new file mode 100644 index 0000000..b7eb708 --- /dev/null +++ b/heygen-avatar/references/asset-routing.md @@ -0,0 +1,86 @@ +# Asset Handling — The Classification Engine + +When the user provides files, URLs, or references, route each asset to the right path. The user should NEVER have to think about this. + +## Two Paths + +| Path | What happens | When to use | +|------|-------------|-------------| +| **A: Contextualize → Prompt** | Read/analyze the asset, extract key info, bake into script. Video Agent never sees the original. | Reference material, auth-walled content, documents where the *information* matters more than the *visual*. | +| **B: Attach to API** | Upload the raw file via `files[]`. Video Agent analyzes, extracts graphics, uses as frames/B-roll. | Screenshots, branded assets, PDFs with important visual layouts, images the viewer should literally see. | +| **A+B: Both** | Contextualize for script quality AND attach for visual use. | Long docs where you need to summarize but Video Agent should also have the full source. | + +## Classification Flow + +``` +1. Can Video Agent access this directly? + - Public URL (no auth, no paywall) → YES + - Private/internal URL → NO + - Local file → NO (must upload first) + +2. Should the viewer SEE this asset? + - Screenshot, logo, product image, chart → YES → Path B + - Research doc, article, context material → NO → Path A + - Ambiguous → Path A+B + +3. Is the content too long for the prompt? + - Short (< 500 words) → fits in prompt + - Long (> 500 words) → summarize key points, attach full doc +``` + +## Decision Matrix + +| Asset Type | Publicly Accessible? | Show On Screen? | Route | +|-----------|---------------------|----------------|-------| +| Screenshot / image | N/A | Yes | **B: Attach** + describe in prompt as B-roll | +| Logo / brand asset | N/A | Yes | **B: Attach** + anchor to intro/outro | +| Public URL to file (PDF, image, video) | Yes | Maybe | **B: Download → upload via `/v3/assets` → pass `asset_id`** + summarize | +| Public URL to web page (HTML) | Yes | No | **A: Fetch and contextualize only.** Do NOT pass HTML URLs in `files[]`. | +| Auth-walled URL (requires login) | No | No | **A: Ask the user to paste the content.** Never fabricate. | +| PDF (short, text-heavy) | N/A | No | **A+B: Extract key points** + attach | +| PDF (long, visual-rich) | N/A | Maybe | **B: Attach** + summarize top points | +| Raw data / spreadsheet | N/A | Partially | **A: Analyze and describe** key stats. Attach if charts should appear. | + +## Executing Routes + +### Path A (Contextualize) +- URLs: Use `web_fetch` to retrieve publicly accessible content +- For auth-walled content you cannot access: ask the user to paste the text directly +- Extract 3-5 most important points relevant to the video +- Weave naturally into the script. Don't dump. Integrate. + +### Path B (Attach) +Upload to HeyGen: + +**MCP:** upload via the asset tool (depends on environment). +**CLI:** `heygen asset create --file /path/to/file.png` + +Max 32MB per file. Returns JSON with the new `asset_id`. + +Or pass inline in `files[]`: +```json +{"type": "url", "url": "https://example.com/image.png"} +{"type": "asset_id", "asset_id": ""} +{"type": "base64", "data": "", "content_type": "image/png"} +``` + +### Describe Asset Usage in Prompt +Be SPECIFIC: +- "Use the uploaded dashboard screenshot as B-roll when discussing analytics" +- "Display the company logo in the intro and end card" + +### Log Classification +In the learning log entry, record: +```json +"assets_classified": [{"type": "image", "route": "attach", "accessible": true, "reason": "product screenshot"}] +``` + +## Rules + +- **Never ask the user which path unless genuinely 50/50.** You're the producer. Make the call. +- **When in doubt, do both (A+B).** Over-providing costs nothing. +- **Always describe attached assets in the prompt.** Uploading without description = ignored. +- **Auth-walled content is YOUR job.** Bridge the gap between your access and Video Agent's. +- **URLs that fail:** Try `web_fetch`. If login/paywall/404 → tell the user, ask for content directly. Never silently fabricate. +- **HTML URLs cannot go in `files[]`.** Video Agent rejects `text/html`. Web pages are ALWAYS Path A only. +- **Prefer download→upload→asset_id** over `files[]{url}`. HeyGen's servers often blocked by CDN/WAF. diff --git a/heygen-avatar/references/avatar-creation.md b/heygen-avatar/references/avatar-creation.md new file mode 100644 index 0000000..df52e62 --- /dev/null +++ b/heygen-avatar/references/avatar-creation.md @@ -0,0 +1,178 @@ +# Avatar Creation API Surface + +This guide expands `heygen-avatar` Phase 2 (avatar creation) and Phase 3 +(voice selection) with the full API surface, field mappings, and file +input formats. The SKILL.md gives the high-level workflow; this file is +the reference when you need exact arguments, edge cases, or alternative +creation paths. + +For *avatar discovery* (finding an existing avatar at video time), see +[`heygen-video/references/avatar-discovery.md`](https://github.com/heygen-com/skills/blob/master/heygen-video/references/avatar-discovery.md). + +--- + +## Avatar Creation: Three Types + +`heygen-avatar` Phase 2 supports three creation types. Pick based on what +the user provides: + +| User input | Type | API | +|---|---|---| +| A photo of a real person | `photo` | `create_photo_avatar` | +| A description of an appearance | `prompt` | `create_prompt_avatar` | +| A short video recording of a real person | `video` | `create_digital_twin` | + +All three accept an optional `avatar_group_id`: +- **Omit it** to create a new character (new group). +- **Include it** to add a new look (variation) to an existing character. + +Always use Mode 2 (with `avatar_group_id`) when the avatar already exists +and you're creating a variant (different outfit, orientation fix, bg +change). Only use Mode 1 (new character) for genuinely new identities. + +### Photo avatar (from user's photo) + +**MCP:** `create_photo_avatar(name=, file=, avatar_group_id=)` + +**CLI:** +```bash +heygen avatar create -d '{ + "type": "photo", + "name": "My Avatar", + "file": {"type": "url", "url": "https://example.com/headshot.jpg"}, + "avatar_group_id": "" +}' +``` + +Photo requirements: +- JPEG or PNG +- Min 512x512 +- Clear front-facing face +- Good lighting + +### AI-generated avatar (from text prompt) + +**MCP:** `create_prompt_avatar(name=, prompt=, avatar_group_id=)` + +**CLI:** +```bash +heygen avatar create -d '{ + "type": "prompt", + "name": "Tech Presenter", + "prompt": "Young professional woman, modern workspace, confident smile", + "avatar_group_id": "" +}' +``` + +Prompt limit: 1000 characters (the API spec says 200 but the actual +enforced limit is 1000). Be descriptive — include style, features, +expression, lighting. + +Optional: up to 3 `reference_images` to anchor the generated appearance. + +### Video avatar / digital twin (from a short recording) + +**MCP:** `create_digital_twin(name=, file=, avatar_group_id=)` + +**CLI:** +```bash +heygen avatar create -d '{ + "type": "video", + "name": "My Video Avatar", + "file": {"type": "asset_id", "asset_id": ""}, + "avatar_group_id": "" +}' +``` + +--- + +## File Input Formats + +`file` accepts three forms: + +```jsonc +// Public URL (no auth, no paywall) +{ "type": "url", "url": "https://example.com/headshot.jpg" } + +// Pre-uploaded asset (from `heygen asset create --file `) +{ "type": "asset_id", "asset_id": "" } + +// Inline base64 +{ "type": "base64", "data": "", "content_type": "image/png" } +``` + +For when each is appropriate, see +[`references/asset-routing.md`](asset-routing.md). + +--- + +## Response Shape + +All three types return: +```jsonc +{ + "avatar_item": { + "id": "", // ephemeral — the specific look + "group_id": "" // stable — the character identity + } +} +``` + +- `id` is the **look_id** — what you pass downstream as `avatar_id` to + `create_video_agent` for video generation. +- `group_id` is the **character identity** — stable across looks. Save + this in the AVATAR-.md file. Always resolve fresh look_ids at + video time via `list_avatar_looks(group_id=)` rather than caching + a specific look_id. + +--- + +## Identity Field → HeyGen Enum Mapping + +When building a prompt-based avatar, map identity attributes to these +HeyGen enums: + +- **age**: Young Adult | Early Middle Age | Late Middle Age | Senior | Unspecified +- **gender**: Man | Woman | Unspecified +- **ethnicity**: White | Black | Asian American | East Asian | South East Asian | South Asian | Middle Eastern | Pacific | Hispanic | Unspecified +- **style**: Realistic | Pixar | Cinematic | Vintage | Noir | Cyberpunk | Unspecified +- **orientation**: square | horizontal | vertical +- **pose**: half_body | close_up | full_body + +--- + +## Voice Selection (during avatar setup) + +After the avatar look is created, pair it with a voice. Two paths: + +### Path A — Voice Design (preferred) + +Find matching voices via semantic search using the Voice section from +the AVATAR file. This searches HeyGen's full voice library. No new +voices are generated and no quota is consumed. + +**Language matching:** The voice design prompt should specify the target +language from `user_language`. Example for Japanese: `"A calm, warm +female voice. Professional but approachable. Japanese speaker."` This +ensures semantic search returns voices in the correct language. + +### Path B — Voice Browse (fallback) + +For manual catalog browsing: + +**MCP:** `list_voices(type=private)` then `list_voices(type=public, language=, gender=)` + +**CLI:** +```bash +heygen voice list --type private --limit 20 +heygen voice list --type public --engine starfish --language en --gender female --limit 20 +``` + +**ALWAYS show a playable voice preview.** Each voice response includes +`preview_audio_url` — share it before committing. + +**Handling missing/broken previews:** Some voices return bare `s3://` +paths or `null`. When this happens: note "(no preview available)" and +offer to generate a short TTS sample via `create_speech` (MCP) or +`heygen voice speech create --text "" --voice-id +--input-type plain_text --language en --locale en-US` (CLI). diff --git a/heygen-avatar/references/troubleshooting.md b/heygen-avatar/references/troubleshooting.md new file mode 100644 index 0000000..b9ad832 --- /dev/null +++ b/heygen-avatar/references/troubleshooting.md @@ -0,0 +1,151 @@ +# Known Issues & Troubleshooting + +## Known Bug: Video Agent "Talking Photo Not Found" + +**Error message:** "The Talking Photo for the current narrator could not be found." + +**Root Cause:** Confirmed as a Video Agent backend bug by HeyGen engineering (Jerry Yan). Affects `video_avatar` type narrators and stock avatar auto-selection. + +**Workaround:** +- Prefer explicit `avatar_id` over auto-selection +- If `video_avatar` fails, retry with a `studio_avatar` or `photo_avatar` + +**Status:** Fix in progress at HeyGen. + +--- + +## Weird Pauses / Unnatural Silence in Videos + +**Symptom:** Video has awkward pauses or breaks between sentences. Narrator stops speaking but video continues with dead air before next line. + +**Root Cause:** When Video Agent receives a script shorter than the target duration, it treats the script as verbatim speech and inserts silence/breaks to stretch it to the exact requested duration. It won't ad-lib or expand — it just pads with dead air. + +**Fix:** Add this directive to EVERY prompt: +> "This script is a concept and theme to convey — not a verbatim transcript. You have full creative freedom to expand, elaborate, add examples, and fill the duration naturally. Do not pad with silence or pauses." + +This tells Video Agent it can expand the script naturally instead of treating it as a fixed speech transcript. Per Jerry Yan: "If you tell it it's not a script to be strictly followed but concept or theme or give it green light to expand the script it will do well." + +**Status:** Skill-side fix (prompt directive). HeyGen is also tuning the default behavior but the explicit directive is the reliable workaround. + +--- + +## Duration Variance (Expected Behavior) + +Video Agent controls final video timing internally. Duration accuracy ranges from 79-174% of target across testing. This is NOT a bug. + +**Mitigation:** Variable padding multipliers (Script): +- ≤30s target: 1.6x padding +- 31-119s target: 1.4x padding +- ≥120s target: 1.3x padding + +With explicit `avatar_id`: ~97% duration accuracy average. +Without `avatar_id`: ~80% accuracy average. + +--- + +## Frame Check: Video Agent Not Applying Framing + +If the Video Agent ignores the FRAMING NOTE or BACKGROUND NOTE and produces black bars, letterboxing, or mismatched framing: + +1. **Ensure the note is appended at the END of the prompt**, after all other content (script, style block, etc.). Video Agent processes instructions sequentially and late-prompt directives have the strongest effect. +2. **Check that the correction note was actually appended.** Log the final prompt text and verify the FRAMING NOTE / BACKGROUND NOTE block is present. +3. **photo_avatar does NOT need BACKGROUND NOTE.** Video Agent generates avatar + environment together for photo_avatars. Only append framing notes for orientation mismatches. Background notes are for studio_avatars with transparent/empty backgrounds only. + +--- + +## Stock Avatar Auto-Selection Unreliable + +When no `avatar_id` is provided, Video Agent uses narrator tags (`{{@narrator_l0ug91}}`) that sometimes fail to resolve during render. + +**Fix:** Always use explicit `avatar_id` from discovery. The only exception is Quick Shot mode where the user explicitly wants speed over reliability. + +--- + +## HTML URLs in files[] Rejected + +Video Agent rejects `text/html` content type in the `files[]` array. Web pages (blogs, docs sites, articles) must be handled via Path A (contextualize) only. + +**What works in files[]:** Direct file URLs (PDFs, images, videos) — but prefer download→upload→asset_id since CDN/WAF often blocks HeyGen's servers. + +--- + +## Avatar Not Ready for Video Generation + +**Symptom:** Video generation fails or produces errors immediately after creating a new avatar. The avatar exists in the HeyGen dashboard but videos referencing it fail. + +**Root Cause:** Avatar creation is asynchronous. `heygen avatar create` (and `create_photo_avatar` / `create_prompt_avatar` MCP tools) return success immediately, but the avatar image is still being processed. If you submit a video request before processing completes, it fails. + +**Detection:** Poll with `heygen avatar looks list --group-id ` (or MCP `list_avatar_looks`). The avatar is NOT ready until: +- `preview_image_url` is non-null +- `image_width` and `image_height` are non-zero + +At the group level (`heygen avatar list`), an unready avatar will have no `preview_image_url` on the group object. + +**Fix:** Poll every 10 seconds after creation, wait for preview URL to appear. Typical: 30-90s for photo avatars, 1-3 min for prompt avatars. Timeout at 5 min. + +**The heygen-avatar skill handles this automatically.** If you bypass the skill, you must implement this polling yourself. + +--- + +## Interactive Sessions Reliability + +Interactive sessions (created without `--wait` and iterated via `heygen video-agent send`) have known issues: +- Sessions frequently stuck at `processing` status +- `reviewing` state may never be reached +- Follow-up messages fail with timing errors +- Stop command may not trigger video generation + +**Recommendation:** Use one-shot mode for production. Interactive sessions documented for future use once HeyGen stabilizes the API. + +--- + +## Error Code → Action + +Stable CLI exit codes tell you what to do without parsing messages: + +| Exit | Class | Action | +|------|-------|--------| +| `0` | ok | Continue | +| `1` | API / network | Retry with backoff. If persistent, check `--verbose` or contact HeyGen support. | +| `2` | usage | You passed a bad flag. Run `--help` on the command, fix the args, retry. | +| `3` | auth | Re-auth: `heygen auth login` or set `HEYGEN_API_KEY`. Verify with `heygen auth status`. | +| `4` | timeout under `--wait` | Operation still running server-side. stdout contains the partial resource (with `session_id` or `video_id`) — resume polling with `heygen video-agent get ` or `heygen video get `. Do NOT re-submit. | + +Common API-error hints (surfaced in stderr envelope `{error:{code,message,hint}}`): + +- `402` / insufficient credits → tell the user their HeyGen plan is out of credits. +- `403` / forbidden → the resource is not owned by the caller (wrong `group_id`, private avatar). +- `404` / not found → ID is stale. Re-fetch via `heygen avatar list`, `heygen video-agent get`, etc. + +--- + +## Polling Cadence + +When `--wait` isn't an option (e.g., you want to return control to the user between polls), use a back-off schedule rather than a fixed interval: + +| Age of job | Poll interval | +|------------|---------------| +| 0–2 min | every 10s | +| 2–5 min | every 30s | +| 5–10 min | every 60s | +| > 10 min | surface "taking longer than usual" once, keep polling at 60s, give up at 15 min | + +If a job is stuck at the same status for >5 min, that's a signal to surface a status update or check the dashboard. + +**Prefer `--wait`** on creation commands. It handles the polling internally and returns the final resource or exits `4` with a resumable `session_id` / `video_id` on timeout. + +--- + +## Direct Video vs Video Agent — Which Endpoint? + +Two ways to generate a video. Different pricing, different trade-offs. + +| | **Direct Video** | **Video Agent** | +|---|-------------------|-----------------| +| Command / Tool | `heygen video create` / no MCP tool yet | `heygen video-agent create` / `create_video_agent` | +| Input | Full script + avatar + voice + scene JSON | Prompt + optional avatar/voice/style | +| Control | You author every scene | Video Agent plans scenes, pacing, motion | +| Pricing | ~$0.0333/sec | ~$0.10/sec | +| When to use | Deterministic multi-scene videos, tight control, bulk generation | Creative intros, messages, "make a video about X" requests | + +The default in this skill is **Video Agent** — it's what `heygen-video` is built around. Drop to Direct Video only for batch or highly scripted workflows where Agent's autonomy is overhead. diff --git a/heygen-video/SKILL.md b/heygen-video/SKILL.md index c845c80..0e09970 100644 --- a/heygen-video/SKILL.md +++ b/heygen-video/SKILL.md @@ -1,4 +1,5 @@ --- +version: 2.3.0 # x-release-please-version name: heygen-video description: | Generate HeyGen presenter videos via the v3 Video Agent pipeline — handles Frame Check @@ -45,13 +46,53 @@ You are a video producer. Not a form. Not a CLI wrapper. A producer who understa ## API Mode Detection -See the root [SKILL.md](../SKILL.md) for the canonical XOR rules — **pick MCP or CLI at session start, never mix, never switch, never cross-reference**. +**Pick one transport at session start. Never mix, never switch mid-session, never narrate the choice.** -Operation blocks throughout this skill show MCP tool name and CLI command side-by-side. **Read only the column for your detected mode.** If MCP is available, use the `mcp__heygen__*` tools; ignore the CLI column. If CLI is available, run `heygen ...` commands; ignore the MCP column. Never invoke anything from the other column. +Detect in this order: -**Do not look up API endpoints.** MCP tool names are the contract in MCP mode. `heygen --help` is the contract in CLI mode. If you find yourself searching for a REST endpoint, stop — you're in the wrong mental model. +1. **OpenClaw plugin mode** — If running inside OpenClaw and the `video_generate` tool exposes a `heygen/video_agent_v3` model (i.e. the user has [`@heygen/openclaw-plugin-heygen`](https://github.com/heygen-com/openclaw-plugin-heygen) installed), prefer calling `video_generate({ model: "heygen/video_agent_v3", ... })` directly for video generation. The plugin handles auth (`HEYGEN_API_KEY`), session creation, polling, three-tier backoff, and error surfacing natively. Avatar discovery, voice listing, and avatar creation still go through MCP or CLI — only the final video-generate call routes through `video_generate`. Frame Check still runs before submission. +2. **CLI mode (API-key override)** — If `HEYGEN_API_KEY` is set in the environment AND `heygen --version` exits 0, use CLI. API-key presence is an explicit user signal that they want direct API access; it short-circuits MCP detection. No question asked. +3. **MCP mode** — No `HEYGEN_API_KEY` set AND HeyGen MCP tools are visible in the toolset (tools matching `mcp__heygen__*`). OAuth auth, uses existing plan credits. +4. **CLI mode (fallback)** — MCP tools NOT available AND `heygen --version` exits 0. Auth via `heygen auth login` (persists to `~/.heygen/credentials`). +5. **Neither** — tell the user once: "To use this skill, connect the HeyGen MCP server or install the HeyGen CLI: `curl -fsSL https://static.heygen.ai/cli/install.sh | bash` then `heygen auth login`." -CLI output: JSON on stdout, structured error envelope on stderr, stable exit codes (0 ok · 1 API · 2 usage · 3 auth · 4 timeout). See [../references/troubleshooting.md](../references/troubleshooting.md) for error → action mapping and polling cadence. Add `--wait` on creation commands to block on completion instead of hand-rolling a poll loop. +**Hard rules:** +- **Never call `curl api.heygen.com/...`** — every mode routes through its own surface. +- **OpenClaw plugin mode: only use `video_generate` for the generate step.** Never run `heygen ...` CLI for the generate call when the plugin is available. Avatar/voice discovery still uses MCP or CLI. +- **MCP mode: only use `mcp__heygen__*` tools.** Never run `heygen ...` CLI commands. The MCP tool name IS the API. +- **CLI mode: only use `heygen ...` commands.** Run `heygen --help` to discover arguments. +- **Never cross over.** Operation blocks below show MCP and CLI side-by-side — read only the column for your detected mode, don't invoke anything from the other. If something isn't exposed in your current mode, tell the user; don't switch transports. + +### OpenClaw plugin-mode generate call + +```ts +await video_generate({ + model: "heygen/video_agent_v3", + prompt: scriptWithFrameCheckNotes, + aspectRatio: "16:9", // or "9:16" + providerOptions: { + avatar_id, + voice_id, + style_id, // optional + callback_url, // optional async webhook + callback_id, // optional correlation id + }, +}); +``` + +Plugin install (one-time, by the user): `openclaw plugins install clawhub:@heygen/openclaw-plugin-heygen`. Plugin docs: . + +### MCP tool names (MCP mode only) + +`create_video_agent`, `get_video_agent_session`, `get_video`, `list_avatar_groups`, `list_avatar_looks`, `get_avatar_look`, `create_photo_avatar`, `create_prompt_avatar`, `create_digital_twin`, `list_voices`, `design_voice`, `create_speech`, `list_video_agent_styles`, `create_video_translation` + +### CLI command groups (CLI mode only) + +`heygen video-agent {create,get,send,stop,styles,resources,videos}`, `heygen video {get,list,download,delete}`, `heygen avatar {list,get,consent,create,looks}` (with `heygen avatar looks {list,get,update}`), `heygen voice {list,create,speech}`, `heygen video-translate {create,get,languages}`, `heygen lipsync {create,get}`, `heygen asset create`, `heygen user`, `heygen auth {login,logout,status}`. Every subcommand supports `--help` — that's your reference. Run `heygen --help` to see the full noun list. + +**Do not look up API endpoints.** There is no `api-reference.md` lookup step. MCP mode uses tool names. CLI mode uses `heygen ... --help`. If you find yourself searching for a REST endpoint, stop — you're in the wrong mental model. + +CLI output: JSON on stdout, `{error:{code,message,hint}}` envelope on stderr, exit codes `0` ok · `1` API · `2` usage · `3` auth · `4` timeout. See [references/troubleshooting.md](references/troubleshooting.md) for error → action mapping and polling cadence. Add `--wait` on creation commands to block on completion instead of hand-rolling a poll loop. --- @@ -91,7 +132,7 @@ Two paths for every asset: - **Path B (Attach):** Upload to HeyGen via `heygen asset create --file ` (or include as `files[]` entries on video-agent create). For visuals the viewer should see. - **A+B (Both):** Summarize for script AND attach original. -📖 **Full routing matrix and upload examples → [../references/asset-routing.md](../references/asset-routing.md)** +📖 **Full routing matrix and upload examples → [references/asset-routing.md](references/asset-routing.md)** **Key rules:** - HTML URLs cannot go in `files[]` (Video Agent rejects `text/html`). Web pages are always Path A. @@ -282,7 +323,7 @@ Snap cuts, flash frames. Zero breathing room. ### Avatar -📖 **Full avatar discovery flow, creation APIs, voice selection → [../references/avatar-discovery.md](../references/avatar-discovery.md)** +📖 **Full avatar discovery flow, creation APIs, voice selection → [references/avatar-discovery.md](references/avatar-discovery.md)** **AVATAR file resolution (run before any external avatar lookup):** @@ -388,7 +429,7 @@ Include an intro sequence, outro sequence, and chapter breaks using Motion Graph **Brand-specific:** Include hex codes (`#1E40AF`), font families (`Inter`), and which media types to prefer per scene type. -📖 **Style presets (Minimalistic, Cinematic, Bold, etc.) → [../references/official-prompt-guide.md](../references/official-prompt-guide.md)** +📖 **Style presets (Minimalistic, Cinematic, Bold, etc.) → [references/official-prompt-guide.md](references/official-prompt-guide.md)** ### Media Type Selection @@ -402,9 +443,9 @@ Video Agent supports three media types. Guide it explicitly or it guesses (often Be explicit in the prompt: "Use motion graphics for the statistics, stock footage for the office scene, AI-generated visuals for the futuristic concept." -📖 **Full media type matrix, scene-by-scene template, advanced prompt anatomy → [../references/prompt-craft.md](../references/prompt-craft.md)** -📖 **Named styles (Deconstructed, Swiss Pulse, etc.) → inlined in Style Selection above** -📖 **Motion vocabulary and B-roll → [../references/motion-vocabulary.md](../references/motion-vocabulary.md)** +📖 **Full media type matrix, scene-by-scene template, advanced prompt anatomy → [references/prompt-craft.md](references/prompt-craft.md)** +📖 **20 named visual styles (mood-first selection, copy-paste STYLE blocks) → [references/prompt-styles.md](references/prompt-styles.md)** +📖 **Motion vocabulary and B-roll → [references/motion-vocabulary.md](references/motion-vocabulary.md)** ### Orientation @@ -467,7 +508,7 @@ FRAMING NOTE: The selected avatar image is in {source} orientation but this vide BACKGROUND NOTE: The selected avatar has no background or a transparent backdrop. Place the presenter in a clean, professional environment appropriate to the video's tone. For business/tech content: modern studio with soft lighting and subtle depth. For casual content: bright, minimal space with natural light. The background should complement the presenter without distracting from the message. ``` -📖 **Full correction templates and stacking matrix → [../references/frame-check.md](../references/frame-check.md)** +📖 **Full correction templates and stacking matrix → [references/frame-check.md](references/frame-check.md)** --- @@ -571,4 +612,4 @@ If user wants changes: adjust prompt based on feedback, re-generate. Never retry - **One idea per video.** Single-topic produces dramatically better results. - **Write for the ear.** If you wouldn't say it to a friend, rewrite it. -📖 **Known issues → [../references/troubleshooting.md](../references/troubleshooting.md)** +📖 **Known issues → [references/troubleshooting.md](references/troubleshooting.md)** diff --git a/heygen-video/references/asset-routing.md b/heygen-video/references/asset-routing.md new file mode 100644 index 0000000..b7eb708 --- /dev/null +++ b/heygen-video/references/asset-routing.md @@ -0,0 +1,86 @@ +# Asset Handling — The Classification Engine + +When the user provides files, URLs, or references, route each asset to the right path. The user should NEVER have to think about this. + +## Two Paths + +| Path | What happens | When to use | +|------|-------------|-------------| +| **A: Contextualize → Prompt** | Read/analyze the asset, extract key info, bake into script. Video Agent never sees the original. | Reference material, auth-walled content, documents where the *information* matters more than the *visual*. | +| **B: Attach to API** | Upload the raw file via `files[]`. Video Agent analyzes, extracts graphics, uses as frames/B-roll. | Screenshots, branded assets, PDFs with important visual layouts, images the viewer should literally see. | +| **A+B: Both** | Contextualize for script quality AND attach for visual use. | Long docs where you need to summarize but Video Agent should also have the full source. | + +## Classification Flow + +``` +1. Can Video Agent access this directly? + - Public URL (no auth, no paywall) → YES + - Private/internal URL → NO + - Local file → NO (must upload first) + +2. Should the viewer SEE this asset? + - Screenshot, logo, product image, chart → YES → Path B + - Research doc, article, context material → NO → Path A + - Ambiguous → Path A+B + +3. Is the content too long for the prompt? + - Short (< 500 words) → fits in prompt + - Long (> 500 words) → summarize key points, attach full doc +``` + +## Decision Matrix + +| Asset Type | Publicly Accessible? | Show On Screen? | Route | +|-----------|---------------------|----------------|-------| +| Screenshot / image | N/A | Yes | **B: Attach** + describe in prompt as B-roll | +| Logo / brand asset | N/A | Yes | **B: Attach** + anchor to intro/outro | +| Public URL to file (PDF, image, video) | Yes | Maybe | **B: Download → upload via `/v3/assets` → pass `asset_id`** + summarize | +| Public URL to web page (HTML) | Yes | No | **A: Fetch and contextualize only.** Do NOT pass HTML URLs in `files[]`. | +| Auth-walled URL (requires login) | No | No | **A: Ask the user to paste the content.** Never fabricate. | +| PDF (short, text-heavy) | N/A | No | **A+B: Extract key points** + attach | +| PDF (long, visual-rich) | N/A | Maybe | **B: Attach** + summarize top points | +| Raw data / spreadsheet | N/A | Partially | **A: Analyze and describe** key stats. Attach if charts should appear. | + +## Executing Routes + +### Path A (Contextualize) +- URLs: Use `web_fetch` to retrieve publicly accessible content +- For auth-walled content you cannot access: ask the user to paste the text directly +- Extract 3-5 most important points relevant to the video +- Weave naturally into the script. Don't dump. Integrate. + +### Path B (Attach) +Upload to HeyGen: + +**MCP:** upload via the asset tool (depends on environment). +**CLI:** `heygen asset create --file /path/to/file.png` + +Max 32MB per file. Returns JSON with the new `asset_id`. + +Or pass inline in `files[]`: +```json +{"type": "url", "url": "https://example.com/image.png"} +{"type": "asset_id", "asset_id": ""} +{"type": "base64", "data": "", "content_type": "image/png"} +``` + +### Describe Asset Usage in Prompt +Be SPECIFIC: +- "Use the uploaded dashboard screenshot as B-roll when discussing analytics" +- "Display the company logo in the intro and end card" + +### Log Classification +In the learning log entry, record: +```json +"assets_classified": [{"type": "image", "route": "attach", "accessible": true, "reason": "product screenshot"}] +``` + +## Rules + +- **Never ask the user which path unless genuinely 50/50.** You're the producer. Make the call. +- **When in doubt, do both (A+B).** Over-providing costs nothing. +- **Always describe attached assets in the prompt.** Uploading without description = ignored. +- **Auth-walled content is YOUR job.** Bridge the gap between your access and Video Agent's. +- **URLs that fail:** Try `web_fetch`. If login/paywall/404 → tell the user, ask for content directly. Never silently fabricate. +- **HTML URLs cannot go in `files[]`.** Video Agent rejects `text/html`. Web pages are ALWAYS Path A only. +- **Prefer download→upload→asset_id** over `files[]{url}`. HeyGen's servers often blocked by CDN/WAF. diff --git a/heygen-video/references/avatar-discovery.md b/heygen-video/references/avatar-discovery.md new file mode 100644 index 0000000..2e52344 --- /dev/null +++ b/heygen-video/references/avatar-discovery.md @@ -0,0 +1,179 @@ +# Avatar Discovery & Voice Selection (heygen-video) + +This guide covers *avatar discovery for video generation* — how heygen-video +finds an appropriate presenter (or skips presenter entirely) before calling +the Video Agent. For *avatar creation*, see `heygen-avatar` and +[`heygen-avatar/references/avatar-creation.md`](https://github.com/heygen-com/skills/blob/master/heygen-avatar/references/avatar-creation.md). + +## Path 0: Resolve workspace AVATAR files first + +Before any HeyGen catalog lookup, check the workspace root for an +applicable `AVATAR-*.md` file. These are written by `heygen-avatar` +and contain `Group ID` + `Voice ID` ready to use, with no API call +needed. + +Resolution precedence: + +| Request signal | File to read | +|---|---| +| Named subject ("video with Eve", "Cleo's update") | `AVATAR-.md` | +| Agent self-reference ("video of yourself", "give us your update") | `AVATAR-AGENT.md` (symlink) | +| User self-reference ("video of me", "my video update") | `AVATAR-USER.md` (symlink) | +| No subject in request | Skip to Path A | + +`AVATAR-AGENT.md` and `AVATAR-USER.md` are role-based symlinks maintained +by `heygen-avatar` Phase 5; they resolve to the current agent's / user's +named AVATAR file at read time. Treat them like any other AVATAR file +once read. + +If the resolved file has a populated HeyGen section, extract `Group ID` +and `Voice ID` and proceed to Frame Check. Skip Path A entirely. If the +file exists but the HeyGen section is empty, run `heygen-avatar` Phase 2 +first. + +If no file applies (no name match, no role alias, generic catalog +browsing requested) — fall through to Path A below. + +## Path A: Discover Existing Avatars + +### A1: Check for private avatars first + +**If user specifies an avatar by name** (e.g. "use Eve's Podcast look"), take the fast path: + +**MCP:** `list_avatar_looks(ownership=private)` — filter client-side by name match. +**CLI:** +```bash +heygen avatar looks list --ownership private --limit 50 +``` +Avoids the 2-call group→looks pattern. + +**If user wants to browse**, use the group-first flow: + +**MCP:** +1. `list_avatar_groups(ownership=private)` — list groups (each group = one person) +2. `list_avatar_looks(group_id=)` — show looks for chosen group + +**CLI:** +```bash +heygen avatar list --ownership private --limit 50 +heygen avatar looks list --group-id --limit 50 +``` + +Each look has an `id` — this is the `avatar_id` you pass downstream. + +Avatar types: `studio_avatar`, `video_avatar`, `photo_avatar`. Photo avatars support `motion_prompt` and `expressiveness`. + +**ALWAYS show the preview image** when presenting an avatar look. Each look response includes `preview_image_url` — display inline. + +### A2: Check last-used avatar + +Check `heygen-video-log.jsonl` for last used avatar_id. If found: + +**MCP:** `get_avatar_look(look_id=)` +**CLI:** `heygen avatar looks get --look-id ` + +Show preview image: "Last time you used [Avatar Name]. Use her again?" + +### A3: Avatar conversation + +Ask: "Do you want a visible presenter, or voice-over only?" + +If voice-over only → no `avatar_id`. State in prompt: "Voice-over narration only." + +If presenter wanted, present private avatars first. For public/stock avatars, browse by group: + +**MCP:** `list_avatar_groups(ownership=public)` +**CLI:** +```bash +heygen avatar list --ownership public --limit 20 +``` + +Show group names + one representative image. Let the user pick a person. + +**MCP:** `list_avatar_looks(group_id=)` +**CLI:** +```bash +heygen avatar looks list --group-id --limit 10 +``` + +**Why group-first:** The flat `heygen avatar looks list --ownership public` call returns 50+ results for only 3 unique people per page. Group-level browsing (2 calls) gives much better discovery UX. + +### A4: Voice direction + +After avatar is settled, confirm voice preferences (accent, delivery style, language). + +**ALWAYS show a playable voice preview.** Each voice response includes `preview_audio_url` — share it. + +**Handling missing/broken previews:** Some voices return bare `s3://` paths or `null`. When this happens: note "(no preview available)" and offer to generate a short TTS sample via `create_speech` (MCP) or `heygen voice speech create --text "" --voice-id --input-type plain_text --language en --locale en-US` (CLI). + +--- + +## Path B: Create a New Avatar + +If no existing avatar fits and the user wants one created, route to the +`heygen-avatar` skill. See +[`heygen-avatar/references/avatar-creation.md`](https://github.com/heygen-com/skills/blob/master/heygen-avatar/references/avatar-creation.md) +for the full creation API surface (photo / prompt / digital twin), file +input formats, and identity field mappings. + +After `heygen-avatar` finishes, an `AVATAR-.md` file is written and +heygen-video resumes here at Path 0 to pick it up. + +--- + +## Path C: Direct Image (Simplest for One-Off) + +Skip avatar creation. Pass `image_url` directly: + +**MCP:** `create_video_from_image(image_url=, script=