diff --git a/packages/cli/src/media-config.ts b/packages/cli/src/media-config.ts
index eccb0a8..c723f53 100644
--- a/packages/cli/src/media-config.ts
+++ b/packages/cli/src/media-config.ts
@@ -14,10 +14,21 @@
 
 import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'node:fs';
 import { join } from 'node:path';
-import { resolveMinimaxCredentials, type MinimaxCredentials } from '@html-video/core';
+import {
+  resolveMinimaxCredentials,
+  type MinimaxCredentials,
+  resolveFishAudioCredentials,
+  type FishAudioCredentials,
+} from '@html-video/core';
+
+/** Which provider synthesizes narration. Music is always MiniMax (FishAudio
+ *  has no music generation). Defaults to 'minimax' for backward compat. */
+export type NarrationProvider = 'minimax' | 'fishaudio';
 
 interface MediaConfig {
   minimax?: { apiKey?: string; baseUrl?: string };
+  fishaudio?: { apiKey?: string; baseUrl?: string };
+  narrationProvider?: NarrationProvider;
 }
 
 export class MediaConfigStore {
@@ -86,6 +97,67 @@ export class MediaConfigStore {
     }
     return resolveMinimaxCredentials();
   }
+
+  // --- FishAudio (narration only; no region — single global host) ----------
+
+  /** What the Settings UI shows for FishAudio: configured? + masked key + base
+   *  URL. Never returns the raw key. Reports the source (config file vs env). */
+  getFishAudioStatus(): { configured: boolean; source: 'config' | 'env' | 'none'; maskedKey: string; baseUrl: string } {
+    const cfg = this.read().fishaudio;
+    if (cfg?.apiKey) {
+      return { configured: true, source: 'config', maskedKey: mask(cfg.apiKey), baseUrl: cfg.baseUrl ?? '' };
+    }
+    const env = resolveFishAudioCredentials();
+    if (env) {
+      return { configured: true, source: 'env', maskedKey: mask(env.apiKey), baseUrl: env.baseUrl };
+    }
+    return { configured: false, source: 'none', maskedKey: '', baseUrl: '' };
+  }
+
+  /** Persist a FishAudio key (and optional base URL) entered in the UI. */
+  setFishAudio(apiKey: string, baseUrl?: string): void {
+    const cfg = this.read();
+    cfg.fishaudio = { apiKey: apiKey.trim() };
+    const b = (baseUrl ?? '').trim();
+    if (b) cfg.fishaudio.baseUrl = b;
+    this.write(cfg);
+  }
+
+  /** Forget the stored FishAudio key (env fallback, if any, still applies). */
+  clearFishAudio(): void {
+    const cfg = this.read();
+    delete cfg.fishaudio;
+    this.write(cfg);
+  }
+
+  /** Resolve usable FishAudio creds: config file first, then env. The model is
+   *  always env-controlled (FISH_AUDIO_MODEL); we reuse the core resolver so
+   *  the model + base-URL defaults stay in one place. */
+  resolveFishAudio(): FishAudioCredentials | null {
+    const cfg = this.read().fishaudio;
+    if (cfg?.apiKey) {
+      // Inject the stored key into the env resolver so model + base defaults
+      // are computed identically, then let a config baseUrl win if present.
+      const ref = resolveFishAudioCredentials({ ...process.env, FISH_AUDIO_API_KEY: cfg.apiKey })!;
+      const baseUrl = (cfg.baseUrl || '').trim().replace(/\/$/, '') || ref.baseUrl;
+      return { apiKey: cfg.apiKey, baseUrl, model: ref.model };
+    }
+    return resolveFishAudioCredentials();
+  }
+
+  // --- Active narration provider -------------------------------------------
+
+  /** Which provider synthesizes narration. Defaults to 'minimax'. */
+  getNarrationProvider(): NarrationProvider {
+    return this.read().narrationProvider === 'fishaudio' ? 'fishaudio' : 'minimax';
+  }
+
+  /** Persist the active narration provider. */
+  setNarrationProvider(provider: NarrationProvider): void {
+    const cfg = this.read();
+    cfg.narrationProvider = provider === 'fishaudio' ? 'fishaudio' : 'minimax';
+    this.write(cfg);
+  }
 }
 
 function mask(key: string): string {
diff --git a/packages/cli/src/studio-server.ts b/packages/cli/src/studio-server.ts
index 6888cba..0ce3e34 100644
--- a/packages/cli/src/studio-server.ts
+++ b/packages/cli/src/studio-server.ts
@@ -11,7 +11,15 @@ import { randomUUID } from 'node:crypto';
 import { fileURLToPath } from 'node:url';
 import { tmpdir } from 'node:os';
 import type { CliContext } from './context.js';
-import { AssetStore, generateTts, generateMusic } from '@html-video/core';
+import {
+  AssetStore,
+  generateTts,
+  generateMusic,
+  generateFishTts,
+  listFishVoices,
+  type MinimaxCredentials,
+  type FishAudioCredentials,
+} from '@html-video/core';
 import { extractUrls, fetchSource } from './fetch-source.js';
 import { detectAll, findAgent, spawnAgent } from '@html-video/runtime';
 
@@ -433,23 +441,42 @@ export async function startStudioServer(ctx: CliContext, port: number): Promise<
         };
         try {
           sse({ type: 'audio_started' });
-          const creds = ctx.mediaConfig.resolveMinimax();
-          if (!creds) {
+          const project = await ctx.orchestrator.load(projectId);
+          const soundtrack = { ...(project.soundtrack ?? {}) };
+          const wantMusic = !!body.music?.prompt?.trim();
+          const wantNarration = !!body.narration?.text?.trim();
+          if (!wantMusic && !wantNarration) {
+            sse({ type: 'audio_failed', message: 'Nothing to generate — provide a music prompt and/or narration text.' });
+            res.end();
+            return;
+          }
+
+          // Music is always MiniMax (FishAudio has no music generation).
+          const musicCreds = wantMusic ? ctx.mediaConfig.resolveMinimax() : null;
+          if (wantMusic && !musicCreds) {
             sse({
               type: 'audio_failed',
-              message:
-                'MiniMax API key not configured — add it in Settings → Audio (or set OD_MINIMAX_API_KEY).',
+              message: 'MiniMax API key not configured — add it in Settings → Audio (or set OD_MINIMAX_API_KEY).',
             });
             res.end();
             return;
           }
 
-          const project = await ctx.orchestrator.load(projectId);
-          const soundtrack = { ...(project.soundtrack ?? {}) };
-          const wantMusic = !!body.music?.prompt?.trim();
-          const wantNarration = !!body.narration?.text?.trim();
-          if (!wantMusic && !wantNarration) {
-            sse({ type: 'audio_failed', message: 'Nothing to generate — provide a music prompt and/or narration text.' });
+          // Narration provider is user-selectable (MiniMax or FishAudio).
+          const narrationProvider = ctx.mediaConfig.getNarrationProvider();
+          const narrationCreds = wantNarration
+            ? narrationProvider === 'fishaudio'
+              ? ctx.mediaConfig.resolveFishAudio()
+              : ctx.mediaConfig.resolveMinimax()
+            : null;
+          if (wantNarration && !narrationCreds) {
+            sse({
+              type: 'audio_failed',
+              message:
+                narrationProvider === 'fishaudio'
+                  ? 'FishAudio API key not configured — add it in Settings → Audio (or set FISH_AUDIO_API_KEY).'
+                  : 'MiniMax API key not configured — add it in Settings → Audio (or set OD_MINIMAX_API_KEY).',
+            });
             res.end();
             return;
           }
@@ -459,7 +486,7 @@ export async function startStudioServer(ctx: CliContext, port: number): Promise<
             const music = await generateMusic({
               prompt: body.music!.prompt!.trim(),
               instrumental: body.music!.instrumental ?? true,
-              creds,
+              creds: musicCreds!,
             });
             const { asset } = await ctx.orchestrator.addBufferAsset(
               projectId,
@@ -474,13 +501,20 @@ export async function startStudioServer(ctx: CliContext, port: number): Promise<
           }
 
           if (wantNarration) {
-            sse({ type: 'audio_progress', stage: 'narration', message: 'generating narration…' });
-            const nar = await generateTts({
-              text: body.narration!.text!.trim(),
-              ...(body.narration!.voiceId !== undefined && { voiceId: body.narration!.voiceId }),
-              ...(body.narration!.languageBoost !== undefined && { languageBoost: body.narration!.languageBoost }),
-              creds,
-            });
+            sse({ type: 'audio_progress', stage: 'narration', message: `generating narration (${narrationProvider})…` });
+            const nar =
+              narrationProvider === 'fishaudio'
+                ? await generateFishTts({
+                    text: body.narration!.text!.trim(),
+                    ...(body.narration!.voiceId ? { referenceId: body.narration!.voiceId } : {}),
+                    creds: narrationCreds as FishAudioCredentials,
+                  })
+                : await generateTts({
+                    text: body.narration!.text!.trim(),
+                    ...(body.narration!.voiceId !== undefined && { voiceId: body.narration!.voiceId }),
+                    ...(body.narration!.languageBoost !== undefined && { languageBoost: body.narration!.languageBoost }),
+                    creds: narrationCreds as MinimaxCredentials,
+                  });
             const { asset } = await ctx.orchestrator.addBufferAsset(
               projectId,
               nar.bytes,
@@ -632,6 +666,47 @@ export async function startStudioServer(ctx: CliContext, port: number): Promise<
         return json(res, 200, ctx.mediaConfig.getMinimaxStatus());
       }
 
+      // FishAudio audio API config — mirrors MiniMax (no region; single host).
+      if (url.pathname === '/api/config/fishaudio' && m === 'GET') {
+        return json(res, 200, ctx.mediaConfig.getFishAudioStatus());
+      }
+      if (url.pathname === '/api/config/fishaudio' && m === 'POST') {
+        const body = (await readBody(req)) as { apiKey?: string; baseUrl?: string };
+        const key = (body.apiKey ?? '').trim();
+        if (!key) return json(res, 400, { error: 'apiKey is required' });
+        ctx.mediaConfig.setFishAudio(key, body.baseUrl);
+        return json(res, 200, ctx.mediaConfig.getFishAudioStatus());
+      }
+      if (url.pathname === '/api/config/fishaudio' && m === 'DELETE') {
+        ctx.mediaConfig.clearFishAudio();
+        return json(res, 200, ctx.mediaConfig.getFishAudioStatus());
+      }
+
+      // Active narration provider (which backend synthesizes voiceover).
+      if (url.pathname === '/api/config/narration-provider' && m === 'GET') {
+        return json(res, 200, { provider: ctx.mediaConfig.getNarrationProvider() });
+      }
+      if (url.pathname === '/api/config/narration-provider' && m === 'POST') {
+        const body = (await readBody(req)) as { provider?: string };
+        const provider = body.provider === 'fishaudio' ? 'fishaudio' : 'minimax';
+        ctx.mediaConfig.setNarrationProvider(provider);
+        return json(res, 200, { provider: ctx.mediaConfig.getNarrationProvider() });
+      }
+
+      // FishAudio voice search — proxies the account's own models server-side so
+      // the browser never sees the key. Returns a trimmed list for the picker.
+      if (url.pathname === '/api/fishaudio/voices' && m === 'GET') {
+        const creds = ctx.mediaConfig.resolveFishAudio();
+        if (!creds) return json(res, 400, { error: 'FishAudio API key not configured' });
+        try {
+          const voices = await listFishVoices({ creds, query: url.searchParams.get('q') ?? '' });
+          return json(res, 200, { voices });
+        } catch (err) {
+          const msg = err instanceof Error ? err.message : String(err);
+          return json(res, 502, { error: msg });
+        }
+      }
+
       // Agents (detected on each call; cheap thanks to the in-process cache)
       if (url.pathname === '/api/agents' && m === 'GET') {
         const force = url.searchParams.get('force') === '1';
diff --git a/packages/core/package.json b/packages/core/package.json
index 5bac310..46a3d60 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -20,7 +20,7 @@
   "scripts": {
     "build": "tsc -p tsconfig.json",
     "typecheck": "tsc -p tsconfig.json --noEmit",
-    "test": "node --test test/"
+    "test": "npm run build && node --test --experimental-strip-types \"test/**/*.test.ts\""
   },
   "dependencies": {
     "@html-video/content-graph": "workspace:*",
diff --git a/packages/core/src/fishaudio.ts b/packages/core/src/fishaudio.ts
new file mode 100644
index 0000000..c381ab2
--- /dev/null
+++ b/packages/core/src/fishaudio.ts
@@ -0,0 +1,213 @@
+/**
+ * @html-video/core — FishAudio TTS provider (narration only).
+ *
+ * FishAudio (https://fish.audio) is a second narration backend alongside
+ * {@link ./minimax.ts}. Unlike MiniMax it does NOT generate music, so this
+ * module only exposes speech synthesis + a voice-listing helper.
+ *
+ * Shape differences from MiniMax that this module absorbs:
+ *   - the model is selected via an HTTP `model` header (s1 / s2-pro), not a
+ *     body field, and is configured through the environment (FISH_AUDIO_MODEL);
+ *   - `/v1/tts` returns the audio as a RAW binary body — no JSON envelope, no
+ *     hex string to decode (MiniMax wraps it in `base_resp` + hex);
+ *   - errors are surfaced as ordinary HTTP status codes (401/402/422);
+ *   - a single global host (no international/China region split).
+ *
+ * Credentials are read from the environment so the studio works without a
+ * config file; a missing key yields `null` from
+ * {@link resolveFishAudioCredentials} and callers report it gracefully.
+ */
+
+import { HtmlVideoError } from './errors.js';
+import type { TtsAudioResult } from './types/index.js';
+
+/** Default host. FishAudio is a single global endpoint (no region split). We
+ *  store the host only and append `/v1/tts` and `/model`; override via
+ *  FISH_AUDIO_BASE_URL (or the Studio Settings UI). */
+const FISH_DEFAULT_BASE_URL = 'https://api.fish.audio';
+/** Default speech model. `s1` is the fast general model; `s2-pro` adds
+ *  multi-speaker. Selected via the `model` header, configured by env. */
+const FISH_DEFAULT_MODEL = 's1';
+/** Hard ceiling for a single TTS request — a request that hasn't returned in
+ *  2 minutes is hung, not slow. */
+const FISH_REQUEST_TIMEOUT_MS = 120_000;
+
+export interface FishAudioCredentials {
+  apiKey: string;
+  baseUrl: string;
+  /** Speech model sent in the `model` header (e.g. 's1' or 's2-pro'). */
+  model: string;
+}
+
+/**
+ * Resolve FishAudio credentials from the environment. Returns `null` (not
+ * throw) when no key is set, so the studio can show a friendly "configure your
+ * key" message instead of a 500.
+ *
+ * Key precedence:   FISH_AUDIO_API_KEY → FISHAUDIO_API_KEY
+ * Base precedence:  FISH_AUDIO_BASE_URL → default
+ * Model precedence: FISH_AUDIO_MODEL → default (s1)
+ */
+export function resolveFishAudioCredentials(
+  env: NodeJS.ProcessEnv = process.env,
+): FishAudioCredentials | null {
+  const apiKey = (env.FISH_AUDIO_API_KEY || env.FISHAUDIO_API_KEY || '').trim();
+  if (!apiKey) return null;
+  const baseUrl = (env.FISH_AUDIO_BASE_URL || FISH_DEFAULT_BASE_URL).trim().replace(/\/$/, '');
+  const model = (env.FISH_AUDIO_MODEL || FISH_DEFAULT_MODEL).trim();
+  return { apiKey, baseUrl, model };
+}
+
+/**
+ * Synthesize spoken narration via FishAudio TTS (`POST /v1/tts`).
+ *
+ * The model is sent in the `model` header (from creds.model). The response is
+ * a RAW binary audio body — no JSON envelope — so we read `arrayBuffer()`
+ * directly. A missing `referenceId` falls back to FishAudio's default voice.
+ */
+export async function generateFishTts(opts: {
+  text: string;
+  referenceId?: string;
+  creds: FishAudioCredentials;
+  signal?: AbortSignal;
+}): Promise<TtsAudioResult> {
+  const text = (opts.text || '').trim();
+  if (!text) {
+    throw new HtmlVideoError('invalid-input', 'narration text is empty');
+  }
+  const referenceId = (opts.referenceId || '').trim();
+  const { creds } = opts;
+
+  const body = {
+    text,
+    format: 'mp3',
+    ...(referenceId ? { reference_id: referenceId } : {}),
+  };
+
+  const timeoutSignal = AbortSignal.timeout(FISH_REQUEST_TIMEOUT_MS);
+  const effectiveSignal = opts.signal
+    ? AbortSignal.any
+      ? AbortSignal.any([opts.signal, timeoutSignal])
+      : opts.signal
+    : timeoutSignal;
+
+  let resp: Response;
+  try {
+    resp = await fetch(`${creds.baseUrl}/v1/tts`, {
+      method: 'POST',
+      headers: {
+        authorization: `Bearer ${creds.apiKey}`,
+        'content-type': 'application/json',
+        // FishAudio selects the speech model via a header, not a body field.
+        model: creds.model,
+      },
+      body: JSON.stringify(body),
+      signal: effectiveSignal,
+    });
+  } catch (e) {
+    const isTimeout = e instanceof Error && (e.name === 'TimeoutError' || e.name === 'AbortError');
+    const msg = e instanceof Error ? e.message : String(e);
+    throw new HtmlVideoError(
+      'render-failed',
+      isTimeout
+        ? `fishaudio tts timed out after ${Math.round(FISH_REQUEST_TIMEOUT_MS / 1000)}s (the API did not respond — try again, or check FISH_AUDIO_BASE_URL)`
+        : `fishaudio tts request failed: ${msg}`,
+      true,
+    );
+  }
+
+  if (!resp.ok) {
+    const detail = truncate(await resp.text().catch(() => ''), 200);
+    const hint =
+      resp.status === 401
+        ? ' (auth — check the FishAudio API key)'
+        : resp.status === 402
+          ? ' (no credit — check the account balance)'
+          : '';
+    throw new HtmlVideoError(
+      'render-failed',
+      `fishaudio tts ${resp.status}: ${detail || 'request rejected'}${hint}`,
+      resp.status >= 500,
+    );
+  }
+
+  const bytes = Buffer.from(await resp.arrayBuffer());
+  if (bytes.length === 0) {
+    throw new HtmlVideoError('render-failed', 'fishaudio tts returned zero audio bytes');
+  }
+  return {
+    bytes,
+    ext: '.mp3',
+    providerNote: `fishaudio/${creds.model} · ${referenceId || 'default'} · ${bytes.length} bytes`,
+  };
+}
+
+/** A trimmed FishAudio voice model, for the studio's voice picker. */
+export interface FishVoice {
+  /** The model id — passed back as `reference_id` when synthesizing. */
+  id: string;
+  title: string;
+  languages: string[];
+  /** Preview audio URL (first sample), if the model has one. */
+  sampleUrl?: string;
+}
+
+/**
+ * List the account's own voice models via `GET /model?self=true`, optionally
+ * filtered by a title query. Returns a trimmed shape for the picker; the raw
+ * FishAudio model objects carry far more than the UI needs.
+ */
+export async function listFishVoices(opts: {
+  creds: FishAudioCredentials;
+  query?: string;
+  pageSize?: number;
+  signal?: AbortSignal;
+}): Promise<FishVoice[]> {
+  const { creds } = opts;
+  const params = new URLSearchParams({ self: 'true', page_size: String(opts.pageSize ?? 20) });
+  const query = (opts.query || '').trim();
+  if (query) params.set('title', query);
+
+  let resp: Response;
+  try {
+    resp = await fetch(`${creds.baseUrl}/model?${params.toString()}`, {
+      headers: { authorization: `Bearer ${creds.apiKey}` },
+      ...(opts.signal ? { signal: opts.signal } : {}),
+    });
+  } catch (e) {
+    const msg = e instanceof Error ? e.message : String(e);
+    throw new HtmlVideoError('render-failed', `fishaudio list voices request failed: ${msg}`, true);
+  }
+  if (!resp.ok) {
+    const detail = truncate(await resp.text().catch(() => ''), 200);
+    throw new HtmlVideoError(
+      'render-failed',
+      `fishaudio list voices ${resp.status}: ${detail || 'request rejected'}`,
+      resp.status >= 500,
+    );
+  }
+
+  const data = (await resp.json().catch(() => ({}))) as {
+    items?: Array<{
+      _id?: string;
+      title?: string;
+      languages?: string[];
+      samples?: Array<{ audio?: string }>;
+    }>;
+  };
+  return (data.items ?? [])
+    .filter((m): m is { _id: string } & typeof m => typeof m._id === 'string' && m._id.length > 0)
+    .map((m) => {
+      const sampleUrl = m.samples?.find((s) => typeof s.audio === 'string')?.audio;
+      return {
+        id: m._id,
+        title: (m.title || '').trim() || m._id,
+        languages: Array.isArray(m.languages) ? m.languages : [],
+        ...(sampleUrl ? { sampleUrl } : {}),
+      };
+    });
+}
+
+function truncate(s: string, n: number): string {
+  return s.length > n ? `${s.slice(0, n)}…` : s;
+}
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 0339d17..0e9c913 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -19,3 +19,9 @@ export {
   generateMusic,
 } from './minimax.js';
 export type { MinimaxCredentials, MinimaxAudioResult } from './minimax.js';
+export {
+  resolveFishAudioCredentials,
+  generateFishTts,
+  listFishVoices,
+} from './fishaudio.js';
+export type { FishAudioCredentials, FishVoice } from './fishaudio.js';
diff --git a/packages/core/src/minimax.ts b/packages/core/src/minimax.ts
index e5bd0dc..6711622 100644
--- a/packages/core/src/minimax.ts
+++ b/packages/core/src/minimax.ts
@@ -16,6 +16,7 @@
  */
 
 import { HtmlVideoError } from './errors.js';
+import type { TtsAudioResult } from './types/index.js';
 
 /** Default base URL. The old `api.minimaxi.chat` host is RETIRED server-side
  *  (issue #4). MiniMax now has two region-bound endpoints — international
@@ -43,16 +44,9 @@ export interface MinimaxCredentials {
   baseUrl: string;
 }
 
-export interface MinimaxAudioResult {
-  /** Decoded audio bytes (MP3). */
-  bytes: Buffer;
-  /** File extension to store under. */
-  ext: '.mp3';
-  /** Human-readable note of what was produced (provider · model · size). */
-  providerNote: string;
-  /** Reported duration in seconds, if the API surfaced it. */
-  durationSec?: number;
-}
+/** @deprecated Use the shared {@link TtsAudioResult}. Kept as an alias so
+ *  existing imports keep working. */
+export type MinimaxAudioResult = TtsAudioResult;
 
 /**
  * Resolve MiniMax credentials from the environment. Returns `null` (not throw)
diff --git a/packages/core/src/types/index.ts b/packages/core/src/types/index.ts
index 22c4a29..b5560d3 100644
--- a/packages/core/src/types/index.ts
+++ b/packages/core/src/types/index.ts
@@ -368,6 +368,22 @@ export interface FrameRecord {
   previewMp4Path?: string;
 }
 
+/**
+ * Result of a TTS / audio synthesis call, shared across narration providers
+ * (MiniMax, FishAudio, …). Providers decode whatever wire format they speak
+ * (MiniMax: JSON+hex envelope; FishAudio: raw binary) into these common bytes.
+ */
+export interface TtsAudioResult {
+  /** Decoded audio bytes. */
+  bytes: Buffer;
+  /** File extension to store under (e.g. '.mp3', '.wav'). */
+  ext: string;
+  /** Human-readable note of what was produced (provider · model · size). */
+  providerNote: string;
+  /** Reported duration in seconds, if the provider surfaced it (cosmetic). */
+  durationSec?: number;
+}
+
 /**
  * v0.9: project-level soundtrack — one background music track + one narration
  * track mixed into the exported MP4. Both reference an entry in `assets[]`
diff --git a/packages/core/test/fishaudio.test.ts b/packages/core/test/fishaudio.test.ts
new file mode 100644
index 0000000..509b1f0
--- /dev/null
+++ b/packages/core/test/fishaudio.test.ts
@@ -0,0 +1,199 @@
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import { resolveFishAudioCredentials, generateFishTts, listFishVoices } from '../dist/fishaudio.js';
+import { HtmlVideoError } from '../dist/errors.js';
+
+const CREDS = { apiKey: 'k-secret', baseUrl: 'https://api.fish.audio', model: 's1' };
+
+/** Swap in a stubbed global fetch for the duration of `fn`, recording the
+ *  single request it receives. Restores the real fetch afterwards. */
+async function withFetch(
+  responder: (url: string, init: RequestInit) => Response,
+  fn: (calls: { url: string; init: RequestInit }[]) => Promise<void>,
+): Promise<void> {
+  const calls: { url: string; init: RequestInit }[] = [];
+  const real = globalThis.fetch;
+  globalThis.fetch = (async (url: string, init: RequestInit) => {
+    calls.push({ url, init });
+    return responder(url, init);
+  }) as unknown as typeof fetch;
+  try {
+    await fn(calls);
+  } finally {
+    globalThis.fetch = real;
+  }
+}
+
+const okAudio = () =>
+  new Response(new Uint8Array([0xff, 0xfb, 0x10, 0x20]), {
+    status: 200,
+    headers: { 'content-type': 'audio/mpeg' },
+  });
+
+// resolveFishAudioCredentials mirrors resolveMinimaxCredentials: it reads creds
+// from the environment, returns null (never throws) when no key is set, and
+// applies env precedence + defaults. FishAudio has a single global host (no
+// region split) and selects the model via env (FISH_AUDIO_MODEL).
+
+test('returns null when no key is set', () => {
+  assert.equal(resolveFishAudioCredentials({}), null);
+});
+
+test('FISH_AUDIO_API_KEY yields default host + default model s1', () => {
+  const c = resolveFishAudioCredentials({ FISH_AUDIO_API_KEY: 'k-123' });
+  assert.deepEqual(c, { apiKey: 'k-123', baseUrl: 'https://api.fish.audio', model: 's1' });
+});
+
+test('FISHAUDIO_API_KEY is accepted as a fallback key name', () => {
+  const c = resolveFishAudioCredentials({ FISHAUDIO_API_KEY: 'k-fallback' });
+  assert.equal(c?.apiKey, 'k-fallback');
+});
+
+test('FISH_AUDIO_API_KEY takes precedence over FISHAUDIO_API_KEY', () => {
+  const c = resolveFishAudioCredentials({
+    FISH_AUDIO_API_KEY: 'primary',
+    FISHAUDIO_API_KEY: 'secondary',
+  });
+  assert.equal(c?.apiKey, 'primary');
+});
+
+test('FISH_AUDIO_BASE_URL overrides the default and strips a trailing slash', () => {
+  const c = resolveFishAudioCredentials({
+    FISH_AUDIO_API_KEY: 'k',
+    FISH_AUDIO_BASE_URL: 'https://proxy.example.com/',
+  });
+  assert.equal(c?.baseUrl, 'https://proxy.example.com');
+});
+
+test('FISH_AUDIO_MODEL overrides the default model', () => {
+  const c = resolveFishAudioCredentials({ FISH_AUDIO_API_KEY: 'k', FISH_AUDIO_MODEL: 's2-pro' });
+  assert.equal(c?.model, 's2-pro');
+});
+
+test('a whitespace-only key is treated as unset', () => {
+  assert.equal(resolveFishAudioCredentials({ FISH_AUDIO_API_KEY: '   ' }), null);
+});
+
+// --- generateFishTts -------------------------------------------------------
+
+test('posts to /v1/tts with bearer auth, the model header, and the right body', async () => {
+  await withFetch(okAudio, async (calls) => {
+    const r = await generateFishTts({ text: 'hello world', referenceId: 'voice-42', creds: CREDS });
+    assert.equal(calls.length, 1);
+    assert.equal(calls[0]!.url, 'https://api.fish.audio/v1/tts');
+    const h = calls[0]!.init.headers as Record<string, string>;
+    assert.equal(h.authorization, 'Bearer k-secret');
+    assert.equal(h.model, 's1');
+    assert.equal(h['content-type'], 'application/json');
+    const body = JSON.parse(calls[0]!.init.body as string);
+    assert.equal(body.text, 'hello world');
+    assert.equal(body.reference_id, 'voice-42');
+    assert.equal(body.format, 'mp3');
+    // decoded the raw binary body into bytes, tagged as mp3
+    assert.ok(Buffer.isBuffer(r.bytes));
+    assert.equal(r.bytes.length, 4);
+    assert.equal(r.ext, '.mp3');
+    assert.match(r.providerNote, /fishaudio\/s1/);
+  });
+});
+
+test('omits reference_id from the body when no voice is given (default voice)', async () => {
+  await withFetch(okAudio, async (calls) => {
+    await generateFishTts({ text: 'no voice', creds: CREDS });
+    const body = JSON.parse(calls[0]!.init.body as string);
+    assert.equal('reference_id' in body, false);
+  });
+});
+
+test('rejects empty text without hitting the network', async () => {
+  await withFetch(okAudio, async (calls) => {
+    await assert.rejects(
+      () => generateFishTts({ text: '   ', creds: CREDS }),
+      (e: unknown) => e instanceof HtmlVideoError && e.code === 'invalid-input',
+    );
+    assert.equal(calls.length, 0);
+  });
+});
+
+test('maps HTTP 401 to a friendly auth error', async () => {
+  const resp401 = () => new Response('unauthorized', { status: 401 });
+  await withFetch(resp401, async () => {
+    await assert.rejects(
+      () => generateFishTts({ text: 'x', creds: CREDS }),
+      (e: unknown) => e instanceof HtmlVideoError && /401|auth|key/i.test(e.message),
+    );
+  });
+});
+
+test('maps HTTP 402 to a friendly credit/balance error', async () => {
+  const resp402 = () => new Response('payment required', { status: 402 });
+  await withFetch(resp402, async () => {
+    await assert.rejects(
+      () => generateFishTts({ text: 'x', creds: CREDS }),
+      (e: unknown) => e instanceof HtmlVideoError && /402|credit|balance/i.test(e.message),
+    );
+  });
+});
+
+test('rejects a zero-byte audio body', async () => {
+  const empty = () =>
+    new Response(new Uint8Array([]), { status: 200, headers: { 'content-type': 'audio/mpeg' } });
+  await withFetch(empty, async () => {
+    await assert.rejects(
+      () => generateFishTts({ text: 'x', creds: CREDS }),
+      (e: unknown) => e instanceof HtmlVideoError,
+    );
+  });
+});
+
+// --- listFishVoices --------------------------------------------------------
+
+const voicesBody = (items: unknown[]) =>
+  new Response(JSON.stringify({ total: items.length, items }), {
+    status: 200,
+    headers: { 'content-type': 'application/json' },
+  });
+
+test('lists own voices via GET /model with self=true + a title query, bearer auth', async () => {
+  const responder = () =>
+    voicesBody([
+      {
+        _id: 'v1',
+        title: '平淡',
+        languages: ['zh'],
+        samples: [{ audio: 'https://cdn.fish.audio/v1.mp3' }],
+      },
+    ]);
+  await withFetch(responder, async (calls) => {
+    const voices = await listFishVoices({ creds: CREDS, query: '平淡' });
+    const u = new URL(calls[0]!.url);
+    assert.equal(u.origin + u.pathname, 'https://api.fish.audio/model');
+    assert.equal(u.searchParams.get('self'), 'true');
+    assert.equal(u.searchParams.get('title'), '平淡');
+    const h = calls[0]!.init.headers as Record<string, string>;
+    assert.equal(h.authorization, 'Bearer k-secret');
+    assert.deepEqual(voices, [
+      { id: 'v1', title: '平淡', languages: ['zh'], sampleUrl: 'https://cdn.fish.audio/v1.mp3' },
+    ]);
+  });
+});
+
+test('omits the title param when no query is given', async () => {
+  await withFetch(
+    () => voicesBody([]),
+    async (calls) => {
+      await listFishVoices({ creds: CREDS });
+      const u = new URL(calls[0]!.url);
+      assert.equal(u.searchParams.has('title'), false);
+      assert.equal(u.searchParams.get('self'), 'true');
+    },
+  );
+});
+
+test('a voice with no samples yields an undefined sampleUrl', async () => {
+  const responder = () => voicesBody([{ _id: 'v2', title: 'bare', languages: [], samples: [] }]);
+  await withFetch(responder, async () => {
+    const voices = await listFishVoices({ creds: CREDS });
+    assert.equal(voices[0]!.sampleUrl, undefined);
+  });
+});
diff --git a/packages/project-studio/public/app.js b/packages/project-studio/public/app.js
index 13785b2..f46c0e6 100644
--- a/packages/project-studio/public/app.js
+++ b/packages/project-studio/public/app.js
@@ -835,6 +835,12 @@ function renderMain() {
                     <select id="st-narration-voice" class="st-voice-select">
                       ${NARRATION_VOICES.map((v) => `<option value="${v.voiceId}">${t('soundtrack.voice_' + v.key)}</option>`).join('')}
                     </select>
+                    <div id="st-fish-voice" class="st-fish-voice" style="display:none">
+                      <input type="text" id="st-fish-voice-search" class="st-fish-search" placeholder="${t('soundtrack.fish_voice_search')}" autocomplete="off" />
+                      <input type="hidden" id="st-fish-voice-id" />
+                      <div id="st-fish-voice-picked" class="st-fish-picked"></div>
+                      <div id="st-fish-voice-results" class="st-fish-results"></div>
+                    </div>
                     <button type="button" class="st-fit" id="btn-st-fit" title="${t('soundtrack.fit_hint')}">${t('soundtrack.fit_durations')}</button>
                   </div>
                   <div class="st-vol-row"><label>${t('soundtrack.narration_volume')} <input type="range" id="st-narration-vol" min="-20" max="6" value="0" /><b id="st-narration-vol-val">0 dB</b></label></div>
@@ -1103,8 +1109,13 @@ function wireSoundtrackPanel() {
         .filter((s) => s.length > 0).join('\n');
       const nt = stitched || narrationText.value.trim();
       if (!nt) { if (statusEl) statusEl.textContent = t('soundtrack.empty_narration'); return; }
-      const voiceSel = document.getElementById('st-narration-voice');
-      payload.narration = { text: nt, volumeDb: Number(narrationVol.value), byFrame: state._narrationByFrame, ...(voiceSel?.value && { voiceId: voiceSel.value }) };
+      // voiceId comes from whichever provider's control is active: the MiniMax
+      // <select> or the FishAudio picker's chosen reference_id.
+      const voiceId =
+        state._narrationProvider === 'fishaudio'
+          ? (document.getElementById('st-fish-voice-id')?.value || '').trim()
+          : (document.getElementById('st-narration-voice')?.value || '');
+      payload.narration = { text: nt, volumeDb: Number(narrationVol.value), byFrame: state._narrationByFrame, ...(voiceId && { voiceId }) };
     }
 
     const label = btn?.textContent;
@@ -1162,6 +1173,78 @@ function wireSoundtrackPanel() {
   }
   if (genMusicBtn) genMusicBtn.onclick = () => runGenerate('music');
   if (genNarrationBtn) genNarrationBtn.onclick = () => runGenerate('narration');
+  wireFishVoicePicker();
+}
+
+/** Wire the FishAudio searchable voice picker in the narration section, and
+ *  show the right voice control for the active narration provider on mount. */
+function wireFishVoicePicker() {
+  const search = document.getElementById('st-fish-voice-search');
+  const hidden = document.getElementById('st-fish-voice-id');
+  const results = document.getElementById('st-fish-voice-results');
+  const picked = document.getElementById('st-fish-voice-picked');
+  if (!search || !hidden || !results || !picked) return;
+
+  // Reflect the active provider when the panel mounts (which control shows).
+  fetch('/api/config/narration-provider')
+    .then((r) => r.json())
+    .then((d) => applyNarrationProviderToUI(d.provider || 'minimax'))
+    .catch(() => {});
+
+  const renderPicked = () => {
+    if (hidden.value) {
+      picked.innerHTML = `${esc(t('soundtrack.fish_voice_picked', { title: picked.dataset.title || hidden.value }))} <a href="#" id="st-fish-voice-clear">${esc(t('soundtrack.fish_voice_clear'))}</a>`;
+      picked.querySelector('#st-fish-voice-clear').onclick = (e) => {
+        e.preventDefault();
+        hidden.value = '';
+        picked.dataset.title = '';
+        renderPicked();
+      };
+    } else {
+      picked.textContent = t('soundtrack.fish_voice_default');
+    }
+  };
+  renderPicked();
+
+  const select = (voice) => {
+    hidden.value = voice.id;
+    picked.dataset.title = voice.title;
+    search.value = '';
+    results.innerHTML = '';
+    renderPicked();
+  };
+
+  let timer = null;
+  const doSearch = async () => {
+    const q = search.value.trim();
+    results.innerHTML = `<div class="st-fish-hint">${esc(t('soundtrack.fish_voice_searching'))}</div>`;
+    try {
+      const data = await fetch(`/api/fishaudio/voices?q=${encodeURIComponent(q)}`).then((r) => r.json());
+      const voices = data.voices || [];
+      if (!voices.length) { results.innerHTML = `<div class="st-fish-hint">${esc(t('soundtrack.fish_voice_none'))}</div>`; return; }
+      results.innerHTML = '';
+      voices.slice(0, 20).forEach((v) => {
+        const row = document.createElement('div');
+        row.className = 'st-fish-result';
+        const lang = (v.languages || []).join(', ');
+        row.innerHTML = `<span class="st-fish-result-title">${esc(v.title)}</span><span class="st-fish-result-lang">${esc(lang)}</span>`;
+        if (v.sampleUrl) {
+          const play = document.createElement('button');
+          play.type = 'button';
+          play.className = 'st-fish-play';
+          play.textContent = '▶';
+          play.onclick = (e) => { e.stopPropagation(); try { new Audio(v.sampleUrl).play(); } catch { /* ignore */ } };
+          row.appendChild(play);
+        }
+        row.onclick = () => select(v);
+        results.appendChild(row);
+      });
+    } catch (e) {
+      results.innerHTML = `<div class="st-fish-hint">${esc(String(e?.message ?? e))}</div>`;
+    }
+  };
+  search.oninput = () => { clearTimeout(timer); timer = setTimeout(doSearch, 300); };
+  search.onfocus = () => { if (!results.children.length) doSearch(); };
 }
 
 function renderSoundtrackPreview(soundtrack) {
@@ -2975,7 +3058,18 @@ async function renderSettingsAudio(panel) {
   panel.innerHTML = `
     <h3>${esc(t('settings.audio.title'))}</h3>
     <div class="panel-sub">${esc(t('settings.audio.subtitle'))}</div>
-    <div class="audio-config" id="audio-config">
+
+    <label class="audio-field" style="margin-top:8px">
+      <span>${esc(t('settings.audio.provider_label'))}</span>
+      <div class="audio-region" id="narration-provider-toggle">
+        <button type="button" class="st-preset" data-provider="minimax">${esc(t('settings.audio.provider_minimax'))}</button>
+        <button type="button" class="st-preset" data-provider="fishaudio">${esc(t('settings.audio.provider_fishaudio'))}</button>
+      </div>
+    </label>
+    <p class="panel-sub" style="font-size:11px;margin:2px 0 8px">${esc(t('settings.audio.provider_note_music'))}</p>
+
+    <!-- MiniMax pane -->
+    <div class="audio-config" id="mm-pane">
       <div class="audio-status" id="audio-status">${esc(t('settings.audio.loading'))}</div>
       <label class="audio-field">
         <span>${esc(t('settings.audio.api_key'))}</span>
@@ -2999,8 +3093,61 @@ async function renderSettingsAudio(panel) {
       </div>
       <p class="panel-sub" style="font-size:11.5px;margin-top:4px">${esc(t('settings.audio.hint'))}</p>
     </div>
+
+    <!-- FishAudio pane (narration only; single global host, model via env) -->
+    <div class="audio-config" id="fa-pane">
+      <div class="audio-status" id="fa-status">${esc(t('settings.audio.loading'))}</div>
+      <label class="audio-field">
+        <span>${esc(t('settings.audio.api_key'))}</span>
+        <input type="password" id="fa-api-key" placeholder="${esc(t('settings.audio.fish_api_key_placeholder'))}" autocomplete="off" />
+      </label>
+      <label class="audio-field">
+        <span>${esc(t('settings.audio.base_url'))}</span>
+        <input type="text" id="fa-base-url" placeholder="https://api.fish.audio" autocomplete="off" />
+      </label>
+      <div class="audio-actions">
+        <button class="audio-save primary-action" id="fa-save" style="background:var(--accent);border-color:var(--accent);color:var(--accent-fg)">${esc(t('settings.audio.save'))}</button>
+        <button class="audio-clear" id="fa-clear">${esc(t('settings.audio.clear'))}</button>
+        <span class="audio-save-state" id="fa-save-state"></span>
+      </div>
+      <p class="panel-sub" style="font-size:11.5px;margin-top:4px">${esc(t('settings.audio.fish_hint'))}</p>
+    </div>
   `;
 
+  const mmPane = panel.querySelector('#mm-pane');
+  const faPane = panel.querySelector('#fa-pane');
+  const toggle = panel.querySelector('#narration-provider-toggle');
+
+  // Reflect a provider in the toggle + which pane shows, and keep the
+  // soundtrack panel's voice control in sync if it's open.
+  const applyProvider = (provider) => {
+    toggle.querySelectorAll('.st-preset').forEach((b) => b.classList.toggle('active', b.dataset.provider === provider));
+    mmPane.style.display = provider === 'fishaudio' ? 'none' : '';
+    faPane.style.display = provider === 'fishaudio' ? '' : 'none';
+    applyNarrationProviderToUI(provider);
+  };
+
+  // Load + persist the active narration provider.
+  let provider = 'minimax';
+  try {
+    provider = (await fetch('/api/config/narration-provider').then((r) => r.json())).provider || 'minimax';
+  } catch { /* default minimax */ }
+  applyProvider(provider);
+  toggle.querySelectorAll('.st-preset').forEach((btn) => {
+    btn.onclick = async () => {
+      const p = btn.dataset.provider;
+      applyProvider(p);
+      try {
+        await fetch('/api/config/narration-provider', {
+          method: 'POST',
+          headers: { 'content-type': 'application/json' },
+          body: JSON.stringify({ provider: p }),
+        });
+      } catch { /* non-fatal; UI already reflects the choice */ }
+    };
+  });
+
+  // --- MiniMax wiring ---
   const statusEl = panel.querySelector('#audio-status');
   const keyInput = panel.querySelector('#mm-api-key');
   const baseInput = panel.querySelector('#mm-base-url');
@@ -3059,6 +3206,66 @@ async function renderSettingsAudio(panel) {
     saveState.textContent = '';
     await refresh();
   };
+
+  // --- FishAudio wiring (no region; key + base URL only) ---
+  const faStatusEl = panel.querySelector('#fa-status');
+  const faKeyInput = panel.querySelector('#fa-api-key');
+  const faBaseInput = panel.querySelector('#fa-base-url');
+  const faSaveState = panel.querySelector('#fa-save-state');
+
+  const faRefresh = async () => {
+    try {
+      const s = await fetch('/api/config/fishaudio').then((r) => r.json());
+      if (s.configured) {
+        const src = s.source === 'env' ? t('settings.audio.source_env') : t('settings.audio.source_config');
+        faStatusEl.innerHTML = `<span class="agent-status-dot ok"></span>${esc(t('settings.audio.configured', { key: s.maskedKey, source: src }))}`;
+        if (s.baseUrl) faBaseInput.value = s.baseUrl;
+      } else {
+        faStatusEl.innerHTML = `<span class="agent-status-dot missing"></span>${esc(t('settings.audio.fish_not_configured'))}`;
+      }
+    } catch {
+      faStatusEl.textContent = t('settings.audio.fish_not_configured');
+    }
+  };
+  await faRefresh();
+
+  panel.querySelector('#fa-save').onclick = async () => {
+    const apiKey = faKeyInput.value.trim();
+    if (!apiKey) { faSaveState.textContent = t('settings.audio.need_key'); return; }
+    faSaveState.textContent = t('settings.audio.saving');
+    try {
+      const r = await fetch('/api/config/fishaudio', {
+        method: 'POST',
+        headers: { 'content-type': 'application/json' },
+        body: JSON.stringify({ apiKey, baseUrl: faBaseInput.value.trim() }),
+      });
+      if (!r.ok) throw new Error(`HTTP ${r.status}`);
+      faKeyInput.value = '';
+      faSaveState.textContent = t('settings.audio.saved');
+      await faRefresh();
+    } catch (e) {
+      faSaveState.textContent = t('settings.audio.save_failed', { message: (e?.message ?? e) });
+    }
+  };
+
+  panel.querySelector('#fa-clear').onclick = async () => {
+    await fetch('/api/config/fishaudio', { method: 'DELETE' });
+    faKeyInput.value = '';
+    faBaseInput.value = '';
+    faSaveState.textContent = '';
+    await faRefresh();
+  };
+}
+
+/** Toggle the soundtrack narration voice control between the MiniMax built-in
+ *  voice <select> and the FishAudio searchable picker, per active provider.
+ *  Safe to call when the soundtrack panel isn't mounted. */
+function applyNarrationProviderToUI(provider) {
+  state._narrationProvider = provider;
+  const sel = document.getElementById('st-narration-voice');
+  const fish = document.getElementById('st-fish-voice');
+  if (sel) sel.style.display = provider === 'fishaudio' ? 'none' : '';
+  if (fish) fish.style.display = provider === 'fishaudio' ? '' : 'none';
 }
 
 function renderSettingsAgent(panel) {
diff --git a/packages/project-studio/public/i18n.js b/packages/project-studio/public/i18n.js
index 52b6425..53c5a09 100644
--- a/packages/project-studio/public/i18n.js
+++ b/packages/project-studio/public/i18n.js
@@ -151,6 +151,12 @@ const DICT = {
     'soundtrack.voice_female_anchor': 'Female · Anchor',
     'soundtrack.voice_female_mature': 'Female · Mature',
     'soundtrack.voice_female_sweet': 'Female · Sweet',
+    'soundtrack.fish_voice_search': 'Search your FishAudio voices…',
+    'soundtrack.fish_voice_default': 'Default voice (leave empty)',
+    'soundtrack.fish_voice_searching': 'Searching…',
+    'soundtrack.fish_voice_none': 'No voices found',
+    'soundtrack.fish_voice_picked': 'Voice: {title}',
+    'soundtrack.fish_voice_clear': 'use default',
     'soundtrack.fit_durations': '⇄ Fit timing to narration',
     'soundtrack.fit_hint': 'Re-pace each frame by how much narration it has',
     'soundtrack.fitting': 'Fitting…',
@@ -198,7 +204,7 @@ const DICT = {
     'settings.tab.language': 'Language',
     'settings.tab.about': 'About',
 
-    'settings.audio.title': 'Audio · MiniMax',
+    'settings.audio.title': 'Audio',
     'settings.audio.subtitle': 'API key for soundtrack generation (background music + narration).',
     'settings.audio.loading': 'Checking…',
     'settings.audio.api_key': 'API key',
@@ -218,6 +224,13 @@ const DICT = {
     'settings.audio.save_failed': 'Save failed: {message}',
     'settings.audio.need_key': 'Enter an API key first.',
     'settings.audio.hint': 'Stored locally in .html-video/media-config.json. Pick the region that matches your key — an api.minimax.io (International) key will NOT work against api.minimaxi.com (China), and vice-versa. The old api.minimaxi.chat host is retired.',
+    'settings.audio.provider_label': 'Narration provider',
+    'settings.audio.provider_minimax': 'MiniMax',
+    'settings.audio.provider_fishaudio': 'FishAudio',
+    'settings.audio.provider_note_music': 'Background music is always MiniMax (FishAudio has no music).',
+    'settings.audio.fish_api_key_placeholder': 'Paste your FishAudio API key',
+    'settings.audio.fish_not_configured': 'No FishAudio key configured yet.',
+    'settings.audio.fish_hint': 'Stored locally in .html-video/media-config.json. FishAudio uses a single global host — no region. The speech model (s1 / s2-pro) is controlled by the FISH_AUDIO_MODEL environment variable (default s1).',
 
     'settings.agent.title': 'Agent',
     'settings.agent.subtitle': 'Pick the runtime that turns your chat into HTML.',
@@ -400,6 +413,12 @@ const DICT = {
     'soundtrack.voice_female_anchor': '女声 · 播音',
     'soundtrack.voice_female_mature': '女声 · 御姐',
     'soundtrack.voice_female_sweet': '女声 · 甜美',
+    'soundtrack.fish_voice_search': '搜索你的 FishAudio 音色…',
+    'soundtrack.fish_voice_default': '默认音色（留空）',
+    'soundtrack.fish_voice_searching': '搜索中…',
+    'soundtrack.fish_voice_none': '没有找到音色',
+    'soundtrack.fish_voice_picked': '音色：{title}',
+    'soundtrack.fish_voice_clear': '用默认',
     'soundtrack.fit_durations': '⇄ 时长适配配音',
     'soundtrack.fit_hint': '按每帧旁白长短重新分配各帧时长',
     'soundtrack.fitting': '适配中…',
@@ -447,7 +466,7 @@ const DICT = {
     'settings.tab.language': '界面语言',
     'settings.tab.about': '关于',
 
-    'settings.audio.title': '音频 · MiniMax',
+    'settings.audio.title': '音频',
     'settings.audio.subtitle': '配乐生成（背景音乐 + 旁白）所需的 API key。',
     'settings.audio.loading': '检查中…',
     'settings.audio.api_key': 'API key',
@@ -467,6 +486,13 @@ const DICT = {
     'settings.audio.save_failed': '保存失败：{message}',
     'settings.audio.need_key': '请先填写 API key。',
     'settings.audio.hint': '保存在本地 .html-video/media-config.json。请按你的 key 选对区域——国际版 api.minimax.io 的 key 在国内版 api.minimaxi.com 上无法使用，反之亦然；旧的 api.minimaxi.chat 域名已停用。',
+    'settings.audio.provider_label': '旁白引擎',
+    'settings.audio.provider_minimax': 'MiniMax',
+    'settings.audio.provider_fishaudio': 'FishAudio',
+    'settings.audio.provider_note_music': '背景音乐始终走 MiniMax（FishAudio 不提供音乐生成）。',
+    'settings.audio.fish_api_key_placeholder': '粘贴你的 FishAudio API key',
+    'settings.audio.fish_not_configured': '尚未配置 FishAudio key。',
+    'settings.audio.fish_hint': '保存在本地 .html-video/media-config.json。FishAudio 是单一全球 host，无区域之分。语音模型（s1 / s2-pro）由环境变量 FISH_AUDIO_MODEL 控制（默认 s1）。',
 
     'settings.agent.title': 'Agent',
     'settings.agent.subtitle': '选一个运行时把你的对话翻成 HTML。',
diff --git a/packages/project-studio/public/index.html b/packages/project-studio/public/index.html
index 4ce7fd2..5a5749e 100644
--- a/packages/project-studio/public/index.html
+++ b/packages/project-studio/public/index.html
@@ -822,6 +822,27 @@
   .st-voice-select { font-size: 11px; padding: 3px 8px; border-radius: var(--radius-sm);
     background: var(--bg); border: 1px solid var(--border); color: var(--text); cursor: pointer; }
   .st-voice-select:focus { outline: none; border-color: var(--accent); }
+  /* FishAudio searchable voice picker (shown instead of the <select>) */
+  .st-fish-voice { position: relative; display: flex; flex-direction: column; gap: 4px; min-width: 220px; }
+  .st-fish-search { font-size: 11px; padding: 3px 8px; border-radius: var(--radius-sm);
+    background: var(--bg); border: 1px solid var(--border); color: var(--text); }
+  .st-fish-search:focus { outline: none; border-color: var(--accent); }
+  .st-fish-picked { font-size: 10.5px; color: var(--text-muted); }
+  .st-fish-picked a { color: var(--accent); margin-left: 6px; }
+  .st-fish-results { position: absolute; top: 100%; left: 0; right: 0; z-index: 30; margin-top: 2px;
+    max-height: 200px; overflow-y: auto; background: var(--bg); border: 1px solid var(--border);
+    border-radius: var(--radius-sm); box-shadow: 0 6px 20px rgba(0,0,0,.18); }
+  .st-fish-results:empty { display: none; }
+  .st-fish-result { display: flex; align-items: center; gap: 8px; padding: 5px 9px; cursor: pointer;
+    font-size: 11px; border-bottom: 1px solid var(--border); }
+  .st-fish-result:last-child { border-bottom: none; }
+  .st-fish-result:hover { background: var(--accent); color: var(--accent-fg); }
+  .st-fish-result-title { flex: 1; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
+  .st-fish-result-lang { font-size: 10px; color: var(--text-muted); }
+  .st-fish-result:hover .st-fish-result-lang { color: var(--accent-fg); }
+  .st-fish-play { font-size: 10px; padding: 1px 6px; border-radius: 999px; cursor: pointer;
+    background: transparent; border: 1px solid currentColor; color: inherit; }
+  .st-fish-hint { padding: 6px 9px; font-size: 10.5px; color: var(--text-muted); }
   .st-fit { font-size: 11px; padding: 3px 10px; border-radius: var(--radius-sm); cursor: pointer;
     background: var(--bg); border: 1px solid var(--accent); color: var(--accent); margin-left: auto;
     transition: all .12s; }
diff --git a/research/2026-06-15-spec-10-fishaudio-tts-provider.md b/research/2026-06-15-spec-10-fishaudio-tts-provider.md
new file mode 100644
index 0000000..ebd8bd5
--- /dev/null
+++ b/research/2026-06-15-spec-10-fishaudio-tts-provider.md
@@ -0,0 +1,104 @@
+# RFC-10 · FishAudio TTS provider for narration
+
+- **Date**: 2026-06-15
+- **Status**: Draft (pending review)
+- **Author**: fancy
+- **Scope**: Add FishAudio as a second narration (text-to-speech) provider alongside the existing MiniMax integration, selectable per workspace. Music generation stays MiniMax-only (FishAudio has none).
+
+## 1. Context
+
+Narration today is hard-wired to MiniMax across ~6 files:
+
+| Layer | File | What it does |
+|---|---|---|
+| Provider | `packages/core/src/minimax.ts` | `resolveMinimaxCredentials` + `generateTts` (POST `/t2a_v2`) + `generateMusic` |
+| Config | `packages/cli/src/media-config.ts` | `MediaConfigStore` persists key to `.html-video/media-config.json` |
+| Server | `packages/cli/src/studio-server.ts` | `POST /api/projects/:id/generate-audio` (SSE) + `/api/config/minimax` |
+| Mux | `packages/core/src/project.ts` | stores MP3 asset, muxes into export via ffmpeg |
+| UI | `packages/project-studio/public/app.js` | Settings → Audio panel + Soundtrack panel (6 hard-coded voices) |
+| i18n | `packages/project-studio/public/i18n.js` | `settings.audio.*` strings say "MiniMax" |
+
+There is **no provider abstraction** — the request shape (`voice_setting`/`audio_setting`), the JSON+hex+`base_resp` response envelope, the region-bound keys, and the fixed 6-voice catalog are all MiniMax-specific.
+
+We use FishAudio's TTS heavily and want it as a first-class narration backend.
+
+## 2. Verified FishAudio API behaviour
+
+Tested live against `api.fish.audio` with a real key (raw `curl` + `ffprobe`):
+
+- **TTS**: `POST https://api.fish.audio/v1/tts`, headers `Authorization: Bearer <key>`, `Content-Type: application/json`, `model: <s1|s2-pro>`. Body `{ text, format:"mp3", reference_id? }`. Response is **raw binary audio** (`audio/mpeg`), `Transfer-Encoding: chunked`. No JSON envelope, no hex.
+  - `s1` and `s2-pro` both verified. `reference_id` optional → default voice. `prosody.speed`/`format:wav` verified working.
+  - **`model` header is documented "required" but the live API accepts its absence** (server default). We send it explicitly anyway.
+  - **No duration is returned** by `/v1/tts`. (See §4 — this turns out not to matter.)
+  - Error codes are standard HTTP: 401 unauthorized, 402 no credit, 422 validation.
+- **Voices**: `GET https://api.fish.audio/model?self=true&title=<q>&page_size=N` → `{ total, items:[{ _id, title, languages, visibility, samples:[{ audio }] }] }`. `_id` is the `reference_id`; `samples[].audio` is a preview MP3 URL. The test account holds 6554 own models → a searchable picker is required, not a plain dropdown.
+- **Single global host** — no international/China region split (unlike MiniMax).
+- FishAudio also has ASR and voice-clone creation; **out of scope** here.
+
+## 3. Key finding that simplifies the design
+
+`generateTts` returns `durationSec` (from MiniMax `extra_info.audio_length`), but **nothing downstream consumes it** — it appears only in the cosmetic `providerNote` string. The "Fit timing to narration" feature (`studio-server.ts:1229`) re-paces frames by **narration text character count**, not audio duration. So FishAudio returning no duration is harmless; `durationSec` stays optional and cosmetic.
+
+## 4. Design (provider abstraction)
+
+### 4.1 Core — new `packages/core/src/fishaudio.ts`
+
+Mirrors `minimax.ts`'s narration surface (music intentionally absent):
+
+- `resolveFishAudioCredentials(env)` → `{ apiKey, baseUrl, model } | null`
+  - key: `FISH_AUDIO_API_KEY` → `FISHAUDIO_API_KEY`
+  - base: `FISH_AUDIO_BASE_URL` → default `https://api.fish.audio` (host only; we append `/v1/tts` and `/model`)
+  - model: `FISH_AUDIO_MODEL` → default `s1`
+- `generateFishTts({ text, referenceId?, creds, signal? })` → POST `/v1/tts`, `model` header, `format:"mp3"`, read `arrayBuffer()` → `TtsAudioResult`. Maps 401/402/422 to friendly `HtmlVideoError('render-failed', …)`.
+- `listFishVoices({ creds, query?, pageSize? })` → GET `/model?self=true&title=<q>` → trimmed `[{ id, title, languages, sampleUrl }]`.
+
+Generalise `MinimaxAudioResult` → shared `TtsAudioResult { bytes; ext; providerNote; durationSec? }` (`ext` widened to `string`; **v1 FishAudio always emits `.mp3`** so the export mux assumptions are untouched). Re-export from `core/src/index.ts`.
+
+### 4.2 Config — `packages/cli/src/media-config.ts`
+
+`media-config.json` grows from `{ minimax }` to:
+
+```json
+{
+  "narrationProvider": "minimax" | "fishaudio",
+  "minimax":   { "apiKey": "…", "baseUrl": "…" },
+  "fishaudio": { "apiKey": "…", "baseUrl": "…" }
+}
+```
+
+Add `getFishAudioStatus / setFishAudio / clearFishAudio / resolveFishAudio` (mirroring the MiniMax methods, minus region) and `getNarrationProvider / setNarrationProvider`. Existing MiniMax methods stay (music still uses them). `narrationProvider` defaults to `minimax` for backward compat.
+
+### 4.3 Server — `packages/cli/src/studio-server.ts`
+
+- `generate-audio` handler: **music** branch unchanged (always MiniMax). **narration** branch resolves the active `narrationProvider` and routes to `generateFishTts` or `generateTts`. If the chosen provider has no key → the existing friendly "configure your key" SSE failure, naming the right provider.
+- New endpoints:
+  - `GET/POST/DELETE /api/config/fishaudio` — mirror `/api/config/minimax`.
+  - `GET/POST /api/config/narration-provider` — read/set the active provider.
+  - `GET /api/fishaudio/voices?q=<query>` — proxy `listFishVoices` for the picker (server holds the key; the browser never sees it).
+
+### 4.4 UI — `packages/project-studio/public/app.js` + `i18n.js`
+
+- **Settings → Audio**: a provider toggle (MiniMax / FishAudio) driving `narrationProvider`. FishAudio pane = key input + a note that the model is controlled by `FISH_AUDIO_MODEL` (default `s1`); **region selector hidden** (MiniMax-only). Panel title becomes provider-aware (no longer a literal "MiniMax").
+- **Soundtrack → Narration**: when `narrationProvider==='fishaudio'`, the voice control becomes a **searchable picker** — a text box → debounced `GET /api/fishaudio/voices?q=` → result list (title · language · ▶ sample) → selecting stores `reference_id` (empty = default voice). When `minimax`, the existing 6-voice dropdown is shown. Volume slider (post-mix dB) is shared, unchanged.
+- i18n: generalise `settings.audio.*`; add FishAudio strings (en + zh-CN).
+
+### 4.5 Non-goals (v1)
+
+ASR; voice-clone creation; per-request speed / temperature / format UI (matches the current MiniMax narration UI, which exposes only voice + volume); any music via FishAudio.
+
+## 5. Files touched
+
+`core/src/fishaudio.ts` (new), `core/src/minimax.ts` (result-type generalisation only), `core/src/index.ts`, `cli/src/media-config.ts`, `cli/src/studio-server.ts`, `project-studio/public/app.js`, `project-studio/public/i18n.js`. No change to the export/mux path or `project.ts`.
+
+## 6. Verification plan (must be real, not "tsc passes")
+
+1. `pnpm -r build` + `pnpm --filter @html-video/cli smoke` green.
+2. Unit: `resolveFishAudioCredentials` env precedence; `generateFishTts` request shape + binary decode (mock fetch); error-code mapping.
+3. **Real API** (with the provided key, never committed): `generateFishTts` → real MP3, `ffprobe` confirms valid audio; `listFishVoices` returns items.
+4. **End-to-end in studio** (chrome-devtools): configure FishAudio key → switch provider → search + pick a voice → generate narration → asset appears + plays. Capture evidence (screenshot / ffprobe of the generated asset).
+5. Regression: with `narrationProvider=minimax`, the existing flow is byte-for-byte unchanged.
+
+## 7. Open questions
+
+- Should the voice picker default `self=true` (own models) or also allow browsing the public marketplace? v1 = `self=true` only.
+- Persist a small "recently used voices" shortlist later? Deferred.