Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 73 additions & 1 deletion packages/cli/src/media-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,21 @@

import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'node:fs';
import { join } from 'node:path';
import { resolveMinimaxCredentials, type MinimaxCredentials } from '@html-video/core';
import {
resolveMinimaxCredentials,
type MinimaxCredentials,
resolveFishAudioCredentials,
type FishAudioCredentials,
} from '@html-video/core';

/** Which provider synthesizes narration. Music is always MiniMax (FishAudio
* has no music generation). Defaults to 'minimax' for backward compat. */
export type NarrationProvider = 'minimax' | 'fishaudio';

interface MediaConfig {
minimax?: { apiKey?: string; baseUrl?: string };
fishaudio?: { apiKey?: string; baseUrl?: string };
narrationProvider?: NarrationProvider;
}

export class MediaConfigStore {
Expand Down Expand Up @@ -86,6 +97,67 @@ export class MediaConfigStore {
}
return resolveMinimaxCredentials();
}

// --- FishAudio (narration only; no region — single global host) ----------

/** What the Settings UI shows for FishAudio: configured? + masked key + base
* URL. Never returns the raw key. Reports the source (config file vs env). */
getFishAudioStatus(): { configured: boolean; source: 'config' | 'env' | 'none'; maskedKey: string; baseUrl: string } {
const cfg = this.read().fishaudio;
if (cfg?.apiKey) {
return { configured: true, source: 'config', maskedKey: mask(cfg.apiKey), baseUrl: cfg.baseUrl ?? '' };
}
const env = resolveFishAudioCredentials();
if (env) {
return { configured: true, source: 'env', maskedKey: mask(env.apiKey), baseUrl: env.baseUrl };
}
return { configured: false, source: 'none', maskedKey: '', baseUrl: '' };
}

/** Persist a FishAudio key (and optional base URL) entered in the UI. */
setFishAudio(apiKey: string, baseUrl?: string): void {
const cfg = this.read();
cfg.fishaudio = { apiKey: apiKey.trim() };
const b = (baseUrl ?? '').trim();
if (b) cfg.fishaudio.baseUrl = b;
this.write(cfg);
}

/** Forget the stored FishAudio key (env fallback, if any, still applies). */
clearFishAudio(): void {
const cfg = this.read();
delete cfg.fishaudio;
this.write(cfg);
}

/** Resolve usable FishAudio creds: config file first, then env. The model is
* always env-controlled (FISH_AUDIO_MODEL); we reuse the core resolver so
* the model + base-URL defaults stay in one place. */
resolveFishAudio(): FishAudioCredentials | null {
const cfg = this.read().fishaudio;
if (cfg?.apiKey) {
// Inject the stored key into the env resolver so model + base defaults
// are computed identically, then let a config baseUrl win if present.
const ref = resolveFishAudioCredentials({ ...process.env, FISH_AUDIO_API_KEY: cfg.apiKey })!;
const baseUrl = (cfg.baseUrl || '').trim().replace(/\/$/, '') || ref.baseUrl;
return { apiKey: cfg.apiKey, baseUrl, model: ref.model };
}
return resolveFishAudioCredentials();
}

// --- Active narration provider -------------------------------------------

/** Which provider synthesizes narration. Defaults to 'minimax'. */
getNarrationProvider(): NarrationProvider {
return this.read().narrationProvider === 'fishaudio' ? 'fishaudio' : 'minimax';
}

/** Persist the active narration provider. */
setNarrationProvider(provider: NarrationProvider): void {
const cfg = this.read();
cfg.narrationProvider = provider === 'fishaudio' ? 'fishaudio' : 'minimax';
this.write(cfg);
}
}

function mask(key: string): string {
Expand Down
113 changes: 94 additions & 19 deletions packages/cli/src/studio-server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,15 @@ import { randomUUID } from 'node:crypto';
import { fileURLToPath } from 'node:url';
import { tmpdir } from 'node:os';
import type { CliContext } from './context.js';
import { AssetStore, generateTts, generateMusic } from '@html-video/core';
import {
AssetStore,
generateTts,
generateMusic,
generateFishTts,
listFishVoices,
type MinimaxCredentials,
type FishAudioCredentials,
} from '@html-video/core';
import { extractUrls, fetchSource } from './fetch-source.js';
import { detectAll, findAgent, spawnAgent } from '@html-video/runtime';

Expand Down Expand Up @@ -433,23 +441,42 @@ export async function startStudioServer(ctx: CliContext, port: number): Promise<
};
try {
sse({ type: 'audio_started' });
const creds = ctx.mediaConfig.resolveMinimax();
if (!creds) {
const project = await ctx.orchestrator.load(projectId);
const soundtrack = { ...(project.soundtrack ?? {}) };
const wantMusic = !!body.music?.prompt?.trim();
const wantNarration = !!body.narration?.text?.trim();
if (!wantMusic && !wantNarration) {
sse({ type: 'audio_failed', message: 'Nothing to generate — provide a music prompt and/or narration text.' });
res.end();
return;
}

// Music is always MiniMax (FishAudio has no music generation).
const musicCreds = wantMusic ? ctx.mediaConfig.resolveMinimax() : null;
if (wantMusic && !musicCreds) {
sse({
type: 'audio_failed',
message:
'MiniMax API key not configured — add it in Settings → Audio (or set OD_MINIMAX_API_KEY).',
message: 'MiniMax API key not configured — add it in Settings → Audio (or set OD_MINIMAX_API_KEY).',
});
res.end();
return;
}

const project = await ctx.orchestrator.load(projectId);
const soundtrack = { ...(project.soundtrack ?? {}) };
const wantMusic = !!body.music?.prompt?.trim();
const wantNarration = !!body.narration?.text?.trim();
if (!wantMusic && !wantNarration) {
sse({ type: 'audio_failed', message: 'Nothing to generate — provide a music prompt and/or narration text.' });
// Narration provider is user-selectable (MiniMax or FishAudio).
const narrationProvider = ctx.mediaConfig.getNarrationProvider();
const narrationCreds = wantNarration
? narrationProvider === 'fishaudio'
? ctx.mediaConfig.resolveFishAudio()
: ctx.mediaConfig.resolveMinimax()
: null;
if (wantNarration && !narrationCreds) {
sse({
type: 'audio_failed',
message:
narrationProvider === 'fishaudio'
? 'FishAudio API key not configured — add it in Settings → Audio (or set FISH_AUDIO_API_KEY).'
: 'MiniMax API key not configured — add it in Settings → Audio (or set OD_MINIMAX_API_KEY).',
});
res.end();
return;
}
Expand All @@ -459,7 +486,7 @@ export async function startStudioServer(ctx: CliContext, port: number): Promise<
const music = await generateMusic({
prompt: body.music!.prompt!.trim(),
instrumental: body.music!.instrumental ?? true,
creds,
creds: musicCreds!,
});
const { asset } = await ctx.orchestrator.addBufferAsset(
projectId,
Expand All @@ -474,13 +501,20 @@ export async function startStudioServer(ctx: CliContext, port: number): Promise<
}

if (wantNarration) {
sse({ type: 'audio_progress', stage: 'narration', message: 'generating narration…' });
const nar = await generateTts({
text: body.narration!.text!.trim(),
...(body.narration!.voiceId !== undefined && { voiceId: body.narration!.voiceId }),
...(body.narration!.languageBoost !== undefined && { languageBoost: body.narration!.languageBoost }),
creds,
});
sse({ type: 'audio_progress', stage: 'narration', message: `generating narration (${narrationProvider})…` });
const nar =
narrationProvider === 'fishaudio'
? await generateFishTts({
text: body.narration!.text!.trim(),
...(body.narration!.voiceId ? { referenceId: body.narration!.voiceId } : {}),
creds: narrationCreds as FishAudioCredentials,
})
: await generateTts({
text: body.narration!.text!.trim(),
...(body.narration!.voiceId !== undefined && { voiceId: body.narration!.voiceId }),
...(body.narration!.languageBoost !== undefined && { languageBoost: body.narration!.languageBoost }),
creds: narrationCreds as MinimaxCredentials,
});
const { asset } = await ctx.orchestrator.addBufferAsset(
projectId,
nar.bytes,
Expand Down Expand Up @@ -632,6 +666,47 @@ export async function startStudioServer(ctx: CliContext, port: number): Promise<
return json(res, 200, ctx.mediaConfig.getMinimaxStatus());
}

// FishAudio audio API config — mirrors MiniMax (no region; single host).
if (url.pathname === '/api/config/fishaudio' && m === 'GET') {
return json(res, 200, ctx.mediaConfig.getFishAudioStatus());
}
if (url.pathname === '/api/config/fishaudio' && m === 'POST') {
const body = (await readBody(req)) as { apiKey?: string; baseUrl?: string };
const key = (body.apiKey ?? '').trim();
if (!key) return json(res, 400, { error: 'apiKey is required' });
ctx.mediaConfig.setFishAudio(key, body.baseUrl);
return json(res, 200, ctx.mediaConfig.getFishAudioStatus());
}
if (url.pathname === '/api/config/fishaudio' && m === 'DELETE') {
ctx.mediaConfig.clearFishAudio();
return json(res, 200, ctx.mediaConfig.getFishAudioStatus());
}

// Active narration provider (which backend synthesizes voiceover).
if (url.pathname === '/api/config/narration-provider' && m === 'GET') {
return json(res, 200, { provider: ctx.mediaConfig.getNarrationProvider() });
}
if (url.pathname === '/api/config/narration-provider' && m === 'POST') {
const body = (await readBody(req)) as { provider?: string };
const provider = body.provider === 'fishaudio' ? 'fishaudio' : 'minimax';
ctx.mediaConfig.setNarrationProvider(provider);
return json(res, 200, { provider: ctx.mediaConfig.getNarrationProvider() });
}

// FishAudio voice search — proxies the account's own models server-side so
// the browser never sees the key. Returns a trimmed list for the picker.
if (url.pathname === '/api/fishaudio/voices' && m === 'GET') {
const creds = ctx.mediaConfig.resolveFishAudio();
if (!creds) return json(res, 400, { error: 'FishAudio API key not configured' });
try {
const voices = await listFishVoices({ creds, query: url.searchParams.get('q') ?? '' });
return json(res, 200, { voices });
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
return json(res, 502, { error: msg });
}
}

// Agents (detected on each call; cheap thanks to the in-process cache)
if (url.pathname === '/api/agents' && m === 'GET') {
const force = url.searchParams.get('force') === '1';
Expand Down
2 changes: 1 addition & 1 deletion packages/core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"scripts": {
"build": "tsc -p tsconfig.json",
"typecheck": "tsc -p tsconfig.json --noEmit",
"test": "node --test test/"
"test": "npm run build && node --test --experimental-strip-types \"test/**/*.test.ts\""
},
"dependencies": {
"@html-video/content-graph": "workspace:*",
Expand Down
Loading