diff --git a/packages/core/src/elevenlabs/index.ts b/packages/core/src/elevenlabs/index.ts index 977740358..c73aba9b7 100644 --- a/packages/core/src/elevenlabs/index.ts +++ b/packages/core/src/elevenlabs/index.ts @@ -17,3 +17,12 @@ export { export type { ElevenLabsVoice, SynthesizeOptions } from "./client.js"; export { generateSoundEffect, clampSfxDuration, SFX_BOUNDS } from "./sfx.js"; export type { GenerateSfxOptions, GenerateSfxResult } from "./sfx.js"; +export { + generateMusic, + generateMusicAndWait, + getMusicJob, + downloadMusic, + clampMusicDuration, + MUSIC_BOUNDS, +} from "./music.js"; +export type { GenerateMusicOptions, MusicJob, MusicJobStatus } from "./music.js"; diff --git a/packages/core/src/elevenlabs/music.ts b/packages/core/src/elevenlabs/music.ts new file mode 100644 index 000000000..269298e8e --- /dev/null +++ b/packages/core/src/elevenlabs/music.ts @@ -0,0 +1,184 @@ +/** + * ElevenLabs Music client (Eleven v3 Music). Unlike SFX which is synchronous + * (request → mp3 in one round-trip), music is an async job: + * + * POST /v1/music → { music_id, status: "processing" } + * GET /v1/music/:id → { status: "completed", audio_url } + * GET audio_url → mp3 bytes + * + * Callers want either the bytes (most common) or the job id (for progress + * UIs). `generateMusicAndWait` does the full poll-then-download flow with + * geometric backoff. The studio route exposes the job id immediately so the + * UI can show "generating…" without freezing the request. + */ + +import { ElevenLabsError } from "./client.js"; + +const API_BASE = "https://api.elevenlabs.io/v1"; + +/** Music length bounds. The API supports up to ~5min in practice; we cap + * shorter to avoid runaway generations on user typos. */ +export const MUSIC_BOUNDS = { + durationMin: 10, + durationMax: 300, + promptMaxChars: 1500, +} as const; + +export type MusicJobStatus = "processing" | "completed" | "failed"; + +export interface MusicJob { + id: string; + status: MusicJobStatus; + /** Set once status === "completed". Direct-download URL for the mp3. */ + audioUrl?: string; + /** Reported by the API on failed jobs. */ + errorMessage?: string; +} + +export interface GenerateMusicOptions { + durationMs?: number; + outputFormat?: "mp3_44100_128" | "mp3_44100_192"; +} + +export function clampMusicDuration(durationSeconds: number): number { + if (!Number.isFinite(durationSeconds)) return 60; + return Math.max(MUSIC_BOUNDS.durationMin, Math.min(MUSIC_BOUNDS.durationMax, durationSeconds)); +} + +/** + * Submit a music-generation request. Returns the job id immediately so the + * caller can show progress. Caller must poll `getMusicJob` (or use + * `generateMusicAndWait`) to retrieve the audio. + */ +export async function generateMusic( + apiKey: string, + prompt: string, + opts: GenerateMusicOptions = {}, +): Promise { + if (!prompt || !prompt.trim()) { + throw new ElevenLabsError("generateMusic: prompt is required"); + } + const trimmed = prompt.trim(); + if (trimmed.length > MUSIC_BOUNDS.promptMaxChars) { + throw new ElevenLabsError( + `generateMusic: prompt too long (max ${MUSIC_BOUNDS.promptMaxChars} chars)`, + ); + } + const body: Record = { prompt: trimmed }; + if (typeof opts.durationMs === "number") { + body.music_length_ms = Math.round( + Math.max( + MUSIC_BOUNDS.durationMin * 1000, + Math.min(MUSIC_BOUNDS.durationMax * 1000, opts.durationMs), + ), + ); + } + if (opts.outputFormat) body.output_format = opts.outputFormat; + + const res = await fetch(`${API_BASE}/music`, { + method: "POST", + headers: { + "xi-api-key": apiKey, + "Content-Type": "application/json", + Accept: "application/json", + }, + body: JSON.stringify(body), + }); + if (!res.ok) { + let detail = ""; + try { + const text = await res.text(); + detail = text.length > 500 ? text.slice(0, 500) + "…" : text; + } catch { + /* ignore */ + } + throw new ElevenLabsError( + `generateMusic: ${res.status} ${res.statusText}${detail ? ` — ${detail}` : ""}`, + res.status, + ); + } + const json = (await res.json()) as { + music_id?: string; + status?: MusicJobStatus; + audio_url?: string; + }; + if (!json.music_id) { + throw new ElevenLabsError("generateMusic: response missing music_id"); + } + return { + id: json.music_id, + status: json.status ?? "processing", + ...(json.audio_url ? { audioUrl: json.audio_url } : {}), + }; +} + +export async function getMusicJob(apiKey: string, jobId: string): Promise { + const res = await fetch(`${API_BASE}/music/${encodeURIComponent(jobId)}`, { + headers: { "xi-api-key": apiKey, Accept: "application/json" }, + }); + if (!res.ok) { + throw new ElevenLabsError(`getMusicJob: ${res.status} ${res.statusText}`, res.status); + } + const json = (await res.json()) as { + music_id?: string; + status?: MusicJobStatus; + audio_url?: string; + error_message?: string; + }; + return { + id: json.music_id ?? jobId, + status: json.status ?? "processing", + ...(json.audio_url ? { audioUrl: json.audio_url } : {}), + ...(json.error_message ? { errorMessage: json.error_message } : {}), + }; +} + +/** + * Submit + poll until completed. Geometric backoff: 2s, 4s, 8s, max 30s. + * Total wait capped at `maxWaitMs` (default 5min). Throws ElevenLabsError on + * failure or timeout. + */ +export async function generateMusicAndWait( + apiKey: string, + prompt: string, + opts: GenerateMusicOptions & { maxWaitMs?: number; onProgress?: (job: MusicJob) => void } = {}, +): Promise<{ jobId: string; audioUrl: string }> { + const job = await generateMusic(apiKey, prompt, opts); + if (job.status === "completed" && job.audioUrl) { + return { jobId: job.id, audioUrl: job.audioUrl }; + } + if (job.status === "failed") { + throw new ElevenLabsError(`generateMusic: job failed — ${job.errorMessage ?? "no detail"}`); + } + const maxWait = opts.maxWaitMs ?? 300_000; + const start = Date.now(); + let delay = 2000; + while (Date.now() - start < maxWait) { + await new Promise((r) => setTimeout(r, delay)); + delay = Math.min(delay * 2, 30_000); + const next = await getMusicJob(apiKey, job.id); + opts.onProgress?.(next); + if (next.status === "completed" && next.audioUrl) { + return { jobId: next.id, audioUrl: next.audioUrl }; + } + if (next.status === "failed") { + throw new ElevenLabsError(`generateMusic: job failed — ${next.errorMessage ?? "no detail"}`); + } + } + throw new ElevenLabsError( + `generateMusic: timed out after ${maxWait}ms waiting for job ${job.id}`, + ); +} + +/** + * Download a completed job's mp3 bytes. The audio_url is signed and short- + * lived — call this immediately after `generateMusicAndWait` returns. + */ +export async function downloadMusic(audioUrl: string): Promise { + const res = await fetch(audioUrl); + if (!res.ok) { + throw new ElevenLabsError(`downloadMusic: ${res.status} ${res.statusText}`, res.status); + } + const bytes = new Uint8Array(await res.arrayBuffer()); + return bytes; +} diff --git a/packages/core/src/gemini/client.ts b/packages/core/src/gemini/client.ts new file mode 100644 index 000000000..a595496c7 --- /dev/null +++ b/packages/core/src/gemini/client.ts @@ -0,0 +1,283 @@ +/** + * Gemini REST client — narrow surface for the two operations we actually use: + * 1. uploadFile(): push an mp4 / image to Gemini's Files API + * 2. generateStructured(): call generateContent with a function tool + * and return the parsed input + * + * Mirrors the Anthropic client.ts shape so the studio routes have a uniform + * "callStructuredTool / callMultimodalStructured" surface for both providers. + * + * Why direct REST (no @google/genai SDK): + * - The SDK pulls in a lot of code we don't need (gRPC fallbacks, Vertex AI + * auth, batch APIs). For the two endpoints we use, fetch+JSON is ~80 lines. + * - Lockstep version compatibility with Anthropic's pattern matters more + * than SDK affordances we won't use. + */ + +import { readFileSync, statSync } from "node:fs"; +import { basename } from "node:path"; + +const API_BASE = "https://generativelanguage.googleapis.com/v1beta"; +const UPLOAD_BASE = "https://generativelanguage.googleapis.com/upload/v1beta"; + +/** Default model for vision/video work. Flash is sufficient for retention + * review, scroll-test, and image analysis at our scope. Pro is reserved + * for multi-video comparison work that doesn't ship in this PR. */ +export const DEFAULT_GEMINI_MODEL = "gemini-2.5-flash"; + +export class GeminiError extends Error { + status?: number; + constructor(message: string, status?: number) { + super(message); + this.name = "GeminiError"; + this.status = status; + } +} + +async function ensureOk(res: Response, label: string): Promise { + if (res.ok) return; + let detail = ""; + try { + const text = await res.text(); + detail = text.length > 800 ? text.slice(0, 800) + "…" : text; + } catch { + /* ignore */ + } + throw new GeminiError( + `${label}: ${res.status} ${res.statusText}${detail ? ` — ${detail}` : ""}`, + res.status, + ); +} + +// ── File upload ────────────────────────────────────────────────────────────── + +export interface UploadedFile { + /** "files/abc123" — used as `file_uri` in subsequent generateContent calls. */ + uri: string; + /** Polled until "ACTIVE" before the file is usable in prompts. */ + state: "PROCESSING" | "ACTIVE" | "FAILED"; + mimeType: string; + name: string; + sizeBytes: number; +} + +interface FilesApiResponse { + file?: { + name?: string; + uri?: string; + state?: "PROCESSING" | "ACTIVE" | "FAILED"; + mimeType?: string; + sizeBytes?: string | number; + }; +} + +/** + * Upload a local file (mp4 / image) to Gemini's Files API. Uses the resumable + * protocol: 1 init request to negotiate, then one body POST. The studio + * server is on the trusted side of the project boundary so we read the file + * synchronously — saves a stream wiring we don't need. + * + * Caller must poll `getFile()` until state === "ACTIVE" before using the + * URI in a generateContent call. `uploadAndWait()` does that wait inline. + */ +export async function uploadFile( + apiKey: string, + filePath: string, + mimeType: string, +): Promise { + const stat = statSync(filePath); + const sizeBytes = stat.size; + const displayName = basename(filePath); + + // Step 1: init — get the upload URL. + const initRes = await fetch(`${UPLOAD_BASE}/files?key=${encodeURIComponent(apiKey)}`, { + method: "POST", + headers: { + "X-Goog-Upload-Protocol": "resumable", + "X-Goog-Upload-Command": "start", + "X-Goog-Upload-Header-Content-Length": String(sizeBytes), + "X-Goog-Upload-Header-Content-Type": mimeType, + "Content-Type": "application/json", + }, + body: JSON.stringify({ file: { display_name: displayName } }), + }); + await ensureOk(initRes, "uploadFile: init"); + const uploadUrl = initRes.headers.get("X-Goog-Upload-URL"); + if (!uploadUrl) { + throw new GeminiError("uploadFile: server did not return X-Goog-Upload-URL header"); + } + + // Step 2: upload the bytes. We POST the whole file at once — fine for the + // sub-100MB videos the studio renders. + const bytes = readFileSync(filePath); + const putRes = await fetch(uploadUrl, { + method: "POST", + headers: { + "Content-Length": String(sizeBytes), + "X-Goog-Upload-Offset": "0", + "X-Goog-Upload-Command": "upload, finalize", + }, + body: new Uint8Array(bytes), + }); + await ensureOk(putRes, "uploadFile: upload"); + const json = (await putRes.json()) as FilesApiResponse; + if (!json.file?.uri) { + throw new GeminiError("uploadFile: response missing file.uri"); + } + return { + uri: json.file.uri, + state: json.file.state ?? "PROCESSING", + mimeType: json.file.mimeType ?? mimeType, + name: json.file.name ?? "", + sizeBytes: + typeof json.file.sizeBytes === "number" + ? json.file.sizeBytes + : Number.parseInt(String(json.file.sizeBytes ?? sizeBytes), 10), + }; +} + +/** + * Poll the Files API for a single file's state. Used by `uploadAndWait()` and + * by callers that want to surface progress in the studio. + */ +export async function getFile(apiKey: string, fileName: string): Promise { + const res = await fetch(`${API_BASE}/${fileName}?key=${encodeURIComponent(apiKey)}`); + await ensureOk(res, "getFile"); + const json = (await res.json()) as FilesApiResponse["file"]; + if (!json?.uri) throw new GeminiError("getFile: response missing uri"); + return { + uri: json.uri, + state: json.state ?? "PROCESSING", + mimeType: json.mimeType ?? "", + name: json.name ?? "", + sizeBytes: + typeof json.sizeBytes === "number" + ? json.sizeBytes + : Number.parseInt(String(json.sizeBytes ?? 0), 10), + }; +} + +/** + * Upload + poll until ACTIVE. Backoff is geometric: 1s, 2s, 4s, max 30s + * between polls; max 60s total wait. Throws GeminiError on FAILED or + * timeout — callers should treat both as user-actionable errors. + */ +export async function uploadAndWait( + apiKey: string, + filePath: string, + mimeType: string, + opts: { maxWaitMs?: number } = {}, +): Promise { + const initial = await uploadFile(apiKey, filePath, mimeType); + if (initial.state === "ACTIVE") return initial; + if (initial.state === "FAILED") { + throw new GeminiError(`uploadFile: server reported FAILED for ${initial.name}`); + } + const maxWait = opts.maxWaitMs ?? 60_000; + const start = Date.now(); + let delay = 1000; + let last = initial; + while (Date.now() - start < maxWait) { + await new Promise((r) => setTimeout(r, delay)); + delay = Math.min(delay * 2, 30_000); + last = await getFile(apiKey, last.name); + if (last.state === "ACTIVE") return last; + if (last.state === "FAILED") { + throw new GeminiError(`uploadFile: server reported FAILED for ${last.name}`); + } + } + throw new GeminiError( + `uploadFile: timed out after ${maxWait}ms waiting for ${last.name} to reach ACTIVE`, + ); +} + +// ── Structured tool call ───────────────────────────────────────────────────── + +export interface ToolFunctionDeclaration { + name: string; + description: string; + parameters: Record; +} + +export interface GenerateContentUsage { + promptTokenCount?: number; + candidatesTokenCount?: number; + totalTokenCount?: number; +} + +interface GenerateContentResponse { + candidates?: Array<{ + content?: { + parts?: Array<{ + functionCall?: { + name: string; + args: unknown; + }; + text?: string; + }>; + }; + finishReason?: string; + }>; + usageMetadata?: GenerateContentUsage; +} + +export interface GeminiPart { + text?: string; + fileData?: { fileUri: string; mimeType: string }; + inlineData?: { mimeType: string; data: string }; // base64 +} + +/** + * Call Gemini's generateContent endpoint with a single function declaration. + * The model is forced to call the function; we parse and return its args. + * + * Mirrors callStructuredTool() from anthropic/client.ts so the storyline + * routes can swap providers per task without learning two prompt shapes. + */ +export async function generateStructured( + apiKey: string, + opts: { + model?: string; + parts: GeminiPart[]; + systemInstruction?: string; + tool: ToolFunctionDeclaration; + temperature?: number; + maxOutputTokens?: number; + }, +): Promise<{ result: T; usage: GenerateContentUsage }> { + const model = opts.model ?? DEFAULT_GEMINI_MODEL; + const url = `${API_BASE}/models/${encodeURIComponent(model)}:generateContent?key=${encodeURIComponent(apiKey)}`; + const body: Record = { + contents: [{ role: "user", parts: opts.parts }], + tools: [{ functionDeclarations: [opts.tool] }], + toolConfig: { + functionCallingConfig: { mode: "ANY", allowedFunctionNames: [opts.tool.name] }, + }, + generationConfig: { + temperature: opts.temperature ?? 0.4, + maxOutputTokens: opts.maxOutputTokens ?? 2048, + }, + }; + if (opts.systemInstruction && opts.systemInstruction.trim().length > 0) { + body.systemInstruction = { parts: [{ text: opts.systemInstruction }] }; + } + + const res = await fetch(url, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); + await ensureOk(res, "generateStructured"); + const json = (await res.json()) as GenerateContentResponse; + const candidate = json.candidates?.[0]; + const fnCall = candidate?.content?.parts?.find((p) => p.functionCall); + if (!fnCall?.functionCall || fnCall.functionCall.name !== opts.tool.name) { + throw new GeminiError( + `generateStructured: model did not return a "${opts.tool.name}" function call; finishReason=${candidate?.finishReason ?? "unknown"}`, + ); + } + return { + result: fnCall.functionCall.args as T, + usage: json.usageMetadata ?? {}, + }; +} diff --git a/packages/core/src/gemini/env.ts b/packages/core/src/gemini/env.ts new file mode 100644 index 000000000..cf7473e15 --- /dev/null +++ b/packages/core/src/gemini/env.ts @@ -0,0 +1,40 @@ +import { + loadKey, + getKeyStatus, + writeKeyToEnvFile, + type KeySource, + type KeyStatus, +} from "../secrets/envKey.js"; + +/** + * Gemini API key loader. Mirrors the Anthropic env loader so the studio + * key panel can present both keys with the same UX. + * + * Looked-up names in priority order: + * 1. process.env.GEMINI_API_KEY + * 2. /.env GEMINI_API_KEY + * 3. ~/.config/hyperframes/global.env GEMINI_API_KEY + * + * Google's docs sometimes use GOOGLE_API_KEY for the same key — we accept + * the canonical GEMINI_API_KEY here and let users alias if needed. Keeping + * to one name avoids the "I set the wrong env var" debugging trap. + */ + +const KEY_NAME = "GEMINI_API_KEY"; + +export type GeminiKeySource = KeySource; +export type GeminiKeyStatus = KeyStatus; + +export function loadGeminiKey(projectDir?: string): string | null { + return loadKey(KEY_NAME, projectDir); +} + +export function getGeminiKeyStatus(projectDir?: string): GeminiKeyStatus { + return getKeyStatus(KEY_NAME, projectDir); +} + +export function writeGeminiKeyToEnvFile(envPath: string, value: string | null): void { + writeKeyToEnvFile(envPath, KEY_NAME, value); +} + +export const GEMINI_KEY_NAME = KEY_NAME; diff --git a/packages/core/src/gemini/index.ts b/packages/core/src/gemini/index.ts new file mode 100644 index 000000000..46cf22901 --- /dev/null +++ b/packages/core/src/gemini/index.ts @@ -0,0 +1,21 @@ +export { + loadGeminiKey, + getGeminiKeyStatus, + writeGeminiKeyToEnvFile, + GEMINI_KEY_NAME, +} from "./env.js"; +export type { GeminiKeySource, GeminiKeyStatus } from "./env.js"; +export { + uploadFile, + uploadAndWait, + getFile, + generateStructured, + GeminiError, + DEFAULT_GEMINI_MODEL, +} from "./client.js"; +export type { + UploadedFile, + ToolFunctionDeclaration, + GenerateContentUsage, + GeminiPart, +} from "./client.js"; diff --git a/packages/core/src/script/assemble.ts b/packages/core/src/script/assemble.ts index 8412b9731..42bccb752 100644 --- a/packages/core/src/script/assemble.ts +++ b/packages/core/src/script/assemble.ts @@ -9,6 +9,7 @@ import type { PlannedScene, PlannedScript, SceneTransition } from "./types.js"; import type { ImageEntry, ImageManifest } from "../images/index.js"; import type { VisualDirectionPlan } from "./visualDirector.js"; import { readSfxManifest, resolveSfxStartForScene, type SfxEntry } from "./sfx/manifest.js"; +import { readMusicManifest, resolveMusicSpan, type SceneSpan } from "./music/manifest.js"; export interface AssembleOptions { projectDir: string; @@ -87,6 +88,12 @@ export function assembleMaster(planned: PlannedScript, opts: AssembleOptions): A list.push(entry); sfxBySceneId.set(entry.sceneId, list); } + + // Music manifest: read here, emit AFTER the scene loop so we know each + // scene's absolute cursor position. Music tracks span multiple scenes so + // they need the full scene-span table to compute start + declared duration. + const musicManifest = readMusicManifest(opts.projectDir); + const sceneSpansForMusic: SceneSpan[] = []; const sceneVisibility: Array<{ id: string; start: number; @@ -223,10 +230,28 @@ export function assembleMaster(planned: PlannedScript, opts: AssembleOptions): A transitionIn, transitionInMs, }); + sceneSpansForMusic.push({ id: scene.id, start: cursor, duration: sceneTotal }); cursor += sceneTotal; } const total = cursor; + + // Music tracks land on track 2. Each track's window comes from + // resolveMusicSpan (start = first covered scene, duration = audio length + // capped to covered span). Volume + duck attributes go to the producer's + // audio mixer at render time. + for (const entry of musicManifest.entries) { + const span = resolveMusicSpan(entry, sceneSpansForMusic, total); + if (span.declaredDuration <= 0) continue; + const labelAttr = entry.label ? ` data-timeline-label="${escapeAttr(entry.label)}"` : ""; + const volumeAttr = + typeof entry.volumeDb === "number" ? ` data-volume-db="${entry.volumeDb.toFixed(1)}"` : ""; + const duckAttr = + typeof entry.duckDb === "number" ? ` data-music-duck-db="${entry.duckDb.toFixed(1)}"` : ""; + audioTags.push( + ` `, + ); + } const title = planned.meta.title ? escapeText(planned.meta.title) : "HyperFrames Video"; // The hyperframes runtime composes its own master from elements with diff --git a/packages/core/src/script/music/manifest.test.ts b/packages/core/src/script/music/manifest.test.ts new file mode 100644 index 000000000..fcbf2d117 --- /dev/null +++ b/packages/core/src/script/music/manifest.test.ts @@ -0,0 +1,76 @@ +import { describe, it, expect } from "vitest"; +import { resolveMusicSpan, type MusicEntry, type SceneSpan } from "./manifest"; + +const baseEntry: Pick = { + scenesCovered: [], + durationSeconds: 60, +}; + +const SCENES: SceneSpan[] = [ + { id: "s01", start: 0, duration: 5 }, + { id: "s02", start: 5, duration: 8 }, + { id: "s03", start: 13, duration: 6 }, + { id: "s04", start: 19, duration: 4 }, +]; +const TOTAL = 23; + +describe("resolveMusicSpan", () => { + it("empty scenesCovered = full-video span, capped at audio length", () => { + expect(resolveMusicSpan({ scenesCovered: [], durationSeconds: 30 }, SCENES, TOTAL)).toEqual({ + start: 0, + declaredDuration: 23, + }); + }); + + it("empty scenesCovered with audio shorter than total stays at audio length", () => { + expect(resolveMusicSpan({ scenesCovered: [], durationSeconds: 10 }, SCENES, TOTAL)).toEqual({ + start: 0, + declaredDuration: 10, + }); + }); + + it("single-scene cover spans that scene only", () => { + expect(resolveMusicSpan({ ...baseEntry, scenesCovered: ["s02"] }, SCENES, TOTAL)).toEqual({ + start: 5, + declaredDuration: 8, + }); + }); + + it("contiguous multi-scene cover spans first start to last end", () => { + // s02 starts 5 + s03 ends 19 → start=5, duration=14 + expect( + resolveMusicSpan({ scenesCovered: ["s02", "s03"], durationSeconds: 60 }, SCENES, TOTAL), + ).toEqual({ start: 5, declaredDuration: 14 }); + }); + + it("non-contiguous cover (s01 + s04) spans the outer hull", () => { + // s01 starts 0 + s04 ends 23 → start=0, duration=23 + expect( + resolveMusicSpan({ scenesCovered: ["s01", "s04"], durationSeconds: 60 }, SCENES, TOTAL), + ).toEqual({ start: 0, declaredDuration: 23 }); + }); + + it("scenesCovered referencing unknown ids falls back to full video", () => { + expect( + resolveMusicSpan({ scenesCovered: ["doesnt-exist"], durationSeconds: 30 }, SCENES, TOTAL), + ).toEqual({ start: 0, declaredDuration: 23 }); + }); + + it("audio longer than covered scenes clips to covered duration", () => { + expect( + resolveMusicSpan({ scenesCovered: ["s01"], durationSeconds: 60 }, SCENES, TOTAL), + ).toEqual({ start: 0, declaredDuration: 5 }); + }); + + it("audio shorter than covered scenes uses audio length", () => { + expect( + resolveMusicSpan({ scenesCovered: ["s01", "s02", "s03"], durationSeconds: 7 }, SCENES, TOTAL), + ).toEqual({ start: 0, declaredDuration: 7 }); + }); + + it("scenesCovered out of order resolves correctly (uses min/max not first/last)", () => { + expect( + resolveMusicSpan({ scenesCovered: ["s04", "s02"], durationSeconds: 60 }, SCENES, TOTAL), + ).toEqual({ start: 5, declaredDuration: 18 }); + }); +}); diff --git a/packages/core/src/script/music/manifest.ts b/packages/core/src/script/music/manifest.ts new file mode 100644 index 000000000..937f040db --- /dev/null +++ b/packages/core/src/script/music/manifest.ts @@ -0,0 +1,157 @@ +import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { dirname, join } from "node:path"; + +/** + * Music manifest — lives at `/assets/music/music.manifest.json`. + * Mirrors the SFX manifest pattern with one structural difference: a music + * track spans MULTIPLE scenes (an underscore covers a whole act, a stinger + * covers a single transition). Anchoring is therefore a `scenesCovered: []` + * array instead of a single anchor. + * + * The assembler computes the absolute start/end on the master timeline by + * looking up the first and last scene's cumulative offsets. Each track's + * audio file lives at `assets/music/.mp3` next to the manifest. + */ + +export const MUSIC_DIR = "assets/music"; +export const MUSIC_MANIFEST = "music.manifest.json"; +export const MUSIC_MANIFEST_VERSION = 1; + +export type MusicRole = "underscore" | "stinger" | "intro" | "outro"; + +export interface MusicEntry { + id: string; + /** Text prompt sent to ElevenLabs. Surfaced in the studio for context. */ + prompt: string; + /** Path under the project root: `assets/music/.mp3`. */ + path: string; + /** Measured length of the file (seconds). The assembler uses this directly + * as `data-duration` so the timeline shows the real footprint. */ + durationSeconds: number; + /** Ordered scene ids the track plays under. Empty list = whole video. */ + scenesCovered: string[]; + /** Loose tag for UI grouping. Doesn't affect playback. */ + role: MusicRole; + /** Optional human label (e.g. "investigative bed"). Defaults to a prompt + * truncation when not supplied. */ + label?: string; + /** Volume in dB. Default 0 (full). The producer's audio mixer reads + * data-volume-db to apply this at render time. */ + volumeDb?: number; + /** Sidechain duck depth in dB applied during voiceover windows. -12 is the + * default for cinematic reels — voiceover stays clear without making the + * music feel cut. */ + duckDb?: number; + /** ISO timestamp the entry was created. */ + createdAt: string; +} + +export interface MusicManifest { + version: number; + entries: MusicEntry[]; +} + +export function emptyManifest(): MusicManifest { + return { version: MUSIC_MANIFEST_VERSION, entries: [] }; +} + +export function readMusicManifest(projectDir: string): MusicManifest { + const path = join(projectDir, MUSIC_DIR, MUSIC_MANIFEST); + if (!existsSync(path)) return emptyManifest(); + try { + const raw = JSON.parse(readFileSync(path, "utf-8")) as Partial; + if (!raw || typeof raw !== "object") return emptyManifest(); + const entries = Array.isArray(raw.entries) ? raw.entries.filter(isValidEntry) : []; + return { version: MUSIC_MANIFEST_VERSION, entries }; + } catch { + return emptyManifest(); + } +} + +export function writeMusicManifest(projectDir: string, manifest: MusicManifest): void { + const path = join(projectDir, MUSIC_DIR, MUSIC_MANIFEST); + mkdirSync(dirname(path), { recursive: true }); + writeFileSync(path, JSON.stringify(manifest, null, 2) + "\n"); +} + +export function appendMusicEntry(projectDir: string, entry: MusicEntry): MusicManifest { + const manifest = readMusicManifest(projectDir); + manifest.entries.push(entry); + writeMusicManifest(projectDir, manifest); + return manifest; +} + +export function removeMusicEntry(projectDir: string, entryId: string): MusicManifest { + const manifest = readMusicManifest(projectDir); + manifest.entries = manifest.entries.filter((e) => e.id !== entryId); + writeMusicManifest(projectDir, manifest); + return manifest; +} + +function isValidEntry(value: unknown): value is MusicEntry { + if (!value || typeof value !== "object") return false; + const e = value as Record; + return ( + typeof e.id === "string" && + typeof e.prompt === "string" && + typeof e.path === "string" && + typeof e.durationSeconds === "number" && + Array.isArray(e.scenesCovered) && + e.scenesCovered.every((s: unknown) => typeof s === "string") && + typeof e.role === "string" && + typeof e.createdAt === "string" + ); +} + +/** + * Compute a music track's absolute start time + the duration the assembler + * should declare on its `