Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions app/api/transcribe/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import { NextRequest, NextResponse } from "next/server";
import { processAudioTranscription } from "@/lib/transcribe/processAudioTranscription";
import { formatTranscriptionError } from "@/lib/transcribe/types";

export async function POST(req: NextRequest) {
try {
const body = await req.json();
const { audio_url, account_id, artist_account_id, title, include_timestamps } = body;

if (!audio_url) {
return NextResponse.json({ error: "Missing required field: audio_url" }, { status: 400 });
}
if (!account_id) {
return NextResponse.json({ error: "Missing required field: account_id" }, { status: 400 });
}
if (!artist_account_id) {
return NextResponse.json(
{ error: "Missing required field: artist_account_id" },
{ status: 400 },
);
}
Comment on lines +8 to +21
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SRP - validate function

  • actual: API body validation is handled directly in the API route
  • required: API body validation has a standalone lib. See other API routes for an example.


const result = await processAudioTranscription({
audioUrl: audio_url,
ownerAccountId: account_id,
artistAccountId: artist_account_id,
title,
includeTimestamps: include_timestamps,
});

return NextResponse.json({
success: true,
audioFile: result.audioFile,
transcriptFile: result.transcriptFile,
text: result.text,
language: result.language,
});
} catch (error) {
const { message, status } = formatTranscriptionError(error);
return NextResponse.json({ error: message }, { status });
}
}

473 changes: 473 additions & 0 deletions features/feature-email-client.md

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions lib/consts.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/**
* Shared constants for Recoup-API
*/

export const SUPABASE_STORAGE_BUCKET = "user-files";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DRY principle

  • actual: creating new consts file
  • required: use the existing recoup-api/lib/const.ts file


2 changes: 2 additions & 0 deletions lib/mcp/tools/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import { registerSearchWebTool } from "./registerSearchWebTool";
import { registerAllFileTools } from "./files";
import { registerCreateSegmentsTool } from "./registerCreateSegmentsTool";
import { registerAllYouTubeTools } from "./youtube";
import { registerTranscribeTools } from "./transcribe";

/**
* Registers all MCP tools on the server.
Expand All @@ -27,6 +28,7 @@ export const registerAllTools = (server: McpServer): void => {
registerAllSora2Tools(server);
registerAllSpotifyTools(server);
registerAllTaskTools(server);
registerTranscribeTools(server);
registerContactTeamTool(server);
registerGetLocalTimeTool(server);
registerSearchWebTool(server);
Expand Down
12 changes: 12 additions & 0 deletions lib/mcp/tools/transcribe/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { registerTranscribeAudioTool } from "./registerTranscribeAudioTool";

/**
* Registers all transcribe-related MCP tools.
*
* @param server - The MCP server instance
*/
export function registerTranscribeTools(server: McpServer): void {
registerTranscribeAudioTool(server);
}

51 changes: 51 additions & 0 deletions lib/mcp/tools/transcribe/registerTranscribeAudioTool.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { z } from "zod";
import { processAudioTranscription } from "@/lib/transcribe/processAudioTranscription";
import { formatTranscriptionError } from "@/lib/transcribe/types";
import { getToolResultSuccess } from "@/lib/mcp/getToolResultSuccess";
import { getToolResultError } from "@/lib/mcp/getToolResultError";

const transcribeAudioSchema = z.object({
audio_url: z.string().url().describe("URL to the audio file (mp3, wav, m4a, webm)"),
account_id: z.string().uuid().describe("Owner account ID"),
artist_account_id: z.string().uuid().describe("Artist account ID for file storage"),
title: z.string().optional().describe("Title for the transcription (used in filename)"),
include_timestamps: z.boolean().optional().describe("Include timestamps in the transcript"),
});

type TranscribeAudioArgs = z.infer<typeof transcribeAudioSchema>;

export function registerTranscribeAudioTool(server: McpServer): void {
server.registerTool(
"transcribe_audio",
{
description:
"Transcribe audio (music, podcast, voice memo) using OpenAI Whisper. Saves both the original audio file and the transcript markdown to the customer's files.",
inputSchema: transcribeAudioSchema,
},
async (args: TranscribeAudioArgs) => {
try {
const result = await processAudioTranscription({
audioUrl: args.audio_url,
ownerAccountId: args.account_id,
artistAccountId: args.artist_account_id,
title: args.title,
includeTimestamps: args.include_timestamps,
});

return getToolResultSuccess({
success: true,
message: `Saved "${result.audioFile.fileName}" and "${result.transcriptFile.fileName}"`,
audioFile: result.audioFile,
transcriptFile: result.transcriptFile,
text: result.text,
language: result.language,
});
Comment on lines +36 to +43
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

KISS principle

  • actual: manually destructuring result in call to getToolResultSuccess
  • required: pass raw response to getToolResult. see other tool files for reference.

} catch (error) {
const { message } = formatTranscriptionError(error);
return getToolResultError(`Failed to transcribe audio. ${message}`);
}
},
);
}

64 changes: 64 additions & 0 deletions lib/supabase/files/createFileRecord.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import supabase from "@/lib/supabase/serverClient";

export interface FileRecord {
id: string;
owner_account_id: string;
artist_account_id: string;
storage_key: string;
file_name: string;
mime_type: string | null;
size_bytes: number | null;
description: string | null;
tags: string[];
}

export interface CreateFileRecordParams {
ownerAccountId: string;
artistAccountId: string;
storageKey: string;
fileName: string;
mimeType?: string | null;
sizeBytes?: number | null;
description?: string | null;
tags?: string[];
}
Comment on lines +3 to +24
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DRY Principle

  • actual: redeclaring supabase types.
  • required: delete these new types and directly reference the supabase schema.


/**
* Create a file record in the database
*/
export async function createFileRecord(
params: CreateFileRecordParams
): Promise<FileRecord> {
const {
ownerAccountId,
artistAccountId,
storageKey,
fileName,
mimeType,
sizeBytes,
description,
tags,
} = params;

const { data, error } = await supabase
.from("files")
.insert({
owner_account_id: ownerAccountId,
artist_account_id: artistAccountId,
storage_key: storageKey,
file_name: fileName,
mime_type: mimeType ?? null,
size_bytes: sizeBytes ?? null,
description: description ?? null,
tags: Array.isArray(tags) ? tags : [],
})
.select()
.single();

if (error) {
throw new Error(`Failed to create file record: ${error.message}`);
}

return data;
}

26 changes: 26 additions & 0 deletions lib/supabase/storage/uploadFileByKey.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import supabase from "@/lib/supabase/serverClient";
import { SUPABASE_STORAGE_BUCKET } from "@/lib/consts";

/**
* Upload file to Supabase storage by key
*/
export async function uploadFileByKey(
key: string,
file: File | Blob,
options: {
contentType?: string;
upsert?: boolean;
} = {}
): Promise<void> {
const { error } = await supabase.storage
.from(SUPABASE_STORAGE_BUCKET)
.upload(key, file, {
contentType: options.contentType || "application/octet-stream",
upsert: options.upsert ?? false,
});

if (error) {
throw new Error(`Failed to upload file: ${error.message}`);
}
}

36 changes: 36 additions & 0 deletions lib/transcribe/formatTranscriptMd.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { TranscriptionResult, TranscriptMdOptions } from "./types";

/**
* Formats a transcription result as a markdown document.
*
* @param transcription - The transcription result from OpenAI Whisper
* @param options - Formatting options (title, timestamps)
* @returns Formatted markdown string
*/
export function formatTranscriptMd(
transcription: TranscriptionResult,
options: TranscriptMdOptions = {},
): string {
const { title = "Transcription", includeTimestamps = false } = options;

let md = `# ${title}\n\n`;
md += `---\n\n`;

if (includeTimestamps && transcription.chunks && transcription.chunks.length > 0) {
// Format with timestamps
for (const chunk of transcription.chunks) {
const [start] = chunk.timestamp;
const mins = Math.floor(start / 60);
const secs = Math.floor(start % 60)
.toString()
.padStart(2, "0");
md += `**[${mins}:${secs}]** ${chunk.text.trim()}\n\n`;
}
} else {
// Plain text without timestamps
md += transcription.text;
}

return md;
}

14 changes: 14 additions & 0 deletions lib/transcribe/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/**
* Audio Transcription Module
*
* Transcribes audio files using OpenAI Whisper and saves both the original
* audio and transcript to customer files.
*/

export { transcribeAudio } from "./transcribeAudio";
export { formatTranscriptMd } from "./formatTranscriptMd";
export { saveAudioToFiles } from "./saveAudioToFiles";
export { saveTranscriptToFiles } from "./saveTranscriptToFiles";
export { processAudioTranscription } from "./processAudioTranscription";
export * from "./types";

73 changes: 73 additions & 0 deletions lib/transcribe/processAudioTranscription.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import { transcribeAudio } from "./transcribeAudio";
import { formatTranscriptMd } from "./formatTranscriptMd";
import { saveAudioToFiles } from "./saveAudioToFiles";
import { saveTranscriptToFiles } from "./saveTranscriptToFiles";
import { ProcessTranscriptionParams, ProcessTranscriptionResult } from "./types";

/**
* Fetches audio from URL, transcribes it with OpenAI Whisper, and saves both
* the original audio and transcript markdown to the customer's files.
*/
export async function processAudioTranscription(
params: ProcessTranscriptionParams,
): Promise<ProcessTranscriptionResult> {
const { audioUrl, ownerAccountId, artistAccountId, title, includeTimestamps } = params;

const response = await fetch(audioUrl);
if (!response.ok) {
throw new Error(`Failed to fetch audio: ${response.statusText}`);
}

const audioBlob = await response.blob();
const contentType = response.headers.get("content-type") || "audio/mpeg";
const ext = getExtensionFromContentType(contentType);
const timestamp = Date.now();
const safeTitle = (title || "audio").replace(/[^a-zA-Z0-9._-]/g, "_");
const uniqueTitle = `${safeTitle}-${timestamp}`;
const fileName = `${uniqueTitle}.${ext}`;

const audioFileRecord = await saveAudioToFiles({
audioBlob,
contentType,
fileName,
ownerAccountId,
artistAccountId,
title: uniqueTitle,
tags: ["audio", "original"],
});

const transcription = await transcribeAudio(audioBlob, fileName);

const markdown = formatTranscriptMd(transcription, { title, includeTimestamps });

const transcriptFileRecord = await saveTranscriptToFiles({
markdown,
ownerAccountId,
artistAccountId,
title: uniqueTitle,
tags: ["transcription", "generated"],
});

return {
audioFile: {
id: audioFileRecord.id,
fileName: audioFileRecord.file_name,
storageKey: audioFileRecord.storage_key,
},
transcriptFile: {
id: transcriptFileRecord.id,
fileName: transcriptFileRecord.file_name,
storageKey: transcriptFileRecord.storage_key,
},
text: transcription.text,
language: transcription.language,
};
}

function getExtensionFromContentType(contentType: string): string {
if (contentType.includes("wav")) return "wav";
if (contentType.includes("m4a") || contentType.includes("mp4")) return "m4a";
if (contentType.includes("webm")) return "webm";
return "mp3";
}
Comment on lines +67 to +72
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Single Responsibility Principle

  • actual: multiple functions defined in one, single, file
  • required: new function file for getExtensionFromContentType


27 changes: 27 additions & 0 deletions lib/transcribe/saveAudioToFiles.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import { uploadFileByKey } from "@/lib/supabase/storage/uploadFileByKey";
import { createFileRecord } from "@/lib/supabase/files/createFileRecord";
import { SaveAudioParams, FileRecord } from "./types";

export async function saveAudioToFiles(params: SaveAudioParams): Promise<FileRecord> {
const { audioBlob, contentType, fileName, ownerAccountId, artistAccountId, title = "Audio" } =
params;

const safeFileName = fileName.replace(/[^a-zA-Z0-9._-]/g, "_");
const storageKey = `files/${ownerAccountId}/${artistAccountId}/${safeFileName}`;

await uploadFileByKey(storageKey, audioBlob, {
contentType,
upsert: false,
});

return createFileRecord({
ownerAccountId,
artistAccountId,
storageKey,
fileName: safeFileName,
mimeType: contentType,
sizeBytes: audioBlob.size,
description: `Audio file: "${title}"`,
tags: params.tags || ["audio"],
});
}
Loading