From 06a17e2cdacbb176983deb8903f4c90107d1fc66 Mon Sep 17 00:00:00 2001 From: Sidney Swift <158200036+sidneyswift@users.noreply.github.com> Date: Tue, 6 Jan 2026 12:29:28 -0500 Subject: [PATCH 1/5] feat: add audio transcription with OpenAI Whisper - Add lib/transcribe/ module with core transcription logic - Add MCP tool 'transcribe_audio' for chat integration - Add POST /api/transcribe endpoint - Saves both original audio and transcript to customer files --- app/api/transcribe/route.ts | 72 +++ features/feature-email-client.md | 473 ++++++++++++++++++ lib/mcp/tools/index.ts | 2 + lib/mcp/tools/transcribe/index.ts | 12 + .../transcribe/registerTranscribeAudioTool.ts | 73 +++ lib/transcribe/formatTranscriptMd.ts | 36 ++ lib/transcribe/index.ts | 14 + lib/transcribe/processAudioTranscription.ts | 80 +++ lib/transcribe/saveAudioToFiles.ts | 59 +++ lib/transcribe/saveTranscriptToFiles.ts | 60 +++ lib/transcribe/transcribeAudio.ts | 70 +++ lib/transcribe/types.ts | 54 ++ 12 files changed, 1005 insertions(+) create mode 100644 app/api/transcribe/route.ts create mode 100644 features/feature-email-client.md create mode 100644 lib/mcp/tools/transcribe/index.ts create mode 100644 lib/mcp/tools/transcribe/registerTranscribeAudioTool.ts create mode 100644 lib/transcribe/formatTranscriptMd.ts create mode 100644 lib/transcribe/index.ts create mode 100644 lib/transcribe/processAudioTranscription.ts create mode 100644 lib/transcribe/saveAudioToFiles.ts create mode 100644 lib/transcribe/saveTranscriptToFiles.ts create mode 100644 lib/transcribe/transcribeAudio.ts create mode 100644 lib/transcribe/types.ts diff --git a/app/api/transcribe/route.ts b/app/api/transcribe/route.ts new file mode 100644 index 00000000..c9501594 --- /dev/null +++ b/app/api/transcribe/route.ts @@ -0,0 +1,72 @@ +import { NextRequest, NextResponse } from "next/server"; +import { processAudioTranscription } from "@/lib/transcribe/processAudioTranscription"; + +/** + * POST /api/transcribe + * + * Transcribes audio using OpenAI Whisper and saves both the original audio + * and transcript markdown to the customer's files. + * + * Request body: + * - audio_url: URL to the audio file (required) + * - account_id: Owner account ID (required) + * - artist_account_id: Artist account ID for file storage (required) + * - title: Title for the transcription (optional) + * - include_timestamps: Include timestamps in transcript (optional) + */ +export async function POST(req: NextRequest) { + try { + const body = await req.json(); + const { audio_url, account_id, artist_account_id, title, include_timestamps } = body; + + // Validate required fields + if (!audio_url) { + return NextResponse.json({ error: "Missing required field: audio_url" }, { status: 400 }); + } + if (!account_id) { + return NextResponse.json({ error: "Missing required field: account_id" }, { status: 400 }); + } + if (!artist_account_id) { + return NextResponse.json( + { error: "Missing required field: artist_account_id" }, + { status: 400 }, + ); + } + + const result = await processAudioTranscription({ + audioUrl: audio_url, + ownerAccountId: account_id, + artistAccountId: artist_account_id, + title, + includeTimestamps: include_timestamps, + }); + + return NextResponse.json({ + success: true, + audioFile: result.audioFile, + transcriptFile: result.transcriptFile, + text: result.text, + language: result.language, + }); + } catch (error) { + console.error("Transcription error:", error); + + let errorMessage = error instanceof Error ? error.message : "Transcription failed"; + let status = 500; + + // Handle specific error cases + if (errorMessage.includes("OPENAI_API_KEY")) { + errorMessage = "OpenAI API key is not configured"; + status = 500; + } else if (errorMessage.includes("fetch audio")) { + errorMessage = "Could not fetch the audio file. Please check the URL is accessible."; + status = 400; + } else if (errorMessage.includes("25 MB") || errorMessage.includes("file size")) { + errorMessage = "Audio file exceeds the 25MB limit"; + status = 413; + } + + return NextResponse.json({ error: errorMessage }, { status }); + } +} + diff --git a/features/feature-email-client.md b/features/feature-email-client.md new file mode 100644 index 00000000..e8085c18 --- /dev/null +++ b/features/feature-email-client.md @@ -0,0 +1,473 @@ +# Feature: Inbound Email Client + +## Overview + +The inbound email feature allows users to interact with the Recoup AI via email. When someone sends an email to a Recoup email address (e.g., `support@mail.recoupable.com`), the system: + +1. Receives the email via a Resend webhook +2. Processes it through the AI agent +3. Sends back an AI-generated reply in the same email thread +4. Stores the conversation in the database + +This creates a seamless email-based chat experience with full conversation continuity between email and web. + +--- + +## External Service: Resend + +The feature uses **[Resend](https://resend.com)** for: +- **Receiving emails** via webhooks (`email.received` events) +- **Fetching email content** (webhooks only send metadata, not the body) +- **Sending reply emails** with proper threading (`In-Reply-To` header) + +**Required Environment Variable:** +``` +RESEND_API_KEY=your_resend_api_key +``` + +--- + +## File Structure + +### Entry Point +``` +app/api/emails/inbound/route.ts ← Webhook endpoint (POST) +``` + +### Core Email Logic +``` +lib/emails/ +├── client.ts ← Resend client factory +├── sendEmail.ts ← sendEmailWithResend() +├── validateInboundEmailEvent.ts ← Zod schema validation +├── isTestEmail.ts ← Test email detection +└── inbound/ + ├── handleInboundEmail.ts ← Main orchestrator + ├── respondToInboundEmail.ts ← Response coordination + ├── validateNewEmailMemory.ts ← Room creation & dedup + ├── generateEmailResponse.ts ← AI response generation + ├── getEmailContent.ts ← Fetch email body from Resend + ├── getEmailRoomId.ts ← Thread → Room mapping + ├── getEmailRoomMessages.ts ← Conversation history + ├── getFromWithName.ts ← Format "from" address + └── trimRepliedContext.ts ← Strip quoted replies +``` + +### AI & Prompts +``` +lib/agents/generalAgent/ +└── getGeneralAgent.ts ← Creates the AI agent + +lib/chat/ +├── const.ts ← SYSTEM_PROMPT (main prompt) +├── buildSystemPromptWithImages.ts ← Add image context +├── createNewRoom.ts ← Room creation + notifications +├── generateChatTitle.ts ← AI-generated title +├── setupToolsForRequest.ts ← Load MCP tools +├── validateChatRequest.ts ← Request body schema +├── filterExcludedTools.ts ← Tool filtering +└── types.ts ← RoutingDecision type + +lib/prompts/ +└── getSystemPrompt.ts ← Dynamic prompt assembly +``` + +### Database Operations +``` +lib/supabase/ +├── account_emails/ +│ └── selectAccountEmails.ts ← Email → Account lookup +├── memories/ +│ ├── insertMemories.ts ← Store messages +│ └── selectMemories.ts ← Retrieve conversation +├── memory_emails/ +│ ├── insertMemoryEmail.ts ← Link email to memory +│ └── selectMemoryEmails.ts ← Find existing threads +└── rooms/ + └── insertRoom.ts ← Create conversation room +``` + +### Utilities +``` +lib/messages/ +├── getMessages.ts ← Convert text to UIMessage +├── filterMessageContentForMemories.ts ← Format for storage +├── extractImageUrlsFromMessages.ts ← Image handling +└── validateMessages.ts ← Message validation + +lib/uuid/ +└── generateUUID.ts ← Generate unique IDs + +lib/telegram/ +└── sendNewConversationNotification.ts ← Team notifications +``` + +--- + +## Database Tables + +| Table | Purpose | +|-------|---------| +| `account_emails` | Links email addresses to account IDs | +| `memories` | Stores conversation messages (user and assistant) | +| `memory_emails` | Links emails to memory records for thread tracking | +| `rooms` | Represents conversation rooms | + +--- + +## Key Functions Reference + +| Function | File | Purpose | +|----------|------|---------| +| `handleInboundEmail()` | `lib/emails/inbound/handleInboundEmail.ts` | Main orchestrator for webhook | +| `validateInboundEmailEvent()` | `lib/emails/validateInboundEmailEvent.ts` | Zod validation of webhook | +| `respondToInboundEmail()` | `lib/emails/inbound/respondToInboundEmail.ts` | Coordinates response flow | +| `validateNewEmailMemory()` | `lib/emails/inbound/validateNewEmailMemory.ts` | Room creation & duplicate detection | +| `generateEmailResponse()` | `lib/emails/inbound/generateEmailResponse.ts` | AI response generation | +| `getEmailContent()` | `lib/emails/inbound/getEmailContent.ts` | Fetches email body from Resend | +| `getEmailRoomId()` | `lib/emails/inbound/getEmailRoomId.ts` | Maps email thread to room | +| `getEmailRoomMessages()` | `lib/emails/inbound/getEmailRoomMessages.ts` | Gets conversation history | +| `trimRepliedContext()` | `lib/emails/inbound/trimRepliedContext.ts` | Strips quoted replies | +| `getFromWithName()` | `lib/emails/inbound/getFromWithName.ts` | Formats "from" address | +| `sendEmailWithResend()` | `lib/emails/sendEmail.ts` | Sends email via Resend | +| `getGeneralAgent()` | `lib/agents/generalAgent/getGeneralAgent.ts` | Creates AI agent | +| `getSystemPrompt()` | `lib/prompts/getSystemPrompt.ts` | Builds dynamic system prompt | +| `createNewRoom()` | `lib/chat/createNewRoom.ts` | Creates room + sends notifications | +| `generateChatTitle()` | `lib/chat/generateChatTitle.ts` | AI-generates conversation title | + +--- + +## User Journey 1: First Email (New Conversation) + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ USER SENDS FIRST EMAIL │ +│ │ +│ From: manager@label.com │ +│ To: support@mail.recoupable.com │ +│ Subject: Help with TikTok strategy │ +│ Body: "How can I grow my artist's TikTok following?" │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ 1. RESEND WEBHOOK → POST /api/emails/inbound │ +│ File: app/api/emails/inbound/route.ts │ +│ Function: POST() → handleInboundEmail() │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ 2. VALIDATE WEBHOOK PAYLOAD │ +│ File: lib/emails/validateInboundEmailEvent.ts │ +│ Function: validateInboundEmailEvent(body) │ +│ │ +│ Checks: type="email.received", email_id, from, to, message_id, etc. │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ 3. START RESPONSE FLOW │ +│ File: lib/emails/inbound/respondToInboundEmail.ts │ +│ Function: respondToInboundEmail(event) │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ 4. VALIDATE & CREATE MEMORY │ +│ File: lib/emails/inbound/validateNewEmailMemory.ts │ +│ Function: validateNewEmailMemory(event) │ +│ │ +│ Step A: Look up sender in account_emails table │ +│ → Found: manager@label.com → account_id: "abc-123" │ +│ │ +│ Step B: Fetch full email body from Resend │ +│ File: lib/emails/inbound/getEmailContent.ts │ +│ │ +│ Step C: Strip quoted reply text │ +│ File: lib/emails/inbound/trimRepliedContext.ts │ +│ │ +│ Step D: Check for existing room (via references header) │ +│ File: lib/emails/inbound/getEmailRoomId.ts │ +│ → No references → NEW CONVERSATION │ +│ │ +│ Step E: Generate new room ID │ +│ → roomId: "room-xyz-789" │ +│ │ +│ Step F: Create new room │ +│ File: lib/chat/createNewRoom.ts │ +│ ├─► Generate title: "TikTok Strategy" │ +│ ├─► Insert room in database │ +│ └─► Send Telegram notification to team │ +│ │ +│ Step G: Store user message as memory (id = email_id for dedup) │ +│ │ +│ Step H: Link email to memory in memory_emails table │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ 5. GENERATE AI RESPONSE │ +│ File: lib/emails/inbound/generateEmailResponse.ts │ +│ │ +│ Step A: Create AI Agent with system prompt │ +│ File: lib/agents/generalAgent/getGeneralAgent.ts │ +│ Uses: lib/chat/const.ts (SYSTEM_PROMPT) │ +│ Uses: lib/prompts/getSystemPrompt.ts (dynamic context) │ +│ │ +│ Step B: Get conversation history (empty for new convo) │ +│ File: lib/emails/inbound/getEmailRoomMessages.ts │ +│ │ +│ Step C: Generate response via agent.generate() │ +│ │ +│ Step D: Add email footer with web link │ +│ → "Continue on Recoup: chat.recoupable.com/chat/room-xyz-789" │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ 6. SEND REPLY EMAIL │ +│ File: lib/emails/sendEmail.ts │ +│ │ +│ Payload: │ +│ from: "Support " │ +│ to: ["manager@label.com"] │ +│ subject: "Re: Help with TikTok strategy" │ +│ html: AI response + footer │ +│ headers: { "In-Reply-To": original_message_id } │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ 7. STORE ASSISTANT RESPONSE │ +│ File: lib/supabase/memories/insertMemories.ts │ +│ │ +│ Saves AI response to same room for conversation continuity │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ USER RECEIVES REPLY │ +│ │ +│ From: Support │ +│ Subject: Re: Help with TikTok strategy │ +│ Body: "Great question! Here are 5 strategies..." │ +│ ───────────────────────────────────── │ +│ Note: you can reply directly to this email. │ +│ Or continue on Recoup: chat.recoupable.com/chat/room-xyz-789 │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## User Journey 2: Reply Email (Existing Thread) + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ USER REPLIES TO EMAIL │ +│ │ +│ From: manager@label.com │ +│ To: support@mail.recoupable.com │ +│ Subject: Re: Help with TikTok strategy │ +│ References: ← Key for threading! │ +│ Body: "Can you give me specific hashtag suggestions?" │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ + Steps 1-3: Same as Journey 1 + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ 4. FIND EXISTING ROOM │ +│ File: lib/emails/inbound/getEmailRoomId.ts │ +│ │ +│ ├─► Parse references header │ +│ │ → [""] │ +│ │ │ +│ ├─► Look up in memory_emails table │ +│ │ File: lib/supabase/memory_emails/selectMemoryEmails.ts │ +│ │ │ +│ └─► Found! Returns room_id: "room-xyz-789" │ +│ │ +│ → SKIP room creation (room already exists) │ +│ → Store user message in EXISTING room │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ 5. GENERATE AI RESPONSE WITH FULL CONTEXT │ +│ File: lib/emails/inbound/getEmailRoomMessages.ts │ +│ │ +│ Retrieves from memories table: │ +│ [ │ +│ { role: "user", content: "How can I grow TikTok..." }, │ +│ { role: "assistant", content: "Great question! Here are 5..." }, │ +│ { role: "user", content: "Can you give me hashtag..." } │ +│ ] │ +│ │ +│ AI has FULL CONTEXT → generates contextual response │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ + Steps 6-7: Same as Journey 1 +``` + +--- + +## User Journey 3: Email to Web Transition + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ USER CLICKS LINK IN EMAIL FOOTER │ +│ │ +│ "Or continue on Recoup: chat.recoupable.com/chat/room-xyz-789" │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ RECOUP-CHAT LOADS CONVERSATION │ +│ │ +│ The web app queries the same memories table using room_id │ +│ → User sees ENTIRE conversation history (email + web messages) │ +│ │ +│ Users can continue chatting on web, and if they email again later, │ +│ those web messages will be included in the AI's context. │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## User Journey 4: Duplicate Prevention (Idempotency) + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ RESEND WEBHOOK SENDS SAME EMAIL TWICE │ +│ (network retry, webhook failure, etc.) │ +│ │ +│ Same email_id: "email-abc-123" │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ FIRST REQUEST: Processed normally │ +│ → Memory inserted with id: "email-abc-123" │ +│ → Reply sent to user │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ SECOND REQUEST: Duplicate detection │ +│ File: lib/emails/inbound/validateNewEmailMemory.ts │ +│ │ +│ try { │ +│ await insertMemories({ id: emailId, ... }); │ +│ } catch (error) { │ +│ if (error.code === "23505") { ← PostgreSQL unique constraint │ +│ return { message: "Email already processed" }; │ +│ } │ +│ } │ +│ │ +│ → NO duplicate reply sent │ +│ → User doesn't get spammed │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Prompts + +### Main System Prompt +**Location:** `lib/chat/const.ts` + +The `SYSTEM_PROMPT` constant defines Recoup's core personality and capabilities: +- Music industry AI assistant +- Artist management, fan analysis, marketing funnels +- Platform-specific social media strategy +- Actionable, data-informed insights + +### Dynamic Prompt Assembly +**Location:** `lib/prompts/getSystemPrompt.ts` + +Enhances the base prompt with runtime context: +- `account_id`, `artist_account_id`, `active_account_email` +- `active_conversation_id`, `active_conversation_name` +- Image editing instructions +- User context (name, job title, company, custom instructions) +- Artist/workspace context (artist-specific instructions) +- Knowledge base content (uploaded files) + +### Email Footer +**Location:** `lib/emails/inbound/generateEmailResponse.ts` + +Appended to every email response: +```html +
+

Note: you can reply directly to this email to continue the conversation.

+

Or continue the conversation on Recoup: + link +

+``` + +--- + +## Key Design Decisions + +### 1. Idempotency +Uses `email_id` as the memory ID to prevent duplicate processing. If the same webhook fires twice, the database insert fails with unique constraint error (code `23505`). + +### 2. Thread Continuity +Uses the email `References` header to find existing rooms. This links email replies to existing conversations. + +### 3. Cross-Platform Seamlessness +Users can switch between email and web chat. Both use the same `memories` table with the same `room_id`. + +### 4. Clean Input Processing +The `trimRepliedContext()` function strips quoted reply content from emails (Gmail, Outlook, Apple Mail formats) so the AI only sees new content. + +### 5. Reply Threading +Uses `In-Reply-To` header in outbound emails to maintain proper threading in user's inbox. + +--- + +## Current Limitations + +1. **CC recipients do NOT receive responses** - Reply only goes to `original.from` +2. **Attachments not processed** - Webhook receives attachment metadata but they're ignored +3. **Plain text responses** - No HTML email styling (just text + footer) +4. **No rate limiting** - No protection against email spam +5. **Silent failures** - When processing fails, user gets no notification + +--- + +## Modification Guide + +| Want to change... | Modify this file | +|-------------------|------------------| +| AI personality/behavior | `lib/chat/const.ts` | +| Dynamic user context in prompt | `lib/prompts/getSystemPrompt.ts` | +| Email footer/links | `lib/emails/inbound/generateEmailResponse.ts` | +| How replies are stripped | `lib/emails/inbound/trimRepliedContext.ts` | +| Thread detection logic | `lib/emails/inbound/getEmailRoomId.ts` | +| "From" address formatting | `lib/emails/inbound/getFromWithName.ts` | +| Room creation behavior | `lib/chat/createNewRoom.ts` | +| Title generation | `lib/chat/generateChatTitle.ts` | +| Available AI tools | `lib/chat/setupToolsForRequest.ts` | +| Team notifications | `lib/telegram/sendNewConversationNotification.ts` | +| Webhook validation | `lib/emails/validateInboundEmailEvent.ts` | +| Message storage format | `lib/messages/filterMessageContentForMemories.ts` | + +--- + +## Potential Improvements + +1. **Include CC recipients in reply** - Add `cc: original.cc` to email payload +2. **Email-specific prompt section** - Add context like "responding via email, keep concise" +3. **Attachment handling** - Process and include attachments in AI context +4. **HTML email styling** - Professional email template with branding +5. **Rate limiting** - Prevent abuse from email spam +6. **Error notification emails** - Notify user when processing fails +7. **Multi-recipient handling** - Different responses for different recipients + + + + diff --git a/lib/mcp/tools/index.ts b/lib/mcp/tools/index.ts index e4581b5a..033b80a6 100644 --- a/lib/mcp/tools/index.ts +++ b/lib/mcp/tools/index.ts @@ -12,6 +12,7 @@ import { registerSearchWebTool } from "./registerSearchWebTool"; import { registerAllFileTools } from "./files"; import { registerCreateSegmentsTool } from "./registerCreateSegmentsTool"; import { registerAllYouTubeTools } from "./youtube"; +import { registerTranscribeTools } from "./transcribe"; /** * Registers all MCP tools on the server. @@ -27,6 +28,7 @@ export const registerAllTools = (server: McpServer): void => { registerAllSora2Tools(server); registerAllSpotifyTools(server); registerAllTaskTools(server); + registerTranscribeTools(server); registerContactTeamTool(server); registerGetLocalTimeTool(server); registerSearchWebTool(server); diff --git a/lib/mcp/tools/transcribe/index.ts b/lib/mcp/tools/transcribe/index.ts new file mode 100644 index 00000000..01ff8e19 --- /dev/null +++ b/lib/mcp/tools/transcribe/index.ts @@ -0,0 +1,12 @@ +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { registerTranscribeAudioTool } from "./registerTranscribeAudioTool"; + +/** + * Registers all transcribe-related MCP tools. + * + * @param server - The MCP server instance + */ +export function registerTranscribeTools(server: McpServer): void { + registerTranscribeAudioTool(server); +} + diff --git a/lib/mcp/tools/transcribe/registerTranscribeAudioTool.ts b/lib/mcp/tools/transcribe/registerTranscribeAudioTool.ts new file mode 100644 index 00000000..b1846026 --- /dev/null +++ b/lib/mcp/tools/transcribe/registerTranscribeAudioTool.ts @@ -0,0 +1,73 @@ +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { z } from "zod"; +import { processAudioTranscription } from "@/lib/transcribe/processAudioTranscription"; +import { getToolResultSuccess } from "@/lib/mcp/getToolResultSuccess"; +import { getToolResultError } from "@/lib/mcp/getToolResultError"; + +const transcribeAudioSchema = z.object({ + audio_url: z.string().url().describe("URL to the audio file (mp3, wav, m4a, webm)"), + account_id: z.string().uuid().describe("Owner account ID"), + artist_account_id: z.string().uuid().describe("Artist account ID for file storage"), + title: z.string().optional().describe("Title for the transcription (used in filename)"), + include_timestamps: z.boolean().optional().describe("Include timestamps in the transcript"), +}); + +type TranscribeAudioArgs = z.infer; + +/** + * Registers the "transcribe_audio" tool on the MCP server. + * Transcribes audio using OpenAI Whisper and saves both the audio and transcript + * to the customer's files. + * + * @param server - The MCP server instance to register the tool on. + */ +export function registerTranscribeAudioTool(server: McpServer): void { + server.registerTool( + "transcribe_audio", + { + description: + "Transcribe audio (music, podcast, voice memo) using OpenAI Whisper. Saves both the original audio file and the transcript markdown to the customer's files.", + inputSchema: transcribeAudioSchema, + }, + async (args: TranscribeAudioArgs) => { + try { + const result = await processAudioTranscription({ + audioUrl: args.audio_url, + ownerAccountId: args.account_id, + artistAccountId: args.artist_account_id, + title: args.title, + includeTimestamps: args.include_timestamps, + }); + + const response = { + success: true, + message: `Saved "${result.audioFile.fileName}" and "${result.transcriptFile.fileName}"`, + audioFile: result.audioFile, + transcriptFile: result.transcriptFile, + text: result.text, + language: result.language, + }; + + return getToolResultSuccess(response); + } catch (error) { + console.error("Error transcribing audio:", error); + + let errorMessage = error instanceof Error ? error.message : "An unexpected error occurred"; + + // Format helpful error messages + if (errorMessage.includes("OPENAI_API_KEY")) { + errorMessage = "OpenAI API key is missing. Please check environment variables."; + } else if (errorMessage.includes("rate limit")) { + errorMessage = "Rate limit exceeded. Please try again later."; + } else if (errorMessage.includes("fetch audio")) { + errorMessage = "Could not fetch the audio file. Please check the URL is accessible."; + } else if (errorMessage.includes("25 MB") || errorMessage.includes("file size")) { + errorMessage = "Audio file is too large. OpenAI Whisper has a 25MB limit."; + } + + return getToolResultError(`Failed to transcribe audio. ${errorMessage}`); + } + }, + ); +} + diff --git a/lib/transcribe/formatTranscriptMd.ts b/lib/transcribe/formatTranscriptMd.ts new file mode 100644 index 00000000..dc052de1 --- /dev/null +++ b/lib/transcribe/formatTranscriptMd.ts @@ -0,0 +1,36 @@ +import { TranscriptionResult, TranscriptMdOptions } from "./types"; + +/** + * Formats a transcription result as a markdown document. + * + * @param transcription - The transcription result from OpenAI Whisper + * @param options - Formatting options (title, timestamps) + * @returns Formatted markdown string + */ +export function formatTranscriptMd( + transcription: TranscriptionResult, + options: TranscriptMdOptions = {}, +): string { + const { title = "Transcription", includeTimestamps = false } = options; + + let md = `# ${title}\n\n`; + md += `---\n\n`; + + if (includeTimestamps && transcription.chunks && transcription.chunks.length > 0) { + // Format with timestamps + for (const chunk of transcription.chunks) { + const [start] = chunk.timestamp; + const mins = Math.floor(start / 60); + const secs = Math.floor(start % 60) + .toString() + .padStart(2, "0"); + md += `**[${mins}:${secs}]** ${chunk.text.trim()}\n\n`; + } + } else { + // Plain text without timestamps + md += transcription.text; + } + + return md; +} + diff --git a/lib/transcribe/index.ts b/lib/transcribe/index.ts new file mode 100644 index 00000000..38430b56 --- /dev/null +++ b/lib/transcribe/index.ts @@ -0,0 +1,14 @@ +/** + * Audio Transcription Module + * + * Transcribes audio files using OpenAI Whisper and saves both the original + * audio and transcript to customer files. + */ + +export { transcribeAudio } from "./transcribeAudio"; +export { formatTranscriptMd } from "./formatTranscriptMd"; +export { saveAudioToFiles } from "./saveAudioToFiles"; +export { saveTranscriptToFiles } from "./saveTranscriptToFiles"; +export { processAudioTranscription } from "./processAudioTranscription"; +export * from "./types"; + diff --git a/lib/transcribe/processAudioTranscription.ts b/lib/transcribe/processAudioTranscription.ts new file mode 100644 index 00000000..eccfb2d2 --- /dev/null +++ b/lib/transcribe/processAudioTranscription.ts @@ -0,0 +1,80 @@ +import { transcribeAudio } from "./transcribeAudio"; +import { formatTranscriptMd } from "./formatTranscriptMd"; +import { saveAudioToFiles } from "./saveAudioToFiles"; +import { saveTranscriptToFiles } from "./saveTranscriptToFiles"; +import { ProcessTranscriptionParams, ProcessTranscriptionResult } from "./types"; + +/** + * Main orchestrator: fetches audio, saves it, transcribes, and saves transcript. + * Both files are saved to the customer's files. + * + * @param params - Audio URL and account information + * @returns Result with both file records and transcription text + */ +export async function processAudioTranscription( + params: ProcessTranscriptionParams, +): Promise { + const { audioUrl, ownerAccountId, artistAccountId, title, includeTimestamps } = params; + + // 1. Fetch the audio file from URL + const response = await fetch(audioUrl); + if (!response.ok) { + throw new Error(`Failed to fetch audio: ${response.statusText}`); + } + const audioBlob = await response.blob(); + const contentType = response.headers.get("content-type") || "audio/mpeg"; + + // Determine file extension from content type + let ext = "mp3"; + if (contentType.includes("wav")) ext = "wav"; + else if (contentType.includes("m4a") || contentType.includes("mp4")) ext = "m4a"; + else if (contentType.includes("webm")) ext = "webm"; + + const safeTitle = (title || "audio").replace(/[^a-zA-Z0-9._-]/g, "_"); + const fileName = `${safeTitle}.${ext}`; + + // 2. Save the original audio file + const audioFileRecord = await saveAudioToFiles({ + audioBlob, + contentType, + fileName, + ownerAccountId, + artistAccountId, + title, + tags: ["audio", "original"], + }); + + // 3. Transcribe using OpenAI Whisper + const transcription = await transcribeAudio(audioBlob, fileName); + + // 4. Format as markdown + const markdown = formatTranscriptMd(transcription, { + title, + includeTimestamps, + }); + + // 5. Save the transcript + const transcriptFileRecord = await saveTranscriptToFiles({ + markdown, + ownerAccountId, + artistAccountId, + title, + tags: ["transcription", "generated"], + }); + + return { + audioFile: { + id: audioFileRecord.id, + fileName: audioFileRecord.file_name, + storageKey: audioFileRecord.storage_key, + }, + transcriptFile: { + id: transcriptFileRecord.id, + fileName: transcriptFileRecord.file_name, + storageKey: transcriptFileRecord.storage_key, + }, + text: transcription.text, + language: transcription.language, + }; +} + diff --git a/lib/transcribe/saveAudioToFiles.ts b/lib/transcribe/saveAudioToFiles.ts new file mode 100644 index 00000000..4697c93c --- /dev/null +++ b/lib/transcribe/saveAudioToFiles.ts @@ -0,0 +1,59 @@ +import supabase from "@/lib/supabase/serverClient"; +import { SaveAudioParams } from "./types"; + +const SUPABASE_STORAGE_BUCKET = "user-files"; + +interface FileRecord { + id: string; + file_name: string; + storage_key: string; +} + +/** + * Saves audio blob to customer files (storage + database record). + * + * @param params - Audio file and account information + * @returns The created file record + */ +export async function saveAudioToFiles(params: SaveAudioParams): Promise { + const { audioBlob, contentType, fileName, ownerAccountId, artistAccountId, title = "Audio" } = + params; + + const safeFileName = fileName.replace(/[^a-zA-Z0-9._-]/g, "_"); + const storageKey = `files/${ownerAccountId}/${artistAccountId}/${safeFileName}`; + + // 1. Upload to Supabase Storage + const { error: uploadError } = await supabase.storage + .from(SUPABASE_STORAGE_BUCKET) + .upload(storageKey, audioBlob, { + contentType, + upsert: false, + }); + + if (uploadError) { + throw new Error(`Failed to upload audio: ${uploadError.message}`); + } + + // 2. Create database record + const { data, error: insertError } = await supabase + .from("files") + .insert({ + owner_account_id: ownerAccountId, + artist_account_id: artistAccountId, + storage_key: storageKey, + file_name: safeFileName, + mime_type: contentType, + size_bytes: audioBlob.size, + description: `Audio file: "${title}"`, + tags: params.tags || ["audio"], + }) + .select() + .single(); + + if (insertError) { + throw new Error(`Failed to create file record: ${insertError.message}`); + } + + return data; +} + diff --git a/lib/transcribe/saveTranscriptToFiles.ts b/lib/transcribe/saveTranscriptToFiles.ts new file mode 100644 index 00000000..f3890ae3 --- /dev/null +++ b/lib/transcribe/saveTranscriptToFiles.ts @@ -0,0 +1,60 @@ +import supabase from "@/lib/supabase/serverClient"; +import { SaveTranscriptParams } from "./types"; + +const SUPABASE_STORAGE_BUCKET = "user-files"; + +interface FileRecord { + id: string; + file_name: string; + storage_key: string; +} + +/** + * Saves transcript markdown to customer files (storage + database record). + * + * @param params - Markdown content and account information + * @returns The created file record + */ +export async function saveTranscriptToFiles(params: SaveTranscriptParams): Promise { + const { markdown, ownerAccountId, artistAccountId, title = "Transcription" } = params; + + const safeTitle = title.replace(/[^a-zA-Z0-9._-]/g, "_"); + const fileName = `${safeTitle}-transcript.md`; + const storageKey = `files/${ownerAccountId}/${artistAccountId}/${fileName}`; + + // 1. Upload to Supabase Storage + const markdownBlob = new Blob([markdown], { type: "text/markdown" }); + const { error: uploadError } = await supabase.storage + .from(SUPABASE_STORAGE_BUCKET) + .upload(storageKey, markdownBlob, { + contentType: "text/markdown", + upsert: false, + }); + + if (uploadError) { + throw new Error(`Failed to upload transcript: ${uploadError.message}`); + } + + // 2. Create database record + const { data, error: insertError } = await supabase + .from("files") + .insert({ + owner_account_id: ownerAccountId, + artist_account_id: artistAccountId, + storage_key: storageKey, + file_name: fileName, + mime_type: "text/markdown", + size_bytes: new TextEncoder().encode(markdown).length, + description: `Transcript for "${title}"`, + tags: params.tags || ["transcription"], + }) + .select() + .single(); + + if (insertError) { + throw new Error(`Failed to create file record: ${insertError.message}`); + } + + return data; +} + diff --git a/lib/transcribe/transcribeAudio.ts b/lib/transcribe/transcribeAudio.ts new file mode 100644 index 00000000..2dcf31b6 --- /dev/null +++ b/lib/transcribe/transcribeAudio.ts @@ -0,0 +1,70 @@ +import { TranscriptionResult } from "./types"; + +/** + * OpenAI Whisper transcription response with verbose_json format. + */ +interface WhisperVerboseResponse { + text: string; + language: string; + segments?: { + start: number; + end: number; + text: string; + }[]; +} + +/** + * Transcribes audio to text using OpenAI Whisper API. + * + * @param audioBlob - The audio file as a Blob + * @param fileName - Original filename (needed for OpenAI API) + * @returns Transcription result with full text and optional timestamps + */ +export async function transcribeAudio( + audioBlob: Blob, + fileName: string, +): Promise { + const apiKey = process.env.OPENAI_API_KEY; + + if (!apiKey) { + throw new Error("OPENAI_API_KEY environment variable is not set"); + } + + // OpenAI expects a File with a name property + const file = new File([audioBlob], fileName, { type: audioBlob.type }); + + const formData = new FormData(); + formData.append("file", file); + formData.append("model", "whisper-1"); + formData.append("response_format", "verbose_json"); + + const response = await fetch("https://api.openai.com/v1/audio/transcriptions", { + method: "POST", + headers: { + Authorization: `Bearer ${apiKey}`, + }, + body: formData, + }); + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + throw new Error( + errorData.error?.message || `Transcription failed with status ${response.status}`, + ); + } + + const data: WhisperVerboseResponse = await response.json(); + + // Map OpenAI segments to our chunk format + const chunks = data.segments?.map((seg) => ({ + timestamp: [seg.start, seg.end] as [number, number], + text: seg.text, + })); + + return { + text: data.text, + chunks, + language: data.language, + }; +} + diff --git a/lib/transcribe/types.ts b/lib/transcribe/types.ts new file mode 100644 index 00000000..f54adb92 --- /dev/null +++ b/lib/transcribe/types.ts @@ -0,0 +1,54 @@ +/** + * Types for the audio transcription feature. + * Used by transcribeAudio, formatTranscriptMd, and related functions. + */ + +export interface TranscriptionResult { + text: string; + chunks?: { timestamp: [number, number]; text: string }[]; + language?: string; +} + +export interface TranscriptMdOptions { + title?: string; + includeTimestamps?: boolean; +} + +export interface SaveFileParams { + ownerAccountId: string; + artistAccountId: string; + title?: string; + tags?: string[]; +} + +export interface SaveAudioParams extends SaveFileParams { + audioBlob: Blob; + contentType: string; + fileName: string; +} + +export interface SaveTranscriptParams extends SaveFileParams { + markdown: string; +} + +export interface ProcessTranscriptionParams { + audioUrl: string; + ownerAccountId: string; + artistAccountId: string; + title?: string; + includeTimestamps?: boolean; +} + +export interface FileInfo { + id: string; + fileName: string; + storageKey: string; +} + +export interface ProcessTranscriptionResult { + audioFile: FileInfo; + transcriptFile: FileInfo; + text: string; + language?: string; +} + From d3bde302b98eb5c27182ff7052a858bb6f367537 Mon Sep 17 00:00:00 2001 From: Sidney Swift <158200036+sidneyswift@users.noreply.github.com> Date: Tue, 6 Jan 2026 12:51:00 -0500 Subject: [PATCH 2/5] refactor: apply code review principles - DRY: Extract shared FileRecord type and STORAGE_BUCKET constant - DRY: Centralize error formatting in formatTranscriptionError() - Remove console.error statements (production code) - Remove redundant 'what' comments, keep 'why' context - Extract getExtensionFromContentType helper function --- app/api/transcribe/route.ts | 35 ++----------------- .../transcribe/registerTranscribeAudioTool.ts | 32 +++-------------- lib/transcribe/processAudioTranscription.ts | 33 +++++++---------- lib/transcribe/saveAudioToFiles.ts | 20 ++--------- lib/transcribe/saveTranscriptToFiles.ts | 20 ++--------- lib/transcribe/types.ts | 34 ++++++++++++++++-- 6 files changed, 56 insertions(+), 118 deletions(-) diff --git a/app/api/transcribe/route.ts b/app/api/transcribe/route.ts index c9501594..5cf0b9a5 100644 --- a/app/api/transcribe/route.ts +++ b/app/api/transcribe/route.ts @@ -1,25 +1,12 @@ import { NextRequest, NextResponse } from "next/server"; import { processAudioTranscription } from "@/lib/transcribe/processAudioTranscription"; +import { formatTranscriptionError } from "@/lib/transcribe/types"; -/** - * POST /api/transcribe - * - * Transcribes audio using OpenAI Whisper and saves both the original audio - * and transcript markdown to the customer's files. - * - * Request body: - * - audio_url: URL to the audio file (required) - * - account_id: Owner account ID (required) - * - artist_account_id: Artist account ID for file storage (required) - * - title: Title for the transcription (optional) - * - include_timestamps: Include timestamps in transcript (optional) - */ export async function POST(req: NextRequest) { try { const body = await req.json(); const { audio_url, account_id, artist_account_id, title, include_timestamps } = body; - // Validate required fields if (!audio_url) { return NextResponse.json({ error: "Missing required field: audio_url" }, { status: 400 }); } @@ -49,24 +36,8 @@ export async function POST(req: NextRequest) { language: result.language, }); } catch (error) { - console.error("Transcription error:", error); - - let errorMessage = error instanceof Error ? error.message : "Transcription failed"; - let status = 500; - - // Handle specific error cases - if (errorMessage.includes("OPENAI_API_KEY")) { - errorMessage = "OpenAI API key is not configured"; - status = 500; - } else if (errorMessage.includes("fetch audio")) { - errorMessage = "Could not fetch the audio file. Please check the URL is accessible."; - status = 400; - } else if (errorMessage.includes("25 MB") || errorMessage.includes("file size")) { - errorMessage = "Audio file exceeds the 25MB limit"; - status = 413; - } - - return NextResponse.json({ error: errorMessage }, { status }); + const { message, status } = formatTranscriptionError(error); + return NextResponse.json({ error: message }, { status }); } } diff --git a/lib/mcp/tools/transcribe/registerTranscribeAudioTool.ts b/lib/mcp/tools/transcribe/registerTranscribeAudioTool.ts index b1846026..0d781822 100644 --- a/lib/mcp/tools/transcribe/registerTranscribeAudioTool.ts +++ b/lib/mcp/tools/transcribe/registerTranscribeAudioTool.ts @@ -1,6 +1,7 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { z } from "zod"; import { processAudioTranscription } from "@/lib/transcribe/processAudioTranscription"; +import { formatTranscriptionError } from "@/lib/transcribe/types"; import { getToolResultSuccess } from "@/lib/mcp/getToolResultSuccess"; import { getToolResultError } from "@/lib/mcp/getToolResultError"; @@ -14,13 +15,6 @@ const transcribeAudioSchema = z.object({ type TranscribeAudioArgs = z.infer; -/** - * Registers the "transcribe_audio" tool on the MCP server. - * Transcribes audio using OpenAI Whisper and saves both the audio and transcript - * to the customer's files. - * - * @param server - The MCP server instance to register the tool on. - */ export function registerTranscribeAudioTool(server: McpServer): void { server.registerTool( "transcribe_audio", @@ -39,33 +33,17 @@ export function registerTranscribeAudioTool(server: McpServer): void { includeTimestamps: args.include_timestamps, }); - const response = { + return getToolResultSuccess({ success: true, message: `Saved "${result.audioFile.fileName}" and "${result.transcriptFile.fileName}"`, audioFile: result.audioFile, transcriptFile: result.transcriptFile, text: result.text, language: result.language, - }; - - return getToolResultSuccess(response); + }); } catch (error) { - console.error("Error transcribing audio:", error); - - let errorMessage = error instanceof Error ? error.message : "An unexpected error occurred"; - - // Format helpful error messages - if (errorMessage.includes("OPENAI_API_KEY")) { - errorMessage = "OpenAI API key is missing. Please check environment variables."; - } else if (errorMessage.includes("rate limit")) { - errorMessage = "Rate limit exceeded. Please try again later."; - } else if (errorMessage.includes("fetch audio")) { - errorMessage = "Could not fetch the audio file. Please check the URL is accessible."; - } else if (errorMessage.includes("25 MB") || errorMessage.includes("file size")) { - errorMessage = "Audio file is too large. OpenAI Whisper has a 25MB limit."; - } - - return getToolResultError(`Failed to transcribe audio. ${errorMessage}`); + const { message } = formatTranscriptionError(error); + return getToolResultError(`Failed to transcribe audio. ${message}`); } }, ); diff --git a/lib/transcribe/processAudioTranscription.ts b/lib/transcribe/processAudioTranscription.ts index eccfb2d2..622f9837 100644 --- a/lib/transcribe/processAudioTranscription.ts +++ b/lib/transcribe/processAudioTranscription.ts @@ -5,35 +5,25 @@ import { saveTranscriptToFiles } from "./saveTranscriptToFiles"; import { ProcessTranscriptionParams, ProcessTranscriptionResult } from "./types"; /** - * Main orchestrator: fetches audio, saves it, transcribes, and saves transcript. - * Both files are saved to the customer's files. - * - * @param params - Audio URL and account information - * @returns Result with both file records and transcription text + * Fetches audio from URL, transcribes it with OpenAI Whisper, and saves both + * the original audio and transcript markdown to the customer's files. */ export async function processAudioTranscription( params: ProcessTranscriptionParams, ): Promise { const { audioUrl, ownerAccountId, artistAccountId, title, includeTimestamps } = params; - // 1. Fetch the audio file from URL const response = await fetch(audioUrl); if (!response.ok) { throw new Error(`Failed to fetch audio: ${response.statusText}`); } + const audioBlob = await response.blob(); const contentType = response.headers.get("content-type") || "audio/mpeg"; - - // Determine file extension from content type - let ext = "mp3"; - if (contentType.includes("wav")) ext = "wav"; - else if (contentType.includes("m4a") || contentType.includes("mp4")) ext = "m4a"; - else if (contentType.includes("webm")) ext = "webm"; - + const ext = getExtensionFromContentType(contentType); const safeTitle = (title || "audio").replace(/[^a-zA-Z0-9._-]/g, "_"); const fileName = `${safeTitle}.${ext}`; - // 2. Save the original audio file const audioFileRecord = await saveAudioToFiles({ audioBlob, contentType, @@ -44,16 +34,10 @@ export async function processAudioTranscription( tags: ["audio", "original"], }); - // 3. Transcribe using OpenAI Whisper const transcription = await transcribeAudio(audioBlob, fileName); - // 4. Format as markdown - const markdown = formatTranscriptMd(transcription, { - title, - includeTimestamps, - }); + const markdown = formatTranscriptMd(transcription, { title, includeTimestamps }); - // 5. Save the transcript const transcriptFileRecord = await saveTranscriptToFiles({ markdown, ownerAccountId, @@ -78,3 +62,10 @@ export async function processAudioTranscription( }; } +function getExtensionFromContentType(contentType: string): string { + if (contentType.includes("wav")) return "wav"; + if (contentType.includes("m4a") || contentType.includes("mp4")) return "m4a"; + if (contentType.includes("webm")) return "webm"; + return "mp3"; +} + diff --git a/lib/transcribe/saveAudioToFiles.ts b/lib/transcribe/saveAudioToFiles.ts index 4697c93c..b38a96cb 100644 --- a/lib/transcribe/saveAudioToFiles.ts +++ b/lib/transcribe/saveAudioToFiles.ts @@ -1,20 +1,6 @@ import supabase from "@/lib/supabase/serverClient"; -import { SaveAudioParams } from "./types"; +import { SaveAudioParams, FileRecord, STORAGE_BUCKET } from "./types"; -const SUPABASE_STORAGE_BUCKET = "user-files"; - -interface FileRecord { - id: string; - file_name: string; - storage_key: string; -} - -/** - * Saves audio blob to customer files (storage + database record). - * - * @param params - Audio file and account information - * @returns The created file record - */ export async function saveAudioToFiles(params: SaveAudioParams): Promise { const { audioBlob, contentType, fileName, ownerAccountId, artistAccountId, title = "Audio" } = params; @@ -22,9 +8,8 @@ export async function saveAudioToFiles(params: SaveAudioParams): Promise { const { markdown, ownerAccountId, artistAccountId, title = "Transcription" } = params; @@ -22,10 +8,9 @@ export async function saveTranscriptToFiles(params: SaveTranscriptParams): Promi const fileName = `${safeTitle}-transcript.md`; const storageKey = `files/${ownerAccountId}/${artistAccountId}/${fileName}`; - // 1. Upload to Supabase Storage const markdownBlob = new Blob([markdown], { type: "text/markdown" }); const { error: uploadError } = await supabase.storage - .from(SUPABASE_STORAGE_BUCKET) + .from(STORAGE_BUCKET) .upload(storageKey, markdownBlob, { contentType: "text/markdown", upsert: false, @@ -35,7 +20,6 @@ export async function saveTranscriptToFiles(params: SaveTranscriptParams): Promi throw new Error(`Failed to upload transcript: ${uploadError.message}`); } - // 2. Create database record const { data, error: insertError } = await supabase .from("files") .insert({ diff --git a/lib/transcribe/types.ts b/lib/transcribe/types.ts index f54adb92..e075e638 100644 --- a/lib/transcribe/types.ts +++ b/lib/transcribe/types.ts @@ -1,8 +1,15 @@ /** - * Types for the audio transcription feature. - * Used by transcribeAudio, formatTranscriptMd, and related functions. + * Shared types and constants for the audio transcription feature. */ +export const STORAGE_BUCKET = "user-files"; + +export interface FileRecord { + id: string; + file_name: string; + storage_key: string; +} + export interface TranscriptionResult { text: string; chunks?: { timestamp: [number, number]; text: string }[]; @@ -52,3 +59,26 @@ export interface ProcessTranscriptionResult { language?: string; } +/** + * Formats transcription errors into user-friendly messages. + * Centralizes error message logic to avoid duplication. + */ +export function formatTranscriptionError(error: unknown): { message: string; status: number } { + const rawMessage = error instanceof Error ? error.message : "Transcription failed"; + + if (rawMessage.includes("OPENAI_API_KEY")) { + return { message: "OpenAI API key is not configured", status: 500 }; + } + if (rawMessage.includes("fetch audio") || rawMessage.includes("Failed to fetch")) { + return { message: "Could not fetch the audio file. Please check the URL is accessible.", status: 400 }; + } + if (rawMessage.includes("25 MB") || rawMessage.includes("file size")) { + return { message: "Audio file exceeds the 25MB limit", status: 413 }; + } + if (rawMessage.includes("rate limit")) { + return { message: "Rate limit exceeded. Please try again later.", status: 429 }; + } + + return { message: rawMessage, status: 500 }; +} + From 0f3e75a671a0491ff46eb8dfee968ba0e6920f11 Mon Sep 17 00:00:00 2001 From: Sidney Swift <158200036+sidneyswift@users.noreply.github.com> Date: Tue, 6 Jan 2026 13:06:45 -0500 Subject: [PATCH 3/5] refactor: extract shared Supabase utilities matching Chat patterns - Add lib/consts.ts with SUPABASE_STORAGE_BUCKET - Add lib/supabase/storage/uploadFileByKey.ts (mirrors Chat) - Add lib/supabase/files/createFileRecord.ts (mirrors Chat) - Refactor transcribe files to use shared utilities - Enables future extraction to shared package --- lib/consts.ts | 6 +++ lib/supabase/files/createFileRecord.ts | 64 +++++++++++++++++++++++++ lib/supabase/storage/uploadFileByKey.ts | 26 ++++++++++ lib/transcribe/saveAudioToFiles.ts | 58 ++++++++++------------ lib/transcribe/saveTranscriptToFiles.ts | 57 ++++++++++------------ lib/transcribe/types.ts | 4 +- 6 files changed, 145 insertions(+), 70 deletions(-) create mode 100644 lib/consts.ts create mode 100644 lib/supabase/files/createFileRecord.ts create mode 100644 lib/supabase/storage/uploadFileByKey.ts diff --git a/lib/consts.ts b/lib/consts.ts new file mode 100644 index 00000000..dc1ee155 --- /dev/null +++ b/lib/consts.ts @@ -0,0 +1,6 @@ +/** + * Shared constants for Recoup-API + */ + +export const SUPABASE_STORAGE_BUCKET = "user-files"; + diff --git a/lib/supabase/files/createFileRecord.ts b/lib/supabase/files/createFileRecord.ts new file mode 100644 index 00000000..eccb4040 --- /dev/null +++ b/lib/supabase/files/createFileRecord.ts @@ -0,0 +1,64 @@ +import supabase from "@/lib/supabase/serverClient"; + +interface FileRecord { + id: string; + owner_account_id: string; + artist_account_id: string; + storage_key: string; + file_name: string; + mime_type: string | null; + size_bytes: number | null; + description: string | null; + tags: string[]; +} + +interface CreateFileRecordParams { + ownerAccountId: string; + artistAccountId: string; + storageKey: string; + fileName: string; + mimeType?: string | null; + sizeBytes?: number | null; + description?: string | null; + tags?: string[]; +} + +/** + * Create a file record in the database + */ +export async function createFileRecord( + params: CreateFileRecordParams +): Promise { + const { + ownerAccountId, + artistAccountId, + storageKey, + fileName, + mimeType, + sizeBytes, + description, + tags, + } = params; + + const { data, error } = await supabase + .from("files") + .insert({ + owner_account_id: ownerAccountId, + artist_account_id: artistAccountId, + storage_key: storageKey, + file_name: fileName, + mime_type: mimeType ?? null, + size_bytes: sizeBytes ?? null, + description: description ?? null, + tags: Array.isArray(tags) ? tags : [], + }) + .select() + .single(); + + if (error) { + throw new Error(`Failed to create file record: ${error.message}`); + } + + return data; +} + diff --git a/lib/supabase/storage/uploadFileByKey.ts b/lib/supabase/storage/uploadFileByKey.ts new file mode 100644 index 00000000..9c1bf579 --- /dev/null +++ b/lib/supabase/storage/uploadFileByKey.ts @@ -0,0 +1,26 @@ +import supabase from "@/lib/supabase/serverClient"; +import { SUPABASE_STORAGE_BUCKET } from "@/lib/consts"; + +/** + * Upload file to Supabase storage by key + */ +export async function uploadFileByKey( + key: string, + file: File | Blob, + options: { + contentType?: string; + upsert?: boolean; + } = {} +): Promise { + const { error } = await supabase.storage + .from(SUPABASE_STORAGE_BUCKET) + .upload(key, file, { + contentType: options.contentType || "application/octet-stream", + upsert: options.upsert ?? false, + }); + + if (error) { + throw new Error(`Failed to upload file: ${error.message}`); + } +} + diff --git a/lib/transcribe/saveAudioToFiles.ts b/lib/transcribe/saveAudioToFiles.ts index b38a96cb..3dab3462 100644 --- a/lib/transcribe/saveAudioToFiles.ts +++ b/lib/transcribe/saveAudioToFiles.ts @@ -1,5 +1,6 @@ -import supabase from "@/lib/supabase/serverClient"; -import { SaveAudioParams, FileRecord, STORAGE_BUCKET } from "./types"; +import { uploadFileByKey } from "@/lib/supabase/storage/uploadFileByKey"; +import { createFileRecord } from "@/lib/supabase/files/createFileRecord"; +import { SaveAudioParams, FileRecord } from "./types"; export async function saveAudioToFiles(params: SaveAudioParams): Promise { const { audioBlob, contentType, fileName, ownerAccountId, artistAccountId, title = "Audio" } = @@ -8,36 +9,25 @@ export async function saveAudioToFiles(params: SaveAudioParams): Promise { const { markdown, ownerAccountId, artistAccountId, title = "Transcription" } = params; @@ -9,36 +10,26 @@ export async function saveTranscriptToFiles(params: SaveTranscriptParams): Promi const storageKey = `files/${ownerAccountId}/${artistAccountId}/${fileName}`; const markdownBlob = new Blob([markdown], { type: "text/markdown" }); - const { error: uploadError } = await supabase.storage - .from(STORAGE_BUCKET) - .upload(storageKey, markdownBlob, { - contentType: "text/markdown", - upsert: false, - }); - if (uploadError) { - throw new Error(`Failed to upload transcript: ${uploadError.message}`); - } - - const { data, error: insertError } = await supabase - .from("files") - .insert({ - owner_account_id: ownerAccountId, - artist_account_id: artistAccountId, - storage_key: storageKey, - file_name: fileName, - mime_type: "text/markdown", - size_bytes: new TextEncoder().encode(markdown).length, - description: `Transcript for "${title}"`, - tags: params.tags || ["transcription"], - }) - .select() - .single(); - - if (insertError) { - throw new Error(`Failed to create file record: ${insertError.message}`); - } - - return data; + await uploadFileByKey(storageKey, markdownBlob, { + contentType: "text/markdown", + upsert: false, + }); + + const data = await createFileRecord({ + ownerAccountId, + artistAccountId, + storageKey, + fileName, + mimeType: "text/markdown", + sizeBytes: new TextEncoder().encode(markdown).length, + description: `Transcript for "${title}"`, + tags: params.tags || ["transcription"], + }); + + return { + id: data.id, + file_name: data.file_name, + storage_key: data.storage_key, + }; } - diff --git a/lib/transcribe/types.ts b/lib/transcribe/types.ts index e075e638..4e2fbacd 100644 --- a/lib/transcribe/types.ts +++ b/lib/transcribe/types.ts @@ -1,9 +1,7 @@ /** - * Shared types and constants for the audio transcription feature. + * Types for the audio transcription feature. */ -export const STORAGE_BUCKET = "user-files"; - export interface FileRecord { id: string; file_name: string; From f6b272ca17b1f6d50690457b48ad08b667e0c178 Mon Sep 17 00:00:00 2001 From: Sidney Swift <158200036+sidneyswift@users.noreply.github.com> Date: Tue, 6 Jan 2026 13:15:27 -0500 Subject: [PATCH 4/5] fix: consolidate FileRecord type definitions - Export FileRecord from createFileRecord.ts (single source of truth) - Re-export from lib/transcribe/types.ts - Simplify save functions to return full FileRecord directly --- lib/supabase/files/createFileRecord.ts | 4 ++-- lib/transcribe/saveAudioToFiles.ts | 8 +------- lib/transcribe/saveTranscriptToFiles.ts | 8 +------- lib/transcribe/types.ts | 6 +----- 4 files changed, 5 insertions(+), 21 deletions(-) diff --git a/lib/supabase/files/createFileRecord.ts b/lib/supabase/files/createFileRecord.ts index eccb4040..6b06744b 100644 --- a/lib/supabase/files/createFileRecord.ts +++ b/lib/supabase/files/createFileRecord.ts @@ -1,6 +1,6 @@ import supabase from "@/lib/supabase/serverClient"; -interface FileRecord { +export interface FileRecord { id: string; owner_account_id: string; artist_account_id: string; @@ -12,7 +12,7 @@ interface FileRecord { tags: string[]; } -interface CreateFileRecordParams { +export interface CreateFileRecordParams { ownerAccountId: string; artistAccountId: string; storageKey: string; diff --git a/lib/transcribe/saveAudioToFiles.ts b/lib/transcribe/saveAudioToFiles.ts index 3dab3462..4c082e61 100644 --- a/lib/transcribe/saveAudioToFiles.ts +++ b/lib/transcribe/saveAudioToFiles.ts @@ -14,7 +14,7 @@ export async function saveAudioToFiles(params: SaveAudioParams): Promise Date: Tue, 6 Jan 2026 13:23:25 -0500 Subject: [PATCH 5/5] fix: add timestamp suffix to prevent filename collisions - Orchestrator generates timestamp once for both audio and transcript - Ensures matching timestamps between related files - Prevents 'resource already exists' errors on duplicate titles --- lib/transcribe/processAudioTranscription.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/transcribe/processAudioTranscription.ts b/lib/transcribe/processAudioTranscription.ts index 622f9837..5663be02 100644 --- a/lib/transcribe/processAudioTranscription.ts +++ b/lib/transcribe/processAudioTranscription.ts @@ -21,8 +21,10 @@ export async function processAudioTranscription( const audioBlob = await response.blob(); const contentType = response.headers.get("content-type") || "audio/mpeg"; const ext = getExtensionFromContentType(contentType); + const timestamp = Date.now(); const safeTitle = (title || "audio").replace(/[^a-zA-Z0-9._-]/g, "_"); - const fileName = `${safeTitle}.${ext}`; + const uniqueTitle = `${safeTitle}-${timestamp}`; + const fileName = `${uniqueTitle}.${ext}`; const audioFileRecord = await saveAudioToFiles({ audioBlob, @@ -30,7 +32,7 @@ export async function processAudioTranscription( fileName, ownerAccountId, artistAccountId, - title, + title: uniqueTitle, tags: ["audio", "original"], }); @@ -42,7 +44,7 @@ export async function processAudioTranscription( markdown, ownerAccountId, artistAccountId, - title, + title: uniqueTitle, tags: ["transcription", "generated"], });