From caca20ce02291cfb50c91e9ab0bc3e9197ccac4a Mon Sep 17 00:00:00 2001 From: olav Date: Sat, 31 Jan 2026 15:04:43 -0800 Subject: [PATCH] feat: Add inbound call support with hook-based notifications - Add `logging` capability to MCP server to enable notifications - Implement full inbound call flow: answer, greet, capture speech, notify Claude - Add Stop and UserPromptSubmit hooks to alert Claude about pending calls - Add configurable TTS model support (CALLME_TTS_MODEL env var) - Add customizable inbound greeting (CALLME_INBOUND_GREETING env var) - Add CALLME_PHONE_PROVIDER and CALLME_NGROK_DOMAIN to plugin env passthrough The inbound call flow: 1. User calls the configured phone number 2. Server answers and plays greeting 3. STT captures user's speech 4. Server plays hold message and writes pending call info 5. Hooks detect the pending call and inject prompt to Claude 6. Claude uses continue_call to respond to the caller Co-Authored-By: Claude Opus 4.5 --- .claude-plugin/plugin.json | 20 +++- hooks/check-pending-call.sh | 41 +++++++ hooks/inbound-call-hook.sh | 42 +++++++ server/src/index.ts | 19 +++- server/src/phone-call.ts | 206 ++++++++++++++++++++++++++++++++++ server/src/providers/index.ts | 3 + 6 files changed, 327 insertions(+), 4 deletions(-) create mode 100755 hooks/check-pending-call.sh create mode 100755 hooks/inbound-call-hook.sh diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 54be3e7..fddfd13 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -15,16 +15,20 @@ "command": "bun", "args": ["run", "--cwd", "${CLAUDE_PLUGIN_ROOT}/server", "start"], "env": { + "CALLME_PHONE_PROVIDER": "${CALLME_PHONE_PROVIDER:-telnyx}", "CALLME_PHONE_ACCOUNT_SID": "${CALLME_PHONE_ACCOUNT_SID}", "CALLME_PHONE_AUTH_TOKEN": "${CALLME_PHONE_AUTH_TOKEN}", "CALLME_PHONE_NUMBER": "${CALLME_PHONE_NUMBER}", "CALLME_USER_PHONE_NUMBER": "${CALLME_USER_PHONE_NUMBER}", "CALLME_OPENAI_API_KEY": "${CALLME_OPENAI_API_KEY}", + "CALLME_TTS_MODEL": "${CALLME_TTS_MODEL:-gpt-4o-mini-tts}", "CALLME_TTS_VOICE": "${CALLME_TTS_VOICE:-onyx}", "CALLME_NGROK_AUTHTOKEN": "${CALLME_NGROK_AUTHTOKEN}", + "CALLME_NGROK_DOMAIN": "${CALLME_NGROK_DOMAIN}", "CALLME_PORT": "${CALLME_PORT:-3333}", "CALLME_TRANSCRIPT_TIMEOUT_MS": "${CALLME_TRANSCRIPT_TIMEOUT_MS:-180000}", - "CALLME_STT_SILENCE_DURATION_MS": "${CALLME_STT_SILENCE_DURATION_MS:-800}" + "CALLME_STT_SILENCE_DURATION_MS": "${CALLME_STT_SILENCE_DURATION_MS:-800}", + "CALLME_INBOUND_GREETING": "${CALLME_INBOUND_GREETING:-Hello, this is Claude. How can I help you?}" } } }, @@ -33,8 +37,18 @@ { "hooks": [ { - "type": "prompt", - "prompt": "SILENTLY evaluate if you should call the user. ONLY use initiate_call if you completed significant work and need to discuss next steps, or are genuinely blocked. Do NOT output any text - either call or do nothing." + "type": "command", + "command": "${CLAUDE_PLUGIN_ROOT}/hooks/inbound-call-hook.sh" + } + ] + } + ], + "UserPromptSubmit": [ + { + "hooks": [ + { + "type": "command", + "command": "${CLAUDE_PLUGIN_ROOT}/hooks/check-pending-call.sh" } ] } diff --git a/hooks/check-pending-call.sh b/hooks/check-pending-call.sh new file mode 100755 index 0000000..14ae724 --- /dev/null +++ b/hooks/check-pending-call.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# Check for pending inbound calls on user prompt submit +# Injects a notice if there's an active call waiting + +PENDING_FILE="/tmp/callme-pending-inbound.json" + +# Check if pending call file exists +if [[ ! -f "$PENDING_FILE" ]]; then + exit 0 +fi + +# Read the pending call info +CALL_INFO=$(cat "$PENDING_FILE") +CALL_ID=$(echo "$CALL_INFO" | jq -r '.callId') +FROM=$(echo "$CALL_INFO" | jq -r '.from') +TRANSCRIPT=$(echo "$CALL_INFO" | jq -r '.transcript') +TIMESTAMP=$(echo "$CALL_INFO" | jq -r '.timestamp') + +# Check if the call is stale (older than 5 minutes = 300000ms) +NOW=$(date +%s) +NOW_MS=$((NOW * 1000)) +AGE=$((NOW_MS - TIMESTAMP)) +if [[ $AGE -gt 300000 ]]; then + rm -f "$PENDING_FILE" + exit 0 +fi + +# Don't remove the file - let Stop hook handle cleanup after Claude responds +# Just prepend context to the user's message + +cat << EOF +[URGENT - ACTIVE PHONE CALL] +Someone is on the phone waiting for you to respond! + +Call ID: $CALL_ID +Caller: $FROM +They said: "$TRANSCRIPT" + +IMPORTANT: Before doing anything else, use continue_call with call_id="$CALL_ID" to respond to them! +--- +EOF diff --git a/hooks/inbound-call-hook.sh b/hooks/inbound-call-hook.sh new file mode 100755 index 0000000..8f8cc7f --- /dev/null +++ b/hooks/inbound-call-hook.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# Inbound Call Stop Hook +# Checks for pending inbound calls and prompts Claude to respond + +PENDING_FILE="/tmp/callme-pending-inbound.json" + +# Check if pending call file exists +if [[ ! -f "$PENDING_FILE" ]]; then + exit 0 +fi + +# Read the pending call info +CALL_INFO=$(cat "$PENDING_FILE") +CALL_ID=$(echo "$CALL_INFO" | jq -r '.callId') +FROM=$(echo "$CALL_INFO" | jq -r '.from') +TRANSCRIPT=$(echo "$CALL_INFO" | jq -r '.transcript') +TIMESTAMP=$(echo "$CALL_INFO" | jq -r '.timestamp') + +# Check if the call is stale (older than 5 minutes) +NOW=$(date +%s) +NOW_MS=$((NOW * 1000)) +AGE=$((NOW_MS - TIMESTAMP)) +if [[ $AGE -gt 300000 ]]; then + # Call is stale, remove the file + rm -f "$PENDING_FILE" + exit 0 +fi + +# Remove the pending file so we don't prompt again +rm -f "$PENDING_FILE" + +# Build the reason message +REASON="URGENT: There is an active inbound phone call waiting for your response! + +Call ID: ${CALL_ID} +Caller: ${FROM} +They said: \"${TRANSCRIPT}\" + +Use continue_call with call_id=\"${CALL_ID}\" to respond to them, or end_call to hang up." + +# Output JSON to inject a prompt about the inbound call +jq -n --arg reason "$REASON" '{"decision": "block", "reason": $reason}' diff --git a/server/src/index.ts b/server/src/index.ts index 0d6addb..65b1989 100644 --- a/server/src/index.ts +++ b/server/src/index.ts @@ -45,9 +45,26 @@ async function main() { // Create stdio MCP server const mcpServer = new Server( { name: 'callme', version: '3.0.0' }, - { capabilities: { tools: {} } } + { capabilities: { tools: {}, logging: {} } } ); + // Wire up inbound call notifications to MCP + callManager.setInboundCallHandler((callId, from, transcript) => { + mcpServer.notification({ + method: 'notifications/message', + params: { + level: 'info', + data: { + type: 'inbound_call', + call_id: callId, + from: from, + transcript: transcript + }, + message: `Incoming call from ${from}!\n\nUser said: "${transcript}"\n\nUse continue_call with call_id="${callId}" to respond, or end_call to hang up.` + } + }); + }); + // List available tools mcpServer.setRequestHandler(ListToolsRequestSchema, async () => { return { diff --git a/server/src/phone-call.ts b/server/src/phone-call.ts index 781ed14..3970225 100644 --- a/server/src/phone-call.ts +++ b/server/src/phone-call.ts @@ -1,5 +1,13 @@ import WebSocket, { WebSocketServer } from 'ws'; import { createServer, IncomingMessage, ServerResponse } from 'http'; +import { appendFileSync } from 'fs'; + +// Debug logging to file +const DEBUG_LOG = '/tmp/callme-debug.log'; +function debugLog(msg: string) { + const ts = new Date().toISOString(); + appendFileSync(DEBUG_LOG, `[${ts}] ${msg}\n`); +} import { loadProviderConfig, createProviders, @@ -27,6 +35,7 @@ interface CallState { startTime: number; hungUp: boolean; sttSession: RealtimeSTTSession | null; + isInbound?: boolean; // True for incoming calls } export interface ServerConfig { @@ -37,6 +46,7 @@ export interface ServerConfig { providers: ProviderRegistry; providerConfig: ProviderConfig; // For webhook signature verification transcriptTimeoutMs: number; + inboundGreeting: string; // Greeting for incoming calls } export function loadServerConfig(publicUrl: string): ServerConfig { @@ -56,6 +66,10 @@ export function loadServerConfig(publicUrl: string): ServerConfig { // Default 3 minutes for transcript timeout const transcriptTimeoutMs = parseInt(process.env.CALLME_TRANSCRIPT_TIMEOUT_MS || '180000', 10); + // Default greeting for inbound calls + const inboundGreeting = process.env.CALLME_INBOUND_GREETING || + "Hello, this is Claude. How can I help you?"; + return { publicUrl, port: parseInt(process.env.CALLME_PORT || '3333', 10), @@ -64,6 +78,7 @@ export function loadServerConfig(publicUrl: string): ServerConfig { providers, providerConfig, transcriptTimeoutMs, + inboundGreeting, }; } @@ -75,11 +90,26 @@ export class CallManager { private wss: WebSocketServer | null = null; private config: ServerConfig; private currentCallId = 0; + private onInboundCall?: (callId: string, from: string, transcript: string) => void; constructor(config: ServerConfig) { this.config = config; } + /** + * Set handler for inbound call notifications + * Called when an incoming call is answered, greeted, and user speaks + */ + setInboundCallHandler(handler: (callId: string, from: string, transcript: string) => void): void { + this.onInboundCall = handler; + } + + private emitInboundCallNotification(callId: string, from: string, transcript: string): void { + if (this.onInboundCall) { + this.onInboundCall(callId, from, transcript); + } + } + startServer(): void { this.httpServer = createServer((req, res) => { const url = new URL(req.url!, `http://${req.headers.host}`); @@ -359,6 +389,8 @@ export class CallManager { const eventType = event.data?.event_type; const callControlId = event.data?.payload?.call_control_id; + debugLog(`Telnyx webhook: ${eventType}, callControlId: ${callControlId}`); + console.error(`[DEBUG] Telnyx webhook received: ${eventType}, callControlId: ${callControlId}`); console.error(`Phone webhook: ${eventType}`); // Always respond 200 OK immediately @@ -370,6 +402,14 @@ export class CallManager { try { switch (eventType) { case 'call.initiated': + // Check if this is an inbound call + const direction = event.data?.payload?.direction; + console.error(`[Webhook] call.initiated - direction: ${direction}`); + if (direction === 'incoming') { + this.handleInboundCall(event.data.payload).catch(err => { + console.error('[Inbound] Failed to handle inbound call:', err); + }); + } break; case 'call.answered': @@ -422,6 +462,172 @@ export class CallManager { } } + /** + * Handle an incoming call - answer, greet, listen, then notify Claude + */ + private async handleInboundCall(payload: any): Promise { + const callControlId = payload.call_control_id; + const from = payload.from; // Caller's phone number + + debugLog(`handleInboundCall started: from=${from}, callControlId=${callControlId}`); + console.error(`[Inbound] Incoming call from ${from}, callControlId: ${callControlId}`); + + // Create call state for the inbound call + const callId = `inbound-${++this.currentCallId}-${Date.now()}`; + + // Create realtime transcription session + console.error(`[${callId}] Creating STT session...`); + const sttSession = this.config.providers.stt.createSession(); + await sttSession.connect(); + console.error(`[${callId}] STT session connected`); + + // Generate secure token for WebSocket authentication + const wsToken = generateWebSocketToken(); + + const state: CallState = { + callId, + callControlId, + userPhoneNumber: from, + ws: null, + streamSid: null, + streamingReady: false, + wsToken, + conversationHistory: [], + startTime: Date.now(), + hungUp: false, + sttSession, + isInbound: true, + }; + + this.activeCalls.set(callId, state); + this.callControlIdToCallId.set(callControlId, callId); + this.wsTokenToCallId.set(wsToken, callId); + + try { + // Answer the call immediately + console.error(`[${callId}] Answering inbound call via Telnyx API...`); + await this.config.providers.phone.answerCall(callControlId); + console.error(`[${callId}] Answer API call succeeded`); + + // Wait for WebSocket connection and streaming to be ready + console.error(`[${callId}] Waiting for WebSocket connection...`); + await this.waitForConnection(callId, 15000); + console.error(`[${callId}] WebSocket connection ready`); + + // Check if user hung up while we were connecting + if (state.hungUp) { + console.error(`[${callId}] User hung up during connection`); + this.cleanupCall(callId); + return; + } + + // Play greeting + const greeting = this.config.inboundGreeting; + console.error(`[${callId}] Generating TTS for greeting: ${greeting}`); + const audioData = await this.generateTTSAudio(greeting); + console.error(`[${callId}] Sending greeting audio...`); + await this.sendPreGeneratedAudio(state, audioData); + console.error(`[${callId}] Greeting audio sent`); + + // Check again if user hung up during greeting + if (state.hungUp) { + console.error(`[${callId}] User hung up during greeting`); + this.cleanupCall(callId); + return; + } + + // Listen for user's response (with 30 second timeout for inbound) + console.error(`[${callId}] Listening for user response...`); + const transcript = await this.listenWithTimeout(state, 30000); + + state.conversationHistory.push({ speaker: 'claude', message: greeting }); + state.conversationHistory.push({ speaker: 'user', message: transcript }); + + console.error(`[${callId}] User said: ${transcript}`); + + // Write pending call info to file for Stop hook to detect + const pendingCallInfo = { + callId, + from, + transcript, + timestamp: Date.now() + }; + const fs = await import('fs'); + fs.writeFileSync('/tmp/callme-pending-inbound.json', JSON.stringify(pendingCallInfo)); + debugLog(`Wrote pending call info to /tmp/callme-pending-inbound.json`); + + // Notify Claude via MCP notification + this.emitInboundCallNotification(callId, from, transcript); + + // Play hold message while waiting for Claude to respond + const holdMessage = "One moment please, I'm connecting you to Claude."; + console.error(`[${callId}] Playing hold message...`); + const holdAudio = await this.generateTTSAudio(holdMessage); + await this.sendPreGeneratedAudio(state, holdAudio); + + } catch (error) { + debugLog(`Inbound call ERROR: ${error instanceof Error ? error.message : error}`); + console.error(`[${callId}] Inbound call handling error:`, error instanceof Error ? error.message : error); + console.error(`[${callId}] Full error:`, error); + // Try to hang up if something went wrong + try { + await this.hangUp(callId); + } catch (hangupErr) { + console.error(`[${callId}] Failed to hang up:`, hangupErr); + } + } + } + + /** + * Listen with a specific timeout (used for inbound calls) + */ + private async listenWithTimeout(state: CallState, timeoutMs: number): Promise { + if (!state.sttSession) { + throw new Error('STT session not available'); + } + + const transcript = await Promise.race([ + state.sttSession.waitForTranscript(timeoutMs), + this.waitForHangup(state), + ]); + + if (state.hungUp) { + throw new Error('Call was hung up by user'); + } + + return transcript; + } + + /** + * Clean up call state and mappings + */ + private cleanupCall(callId: string): void { + const state = this.activeCalls.get(callId); + if (state) { + state.sttSession?.close(); + state.ws?.close(); + this.wsTokenToCallId.delete(state.wsToken); + if (state.callControlId) { + this.callControlIdToCallId.delete(state.callControlId); + } + this.activeCalls.delete(callId); + } + } + + /** + * Hang up a call and clean up + */ + private async hangUp(callId: string): Promise { + const state = this.activeCalls.get(callId); + if (!state) return; + + if (state.callControlId) { + await this.config.providers.phone.hangup(state.callControlId); + } + state.hungUp = true; + this.cleanupCall(callId); + } + async initiateCall(message: string): Promise<{ callId: string; response: string }> { const callId = `call-${++this.currentCallId}-${Date.now()}`; diff --git a/server/src/providers/index.ts b/server/src/providers/index.ts index 2e73616..e792c23 100644 --- a/server/src/providers/index.ts +++ b/server/src/providers/index.ts @@ -33,6 +33,7 @@ export interface ProviderConfig { // OpenAI (TTS + STT) openaiApiKey: string; ttsVoice?: string; + ttsModel?: string; sttModel?: string; sttSilenceDurationMs?: number; } @@ -53,6 +54,7 @@ export function loadProviderConfig(): ProviderConfig { telnyxPublicKey: process.env.CALLME_TELNYX_PUBLIC_KEY, openaiApiKey: process.env.CALLME_OPENAI_API_KEY || '', ttsVoice: process.env.CALLME_TTS_VOICE || 'onyx', + ttsModel: process.env.CALLME_TTS_MODEL || 'tts-1', sttModel: process.env.CALLME_STT_MODEL || 'gpt-4o-transcribe', sttSilenceDurationMs, }; @@ -81,6 +83,7 @@ export function createTTSProvider(config: ProviderConfig): TTSProvider { provider.initialize({ apiKey: config.openaiApiKey, voice: config.ttsVoice, + model: config.ttsModel, }); return provider; }