diff --git a/COORDINATION_PATTERNS.md b/COORDINATION_PATTERNS.md new file mode 100644 index 0000000..02088de --- /dev/null +++ b/COORDINATION_PATTERNS.md @@ -0,0 +1,233 @@ +# Coordination Patterns for Music Playback + +> **Status**: Implemented (Broadcast Architecture) +> **Last Updated**: December 2025 + +## Overview + +This document describes the coordination patterns used in the music playback system, based on the broadcast architecture with `IAudioBroadcast` and `IAudioSink` contracts. + +## Architecture Summary + +``` +┌─────────────────────────────────────────────────────────┐ +│ plugin-music-player │ +│ MusicQueue → Broadcast (IAudioBroadcast) → Multiplex │ +└─────────────────────────────────────────────────────────┘ + │ + Events: 'track:started', 'track:finished' + Method: subscribe() / unsubscribe() + │ +┌─────────────────────────▼───────────────────────────────┐ +│ plugin-discord │ +│ DiscordAudioSink (IAudioSink) ← feed(stream) │ +│ Events: 'statusChange' │ +└─────────────────────────────────────────────────────────┘ +``` + +## Pattern 1: Contract-Based Decoupling + +### Problem +Plugins need to communicate without tight coupling. + +### Solution +Define contracts (interfaces) that each plugin owns: + +```typescript +// plugin-music-player owns IAudioBroadcast +interface IAudioBroadcast { + subscribe(consumerId: string): AudioSubscription; + unsubscribe(consumerId: string): void; + feedAudio(stream: Readable, metadata?: AudioBroadcastMetadata): Promise; + // ... +} + +// plugin-discord owns IAudioSink +interface IAudioSink { + feed(stream: Readable): Promise; + connect(channelId: string): Promise; + disconnect(): Promise; + // ... +} +``` + +### Benefits +- Plugins only depend on contracts, not implementations +- Either plugin can be replaced or upgraded independently +- Clear ownership boundaries + +## Pattern 2: Event-Based Coordination + +### Problem +External plugins (radio, DJ) need to know when tracks start/finish. + +### Solution +Use EventEmitter for state change notifications: + +```typescript +// IAudioBroadcast emits events +broadcast.on('track:started', (metadata) => { + console.log(`Now playing: ${metadata.title}`); +}); + +broadcast.on('track:finished', (metadata) => { + // Trigger next action +}); + +broadcast.on('silence:started', () => { + // Queue is empty +}); + +// IAudioSink emits status changes +sink.on('statusChange', (status) => { + // 'connected' | 'disconnected' | 'connecting' | 'error' +}); +``` + +### Use Cases +- Radio plugin listening for tracks to announce +- DJ plugin waiting for track finish to add commentary +- Web UI updating now-playing display + +## Pattern 3: Auto-Wiring via Service Discovery + +### Problem +Plugins need to connect to each other at runtime. + +### Solution +Use `runtime.getService()` for discovery and wire automatically: + +```typescript +// In MusicService (plugin-music-player) +async autoSubscribeDiscord(guildId: string, broadcast: IAudioBroadcast) { + const discordService = this.runtime.getService('discord'); + if (!discordService) return; // Graceful degradation + + const sink = discordService.getAudioSink(guildId); + if (!sink) return; + + // Auto-wire on connection + sink.on('statusChange', async (status) => { + if (status === 'connected') { + const subscription = broadcast.subscribe(`discord-${guildId}`); + await sink.feed(subscription.stream); + } + }); +} +``` + +### Benefits +- Zero manual configuration +- Works when both plugins loaded +- Gracefully degrades when one is missing + +## Pattern 4: Reconnection Handling + +### Problem +Network hiccups cause Discord disconnections; playback should resume. + +### Solution +Use `statusChange` events for automatic recovery: + +```typescript +sink.on('statusChange', async (status) => { + if (status === 'connected') { + // Re-subscribe to get fresh stream from live point + const subscription = broadcast.subscribe(`discord-${guildId}`); + await sink.feed(subscription.stream); + logger.info('Discord reconnected, re-subscribed to broadcast'); + } +}); +``` + +### Key Insight +Re-subscribing gets the current position in the broadcast, not the beginning. This is because the broadcast is always "live" - like tuning into a radio station. + +## Pattern 5: Non-Blocking Multiplexing + +### Problem +Slow consumers (laggy web clients) could block the main stream. + +### Solution +Each consumer gets an independent `PassThrough` stream with backpressure handling: + +```typescript +// In StreamMultiplexer +source.on('data', (chunk) => { + for (const [id, consumer] of consumers) { + if (!consumer.write(chunk)) { + // Consumer buffer full - drop frame for this consumer + logger.debug(`Backpressure on ${id}, dropping chunk`); + } + } +}); +``` + +### Result +- Discord playback unaffected by web client performance +- Each consumer independent +- No blocking the source stream + +## Pattern 6: Silence Injection + +### Problem +Empty queue causes Discord voice connection timeout. + +### Solution +`StreamCore` injects silence frames when no audio is being fed: + +```typescript +// In StreamCore +startSilence() { + this.silenceInterval = setInterval(() => { + this.output.write(OPUS_SILENCE_FRAME); // 10ms of silence + }, 10); +} + +feed(stream: Readable) { + this.stopSilence(); // Real audio coming + stream.pipe(this.output, { end: false }); + stream.on('end', () => this.startSilence()); +} +``` + +### Benefits +- Voice connection stays alive indefinitely +- Seamless transition when new tracks added +- No manual connection management needed + +## Pattern Summary + +| Pattern | Use Case | Key Mechanism | +|---------|----------|---------------| +| **Contracts** | Plugin decoupling | IAudioBroadcast / IAudioSink | +| **Events** | State notifications | EventEmitter | +| **Auto-Wiring** | Runtime connection | runtime.getService() | +| **Reconnection** | Resilience | statusChange event | +| **Multiplexing** | Multiple consumers | PassThrough + backpressure | +| **Silence** | Connection keep-alive | Interval-based frame injection | + +## Implementation Locations + +### This Package (plugin-discord) + +| Pattern | File | +|---------|------| +| IAudioSink | `src/contracts.ts` | +| Discord sink | `src/sinks/discordAudioSink.ts` | + +### External Package (plugin-music-player) + +> **Note**: The following files are located in `packages/plugin-music-player/` within the monorepo. + +| Pattern | File | +|---------|------| +| IAudioBroadcast | `packages/plugin-music-player/src/contracts.ts` | +| Auto-wiring | `packages/plugin-music-player/src/service.ts` | +| Multiplexing | `packages/plugin-music-player/src/core/streamMultiplexer.ts` | +| Silence injection | `packages/plugin-music-player/src/core/streamCore.ts` | + +## Related Documentation + +- [MUSIC_ARCHITECTURE.md](./MUSIC_ARCHITECTURE.md) - Full architecture overview +- [plugin-music-player README](../plugin-music-player/README.md) - Usage guide diff --git a/MUSIC_ARCHITECTURE.md b/MUSIC_ARCHITECTURE.md new file mode 100644 index 0000000..7bb50f5 --- /dev/null +++ b/MUSIC_ARCHITECTURE.md @@ -0,0 +1,204 @@ +# Music Architecture: Broadcast Model + +> **Status**: Implemented +> **Last Updated**: December 2025 + +## Overview + +The music playback system uses a **broadcast-centric architecture** that decouples audio sources from audio destinations. This enables resilient streaming to multiple consumers while maintaining clean plugin separation. + +## Architecture Diagram + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ PLUGIN-MUSIC-PLAYER │ +│ │ +│ ┌───────────┐ ┌───────────────┐ ┌──────────────────────┐ │ +│ │MusicQueue │───▶│ Broadcast │───▶│ StreamMultiplexer │ │ +│ │ (tracks) │ │ (IAudioBroadcast) │ │ (non-blocking) │ │ +│ └───────────┘ └───────────────┘ └──────────────────────┘ │ +│ │ │ │ +│ ┌──────▼──────┐ │ │ +│ │ StreamCore │ │ │ +│ │(+silence gen)│ │ │ +│ └─────────────┘ │ │ +└─────────────────────────────────────────────────┼───────────────┘ + │ + ┌─────────────────────────┴─────────────┐ + │ subscribe() returns independent │ + │ PassThrough stream per consumer │ + │ │ + ┌─────────▼─────────┐ ┌──────────────▼──┐ + │ Discord Sink │ │ Web Clients │ + │ (IAudioSink) │ │ (/stream API) │ + │ │ │ │ + │ plugin-discord │ │ plugin-music- │ + │ │ │ player routes │ + └───────────────────┘ └─────────────────┘ +``` + +## Key Contracts + +### IAudioBroadcast (plugin-music-player) + +The source contract - exposes a continuous audio stream that consumers can subscribe to. + +```typescript +interface IAudioBroadcast extends EventEmitter { + readonly id: string; + + start(): void; + stop(): void; + + feedAudio(stream: Readable, metadata?: AudioBroadcastMetadata): Promise; + + subscribe(consumerId: string): AudioSubscription; + unsubscribe(consumerId: string): void; + + getCurrentMetadata(): AudioBroadcastMetadata | null; + isPlaying(): boolean; +} +``` + +### IAudioSink (plugin-discord) + +The destination contract - receives and plays audio streams. + +```typescript +interface IAudioSink extends EventEmitter { + readonly id: string; + readonly status: AudioSinkStatus; // 'connected' | 'disconnected' | 'connecting' | 'error' + + feed(stream: Readable): Promise; + stop(): Promise; + + connect(channelId: string): Promise; + disconnect(): Promise; +} +``` + +## Separation of Concerns + +### plugin-music-player owns: +- Queue management +- Broadcast creation and lifecycle +- Stream multiplexing +- Silence injection +- Web streaming routes +- Auto-wiring to discovered sinks + +### plugin-discord owns: +- Voice connection management +- DiscordAudioSink implementation +- Audio player lifecycle +- Connection state handling +- Reconnection logic + +### Neither plugin knows: +- Internal implementation details of the other +- Whether the other plugin is even loaded (graceful degradation) + +## Auto-Wiring + +When both plugins are loaded, `MusicService` automatically: + +1. Discovers Discord service via `runtime.getService('discord')` +2. Obtains `IAudioSink` via `discordService.getAudioSink(guildId)` +3. Subscribes the sink to the broadcast when tracks start +4. Re-subscribes on sink reconnection (via `statusChange` event) + +```typescript +// In MusicService +sink.on('statusChange', async (status) => { + if (status === 'connected') { + const subscription = broadcast.subscribe(`discord-${guildId}`); + await sink.feed(subscription.stream); + } +}); +``` + +## Resilience Features + +### Silence Injection +When the queue is empty, `StreamCore` injects silence frames: +- Keeps Discord voice connection alive +- Prevents timeout disconnects +- Seamless transition when new tracks are added + +### Non-Blocking Multiplexing +`StreamMultiplexer` handles slow consumers: +- Each consumer gets independent PassThrough stream +- Backpressure on one consumer drops frames for that consumer only +- Other consumers (Discord, other web clients) unaffected + +### Auto-Reconnection +When Discord disconnects and reconnects: +1. `DiscordAudioSink` emits `statusChange: 'connected'` +2. `MusicService` receives event +3. `MusicService` re-subscribes to broadcast +4. Audio resumes from live point (not from beginning) + +## Migration from Old Architecture + +The previous architecture had: +- Direct coupling between `MusicQueue` and `VoiceManager` +- No silence injection (connections dropped when queue empty) +- Single consumer (no web streaming without radio plugin) +- Manual reconnection handling + +The new architecture provides: +- Contract-based decoupling +- Built-in silence injection +- Multiple consumers by default +- Automatic reconnection handling + +## File Locations + +### plugin-music-player +- `src/contracts.ts` - IAudioBroadcast interface +- `src/core/broadcast.ts` - Broadcast implementation +- `src/core/streamCore.ts` - StreamCore (silence injection) +- `src/core/streamMultiplexer.ts` - StreamMultiplexer (fan-out) +- `src/service.ts` - MusicService (auto-wiring) + +### plugin-discord +- `src/contracts.ts` - IAudioSink interface +- `src/sinks/discordAudioSink.ts` - DiscordAudioSink implementation +- `src/service.ts` - DiscordService.getAudioSink() + +## Multi-Channel Audio System + +VoiceManager supports multiple logical audio channels with priority-based mixing: + +```typescript +import { + CHANNEL_TTS, // Priority 100 - Text-to-Speech + CHANNEL_MUSIC, // Priority 50 - Music playback + CHANNEL_SFX, // Priority 30 - Sound effects + CHANNEL_AMBIENT, // Priority 20 - Background ambient +} from '@elizaos/plugin-discord'; +``` + +### Priority Behavior + +- **Interrupt**: Higher priority stops lower priority playback +- **Duck**: Higher priority reduces lower priority volume (when `mix: true`) +- **Restore**: When higher priority finishes, lower priority volume restores + +### DJ Integration + +The radio/DJ plugin can use this for smooth transitions: + +```typescript +// Music playing on CHANNEL_MUSIC +// DJ intro starts on CHANNEL_TTS with mix: true +// → Music ducks to 20% +// → TTS plays +// → Music ramps back to 100% +``` + +## Related Documentation + +- [plugin-music-player README](../plugin-music-player/README.md) - Usage and API +- [plugin-discord README](./README.md) - Discord integration +- [plugin-radio README](../plugin-radio/README.md) - Optional DJ features diff --git a/PROGRESSIVE_UPDATES.md b/PROGRESSIVE_UPDATES.md new file mode 100644 index 0000000..b7138c1 --- /dev/null +++ b/PROGRESSIVE_UPDATES.md @@ -0,0 +1,351 @@ +# Progressive Message Updates + +## Overview + +Progressive message updates provide real-time status feedback for long-running operations by editing a single Discord message as the operation progresses, instead of leaving users staring at silence or sending multiple status messages. + +## The Problem + +Traditional bot actions leave users in the dark: +``` +User: "play bohemian rhapsody" +Bot: ... 10 seconds of silence ... +Bot: "Now playing: Bohemian Rhapsody" +``` + +Users think the bot is broken during those 10 seconds. They spam commands, ask "are you working?", or leave frustrated. + +## The Solution + +Progressive updates show what's happening: +``` +User: "play bohemian rhapsody" +Bot: "🔍 Looking up track..." ← Instant feedback + ↓ (same message, edited) + "🔍 Searching for track..." ← 2 seconds later + ↓ (same message, edited) + "✨ Setting up playback..." ← 8 seconds later + ↓ (same message, edited) + "🎵 Now playing: Bohemian Rhapsody" ← Final (10 seconds total) +``` + +## Architecture + +### Core Components + +1. **ProgressiveMessage** (`progressiveMessage.ts`) + - Helper class that actions use to send progressive updates + - Handles debouncing, throttling, and platform detection + - API: `update()`, `complete()`, `fail()` + +2. **MessageManager** (`messages.ts`) + - Discord message handler with progressive update support + - Tracks messages by correlation ID with 60-second TTL + - Edits existing messages when it sees progressive metadata + +3. **Message Editing Utility** (`utils.ts`) + - Wraps Discord.js `message.edit()` with error handling + - Truncates content > 2000 chars (Discord limit) + - Returns null on failure for graceful degradation + +### Data Flow + +```mermaid +sequenceDiagram + participant Action as playAudio Action + participant PM as ProgressiveMessage + participant CB as HandlerCallback + participant MM as MessageManager + participant Discord as Discord API + + Action->>PM: new ProgressiveMessage(callback) + Action->>PM: update("Searching...") + Note over PM: Wait 300ms (minDelay) + PM->>CB: callback({ text, metadata: { correlationId, isInterim: true } }) + CB->>MM: Handle content + MM->>Discord: Send new message + Discord-->>MM: Message object + MM->>MM: Track message by correlationId + + Action->>PM: update("Found!") + PM->>CB: callback({ text, metadata: { correlationId, isInterim: true } }) + CB->>MM: Handle content + MM->>MM: Find tracked message + MM->>Discord: Edit message + + Action->>PM: complete("Done!") + PM->>CB: callback({ text, metadata: { correlationId, isInterim: false } }) + CB->>MM: Handle content + MM->>Discord: Edit message (final) + MM->>MM: Clean up tracking + MM->>MM: Create memory (persist to DB) +``` + +## Key Design Decisions + +### 1. Why Correlation IDs? + +**Problem**: Multiple actions might run simultaneously in the same channel. How do we know which message to edit? + +**Solution**: Each `ProgressiveMessage` instance generates a unique correlation ID (`timestamp-random`). All updates from that action include this ID, so MessageManager knows which message to edit. + +**Why not use message ID directly?**: The action doesn't know the Discord message ID until after the first update is sent. Correlation ID is generated upfront and stays constant throughout the action. + +### 2. Why 60-Second TTL Cleanup? + +**Problem**: If an action crashes, throws an exception, or hangs, we'd track its message forever (memory leak). + +**Solution**: Each tracked message has a 60-second timeout that auto-deletes it. + +**Why 60 seconds?**: Long enough for any legitimate action (most are < 15 seconds), short enough to prevent unbounded growth. The timeout resets with each update, so long-running actions with frequent updates don't expire. + +### 3. Why Debouncing (300ms minDelay)? + +**Problem**: If an operation completes in < 300ms, showing "Searching..." is just noise. + +**Solution**: The first `update()` waits 300ms before sending. If `complete()` is called before that, we skip straight to the final message. + +**Example**: +- Library hit (10ms): User only sees "Now playing!" +- YouTube search (5s): User sees "Searching..." → "Now playing!" + +**Why 300ms specifically?**: Human perception threshold. Operations < 300ms feel instant. Longer feels like waiting. + +### 4. Why Throttling (500ms between updates)? + +**Problem**: Discord rate limits message edits (5 per 5 seconds per channel). Rapid updates trigger rate limits. + +**Solution**: Enforce 500ms minimum between updates. If `update()` is called multiple times rapidly, we debounce and send the last value. + +**Why 500ms?**: Allows ~2 edits/second (10 per 5 seconds), well under Discord's 5/5s limit, while still feeling responsive. + +### 5. Why "Important" Flag for Non-Editing Platforms? + +**Problem**: Web/CLI can't edit messages. Each update creates a new message. Showing all updates floods the UI: +``` +Bot: Looking up track... +Bot: Searching for track... +Bot: Found! Setting up... +Bot: Now playing! +``` + +**Solution**: Non-editing platforms skip transient updates unless marked `important: true`. + +**Example**: +```typescript +progress.update("Looking up..."); // Skipped on web/CLI (fast) +progress.update("Searching...", { important: true }); // Shown (slow) +progress.update("Setting up..."); // Skipped on web/CLI (fast) +progress.complete("Done!"); // Always shown +``` + +**Result on web/CLI**: +``` +Bot: Searching for track... +Bot: Now playing! +``` + +**Why not skip all non-editing updates?**: Some operations genuinely take 5-10+ seconds (searching, fetching external data). Without any feedback, users think the bot is broken. The important flag marks these cases. + +### 6. Why No "isFinal" Flag? + +**Original design**: +```typescript +progress.update("Searching...", { isFinal: false }); +progress.complete("Done!", { isFinal: true }); +``` + +**Problem**: If an exception occurs before `complete()`, we never send `isFinal: true`, leaving an orphaned "Searching..." message as the final message in chat. + +**Current design**: The last message naturally becomes final. If an action throws, the last `update()` stays. The `fail()` method explicitly handles errors. + +**Why this is better**: No orphaned messages. TTL cleanup handles crashes. Simpler API (no flag to forget). + +### 7. Why isInterim Flag Instead of isFinal? + +**Reason**: Determines whether to create a memory (persist to DB). + +- `isInterim: true` → Transient status, don't save +- `isInterim: false` → Final message, create memory + +**Why not save all updates?**: Database bloat. Saving "Searching...", "Found!", "Setting up...", "Done!" creates 4 memories for one action. Only the final state matters for conversation history. + +### 8. Why Metadata Convention Instead of Core Changes? + +**Constraint**: "We shouldn't touch core atm" (project requirement) + +**Solution**: Use `Content.metadata` to pass progressive update info. Core is unaware; only Discord plugin knows about it. + +**Benefits**: +- Zero core changes +- Other plugins (Telegram, etc.) can adopt the same pattern +- Easy to add/remove without breaking anything +- Purely additive feature + +## Usage Guide + +### Basic Pattern + +```typescript +import { ProgressiveMessage } from '@elizaos/plugin-discord'; + +handler: async (runtime, message, state, options, callback) => { + const progress = new ProgressiveMessage(callback, message.content.source); + + try { + // Transient updates (skipped on non-editing platforms) + progress.update("🔍 Starting..."); + + // ... do fast work (< 1s) ... + + // Important update (shown on all platforms) + progress.update("⏳ This might take a while...", { important: true }); + + // ... do slow work (5-10s) ... + + // Final message (always shown) + return await progress.complete("✅ All done!"); + + } catch (error) { + // Error handling (always shown) + return await progress.fail("❌ Something went wrong"); + } +} +``` + +### Guidelines + +**DO**: +- ✅ Use for operations that take > 2 seconds +- ✅ Mark important updates that take > 5 seconds +- ✅ Keep updates short and clear +- ✅ Always call `complete()` or `fail()` +- ✅ Use try/catch with `fail()` for errors + +**DON'T**: +- ❌ Use for instant operations (< 500ms) +- ❌ Send updates more than every 500ms +- ❌ Mark every update as important +- ❌ Forget to call `complete()` or `fail()` +- ❌ Send very long update text (keep < 200 chars) + +### Update Frequency Examples + +```typescript +// Good: 2-3 major milestones +progress.update("Searching..."); // 0s +// ... 5 seconds of work ... +progress.update("Found! Preparing..."); // 5s +// ... 3 seconds of work ... +progress.complete("Done!"); // 8s + +// Bad: Too many updates +progress.update("Starting..."); // 0s +progress.update("Checking cache..."); // 0.1s +progress.update("Cache miss..."); // 0.2s +progress.update("Fetching..."); // 0.5s +progress.update("Parsing..."); // 1s +progress.update("Validating..."); // 1.5s +// Spammy! Hard to read, triggers throttling +``` + +## Platform Behavior Matrix + +| Platform | Edits? | Shows Transient | Shows Important | Shows Final | +|----------|--------|----------------|----------------|-------------| +| Discord | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | +| Web/CLI | ❌ No | ❌ No | ✅ Yes | ✅ Yes | +| Future* | 🤔 TBD | 🤔 TBD | ✅ Yes | ✅ Yes | + +*Future platforms (Telegram, Slack, etc.) can opt into progressive updates by returning true from `supportsProgressive()` + +## Error Handling & Edge Cases + +### 1. Edit Fails (Message Deleted) + +If Discord edit fails (message deleted, permissions changed), we fall back to sending a new message: + +```typescript +const edited = await editMessageContent(existing.message, content.text); +if (!edited) { + // Edit failed - send new message instead + logger.warn('Failed to edit, falling back to new message'); + await sendMessageInChunks(channel, content.text, ...); +} +``` + +### 2. Action Crashes + +TTL cleanup ensures no memory leaks. After 60 seconds, the tracked message is deleted: + +```typescript +setTimeout(() => { + this.progressiveMessages.delete(key); +}, 60000); +``` + +### 3. Multiple Actions in Same Channel + +Each action has a unique correlation ID, so they don't interfere: + +```typescript +// User 1: "play song A" → correlationId: 1234567890-abc123 +// User 2: "play song B" → correlationId: 1234567891-def456 +// Both work independently, editing their own messages +``` + +### 4. Rate Limiting + +500ms throttle ensures we stay under Discord's 5/5s limit. If we exceed it, Discord returns an error, and we fall back to new messages. + +## Testing + +Unit tests verify core behavior without Discord: + +```typescript +// Test fast operations skip interim updates +const progress = new ProgressiveMessage(mockCallback, 'discord'); +progress.update('Checking...'); +await progress.complete('Done!'); +// Only 'Done!' sent, 'Checking...' suppressed + +// Test important flag on non-editing platforms +const progress = new ProgressiveMessage(mockCallback, 'web'); +progress.update('Fast'); // Skipped +progress.update('Slow', { important: true }); // Sent +await progress.complete('Done!'); // Sent +``` + +## Future Enhancements + +### Potential Improvements + +1. **Telegram Support**: Add `this.source === 'telegram'` to `supportsProgressive()` +2. **Custom Throttle Per Action**: Allow actions to override throttle/minDelay +3. **Progress Bars**: Add numeric progress (1/5, 2/5, etc.) support +4. **Streaming Updates**: Websocket-based live updates for web/CLI +5. **Analytics**: Track how often users see progressive vs instant responses + +### Non-Goals + +- **Full progress bars**: Too complex, text updates are sufficient +- **Sub-second updates**: Would trigger rate limits, not worth it +- **Retroactive editing**: Can't edit messages from previous sessions +- **Cross-channel updates**: Each message is scoped to its channel + +## Metrics & Success Criteria + +How do we know this is working? + +1. **User perception**: "Bot feels faster" (even though it's not) +2. **Reduced spam**: Fewer "is the bot working?" messages +3. **Clean chat**: Discord shows 1 message instead of 3-5 +4. **Graceful degradation**: Web/CLI users still get feedback +5. **No crashes**: TTL cleanup prevents memory leaks + +## Related Documentation + +- [ElizaOS Plugin Architecture](/.cursor/rules/elizaos/elizaos_client_plugins.mdc) +- [Discord Plugin README](/packages/plugin-discord/README.md) +- [Message Handler Callback](/packages/core/src/types/components.ts) + diff --git a/README.md b/README.md index 7d6e3b2..400c2e1 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,30 @@ A Discord plugin implementation for ElizaOS, enabling rich integration with Discord servers for managing interactions, voice, and message handling. +## Key Features + +### Progressive Message Updates + +Long-running actions (music search, data fetching, etc.) can show real-time status updates by editing a single Discord message, providing instant feedback without cluttering chat history. + +```typescript +import { ProgressiveMessage } from '@elizaos/plugin-discord'; + +// In your action handler: +const progress = new ProgressiveMessage(callback, message.content.source); +progress.update("🔍 Searching...", { important: true }); +// ... do work ... +return await progress.complete("✅ Done!"); +``` + +**Benefits:** +- Users see what's happening in real-time +- Single message stays clean (edits in place) +- Auto-throttles to respect Discord rate limits +- Gracefully degrades on web/CLI (shows only important updates) + +See [PROGRESSIVE_UPDATES.md](./PROGRESSIVE_UPDATES.md) for detailed documentation. + ## Features - Handle server join events and manage initial configurations @@ -87,6 +111,70 @@ Settings can also be configured in your character file under `settings.discord`: } ``` +## Character-Based Emoji Reactions + +When an agent uses the `REACT_TO_MESSAGE` action, emoji selection follows this priority: + +1. **Extract from response**: If the agent included an emoji in their response text +2. **Character preferences**: Use character's configured emoji preferences +3. **Sentiment detection**: Match emoji to message sentiment (positive, excitement, thanks, etc.) +4. **LLM fallback**: Ask the LLM to select an appropriate emoji + +### Configuration + +#### Simple Format (Array) + +```typescript +// In character file +settings: { + // List of preferred emojis - first match by sentiment wins + preferredEmojis: ['🌸', '🍂', '🌿', '🌊', '🌙', '✨'], +} +``` + +#### Advanced Format (Object) + +```typescript +settings: { + emojiPreferences: { + // Emojis the character prefers to use + preferred: ['👋', '💜', '✨', '🙌', '💫'], + // Emojis the character should never use (even if LLM suggests them) + forbidden: ['😢', '😞', '💔', '👎'], + // Default emoji when no sentiment match + fallback: '💜', + }, +} +``` + +#### Style-Based Rules + +Characters can also forbid all emoji reactions via `style.all`: + +```typescript +style: { + all: [ + 'never use hashtags or emojis', // This disables reactions entirely + // ... + ], +} +``` + +### Sentiment Mapping + +When a character has `preferredEmojis`, the action attempts to match by sentiment: + +| Sentiment | Keywords | Default Emojis | +|-----------|----------|----------------| +| `positive` | good, great, nice, cool | 👍 ✅ 💯 🙌 👏 | +| `agreement` | agree, yes, exactly, right | 👍 ✅ 💯 🤝 | +| `excitement` | awesome, excited, hype | 🔥 🚀 ⭐ 💥 🎉 | +| `love` | love, amazing, wonderful | ❤️ 💕 💜 🖤 💙 | +| `thanks` | thanks, appreciate | 🙏 💜 ❤️ | +| `greeting` | hi, hello, welcome | 👋 🙌 | + +If a character's preferred emoji matches a sentiment category, it's used. Otherwise, the fallback is used. + ## Slash Command Permissions The plugin uses a hybrid permission system that combines Discord's native features with ElizaOS-specific controls. @@ -253,6 +341,13 @@ The plugin emits the following Discord-specific events: ### DiscordService + - Main service class that extends ElizaOS Service + - Handles authentication and session management + - Manages Discord client connection + - Processes events and interactions + - Provides `IAudioSink` instances for voice integration + + Main service class that extends ElizaOS Service: - Handles authentication and session management - Manages Discord client connection @@ -266,6 +361,17 @@ Main service class that extends ElizaOS Service: - Supports message formatting and templating - Manages conversation context +### DiscordAudioSink +- Implements `IAudioSink` interface for audio playback +- Receives audio streams from external sources (e.g., music-player) +- Handles voice connection state and auto-reconnection +- Abstracts Discord voice complexity from audio sources + +### Attachment Handler +- Downloads and processes Discord attachments +- Supports various media types +- Integrates with media transcription + ### VoiceManager - Manages voice channel interactions @@ -522,6 +628,112 @@ The plugin includes a test suite for validating functionality: bun run test ``` +## Multi-Channel Audio System + +The plugin provides a priority-based audio channel system that allows multiple audio streams to coexist intelligently. + +### Predefined Channels + +```typescript +import { + CHANNEL_TTS, // Channel 0: Text-to-Speech (priority 100) + CHANNEL_MUSIC, // Channel 1: Music playback (priority 50) + CHANNEL_SFX, // Channel 2: Sound effects (priority 30) + CHANNEL_AMBIENT, // Channel 3: Background ambient (priority 20) +} from '@elizaos/plugin-discord'; +``` + +### Priority-Based Behavior + +Higher priority channels can **interrupt** or **duck** lower priority channels: + +| Scenario | Behavior | +|----------|----------| +| TTS starts while music plays | Music ducks to 20% volume | +| TTS finishes | Music ramps back to 100% | +| SFX plays during music | SFX overlays (music continues) | + +### Using Channels + +```typescript +import { CHANNEL_MUSIC, CHANNEL_TTS } from '@elizaos/plugin-discord'; + +const voiceManager = discordService.voiceManager; + +// Play music on the music channel +await voiceManager.playAudio(musicStream, { + guildId: '123456789', + channel: CHANNEL_MUSIC, +}); + +// TTS will automatically duck music +await voiceManager.playAudio(ttsStream, { + guildId: '123456789', + channel: CHANNEL_TTS, + mix: true, // Duck instead of interrupt +}); +``` + +### Custom Channels + +Register custom channels for specialized use cases: + +```typescript +voiceManager.registerChannel({ + channel: 4, + priority: 45, + canPause: true, + interruptible: true, + volume: 0.8, + duckVolume: 0.3, +}); +``` + +## Audio Sink Integration + +The plugin provides an `IAudioSink` interface for audio playback in voice channels. This allows other plugins to send audio to Discord without coupling to Discord-specific APIs. + +### IAudioSink Interface + +```typescript +import type { IAudioSink } from '@elizaos/plugin-discord'; + +const discordService = runtime.getService('discord'); +const sink = discordService.getAudioSink(guildId); + +// Check connection status +console.log(sink.status); // 'connected' | 'disconnected' | 'connecting' | 'error' + +// Connect to voice channel +await sink.connect(channelId); + +// Feed audio stream +await sink.feed(audioStream); + +// Stop playback +await sink.stop(); + +// Disconnect +await sink.disconnect(); +``` + +### Auto-Reconnection + +The `DiscordAudioSink` handles voice connection hiccups automatically: + +- Monitors connection state changes +- Emits `statusChange` events for external listeners +- External consumers (e.g., music-player) can re-subscribe on reconnection + +### Integration with plugin-music-player + +When both plugins are loaded: + +1. Music-player automatically discovers Discord audio sinks +2. Audio streams are wired transparently +3. Network hiccups trigger automatic re-subscription +4. No manual configuration required + ## Notes - Ensure that your `.env` file includes the required `DISCORD_API_TOKEN` diff --git a/VOICE_STATUS_FEATURES.md b/VOICE_STATUS_FEATURES.md new file mode 100644 index 0000000..2f04620 --- /dev/null +++ b/VOICE_STATUS_FEATURES.md @@ -0,0 +1,186 @@ +# Discord Voice Channel Status and Listening Activity Features + +This document describes the new capabilities added to the Discord plugin for setting voice channel status and user "listening to" activity. + +## Features + +### 1. Voice Channel Status + +Set or clear a custom status message that appears at the top of a Discord voice channel. + +#### Service Method + +```typescript +await discordService.setVoiceChannelStatus(channelId: string, status: string): Promise +``` + +**Parameters:** +- `channelId`: The Discord ID of the voice channel +- `status`: The status text to display (max 500 characters, empty string or null to clear) + +**Returns:** `Promise` - Whether the status was successfully set + +**Example:** +```typescript +const discordService = runtime.getService('discord') as DiscordService; +await discordService.setVoiceChannelStatus('1234567890', 'Weekly team meeting 📅'); +``` + +#### Action + +**Name:** `SET_VOICE_CHANNEL_STATUS` + +**Similes:** `UPDATE_VOICE_STATUS`, `SET_VC_STATUS`, `CHANGE_VOICE_STATUS`, `UPDATE_VOICE_CHANNEL_STATUS`, `SET_VOICE_MESSAGE`, `CLEAR_VOICE_STATUS` + +**Example Usage:** +- "Set the voice channel status to 'Weekly team meeting'" +- "Update the status in general-voice to 'Study session'" +- "Clear the voice channel status" +- "Set vc status to 'Gaming night 🎮'" + +### 2. Listening Activity + +Set the bot's "listening to" activity status that appears under the bot's name in the member list. + +#### Service Methods + +```typescript +await discordService.setListeningActivity(activity: string, url?: string): Promise +await discordService.clearActivity(): Promise +``` + +**Parameters:** +- `activity`: The activity text to display (e.g., "Spotify", "your commands") +- `url` (optional): URL for streaming activity + +**Returns:** `Promise` - Whether the activity was successfully set + +**Example:** +```typescript +const discordService = runtime.getService('discord') as DiscordService; +await discordService.setListeningActivity('lo-fi beats 🎵'); +await discordService.clearActivity(); // Clear the activity +``` + +#### Action + +**Name:** `SET_LISTENING_ACTIVITY` + +**Similes:** `SET_LISTENING_STATUS`, `SET_LISTENING_TO`, `UPDATE_LISTENING_STATUS`, `CHANGE_LISTENING_ACTIVITY`, `SET_NOW_PLAYING`, `CLEAR_LISTENING_STATUS`, `SET_ACTIVITY`, `UPDATE_STATUS`, `SET_PRESENCE` + +**Example Usage:** +- "Set your status to listening to Spotify" +- "Update your listening activity to 'your commands'" +- "Clear your listening status" +- "Set listening to 'lo-fi beats 🎵'" +- "Stop showing your listening activity" + +## Technical Implementation + +### Voice Channel Status + +The voice channel status feature uses the Discord REST API endpoint `PUT /channels/{channel.id}/voice-status`: + +```typescript +await this.client.rest.put( + `/channels/${channelId}/voice-status`, + { + body: { + status: status || null, + }, + } +); +``` + +**Requirements:** +- Bot must have appropriate permissions in the channel +- Channel must be a voice channel (type `GuildVoice`) +- Status text is limited to 500 characters + +### Listening Activity + +The listening activity uses Discord.js's `setActivity` method with type `2` (Listening): + +```typescript +await this.client.user.setActivity(activity, { + type: 2, // ActivityType.Listening + url: url, +}); +``` + +## Permissions + +### Voice Channel Status +- The bot needs appropriate channel permissions to modify voice channel settings +- Typically requires `MANAGE_CHANNELS` or similar permissions + +### Listening Activity +- No special permissions required +- Bot can always modify its own presence/activity + +## Limitations + +1. **Voice Channel Status**: + - Discord has historically toggled this feature on and off + - The feature may not be visible in all Discord clients + - Some users have reported the feature being disabled by Discord + +2. **Listening Activity**: + - Only one activity can be displayed at a time + - Activity is visible across all servers where the bot is present + +## Error Handling + +Both features include comprehensive error handling: +- Client readiness checks +- Channel type validation +- Permission checks +- Length validation for status text +- Detailed logging for debugging + +## Example Agent Interaction + +``` +User: "Set the voice channel status to 'Study session - no interruptions please'" +Agent: "I'll set the voice channel status to 'Study session - no interruptions please'." + +User: "Update your listening status to 'lofi hip hop radio'" +Agent: "I've set my status to 'Listening to lofi hip hop radio'." + +User: "Clear the vc status" +Agent: "I've cleared the voice channel status." +``` + +## Testing + +To test these features: + +1. Ensure the Discord bot has proper permissions +2. Join a voice channel with the bot +3. Use the actions through natural language commands +4. Verify the status appears in the Discord UI + +**Note:** If voice channel status doesn't appear, it may be due to Discord temporarily disabling the feature on their end. + +## API Compatibility + +- **discord.js version**: 14.18.0 (pinned) +- **@discordjs/voice**: 0.18.0 +- **Discord API version**: v10 + +**Runtime Requirements** (inherited from `@elizaos/core`): +- **Node.js**: 23.x (as specified in monorepo root) +- **Bun**: 1.2.x (as specified in monorepo root) + +> Note: This plugin follows the monorepo's engine constraints. See the root +> `package.json` for authoritative runtime version requirements. + +## Future Enhancements + +Potential future improvements: +- Support for other activity types (Playing, Watching, Streaming, Competing) +- Scheduled status updates +- Automatic status rotation +- Integration with external services (e.g., real Spotify integration) +- Voice channel status templates + diff --git a/__tests__/environment.test.ts b/__tests__/environment.test.ts index bc30cbe..476deba 100644 --- a/__tests__/environment.test.ts +++ b/__tests__/environment.test.ts @@ -30,8 +30,10 @@ describe('Discord Environment Configuration', () => { }, } as IAgentRuntime; + // Updated to match new early validation error message + // We now check for token before schema validation to provide clearer guidance await expect(validateDiscordConfig(invalidRuntime)).rejects.toThrowError( - 'Discord configuration validation failed:\nDISCORD_API_TOKEN: Invalid input: expected string, received null' + 'Discord bot token not found' ); }); diff --git a/__tests__/messageManager.test.ts b/__tests__/messageManager.test.ts index 55d89b7..7beb22f 100644 --- a/__tests__/messageManager.test.ts +++ b/__tests__/messageManager.test.ts @@ -2,11 +2,12 @@ import type { IAgentRuntime } from '@elizaos/core'; import { ChannelType, Client, Collection } from 'discord.js'; import { beforeEach, describe, expect, it, vi } from 'vitest'; import { MessageManager } from '../src/messages'; +import type { IDiscordService } from '../src/types'; describe('Discord MessageManager', () => { let mockRuntime: IAgentRuntime; let mockClient: Client; - let mockDiscordClient: { client: Client; runtime: IAgentRuntime }; + let mockDiscordService: IDiscordService; let mockMessage: any; let messageManager: MessageManager; @@ -14,6 +15,7 @@ describe('Discord MessageManager', () => { vi.clearAllMocks(); mockRuntime = { + agentId: 'mock-agent-id', character: { name: 'TestBot', templates: {}, @@ -54,8 +56,28 @@ describe('Discord MessageManager', () => { mockClient = new Client({ intents: [] }); mockClient.user = { id: 'mock-bot-id', username: 'MockBot' } as any; - mockDiscordClient = { client: mockClient, runtime: mockRuntime }; - messageManager = new MessageManager(mockDiscordClient); + mockDiscordService = { + client: mockClient, + character: mockRuntime.character, + getChannelType: vi.fn().mockResolvedValue(ChannelType.GuildText), + buildMemoryFromMessage: vi.fn().mockImplementation((_message, options) => { + return Promise.resolve({ + id: 'mock-memory-id', + entityId: 'mock-entity-id', + agentId: 'mock-agent-id', + roomId: 'mock-room-id', + content: { + text: options?.processedContent || 'Hello, MockBot!', + source: 'discord', + ...(options?.extraContent || {}), + }, + metadata: options?.extraMetadata || {}, + createdAt: Date.now(), + }); + }), + } as unknown as IDiscordService; + + messageManager = new MessageManager(mockDiscordService, mockRuntime as any); (messageManager as any).getChannelType = vi.fn().mockResolvedValue(ChannelType.GuildText); const guild = { @@ -187,7 +209,7 @@ describe('Discord MessageManager', () => { it('should set mentionType=none when no mention', async () => { // Set natural mode to test this (mockRuntime.character.settings!.discord as any).shouldRespondOnlyToMentions = false; - messageManager = new MessageManager(mockDiscordClient); + messageManager = new MessageManager(mockDiscordService, mockRuntime as any); (messageManager as any).getChannelType = vi.fn().mockResolvedValue(ChannelType.GuildText); mockMessage.mentions.users.has = vi.fn().mockReturnValue(false); @@ -244,7 +266,7 @@ describe('Discord MessageManager', () => { it('should always process DMs regardless of strict mode', async () => { // Temporarily disable shouldIgnoreDirectMessages for this test (mockRuntime.character.settings!.discord as any).shouldIgnoreDirectMessages = false; - messageManager = new MessageManager(mockDiscordClient); + messageManager = new MessageManager(mockDiscordService, mockRuntime as any); (messageManager as any).getChannelType = vi.fn().mockResolvedValue(ChannelType.DM); mockMessage.channel.type = ChannelType.DM; @@ -262,7 +284,7 @@ describe('Discord MessageManager', () => { describe('natural mode (shouldRespondOnlyToMentions=false)', () => { beforeEach(() => { (mockRuntime.character.settings!.discord as any).shouldRespondOnlyToMentions = false; - messageManager = new MessageManager(mockDiscordClient); + messageManager = new MessageManager(mockDiscordService, mockRuntime as any); (messageManager as any).getChannelType = vi.fn().mockResolvedValue(ChannelType.GuildText); }); @@ -301,11 +323,6 @@ describe('Discord MessageManager', () => { }); it('should process audio attachments', async () => { - vi.spyOn(messageManager, 'processMessage').mockResolvedValue({ - processedContent: '', - attachments: [], - }); - const mockAttachments = new Collection([ [ 'mock-attachment-id', @@ -318,14 +335,26 @@ describe('Discord MessageManager', () => { ]); mockMessage.attachments = mockAttachments; - const processAttachmentsMock = vi.fn().mockResolvedValue([]); + const processAttachmentsMock = vi.fn().mockResolvedValue([ + { + id: 'mock-attachment-id', + url: 'https://www.example.mp3', + title: 'mock-attachment.mp3', + source: 'discord', + contentType: 'audio/mpeg', + }, + ]); + // Set up the mock before calling handleMessage Object.defineProperty(messageManager, 'attachmentManager', { value: { processAttachments: processAttachmentsMock }, writable: true, }); - await messageManager.handleMessage(mockMessage); + // Call processMessage directly to test attachment processing + const result = await messageManager.processMessage(mockMessage); + expect(processAttachmentsMock).toHaveBeenCalledWith(mockAttachments); + expect(result.attachments).toHaveLength(1); }); }); diff --git a/__tests__/progressiveMessage.test.ts b/__tests__/progressiveMessage.test.ts new file mode 100644 index 0000000..9e2ece8 --- /dev/null +++ b/__tests__/progressiveMessage.test.ts @@ -0,0 +1,130 @@ +import { describe, it, expect, beforeEach } from 'vitest'; +import { ProgressiveMessage } from '../src/progressiveMessage'; +import type { HandlerCallback, Memory } from '@elizaos/core'; + +describe('ProgressiveMessage', () => { + let mockCallback: HandlerCallback; + let calledWith: any[]; + + beforeEach(() => { + calledWith = []; + mockCallback = async (content) => { + calledWith.push(content); + // Return a mock memory array + return [{ + id: 'test-id', + entityId: 'test-entity', + agentId: 'test-agent', + roomId: 'test-room', + content, + createdAt: Date.now(), + }] as Memory[]; + }; + }); + + describe('Fast operations (< minDelay)', () => { + it('should not send interim updates for fast operations', async () => { + const progress = new ProgressiveMessage(mockCallback, 'discord'); + + // Call update and complete quickly (within 300ms) + progress.update('Checking...'); + await progress.complete('Done!'); + + // Only final message should be sent (no progressive metadata) + expect(calledWith.length).toBe(1); + expect(calledWith[0].text).toBe('Done!'); + expect(calledWith[0].metadata?.progressiveUpdate).toBeUndefined(); + }); + }); + + describe('complete()', () => { + it('should send final message without progressive metadata for fast operations', async () => { + const progress = new ProgressiveMessage(mockCallback, 'discord', { + minDelay: 300, + }); + + // Fast operation - complete immediately + const result = await progress.complete('All done!'); + + // Should have only final message + expect(calledWith.length).toBe(1); + expect(calledWith[0].text).toBe('All done!'); + expect(result).toHaveLength(1); + }); + }); + + describe('fail()', () => { + it('should send failure message', async () => { + const progress = new ProgressiveMessage(mockCallback, 'discord'); + + const result = await progress.fail('Failed!'); + + expect(calledWith.length).toBe(1); + expect(calledWith[0].text).toBe('Failed!'); + expect(result).toHaveLength(1); + }); + }); + + describe('Non-Discord sources', () => { + it('should send messages for non-progressive sources', async () => { + const progress = new ProgressiveMessage(mockCallback, 'web'); + + progress.update('Update 1'); + progress.update('Update 2'); + await new Promise(resolve => setTimeout(resolve, 10)); // Small delay + await progress.complete('Done'); + + // For web, messages are sent (no progressive metadata) + expect(calledWith.length).toBeGreaterThan(0); + + // None should have progressive metadata + calledWith.forEach(call => { + expect(call.metadata?.progressiveUpdate).toBeUndefined(); + }); + }); + }); + + describe('Error handling', () => { + it('should handle callback errors gracefully', async () => { + const errorCallback = async () => { + throw new Error('Callback error'); + }; + const progress = new ProgressiveMessage(errorCallback, 'discord'); + + // Should not throw + await expect(progress.complete('Done')).resolves.toEqual([]); + }); + }); + + describe('Correlation ID', () => { + it('should generate unique correlation IDs for different instances', () => { + const progress1 = new ProgressiveMessage(mockCallback, 'discord'); + const progress2 = new ProgressiveMessage(mockCallback, 'discord'); + + // Both should have different internal correlation IDs + // (We can't test this directly, but we test the behavior) + progress1.update('Test 1'); + progress2.update('Test 2'); + + expect(true).toBe(true); // Simple sanity check + }); + }); + + describe('Source checking', () => { + it('should support discord source', async () => { + const progress = new ProgressiveMessage(mockCallback, 'discord'); + await progress.complete('Done'); + + expect(calledWith.length).toBe(1); + expect(calledWith[0].source).toBe('discord'); + }); + + it('should support non-discord sources', async () => { + const progress = new ProgressiveMessage(mockCallback, 'web'); + await progress.complete('Done'); + + expect(calledWith.length).toBe(1); + expect(calledWith[0].source).toBe('web'); + }); + }); +}); diff --git a/__tests__/token-validation.test.ts b/__tests__/token-validation.test.ts new file mode 100644 index 0000000..c94f5d6 --- /dev/null +++ b/__tests__/token-validation.test.ts @@ -0,0 +1,108 @@ +import { describe, it, expect } from 'bun:test'; + +// Since validateDiscordToken is not exported, we'll test it indirectly through the ClientRegistry +// For now, we'll create a simple validation test based on the rules we implemented + +describe('Discord Token Validation', () => { + // Test basic token format validation logic + function testTokenValidation(token: string): { valid: boolean; error?: string } { + if (!token) { + return { valid: false, error: 'Token is empty or undefined' }; + } + + const trimmedToken = token.trim(); + + if (trimmedToken === '') { + return { valid: false, error: 'Token is empty after trimming whitespace' }; + } + + if (trimmedToken === 'undefined' || trimmedToken === 'null') { + return { valid: false, error: 'Token is literally "undefined" or "null" string' }; + } + + if (trimmedToken.length < 50) { + return { valid: false, error: `Token is too short (${trimmedToken.length} characters). Discord tokens are typically 70+ characters` }; + } + + if (!trimmedToken.includes('.')) { + return { valid: false, error: 'Token does not contain expected dot separators. Discord tokens typically have format: base64.timestamp.signature' }; + } + + const parts = trimmedToken.split('.'); + if (parts.length < 3) { + return { valid: false, error: `Token has ${parts.length} parts, expected at least 3 (base64.timestamp.signature)` }; + } + + if (parts.some(part => part.trim() === '')) { + return { valid: false, error: 'Token contains empty parts between dots' }; + } + + return { valid: true }; + } + + it('should reject empty token', () => { + const result = testTokenValidation(''); + expect(result.valid).toBe(false); + expect(result.error).toContain('empty'); + }); + + it('should reject undefined as string', () => { + const result = testTokenValidation('undefined'); + expect(result.valid).toBe(false); + expect(result.error).toContain('undefined'); + }); + + it('should reject null as string', () => { + const result = testTokenValidation('null'); + expect(result.valid).toBe(false); + expect(result.error).toContain('null'); + }); + + it('should reject token that is too short', () => { + const result = testTokenValidation('short.token.here'); + expect(result.valid).toBe(false); + expect(result.error).toContain('too short'); + }); + + it('should reject token without dots', () => { + const result = testTokenValidation('a'.repeat(70)); + expect(result.valid).toBe(false); + expect(result.error).toContain('dot separators'); + }); + + it('should reject token with less than 3 parts', () => { + const result = testTokenValidation('a'.repeat(30) + '.' + 'b'.repeat(30)); + expect(result.valid).toBe(false); + expect(result.error).toContain('expected at least 3'); + }); + + it('should reject token with empty parts', () => { + const result = testTokenValidation('a'.repeat(30) + '..' + 'b'.repeat(30)); + expect(result.valid).toBe(false); + expect(result.error).toContain('empty parts'); + }); + + it('should accept valid-looking token format', () => { + // Fake token with valid Discord token format (base64.timestamp.signature) + const mockToken = 'AAAAAAAAAAAAAAAAAAAAAA.AAAAAA.AAAAAAAAAAAAAAAAAAAAAAAAA_a'; + const result = testTokenValidation(mockToken); + expect(result.valid).toBe(true); + expect(result.error).toBeUndefined(); + }); + + it('should accept token with whitespace that trims to valid format', () => { + const mockToken = ' AAAAAAAAAAAAAAAAAAAAAA.AAAAAA.AAAAAAAAAAAAAAAAAAAAAAAAA_a '; + const result = testTokenValidation(mockToken); + expect(result.valid).toBe(true); + expect(result.error).toBeUndefined(); + }); + + it('should accept longer token format (new Discord format)', () => { + // Fake token with longer format + const mockToken = 'BBBBBBBBBBBBBBBBBBBBBBBBBBBBB.BBBBBB.BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'; + const result = testTokenValidation(mockToken); + expect(result.valid).toBe(true); + expect(result.error).toBeUndefined(); + }); +}); + diff --git a/__tests__/voiceManager.test.ts b/__tests__/voiceManager.test.ts new file mode 100644 index 0000000..a78bc78 --- /dev/null +++ b/__tests__/voiceManager.test.ts @@ -0,0 +1,212 @@ +import { EventEmitter } from 'node:events'; +import { PassThrough } from 'node:stream'; +import type { Readable } from 'node:stream'; +import { describe, it, expect, mock, beforeEach, spyOn } from 'bun:test'; +import { VoiceManager } from '../src/voice'; +import type { DiscordService } from '../src/service'; +import type { IAgentRuntime } from '@elizaos/core'; + +const createAudioPlayerMock = mock(() => undefined); +const createAudioResourceMock = mock(() => undefined); +const demuxProbeMock = mock(() => undefined); +const getVoiceConnectionsMock = mock(() => undefined); +const mockVoiceConnections = new Map(); + +mock.module('@discordjs/voice', () => ({ + NoSubscriberBehavior: { Pause: 'pause' }, + StreamType: { Arbitrary: 'arbitrary', Opus: 'opus' }, + VoiceConnectionStatus: { + Ready: 'ready', + Signalling: 'signalling', + Connecting: 'connecting', + Disconnected: 'disconnected', + Destroyed: 'destroyed', + }, + createAudioPlayer: (...args: any[]) => createAudioPlayerMock(...args), + createAudioResource: (...args: any[]) => createAudioResourceMock(...args), + demuxProbe: (...args: any[]) => demuxProbeMock(...args), + entersState: mock(() => undefined), + getVoiceConnections: (...args: any[]) => getVoiceConnectionsMock(...args), + joinVoiceChannel: mock(() => undefined), +})); + +mock.module('prism-media', () => { + const decoderFactory = mock(() => new PassThrough()); + const mockModule = { + opus: { + Decoder: decoderFactory, + }, + }; + return { + default: mockModule, + ...mockModule, + }; +}); + +describe('VoiceManager audio pipeline', () => { + let runtime: IAgentRuntime; + let service: DiscordService; + let mockClient: EventEmitter & { user: { id: string } }; + let voiceManager: VoiceManager; + + beforeEach(() => { + // Clear mocks + createAudioPlayerMock.mockClear(); + createAudioResourceMock.mockClear(); + demuxProbeMock.mockClear(); + getVoiceConnectionsMock.mockClear(); + mockVoiceConnections.clear(); + getVoiceConnectionsMock.mockReturnValue(mockVoiceConnections); + + createAudioPlayerMock.mockImplementation(() => ({ + play: mock(() => undefined), + stop: mock(() => undefined), + removeAllListeners: mock(() => undefined), + on: mock(function(this: any) { return this; }), + once: mock(function(this: any) { return this; }), + state: { status: 'playing' }, + })); + createAudioResourceMock.mockImplementation(() => ({ + volume: { + setVolume: mock(() => undefined), + volume: 1.0, + }, + playbackDuration: 0, + started: true, + silenceRemaining: -1, + })); + demuxProbeMock.mockReset(); + + runtime = { + agentId: 'agent-1', + character: { name: 'TestAgent', settings: {} }, + messageService: { handleMessage: mock(() => undefined) }, + ensureConnection: mock(() => undefined), + createMemory: mock(() => undefined), + getMemory: mock(() => undefined), + getSetting: mock(() => undefined), + getService: mock(() => undefined), + useModel: mock(() => undefined), + evaluate: mock(() => undefined), + composeState: mock(() => undefined), + emitEvent: mock(() => undefined), + getOrCreateUser: mock(() => undefined), + processActions: mock(() => undefined), + log: mock(() => undefined), + logger: { + warn: mock(() => undefined), + error: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + success: mock(() => undefined), + trace: mock(() => undefined), + }, + } as unknown as IAgentRuntime; + + mockClient = new EventEmitter() as EventEmitter & { user: { id: string } }; + mockClient.user = { id: 'bot-id' }; + (mockClient as any).on = mockClient.on.bind(mockClient); + (mockClient as any).once = mockClient.once.bind(mockClient); + (mockClient as any).emit = mockClient.emit.bind(mockClient); + (mockClient as any).guilds = { cache: new Map(), fetch: mock(() => Promise.resolve(new Map())) }; + + service = { + client: mockClient as any, + } as DiscordService; + + voiceManager = new VoiceManager(service, runtime); + }); + + it('uses demuxProbe output to build audio resource', async () => { + const guildId = 'guild-1'; + const mockSubscription = { + unsubscribe: mock(() => undefined), + player: null, + connection: null, + }; + const subscribeMock = mock(() => mockSubscription); + const connection = { + subscribe: subscribeMock, + receiver: { speaking: new EventEmitter(), subscribe: mock(() => undefined) }, + state: { status: 'ready' }, + joinConfig: { guildId }, + }; + voiceManager['connections'].set(guildId, connection as any); + + const originalStream = new PassThrough(); + const demuxedStream = new PassThrough(); + demuxProbeMock.mockResolvedValue({ stream: demuxedStream, type: 'opus' }); + + // Start playback but don't await fully - we just want to check setup + void voiceManager.playAudio(originalStream, { guildId }); + + // Wait a bit for async operations + await new Promise(resolve => setTimeout(resolve, 50)); + + expect(demuxProbeMock).toHaveBeenCalledWith(originalStream); + expect(createAudioResourceMock).toHaveBeenCalledWith(demuxedStream, { inputType: 'opus', inlineVolume: true }); + + // Clean up + originalStream.destroy(); + demuxedStream.destroy(); + }); + + it('continues monitoring even when receive stream is initially empty', async () => { + const guildId = 'guild-2'; + const receiveStream = new PassThrough() as Readable & { readableLength: number }; + // Ensure stream has some length to pass the check + Object.defineProperty(receiveStream, 'readableLength', { value: 1, writable: true }); + + const receiverSubscribeMock = mock(() => receiveStream); + const connection = { + receiver: { + subscribe: receiverSubscribeMock, + }, + joinConfig: { guildId }, + }; + + // Set up the mock to return connection for the correct guild ID + mockVoiceConnections.set(guildId, connection); + + const warnSpy = spyOn(runtime.logger, 'warn').mockImplementation(() => { }); + const emitSpy = spyOn(mockClient, 'emit'); + + const membersFetchMock = mock(() => Promise.resolve(null)); + const membersGetMock = mock(() => ({ user: { bot: false } })); + const member = { + id: 'user-123', + user: { username: 'listener', displayName: 'Listener', bot: false }, + guild: { id: guildId }, + displayName: 'Listener', + }; + const channel = { + id: 'channel-1', + name: 'Test Channel', + guild: { + id: guildId, + name: 'Guild', + members: { fetch: membersFetchMock, me: null }, + }, + members: { + get: membersGetMock, + }, + }; + + await (voiceManager as any).monitorMember(member, channel); + + // Check that no warning about empty stream was logged + const warningCalls = (warnSpy as any).mock.calls; + const hasEmptyStreamWarning = warningCalls.some((call: any[]) => { + const msg = call[0]; + return typeof msg === 'object' && JSON.stringify(msg).includes('No receiveStream'); + }); + expect(hasEmptyStreamWarning).toBe(false); + + // Verify userStream event was emitted + expect(emitSpy).toHaveBeenCalledWith('userStream', 'user-123', 'Listener', 'listener', channel, expect.anything()); + + (warnSpy as any).mockRestore(); + (emitSpy as any).mockRestore(); + }); +}); + diff --git a/src/actions/getUserInfo.ts b/src/actions/getUserInfo.ts index 28b25b4..ae82a0c 100644 --- a/src/actions/getUserInfo.ts +++ b/src/actions/getUserInfo.ts @@ -12,7 +12,16 @@ import { } from "@elizaos/core"; import { DiscordService } from "../service"; import { DISCORD_SERVICE_NAME } from "../constants"; -import { type GuildMember } from "discord.js"; +import { type Guild, type GuildMember, type GuildChannel } from "discord.js"; + +/** + * Check if a string looks like a Discord snowflake ID (all digits, 17-20 chars) + * UUIDs contain hyphens and letters, snowflakes are pure numeric + */ +function isDiscordSnowflake(id: string | undefined): boolean { + if (!id) return false; + return /^\d{17,20}$/.test(id); +} /** * Template for extracting user identifier from the request. @@ -156,8 +165,35 @@ export const getUserInfo: Action = { try { const room = state.data?.room || (await runtime.getRoom(message.roomId)); - const serverId = room?.messageServerId; - if (!serverId) { + const channelId = room?.channelId; + const messageServerId = (room as any)?.messageServerId; + + let guild: Guild | undefined; + + // Primary path: Use channelId to find the guild + if (channelId) { + let channel = discordService.client.channels.cache.get(channelId) as GuildChannel | undefined; + if (!channel) { + try { + channel = await discordService.client.channels.fetch(channelId) as GuildChannel | undefined; + } catch { + // Channel fetch failed + } + } + guild = channel?.guild; + } + + // Backwards compatibility: If channelId didn't work, try messageServerId + // Only if it looks like a Discord snowflake (not a UUID) + if (!guild && isDiscordSnowflake(messageServerId)) { + try { + guild = await discordService.client.guilds.fetch(messageServerId); + } catch { + // Guild fetch failed + } + } + + if (!guild) { await callback({ text: "I couldn't determine the current server.", source: "discord", @@ -165,8 +201,6 @@ export const getUserInfo: Action = { return; } - const guild = await discordService.client.guilds.fetch(serverId); - let member: GuildMember | null = null; // Handle "self" request diff --git a/src/actions/joinChannel.ts b/src/actions/joinChannel.ts index 2c90010..8fb60b9 100644 --- a/src/actions/joinChannel.ts +++ b/src/actions/joinChannel.ts @@ -13,10 +13,19 @@ import { } from "@elizaos/core"; import { DiscordService } from "../service"; import { DISCORD_SERVICE_NAME } from "../constants"; -import type { TextChannel, BaseGuildVoiceChannel } from "discord.js"; +import type { TextChannel, BaseGuildVoiceChannel, GuildChannel, Guild } from "discord.js"; import { ChannelType as DiscordChannelType } from "discord.js"; import type { VoiceManager } from "../voice"; +/** + * Check if a string looks like a Discord snowflake ID (all digits, 17-20 chars) + * UUIDs contain hyphens and letters, snowflakes are pure numeric + */ +function isDiscordSnowflake(id: string | undefined): boolean { + if (!id) return false; + return /^\d{17,20}$/.test(id); +} + /** * Template for extracting channel information from the user's request to join a channel. * @@ -82,18 +91,58 @@ const getJoinChannelInfo = async ( return null; }; +/** + * Get the guild from a channel ID or message server ID (with backwards compatibility) + */ +const getGuildFromRoom = async ( + discordService: DiscordService, + channelId?: string, + messageServerId?: string, +): Promise => { + if (!discordService.client) return null; + + // Primary path: Use channelId to find the guild + if (channelId) { + let channel = discordService.client.channels.cache.get(channelId) as GuildChannel | undefined; + if (!channel) { + try { + channel = await discordService.client.channels.fetch(channelId) as GuildChannel | undefined; + } catch { + // Channel fetch failed + } + } + if (channel?.guild) { + return channel.guild; + } + } + + // Backwards compatibility: If channelId didn't work, try messageServerId + // Only if it looks like a Discord snowflake (not a UUID) + if (isDiscordSnowflake(messageServerId)) { + try { + return await discordService.client.guilds.fetch(messageServerId); + } catch { + // Guild fetch failed + } + } + + return null; +}; + /** * Find a Discord channel by various identifiers * @param {DiscordService} discordService - The Discord service instance * @param {string} identifier - The channel identifier (name, ID, or mention) - * @param {string} currentServerId - The current server ID to search in + * @param {string} currentChannelId - The current channel ID to determine which server to search in + * @param {string} messageServerId - Backwards compatibility: old messageServerId (Discord guild ID) * @param {boolean} isVoiceChannel - Whether to look for voice channels * @returns {Promise} The found channel or null */ const findChannel = async ( discordService: DiscordService, identifier: string, - currentServerId?: string, + currentChannelId?: string, + messageServerId?: string, isVoiceChannel?: boolean, ): Promise => { if (!discordService.client) { @@ -122,9 +171,9 @@ const findChannel = async ( } } - // Search in the current server if available - if (currentServerId) { - const guild = await discordService.client.guilds.fetch(currentServerId); + // Search in the current server if available (look up guild via channel ID or messageServerId) + const guild = await getGuildFromRoom(discordService, currentChannelId, messageServerId); + if (guild) { const channels = await guild.channels.fetch(); // Search by channel name @@ -237,7 +286,8 @@ export const joinChannel: Action = { try { const room = state.data?.room || (await runtime.getRoom(message.roomId)); - const currentServerId = room?.messageServerId; + const currentChannelId = room?.channelId; + const messageServerId = (room as any)?.messageServerId; // First, try the user's approach - if they said voice/vc, look for voice channels const messageText = message.content.text?.toLowerCase() || ""; @@ -252,13 +302,15 @@ export const joinChannel: Action = { ? await findChannel( discordService, channelInfo.channelIdentifier, - currentServerId, + currentChannelId, + messageServerId, true, ) : await findChannel( discordService, channelInfo.channelIdentifier, - currentServerId, + currentChannelId, + messageServerId, false, ); @@ -268,21 +320,23 @@ export const joinChannel: Action = { ? await findChannel( discordService, channelInfo.channelIdentifier, - currentServerId, + currentChannelId, + messageServerId, false, ) : await findChannel( discordService, channelInfo.channelIdentifier, - currentServerId, + currentChannelId, + messageServerId, true, ); } if (!targetChannel) { // If the user is in a voice channel and no specific channel was found, join their voice channel - if (isVoiceRequest && currentServerId) { - const guild = discordService.client.guilds.cache.get(currentServerId); + if (isVoiceRequest && (currentChannelId || messageServerId)) { + const guild = await getGuildFromRoom(discordService, currentChannelId, messageServerId); const members = guild?.members.cache; const member = members?.find( (member) => diff --git a/src/actions/leaveChannel.ts b/src/actions/leaveChannel.ts index 657894a..3a9649e 100644 --- a/src/actions/leaveChannel.ts +++ b/src/actions/leaveChannel.ts @@ -14,10 +14,57 @@ import { } from "@elizaos/core"; import { DiscordService } from "../service"; import { DISCORD_SERVICE_NAME } from "../constants"; -import { type TextChannel, BaseGuildVoiceChannel } from "discord.js"; +import { type TextChannel, BaseGuildVoiceChannel, type GuildChannel, type Guild } from "discord.js"; import { ChannelType as DiscordChannelType } from "discord.js"; import type { VoiceManager } from "../voice"; +/** + * Check if a string looks like a Discord snowflake ID (all digits, 17-20 chars) + * UUIDs contain hyphens and letters, snowflakes are pure numeric + */ +function isDiscordSnowflake(id: string | undefined): boolean { + if (!id) return false; + return /^\d{17,20}$/.test(id); +} + +/** + * Get the guild from a channel ID or message server ID (with backwards compatibility) + */ +const getGuildFromRoom = async ( + discordService: DiscordService, + channelId?: string, + messageServerId?: string, +): Promise => { + if (!discordService.client) return null; + + // Primary path: Use channelId to find the guild + if (channelId) { + let channel = discordService.client.channels.cache.get(channelId) as GuildChannel | undefined; + if (!channel) { + try { + channel = await discordService.client.channels.fetch(channelId) as GuildChannel | undefined; + } catch { + // Channel fetch failed + } + } + if (channel?.guild) { + return channel.guild; + } + } + + // Backwards compatibility: If channelId didn't work, try messageServerId + // Only if it looks like a Discord snowflake (not a UUID) + if (isDiscordSnowflake(messageServerId)) { + try { + return await discordService.client.guilds.fetch(messageServerId); + } catch { + // Guild fetch failed + } + } + + return null; +}; + /** * Template for extracting channel information from the user's request to leave a channel. * @@ -270,8 +317,8 @@ export const leaveChannel: Action = { try { const room = state.data?.room || (await runtime.getRoom(message.roomId)); - const currentServerId = room?.messageServerId; const currentChannelId = room?.channelId; + const messageServerId = (room as any)?.messageServerId; // Check if trying to leave voice without specifying channel const messageText = message.content.text?.toLowerCase() || ""; @@ -296,8 +343,8 @@ export const leaveChannel: Action = { return undefined; } - if (currentServerId) { - const guild = discordService.client.guilds.cache.get(currentServerId); + if (currentChannelId || messageServerId) { + const guild = await getGuildFromRoom(discordService, currentChannelId, messageServerId); const voiceChannel = guild?.members.me?.voice.channel; if ( diff --git a/src/actions/reactToMessage.ts b/src/actions/reactToMessage.ts index f0d3c45..17daf9e 100644 --- a/src/actions/reactToMessage.ts +++ b/src/actions/reactToMessage.ts @@ -124,6 +124,189 @@ const emojiMap: Record = { ':rocket:': '🚀', }; +// Sentiment keywords mapped to emoji categories +const sentimentEmojis: Record = { + positive: ['👍', '✅', '💯', '🙌', '👏'], + agreement: ['👍', '✅', '💯', '🤝'], + excitement: ['🔥', '🚀', '⭐', '💥', '🎉'], + love: ['❤️', '💕', '💜', '🖤', '💙'], + thinking: ['🤔', '💭', '🧐'], + funny: ['😂', '😆', '🤣', '😄'], + greeting: ['👋', '🙌'], + thanks: ['🙏', '💜', '❤️'], + question: ['🤔', '❓', '👀'], + sad: ['😢', '💔', '😞'], + neutral: ['👀', '👍'], +}; + +/** + * Detects the sentiment/intent of a message for emoji selection. + * Returns a sentiment category that can be used to pick appropriate emojis. + */ +function detectSentiment(text: string): string { + if (!text) return 'neutral'; + const lower = text.toLowerCase(); + + if (/\b(thanks?|thank you|appreciate|grateful)\b/.test(lower)) return 'thanks'; + if (/\b(love|adore|amazing|wonderful|beautiful)\b/.test(lower)) return 'love'; + if (/\b(lol|lmao|haha|funny|hilarious|joke)\b/.test(lower)) return 'funny'; + if (/\b(awesome|excited|hype|let'?s go|amazing|incredible)\b/.test(lower)) return 'excitement'; + if (/\b(agree|yes|exactly|right|correct|true)\b/.test(lower)) return 'agreement'; + if (/\b(good|great|nice|cool|ok|fine|sure)\b/.test(lower)) return 'positive'; + if (/\b(hi|hello|hey|welcome|greetings)\b/.test(lower)) return 'greeting'; + if (/\?/.test(lower)) return 'question'; + if (/\b(sad|sorry|unfortunate|bad|wrong)\b/.test(lower)) return 'sad'; + if (/\b(think|wonder|maybe|perhaps|hmm)\b/.test(lower)) return 'thinking'; + + return 'neutral'; +} + +/** + * Check if the character's style forbids emoji usage. + * Scans style.all for rules like "never use emojis". + */ +function characterForbidsEmojis(runtime: IAgentRuntime): boolean { + const styleAll = runtime.character?.style?.all || []; + for (const rule of styleAll) { + const lower = rule.toLowerCase(); + if ( + (lower.includes('never') || lower.includes("don't") || lower.includes('no ')) && + lower.includes('emoji') + ) { + return true; + } + } + return false; +} + +/** + * Get character's preferred emojis from settings. + * Supports both array format and object format with categories. + * + * Examples: + * settings: { preferredEmojis: ['🌸', '🍂', '🌿'] } + * settings: { emojiPreferences: { preferred: ['🖤', '🌙'], forbidden: ['❤️'], fallback: '👍' } } + */ +function getCharacterEmojiPreferences(runtime: IAgentRuntime): { + preferred: string[]; + forbidden: string[]; + fallback: string | null; +} { + const settings = runtime.character?.settings as Record | undefined; + if (!settings) return { preferred: [], forbidden: [], fallback: null }; + + // Simple array format + if (Array.isArray(settings.preferredEmojis)) { + return { + preferred: settings.preferredEmojis as string[], + forbidden: [], + fallback: (settings.preferredEmojis as string[])[0] || null, + }; + } + + // Object format with categories + const prefs = settings.emojiPreferences as Record | undefined; + if (prefs && typeof prefs === 'object') { + return { + preferred: Array.isArray(prefs.preferred) ? (prefs.preferred as string[]) : [], + forbidden: Array.isArray(prefs.forbidden) ? (prefs.forbidden as string[]) : [], + fallback: typeof prefs.fallback === 'string' ? prefs.fallback : null, + }; + } + + return { preferred: [], forbidden: [], fallback: null }; +} + +/** + * Select an emoji based on character preferences and message sentiment. + * Returns null if character forbids emojis or no suitable emoji found. + */ +function selectCharacterEmoji( + runtime: IAgentRuntime, + messageText: string +): string | null { + // Check if character forbids emojis + if (characterForbidsEmojis(runtime)) { + runtime.logger.debug( + { src: 'plugin:discord:action:react' }, + `[REACT_TO_MESSAGE] Character style forbids emojis` + ); + return null; + } + + const prefs = getCharacterEmojiPreferences(runtime); + const sentiment = detectSentiment(messageText); + + // If character has preferred emojis, try to match by sentiment + if (prefs.preferred.length > 0) { + // Get sentiment-appropriate emojis from character's preferred list + const sentimentOptions = sentimentEmojis[sentiment] || sentimentEmojis.neutral; + const characterMatch = prefs.preferred.find((e) => sentimentOptions.includes(e)); + + if (characterMatch) { + runtime.logger.debug( + { src: 'plugin:discord:action:react', emoji: characterMatch, sentiment }, + `[REACT_TO_MESSAGE] Selected character-preferred emoji by sentiment` + ); + return characterMatch; + } + + // No sentiment match, use first preferred or fallback + const selected = prefs.fallback || prefs.preferred[0]; + runtime.logger.debug( + { src: 'plugin:discord:action:react', emoji: selected }, + `[REACT_TO_MESSAGE] Using character fallback emoji` + ); + return selected; + } + + // No character preferences - use sentiment-based selection + const options = sentimentEmojis[sentiment] || sentimentEmojis.neutral; + + // Filter out forbidden emojis + const allowed = prefs.forbidden.length > 0 + ? options.filter((e) => !prefs.forbidden.includes(e)) + : options; + + if (allowed.length === 0) { + // Sentiment options exhausted - try neutral emojis first + const neutralAllowed = sentimentEmojis.neutral.filter((e) => !prefs.forbidden.includes(e)); + if (neutralAllowed.length > 0) { + runtime.logger.debug( + { src: 'plugin:discord:action:react', emoji: neutralAllowed[0], sentiment }, + `[REACT_TO_MESSAGE] Sentiment emojis forbidden, using neutral fallback` + ); + return neutralAllowed[0]; + } + + // Neutral exhausted - scan ALL emoji sets for any non-forbidden emoji + for (const category of Object.keys(sentimentEmojis)) { + const categoryAllowed = sentimentEmojis[category].filter((e) => !prefs.forbidden.includes(e)); + if (categoryAllowed.length > 0) { + runtime.logger.debug( + { src: 'plugin:discord:action:react', emoji: categoryAllowed[0], category, sentiment }, + `[REACT_TO_MESSAGE] Found non-forbidden emoji in ${category} category` + ); + return categoryAllowed[0]; + } + } + + // All emojis are forbidden - return null to skip reaction + runtime.logger.debug( + { src: 'plugin:discord:action:react', forbidden: prefs.forbidden }, + `[REACT_TO_MESSAGE] All emojis forbidden, skipping reaction` + ); + return null; + } + + const selected = allowed[0]; + runtime.logger.debug( + { src: 'plugin:discord:action:react', emoji: selected, sentiment }, + `[REACT_TO_MESSAGE] Selected sentiment-based emoji` + ); + return selected; +} + export const reactToMessage: Action = { name: 'REACT_TO_MESSAGE', similes: [ @@ -203,8 +386,32 @@ export const reactToMessage: Action = { } } + // ============================================================================ + // CHARACTER PATH: Use character preferences/style when fast path fails + // ============================================================================ + if (!reactionInfo && !needsLLM) { + // Agent spontaneously reacting, try character-based emoji selection + const characterEmoji = selectCharacterEmoji(runtime, userText); + if (characterEmoji) { + runtime.logger.debug( + { src: 'plugin:discord:action:react', emoji: characterEmoji }, + `[REACT_TO_MESSAGE] Using character-based emoji selection` + ); + reactionInfo = { messageRef: 'last', emoji: characterEmoji }; + } else if (characterForbidsEmojis(runtime)) { + // Character style forbids emojis - silently skip + runtime.logger.debug( + { src: 'plugin:discord:action:react' }, + `[REACT_TO_MESSAGE] Skipping reaction - character forbids emojis` + ); + return; + } + } + + // ============================================================================ + // LLM PATH: Use when explicit request or other paths fail + // ============================================================================ if (!reactionInfo) { - // LLM PATH: Use when fast path fails or user specified a specific target const prompt = composePromptFromState({ state, template: reactToMessageTemplate, @@ -217,11 +424,41 @@ export const reactToMessage: Action = { const parsedResponse = parseJSONObjectFromText(response); if (parsedResponse?.emoji) { - reactionInfo = { - messageRef: parsedResponse.messageRef || 'last', - emoji: parsedResponse.emoji, - }; - break; + // Check if the LLM-selected emoji is forbidden by character + const prefs = getCharacterEmojiPreferences(runtime); + let emoji: string | null = parsedResponse.emoji; + + if (prefs.forbidden.includes(emoji)) { + // Try to find an allowed alternative from sentiment-matched emojis + const sentiment = detectSentiment(userText); + const alternatives = sentimentEmojis[sentiment] || []; + const allowedAlternatives = alternatives.filter((e) => !prefs.forbidden.includes(e)); + + if (allowedAlternatives.length > 0) { + emoji = allowedAlternatives[0]; + } else { + // Fallback to neutral emojis filtered for forbidden + const neutralAllowed = sentimentEmojis.neutral.filter((e) => !prefs.forbidden.includes(e)); + if (neutralAllowed.length > 0) { + emoji = neutralAllowed[0]; + } else { + // No safe emoji available - skip reaction entirely + runtime.logger.debug( + { src: 'plugin:discord:action:react', forbidden: prefs.forbidden }, + '[REACT_TO_MESSAGE] LLM selected forbidden emoji and no safe alternative exists, skipping reaction' + ); + emoji = null; + } + } + } + + if (emoji) { + reactionInfo = { + messageRef: parsedResponse.messageRef || 'last', + emoji, + }; + break; + } } } } diff --git a/src/actions/readChannel.ts b/src/actions/readChannel.ts index 5ec07b6..4385cdf 100644 --- a/src/actions/readChannel.ts +++ b/src/actions/readChannel.ts @@ -12,7 +12,16 @@ import { } from "@elizaos/core"; import { DiscordService } from "../service"; import { DISCORD_SERVICE_NAME } from "../constants"; -import { PermissionsBitField, type TextChannel } from "discord.js"; +import { PermissionsBitField, type TextChannel, type Guild, type GuildChannel } from "discord.js"; + +/** + * Check if a string looks like a Discord snowflake ID (all digits, 17-20 chars) + * UUIDs contain hyphens and letters, snowflakes are pure numeric + */ +function isDiscordSnowflake(id: string | undefined): boolean { + if (!id) return false; + return /^\d{17,20}$/.test(id); +} /** * Template for extracting channel information from the user's request. @@ -174,15 +183,41 @@ export const readChannel: Action = { )) as TextChannel; } else { // It's a channel name - search in the current server - const serverId = room?.messageServerId; - if (!serverId) { + const channelId = room?.channelId; + const messageServerId = (room as any)?.messageServerId; + + let guild: Guild | undefined; + + // Primary path: Use channelId to find the guild + if (channelId) { + let currentChannel = discordService.client.channels.cache.get(channelId) as GuildChannel | undefined; + if (!currentChannel) { + try { + currentChannel = await discordService.client.channels.fetch(channelId) as GuildChannel | undefined; + } catch { + // Channel fetch failed + } + } + guild = currentChannel?.guild; + } + + // Backwards compatibility: If channelId didn't work, try messageServerId + // Only if it looks like a Discord snowflake (not a UUID) + if (!guild && isDiscordSnowflake(messageServerId)) { + try { + guild = await discordService.client.guilds.fetch(messageServerId); + } catch { + // Guild fetch failed + } + } + + if (!guild) { await callback({ text: "I couldn't determine which server to search for that channel.", source: "discord", }); return; } - const guild = await discordService.client.guilds.fetch(serverId); const channels = await guild.channels.fetch(); targetChannel = diff --git a/src/actions/searchMessages.ts b/src/actions/searchMessages.ts index c9552d0..ab9efbd 100644 --- a/src/actions/searchMessages.ts +++ b/src/actions/searchMessages.ts @@ -12,35 +12,59 @@ import { } from "@elizaos/core"; import { DiscordService } from "../service"; import { DISCORD_SERVICE_NAME } from "../constants"; -import { type TextChannel, type Message, Collection } from "discord.js"; +import { type TextChannel, type Message, Collection, type Guild, type GuildChannel } from "discord.js"; + +/** + * Check if a string looks like a Discord snowflake ID (all digits, 17-20 chars) + * UUIDs contain hyphens and letters, snowflakes are pure numeric + */ +function isDiscordSnowflake(id: string | undefined): boolean { + if (!id) return false; + return /^\d{17,20}$/.test(id); +} /** * Template for extracting search parameters from the user's request. + * + * PROMPT DESIGN NOTE: + * We show a minimal JSON schema with only required fields. Optional fields are + * described separately. This prevents LLMs from outputting null/undefined for + * fields they feel compelled to include when they see them in the schema. */ export const searchMessagesTemplate = `# Searching for Discord messages {{recentMessages}} -# Instructions: {{senderName}} is requesting to search for messages in Discord. Extract: -1. The search query/keywords -2. The channel to search in (current if not specified) -3. Optional filters like author, time range, or message count +# Instructions: {{senderName}} is requesting to search for messages in Discord. + +Extract the search parameters as JSON. Only include fields that apply to the request. + +## Required fields: +- "query": The search keywords +- "channelIdentifier": Channel name, channel ID, or "current" (default: "current") -Examples: -- "search for messages containing 'meeting'" -> query: "meeting", channelIdentifier: "current", NO author field -- "find messages from @user about bugs" -> query: "bugs", channelIdentifier: "current", author: "user" -- "search #general for links from last week" -> query: "links", channelIdentifier: "general", timeRange: "week" -- "search for messages about 'spartan' in this channel" -> query: "spartan", channelIdentifier: "current" +## Optional fields (only include if explicitly mentioned): +- "author": Username to filter by (only if user asked to search messages FROM someone) +- "timeRange": One of "hour", "day", "week", "month" (only if user specified a time period) +- "limit": Number 1-100 (default: 20) -Your response must be formatted as a JSON block: +## Examples: + +"search for messages containing 'meeting'" -> \`\`\`json -{ - "query": "", - "channelIdentifier": "", - "author": "", // ONLY include this field if a specific author was mentioned - "timeRange": "", // ONLY include if a time range was specified - "limit": -} +{"query": "meeting", "channelIdentifier": "current"} +\`\`\` + +"find messages from @john about bugs" -> +\`\`\`json +{"query": "bugs", "channelIdentifier": "current", "author": "john"} \`\`\` + +"search #general for links from last week" -> +\`\`\`json +{"query": "links", "channelIdentifier": "general", "timeRange": "week"} +\`\`\` + +Now extract the parameters from {{senderName}}'s request: `; const getSearchParams = async ( @@ -69,11 +93,24 @@ const getSearchParams = async ( // Remove quotes from query if present const cleanQuery = parsedResponse.query.replace(/^["']|["']$/g, ""); + // Normalize null-like string values to actual null + const normalizeNullish = (val: unknown): string | null => { + if (!val) return null; + if (typeof val === 'string') { + const lower = val.toLowerCase().trim(); + if (lower === 'null' || lower === 'undefined' || lower === 'none' || lower === '') { + return null; + } + return val; + } + return null; + }; + return { query: cleanQuery, - channelIdentifier: parsedResponse.channelIdentifier || "current", - author: parsedResponse.author || null, - timeRange: parsedResponse.timeRange || null, + channelIdentifier: parsedResponse.channelIdentifier || 'current', + author: normalizeNullish(parsedResponse.author), + timeRange: normalizeNullish(parsedResponse.timeRange), limit: Math.min(Math.max(parsedResponse.limit || 20, 1), 100), }; } @@ -96,8 +133,8 @@ const searchInMessages = ( return false; } - // Filter by author if specified - if (author && author !== "null" && author !== "undefined") { + // Filter by author if specified (already normalized to null if invalid) + if (author) { const authorLower = author.toLowerCase(); const matchesUsername = msg.author.username .toLowerCase() @@ -203,15 +240,41 @@ export const searchMessages: Action = { )) as TextChannel; } else { // It's a channel name - search in the current server - const serverId = room?.messageServerId; - if (!serverId) { + const channelId = room?.channelId; + const messageServerId = (room as any)?.messageServerId; + + let guild: Guild | undefined; + + // Primary path: Use channelId to find the guild + if (channelId) { + let currentChannel = discordService.client.channels.cache.get(channelId) as GuildChannel | undefined; + if (!currentChannel) { + try { + currentChannel = await discordService.client.channels.fetch(channelId) as GuildChannel | undefined; + } catch { + // Channel fetch failed + } + } + guild = currentChannel?.guild; + } + + // Backwards compatibility: If channelId didn't work, try messageServerId + // Only if it looks like a Discord snowflake (not a UUID) + if (!guild && isDiscordSnowflake(messageServerId)) { + try { + guild = await discordService.client.guilds.fetch(messageServerId); + } catch { + // Guild fetch failed + } + } + + if (!guild) { await callback({ text: "I couldn't determine which server to search for that channel.", source: "discord", }); return; } - const guild = await discordService.client.guilds.fetch(serverId); const channels = await guild.channels.fetch(); targetChannel = (channels.find( diff --git a/src/actions/sendDM.ts b/src/actions/sendDM.ts index eac03d1..af06349 100644 --- a/src/actions/sendDM.ts +++ b/src/actions/sendDM.ts @@ -12,7 +12,54 @@ import { } from "@elizaos/core"; import { DiscordService } from "../service"; import { DISCORD_SERVICE_NAME } from "../constants"; -import type { User } from "discord.js"; +import type { User, GuildChannel, Guild } from "discord.js"; + +/** + * Check if a string looks like a Discord snowflake ID (all digits, 17-20 chars) + * UUIDs contain hyphens and letters, snowflakes are pure numeric + */ +function isDiscordSnowflake(id: string | undefined): boolean { + if (!id) return false; + return /^\d{17,20}$/.test(id); +} + +/** + * Get the guild from a channel ID or message server ID (with backwards compatibility) + */ +const getGuildFromRoom = async ( + discordService: DiscordService, + channelId?: string, + messageServerId?: string, +): Promise => { + if (!discordService.client) return null; + + // Primary path: Use channelId to find the guild + if (channelId) { + let channel = discordService.client.channels.cache.get(channelId) as GuildChannel | undefined; + if (!channel) { + try { + channel = await discordService.client.channels.fetch(channelId) as GuildChannel | undefined; + } catch { + // Channel fetch failed + } + } + if (channel?.guild) { + return channel.guild; + } + } + + // Backwards compatibility: If channelId didn't work, try messageServerId + // Only if it looks like a Discord snowflake (not a UUID) + if (isDiscordSnowflake(messageServerId)) { + try { + return await discordService.client.guilds.fetch(messageServerId); + } catch { + // Guild fetch failed + } + } + + return null; +}; /** * Template for extracting DM recipient and message information from the user's request. @@ -85,13 +132,15 @@ const getDMInfo = async ( * Find a Discord user by various identifiers * @param {DiscordService} discordService - The Discord service instance * @param {string} identifier - The user identifier (username, ID, or mention) - * @param {string} currentServerId - The current server ID to search in + * @param {string} currentChannelId - The current channel ID to determine which server to search in + * @param {string} messageServerId - Backwards compatibility: old messageServerId (Discord guild ID) * @returns {Promise} The found user or null */ const findUser = async ( discordService: DiscordService, identifier: string, - currentServerId?: string, + currentChannelId?: string, + messageServerId?: string, ): Promise => { if (!discordService.client) { return null; @@ -110,9 +159,9 @@ const findUser = async ( } } - // Search in the current server if available - if (currentServerId) { - const guild = await discordService.client.guilds.fetch(currentServerId); + // Search in the current server if available (look up guild via channel ID or messageServerId) + const guild = await getGuildFromRoom(discordService, currentChannelId, messageServerId); + if (guild) { const members = await guild.members.fetch(); // Search by username or display name @@ -207,13 +256,15 @@ export const sendDM: Action = { try { const room = state.data?.room || (await runtime.getRoom(message.roomId)); - const currentServerId = room?.messageServerId; + const currentChannelId = room?.channelId; + const messageServerId = (room as any)?.messageServerId; // Find the user const targetUser = await findUser( discordService, dmInfo.recipientIdentifier, - currentServerId, + currentChannelId, + messageServerId, ); if (!targetUser) { diff --git a/src/actions/serverInfo.ts b/src/actions/serverInfo.ts index c2ca595..d63783d 100644 --- a/src/actions/serverInfo.ts +++ b/src/actions/serverInfo.ts @@ -9,7 +9,16 @@ import { } from "@elizaos/core"; import { DiscordService } from "../service"; import { DISCORD_SERVICE_NAME } from "../constants"; -import { type Guild } from "discord.js"; +import { type Guild, type GuildChannel } from "discord.js"; + +/** + * Check if a string looks like a Discord snowflake ID (all digits, 17-20 chars) + * UUIDs contain hyphens and letters, snowflakes are pure numeric + */ +function isDiscordSnowflake(id: string | undefined): boolean { + if (!id) return false; + return /^\d{17,20}$/.test(id); +} const formatServerInfo = (guild: Guild, detailed: boolean = false): string => { const createdAt = new Date(guild.createdAt).toLocaleDateString(); @@ -121,8 +130,35 @@ export const serverInfo: Action = { try { const room = state.data?.room || (await runtime.getRoom(message.roomId)); - const serverId = room?.messageServerId; - if (!serverId) { + const channelId = room?.channelId; + const messageServerId = (room as any)?.messageServerId; + + let guild: Guild | undefined; + + // Primary path: Use channelId to find the guild + if (channelId) { + let channel = discordService.client.channels.cache.get(channelId) as GuildChannel | undefined; + if (!channel) { + try { + channel = await discordService.client.channels.fetch(channelId) as GuildChannel | undefined; + } catch { + // Channel fetch failed + } + } + guild = channel?.guild; + } + + // Backwards compatibility: If channelId didn't work, try messageServerId + // Only if it looks like a Discord snowflake (not a UUID) + if (!guild && isDiscordSnowflake(messageServerId)) { + try { + guild = await discordService.client.guilds.fetch(messageServerId); + } catch { + // Guild fetch failed + } + } + + if (!guild) { await callback({ text: "I couldn't determine the current server.", source: "discord", @@ -130,8 +166,6 @@ export const serverInfo: Action = { return; } - const guild = await discordService.client.guilds.fetch(serverId); - // Check if the request is for detailed info const messageText = message.content.text?.toLowerCase() || ""; const isDetailed = diff --git a/src/actions/setListeningActivity.ts b/src/actions/setListeningActivity.ts new file mode 100644 index 0000000..837ec90 --- /dev/null +++ b/src/actions/setListeningActivity.ts @@ -0,0 +1,286 @@ +import { + type Action, + type ActionExample, + type Content, + type HandlerCallback, + type IAgentRuntime, + type Memory, + ModelType, + type State, + composePromptFromState, + parseJSONObjectFromText, +} from '@elizaos/core'; +import { DiscordService } from '../service'; +import { DISCORD_SERVICE_NAME } from '../constants'; + +/** + * Template for extracting listening activity information from the user's request. + */ +export const setListeningActivityTemplate = `# Messages we are analyzing for setting listening activity +{{recentMessages}} + +# Instructions: {{senderName}} is requesting to set the bot's "listening to" activity status. +Extract the following information from their request: +- The activity text they want to display (e.g., "Spotify", "your commands", "the void") +- Whether they want to clear the activity (indicated by words like "clear", "remove", "stop") + +Your response must be formatted as a JSON block with this structure: +\`\`\`json +{ + "activityText": "", + "clearActivity": true/false +} +\`\`\` +`; + +/** + * Get listening activity information from the user's request + * Validates that activityText and clearActivity are proper types + * to avoid passing undefined to discordService.setListeningActivity + */ +const getListeningActivityInfo = async ( + runtime: IAgentRuntime, + _message: Memory, + state: State +): Promise<{ activityText: string; clearActivity: boolean } | null> => { + const prompt = composePromptFromState({ + state, + template: setListeningActivityTemplate, + }); + + for (let i = 0; i < 3; i++) { + const response = await runtime.useModel(ModelType.TEXT_SMALL, { + prompt, + }); + + const parsedResponse = parseJSONObjectFromText(response) as { + activityText?: unknown; + clearActivity?: unknown; + } | null; + + if (!parsedResponse) { + continue; // Retry if parsing failed entirely + } + + // Validate activityText is a string (can be empty for clearing) + const activityText = parsedResponse.activityText; + if (typeof activityText !== 'string') { + runtime.logger.debug( + { attempt: i + 1, activityText }, + 'Invalid activityText from LLM, retrying' + ); + continue; + } + + // Validate clearActivity is a boolean + const clearActivity = parsedResponse.clearActivity; + if (typeof clearActivity !== 'boolean') { + runtime.logger.debug( + { attempt: i + 1, clearActivity }, + 'Invalid clearActivity from LLM, retrying' + ); + continue; + } + + return { + activityText: activityText.trim(), + clearActivity, + }; + } + + runtime.logger.warn('Failed to get valid listening activity info after 3 attempts'); + return null; +}; + +export const setListeningActivity: Action = { + name: 'SET_LISTENING_ACTIVITY', + similes: [ + 'SET_LISTENING_STATUS', + 'SET_LISTENING_TO', + 'UPDATE_LISTENING_STATUS', + 'CHANGE_LISTENING_ACTIVITY', + 'SET_NOW_PLAYING', + 'CLEAR_LISTENING_STATUS', + 'SET_ACTIVITY', + 'UPDATE_STATUS', + 'SET_PRESENCE', + ], + description: + 'Set or clear the bot\'s "listening to" activity status. This appears under the bot\'s name in the member list (e.g., "Listening to Spotify").', + validate: async (_runtime: IAgentRuntime, message: Memory, _state: State) => { + if (message.content.source !== 'discord') { + return false; + } + return true; + }, + handler: async ( + runtime: IAgentRuntime, + message: Memory, + state: State, + _options: any, + callback: HandlerCallback + ) => { + const discordService = runtime.getService(DISCORD_SERVICE_NAME) as DiscordService; + + if (!discordService || !discordService.client) { + // Keep diagnostic logging for debugging + console.error('Discord service not found or not initialized'); + + // Notify user of the error before returning + // Using callback since this is an action handler, not a slash command + // The message will be delivered through the same channel the user messaged in + await callback({ + text: 'Discord service is not initialized. Please try again in a moment.', + source: 'discord', + }); + return; + } + + const activityInfo = await getListeningActivityInfo(runtime, message, state); + if (!activityInfo) { + console.error("Couldn't parse listening activity information from message"); + await callback({ + text: "I couldn't understand what listening activity you want me to set. Please specify the activity text or ask me to clear it.", + source: 'discord', + }); + return; + } + + try { + let success = false; + let responseText = ''; + + if (activityInfo.clearActivity) { + // Clear the activity + success = await discordService.clearActivity(); + responseText = 'I\'ve cleared my listening activity status.'; + } else { + // Set the listening activity + success = await discordService.setListeningActivity(activityInfo.activityText); + responseText = `I've set my status to "Listening to ${activityInfo.activityText}".`; + } + + if (success) { + await runtime.createMemory( + { + entityId: message.entityId, + agentId: message.agentId, + roomId: message.roomId, + content: { + source: 'discord', + thought: activityInfo.clearActivity + ? 'I cleared my listening activity' + : `I set my listening activity to "${activityInfo.activityText}"`, + actions: ['SET_LISTENING_ACTIVITY_COMPLETED'], + }, + metadata: { + type: 'SET_LISTENING_ACTIVITY', + activityText: activityInfo.activityText, + cleared: activityInfo.clearActivity, + }, + }, + 'messages' + ); + + const response: Content = { + text: responseText, + actions: ['SET_LISTENING_ACTIVITY_RESPONSE'], + source: message.content.source, + }; + + await callback(response); + } else { + await callback({ + text: "I couldn't update my listening activity. Please try again.", + source: 'discord', + }); + } + } catch (error) { + console.error('Error setting listening activity:', error); + await callback({ + text: 'I encountered an error while trying to update my listening activity.', + source: 'discord', + }); + } + }, + examples: [ + [ + { + name: '{{name1}}', + content: { + text: 'Set your status to listening to Spotify', + }, + }, + { + name: '{{name2}}', + content: { + text: 'I\'ll set my status to "Listening to Spotify".', + actions: ['SET_LISTENING_ACTIVITY'], + }, + }, + ], + [ + { + name: '{{name1}}', + content: { + text: 'Update your listening activity to "your commands"', + }, + }, + { + name: '{{name2}}', + content: { + text: 'I\'ll update my status to "Listening to your commands".', + actions: ['SET_LISTENING_ACTIVITY'], + }, + }, + ], + [ + { + name: '{{name1}}', + content: { + text: 'Clear your listening status', + }, + }, + { + name: '{{name2}}', + content: { + text: "I'll clear my listening activity status.", + actions: ['SET_LISTENING_ACTIVITY'], + }, + }, + ], + [ + { + name: '{{name1}}', + content: { + text: 'Set listening to "lo-fi beats 🎵"', + }, + }, + { + name: '{{name2}}', + content: { + text: 'I\'ll set my status to "Listening to lo-fi beats 🎵".', + actions: ['SET_LISTENING_ACTIVITY'], + }, + }, + ], + [ + { + name: '{{name1}}', + content: { + text: 'Stop showing your listening activity', + }, + }, + { + name: '{{name2}}', + content: { + text: "I'll stop showing my listening activity.", + actions: ['SET_LISTENING_ACTIVITY'], + }, + }, + ], + ] as ActionExample[][], +} as Action; + +export default setListeningActivity; + diff --git a/src/actions/setVoiceChannelStatus.ts b/src/actions/setVoiceChannelStatus.ts new file mode 100644 index 0000000..a308148 --- /dev/null +++ b/src/actions/setVoiceChannelStatus.ts @@ -0,0 +1,416 @@ +import { + type Action, + type ActionExample, + type Content, + type HandlerCallback, + type IAgentRuntime, + type Memory, + ModelType, + type State, + composePromptFromState, + parseJSONObjectFromText, +} from '@elizaos/core'; +import { DiscordService } from '../service'; +import { DISCORD_SERVICE_NAME } from '../constants'; +import type { BaseGuildVoiceChannel, Guild, GuildChannel } from 'discord.js'; +import { ChannelType as DiscordChannelType } from 'discord.js'; + +/** + * Check if a string looks like a Discord snowflake ID (all digits, 17-20 chars) + * UUIDs contain hyphens and letters, snowflakes are pure numeric + */ +function isDiscordSnowflake(id: string | undefined): boolean { + if (!id) return false; + return /^\d{17,20}$/.test(id); +} + +/** + * Get the guild from a channel ID or message server ID (with backwards compatibility) + */ +const getGuildFromRoom = async ( + discordService: DiscordService, + channelId?: string, + messageServerId?: string, +): Promise => { + if (!discordService.client) return null; + + // Primary path: Use channelId to find the guild + if (channelId) { + let channel = discordService.client.channels.cache.get(channelId) as GuildChannel | undefined; + if (!channel) { + try { + channel = await discordService.client.channels.fetch(channelId) as GuildChannel | undefined; + } catch { + // Channel fetch failed + } + } + if (channel?.guild) { + return channel.guild; + } + } + + // Backwards compatibility: If channelId didn't work, try messageServerId + // Only if it looks like a Discord snowflake (not a UUID) + if (isDiscordSnowflake(messageServerId)) { + try { + return await discordService.client.guilds.fetch(messageServerId); + } catch { + // Guild fetch failed + } + } + + return null; +}; + +/** + * Template for extracting voice channel status information from the user's request. + */ +export const setVoiceChannelStatusTemplate = `# Messages we are analyzing for voice channel status update +{{recentMessages}} + +# Instructions: {{senderName}} is requesting to set a status on a Discord voice channel. +Extract the following information from their request: +- The channel identifier (name, ID, or mention) +- The status message they want to set (can be empty to clear the status) + +Your response must be formatted as a JSON block with this structure: +\`\`\`json +{ + "channelIdentifier": "", + "statusMessage": "" +} +\`\`\` +`; + +/** + * Get voice channel status information from the user's request + * Validates that channelIdentifier and statusMessage are proper strings + * to avoid passing undefined to discordService.setVoiceChannelStatus + */ +const getVoiceChannelStatusInfo = async ( + runtime: IAgentRuntime, + _message: Memory, + state: State +): Promise<{ channelIdentifier: string; statusMessage: string } | null> => { + const prompt = composePromptFromState({ + state, + template: setVoiceChannelStatusTemplate, + }); + + for (let i = 0; i < 3; i++) { + const response = await runtime.useModel(ModelType.TEXT_SMALL, { + prompt, + }); + + const parsedResponse = parseJSONObjectFromText(response) as { + channelIdentifier?: unknown; + statusMessage?: unknown; + } | null; + + if (!parsedResponse) { + continue; // Retry if parsing failed entirely + } + + // Validate channelIdentifier is a non-empty string + // The LLM may return undefined, null, or non-string values + const channelIdentifier = parsedResponse.channelIdentifier; + if (typeof channelIdentifier !== 'string' || channelIdentifier.trim() === '') { + runtime.logger.debug( + { attempt: i + 1, channelIdentifier }, + 'Invalid channelIdentifier from LLM, retrying' + ); + continue; + } + + // Validate statusMessage is a string (empty string is valid - clears status) + // But it must be a string, not undefined/null/number + const statusMessage = parsedResponse.statusMessage; + if (typeof statusMessage !== 'string') { + runtime.logger.debug( + { attempt: i + 1, statusMessage }, + 'Invalid statusMessage from LLM, retrying' + ); + continue; + } + + return { + channelIdentifier: channelIdentifier.trim(), + statusMessage: statusMessage.trim(), + }; + } + + runtime.logger.warn('Failed to get valid voice channel status info after 3 attempts'); + return null; +}; + +/** + * Find a Discord voice channel by various identifiers + */ +const findVoiceChannel = async ( + runtime: IAgentRuntime, + discordService: DiscordService, + identifier: string, + channelId?: string, + messageServerId?: string +): Promise => { + if (!discordService.client) return null; + + // Remove channel mention formatting if present + const cleanId = identifier.replace(/[<#>]/g, ''); + + try { + // Try to fetch by ID first + if (/^\d+$/.test(cleanId)) { + try { + const channel = await discordService.client.channels.fetch(cleanId); + if (channel?.type === DiscordChannelType.GuildVoice) { + return channel as BaseGuildVoiceChannel; + } + } catch (e) { + // ID not found, continue to name search + } + } + + // Precompute normalized identifier values to avoid repeated toLowerCase() calls + // and to ensure we don't call toLowerCase() on undefined inside the find predicate + const normalizedIdentifier = identifier.toLowerCase(); + const strippedIdentifier = normalizedIdentifier.replace(/[^a-z0-9 ]/g, ''); + + // Search in the current server if available (using channelId or messageServerId fallback) + const guild = await getGuildFromRoom(discordService, channelId, messageServerId); + if (guild) { + const channels = await guild.channels.fetch(); + + const channel = channels.find((ch) => { + // Guard against null/undefined channel or missing name property + if (!ch || typeof ch.name !== 'string') return false; + + const normalizedName = ch.name.toLowerCase(); + const strippedName = normalizedName.replace(/[^a-z0-9 ]/g, ''); + const nameMatch = normalizedName === normalizedIdentifier || strippedName === strippedIdentifier; + + return nameMatch && ch.type === DiscordChannelType.GuildVoice; + }); + + if (channel) { + return channel as BaseGuildVoiceChannel; + } + } + + // Search in all guilds the bot is in + const guilds = Array.from(discordService.client.guilds.cache.values()); + for (const guild of guilds) { + try { + const channels = await guild.channels.fetch(); + const channel = channels.find((ch) => { + // Guard against null/undefined channel or missing name property + if (!ch || typeof ch.name !== 'string') return false; + + const normalizedName = ch.name.toLowerCase(); + const strippedName = normalizedName.replace(/[^a-z0-9 ]/g, ''); + const nameMatch = normalizedName === normalizedIdentifier || strippedName === strippedIdentifier; + + return nameMatch && ch.type === DiscordChannelType.GuildVoice; + }); + + if (channel) { + return channel as BaseGuildVoiceChannel; + } + } catch (e) { + // Continue searching in other guilds + } + } + + return null; + } catch (error) { + runtime.logger.error({ error: error instanceof Error ? error.message : String(error) }, 'Error finding voice channel'); + return null; + } +}; + +export const setVoiceChannelStatus: Action = { + name: 'SET_VOICE_CHANNEL_STATUS', + similes: [ + 'UPDATE_VOICE_STATUS', + 'SET_VC_STATUS', + 'CHANGE_VOICE_STATUS', + 'UPDATE_VOICE_CHANNEL_STATUS', + 'SET_VOICE_MESSAGE', + 'CLEAR_VOICE_STATUS', + ], + description: + 'Set or clear the status message for a Discord voice channel. The status appears at the top of the voice channel.', + validate: async (_runtime: IAgentRuntime, message: Memory, _state: State) => { + if (message.content.source !== 'discord') { + return false; + } + return true; + }, + handler: async ( + runtime: IAgentRuntime, + message: Memory, + state: State, + _options: any, + callback: HandlerCallback + ) => { + const discordService = runtime.getService(DISCORD_SERVICE_NAME) as DiscordService; + + if (!discordService || !discordService.client) { + runtime.logger.error('Discord service not found or not initialized'); + await callback({ + text: 'Discord service is not initialized. Please try again in a moment.', + source: 'discord', + }); + return; + } + + const statusInfo = await getVoiceChannelStatusInfo(runtime, message, state); + if (!statusInfo) { + runtime.logger.error("Couldn't parse voice channel status information from message"); + await callback({ + text: "I couldn't understand which voice channel and what status you want to set. Please specify the channel and status message.", + source: 'discord', + }); + return; + } + + try { + const room = state.data?.room || (await runtime.getRoom(message.roomId)); + const channelId = room?.channelId; + const messageServerId = (room as any)?.messageServerId; + + // Find the voice channel + const voiceChannel = await findVoiceChannel( + runtime, + discordService, + statusInfo.channelIdentifier, + channelId, + messageServerId + ); + + if (!voiceChannel) { + await callback({ + text: `I couldn't find a voice channel with the identifier "${statusInfo.channelIdentifier}". Please make sure the channel name or ID is correct and I have access to it.`, + source: 'discord', + }); + return; + } + + // Set the voice channel status + const success = await discordService.setVoiceChannelStatus( + voiceChannel.id, + statusInfo.statusMessage + ); + + if (success) { + const responseText = statusInfo.statusMessage + ? `I've set the status for ${voiceChannel.name} to: "${statusInfo.statusMessage}"` + : `I've cleared the status for ${voiceChannel.name}.`; + + await runtime.createMemory( + { + entityId: message.entityId, + agentId: message.agentId, + roomId: message.roomId, + content: { + source: 'discord', + thought: `I updated the voice channel status for ${voiceChannel.name}`, + actions: ['SET_VOICE_CHANNEL_STATUS_COMPLETED'], + }, + metadata: { + type: 'SET_VOICE_CHANNEL_STATUS', + channelId: voiceChannel.id, + channelName: voiceChannel.name, + status: statusInfo.statusMessage, + }, + }, + 'messages' + ); + + const response: Content = { + text: responseText, + actions: ['SET_VOICE_CHANNEL_STATUS_RESPONSE'], + source: message.content.source, + }; + + await callback(response); + } else { + await callback({ + text: `I couldn't set the status for ${voiceChannel.name}. Please make sure I have the necessary permissions.`, + source: 'discord', + }); + } + } catch (error) { + runtime.logger.error({ error: error instanceof Error ? error.message : String(error) }, 'Error setting voice channel status'); + await callback({ + text: 'I encountered an error while trying to set the voice channel status. Please make sure I have the necessary permissions.', + source: 'discord', + }); + } + }, + examples: [ + [ + { + name: '{{name1}}', + content: { + text: 'Set the voice channel status to "Weekly team meeting"', + }, + }, + { + name: '{{name2}}', + content: { + text: 'I\'ll set the voice channel status to "Weekly team meeting".', + actions: ['SET_VOICE_CHANNEL_STATUS'], + }, + }, + ], + [ + { + name: '{{name1}}', + content: { + text: 'Update the status in general-voice to "Study session"', + }, + }, + { + name: '{{name2}}', + content: { + text: 'I\'ll update the voice channel status to "Study session".', + actions: ['SET_VOICE_CHANNEL_STATUS'], + }, + }, + ], + [ + { + name: '{{name1}}', + content: { + text: 'Clear the voice channel status', + }, + }, + { + name: '{{name2}}', + content: { + text: "I'll clear the voice channel status.", + actions: ['SET_VOICE_CHANNEL_STATUS'], + }, + }, + ], + [ + { + name: '{{name1}}', + content: { + text: 'Set vc status to "Gaming night 🎮"', + }, + }, + { + name: '{{name2}}', + content: { + text: 'I\'ll set the voice channel status to "Gaming night 🎮".', + actions: ['SET_VOICE_CHANNEL_STATUS'], + }, + }, + ], + ] as ActionExample[][], +} as Action; + +export default setVoiceChannelStatus; + diff --git a/src/attachments.ts b/src/attachments.ts index 25a8f8c..73f3364 100644 --- a/src/attachments.ts +++ b/src/attachments.ts @@ -13,6 +13,13 @@ export class AttachmentManager { private attachmentCache: Map = new Map(); private runtime: IAgentRuntime; + /** + * Get a human-readable identifier for logging (character name or agentId fallback) + */ + private get agentIdentifier(): string { + return this.runtime?.character?.name || this.runtime.agentId; + } + /** * Constructor for creating a new instance of the class. * @@ -102,6 +109,7 @@ export class AttachmentManager { let audioMimeType: string; if (attachment.contentType?.startsWith('audio/')) { + // 'audio/wav' default? audioBuffer = Buffer.from(audioVideoArrayBuffer); audioFileName = attachment.name || 'audio.mp3'; audioMimeType = attachment.contentType; @@ -123,7 +131,7 @@ export class AttachmentManager { const transcriptionLength = transcription?.length || 0; this.runtime.logger.debug({ src: 'plugin:discord', - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, attachmentId: attachment.id, contentType: attachment.contentType, transcriptionLength @@ -136,7 +144,7 @@ export class AttachmentManager { if (!transcription || transcriptionLength === 0) { this.runtime.logger.debug({ src: 'plugin:discord', - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, attachmentId: attachment.id }, 'Transcription is empty, skipping summarization'); title = undefined; @@ -145,7 +153,7 @@ export class AttachmentManager { // Short transcriptions don't benefit from summarization this.runtime.logger.debug({ src: 'plugin:discord', - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, attachmentId: attachment.id, transcriptionLength }, 'Transcription is short, skipping summarization'); @@ -155,7 +163,7 @@ export class AttachmentManager { // Transcription is long enough to benefit from summarization this.runtime.logger.debug({ src: 'plugin:discord', - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, attachmentId: attachment.id, transcriptionLength }, 'Summarizing transcription'); @@ -176,7 +184,7 @@ export class AttachmentManager { } catch (error) { this.runtime.logger.error({ src: 'plugin:discord', - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, attachmentId: attachment.id, contentType: attachment.contentType, error: error instanceof Error ? error.message : String(error), @@ -235,7 +243,7 @@ export class AttachmentManager { this.runtime.logger.debug({ src: 'plugin:discord', - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, tempMP4File, tempAudioFile, }, 'Extracting audio from MP4'); @@ -261,7 +269,7 @@ export class AttachmentManager { this.runtime.logger.debug({ src: 'plugin:discord', - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, audioDataSize: audioData.length, }, 'Successfully extracted audio from MP4'); @@ -278,7 +286,7 @@ export class AttachmentManager { } catch (cleanupError) { this.runtime.logger.warn({ src: 'plugin:discord', - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: cleanupError instanceof Error ? cleanupError.message : String(cleanupError), }, 'Failed to cleanup temp files'); } @@ -305,7 +313,7 @@ export class AttachmentManager { throw new Error('PDF service not found'); } const text = await pdfService.convertPdfToText(Buffer.from(pdfBuffer)); - this.runtime.logger.debug({ src: 'plugin:discord', agentId: this.runtime.agentId, attachmentId: attachment.id, textLength: text?.length }, 'Summarizing PDF content'); + this.runtime.logger.debug({ src: 'plugin:discord', agentId: this.agentIdentifier, attachmentId: attachment.id, textLength: text?.length }, 'Summarizing PDF content'); const { title, description } = await generateSummary(this.runtime, text); return { @@ -319,7 +327,7 @@ export class AttachmentManager { } catch (error) { this.runtime.logger.error({ src: 'plugin:discord', - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, attachmentId: attachment.id, contentType: attachment.contentType, error: error instanceof Error ? error.message : String(error), @@ -345,7 +353,7 @@ export class AttachmentManager { try { const response = await fetch(attachment.url); const text = await response.text(); - this.runtime.logger.debug({ src: 'plugin:discord', agentId: this.runtime.agentId, attachmentId: attachment.id, textLength: text?.length }, 'Summarizing plaintext content'); + this.runtime.logger.debug({ src: 'plugin:discord', agentId: this.agentIdentifier, attachmentId: attachment.id, textLength: text?.length }, 'Summarizing plaintext content'); const { title, description } = await generateSummary(this.runtime, text); return { @@ -359,7 +367,7 @@ export class AttachmentManager { } catch (error) { this.runtime.logger.error({ src: 'plugin:discord', - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, attachmentId: attachment.id, contentType: attachment.contentType, error: error instanceof Error ? error.message : String(error), @@ -401,7 +409,7 @@ export class AttachmentManager { } catch (error) { this.runtime.logger.error({ src: 'plugin:discord', - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, attachmentId: attachment.id, contentType: attachment.contentType, error: error instanceof Error ? error.message : String(error), diff --git a/src/audioChannels.ts b/src/audioChannels.ts new file mode 100644 index 0000000..561f78e --- /dev/null +++ b/src/audioChannels.ts @@ -0,0 +1,179 @@ +/** + * Audio Channel System for Discord Voice + * + * WHY CHANNELS: + * Discord can only play one audio stream at a time per voice connection. + * The channel system provides logical separation and priority-based mixing: + * - TTS can interrupt/duck music + * - Music plays when TTS is silent + * - SFX can overlay without interrupting + * - Ambient provides background layer + * + * PRIORITY SYSTEM: + * Higher priority channels can interrupt or duck lower priority channels. + * When a high-priority channel finishes, lower priority channels resume. + * + * Example flow: + * 1. Music playing on CHANNEL_MUSIC (priority 50) + * 2. TTS starts on CHANNEL_TTS (priority 100) → music ducks to 20% + * 3. TTS finishes → music ramps back to 100% + * + * EXTENSIBILITY: + * Plugins can register custom channels (4, 5, 6...) for special use cases + * like mixing multiple music tracks or custom audio layers. + */ + +import type { AudioChannelConfig } from './voice'; + +// ============================================================================ +// PREDEFINED AUDIO CHANNELS +// ============================================================================ + +/** + * Channel 0: Text-to-Speech + * + * WHY HIGHEST PRIORITY (100): + * When the bot speaks, users need to hear it clearly. TTS interrupts or ducks + * everything else because spoken responses are time-sensitive communication. + * + * WHY NOT PAUSABLE: + * TTS utterances should complete or be cancelled, not paused mid-sentence. + * + * WHY NOT INTERRUPTIBLE: + * TTS is already the highest priority - nothing should interrupt it. + */ +export const CHANNEL_TTS = 0; + +/** + * Channel 1: Music Playback + * + * WHY PRIORITY 50 (MEDIUM): + * Music is the primary audio content but should yield to TTS/announcements. + * Lower than TTS so DJ commentary can duck the music. + * + * WHY PAUSABLE: + * Music can be paused/resumed - this enables skip, pause/play controls. + * + * WHY INTERRUPTIBLE: + * Higher priority channels (TTS) should be able to duck/interrupt music. + * + * DUCK VOLUME (0.2): + * When ducked, music plays at 20% volume - audible but not overwhelming. + */ +export const CHANNEL_MUSIC = 1; + +/** + * Channel 2: Sound Effects + * + * WHY PRIORITY 30: + * SFX are short bursts that should layer with music but yield to speech. + * Lower than music so they don't interrupt the main audio experience. + * + * WHY NOT PAUSABLE: + * SFX are typically short and should complete naturally. + * + * WHY INTERRUPTIBLE: + * Both TTS and music can take precedence over SFX. + */ +export const CHANNEL_SFX = 2; + +/** + * Channel 3: Ambient Background + * + * WHY LOWEST PRIORITY (20): + * Ambient sounds are background atmosphere - they should never compete + * with speech, music, or sound effects. + * + * WHY INTERRUPTIBLE: + * Everything takes priority over ambient sounds. + * + * DUCK VOLUME (0.1): + * When ducked, ambient plays at 10% - barely audible background. + */ +export const CHANNEL_AMBIENT = 3; + +// ============================================================================ +// DEFAULT CHANNEL CONFIGURATIONS +// ============================================================================ + +/** + * Default configurations for the predefined channels. + * + * WHY EXPORT CONFIGS: + * Allows plugins to understand channel behaviors without accessing VoiceManager. + * Useful for deciding which channel to use based on audio type. + */ +export const DEFAULT_CHANNEL_CONFIGS: Record = { + [CHANNEL_TTS]: { + channel: CHANNEL_TTS, + priority: 100, + canPause: false, + interruptible: false, + volume: 1.0, + }, + [CHANNEL_MUSIC]: { + channel: CHANNEL_MUSIC, + priority: 50, + canPause: true, + interruptible: true, + volume: 1.0, + duckVolume: 0.2, + }, + [CHANNEL_SFX]: { + channel: CHANNEL_SFX, + priority: 30, + canPause: false, + interruptible: true, + volume: 1.0, + }, + [CHANNEL_AMBIENT]: { + channel: CHANNEL_AMBIENT, + priority: 20, + canPause: false, + interruptible: true, + volume: 0.5, + duckVolume: 0.1, + }, +}; + +// ============================================================================ +// HELPER FUNCTIONS +// ============================================================================ + +/** + * Get a human-readable name for a channel number. + * Useful for logging and debugging. + */ +export function getChannelName(channel: number): string { + switch (channel) { + case CHANNEL_TTS: + return 'TTS'; + case CHANNEL_MUSIC: + return 'Music'; + case CHANNEL_SFX: + return 'SFX'; + case CHANNEL_AMBIENT: + return 'Ambient'; + default: + return `Custom-${channel}`; + } +} + +/** + * Check if a channel can interrupt another channel based on priority. + */ +export function canInterrupt( + newChannel: number, + existingChannel: number, + configs: Record = DEFAULT_CHANNEL_CONFIGS +): boolean { + const newConfig = configs[newChannel]; + const existingConfig = configs[existingChannel]; + + if (!newConfig || !existingConfig) { + return false; + } + + return newConfig.priority > existingConfig.priority && existingConfig.interruptible; +} + diff --git a/src/clientRegistry.ts b/src/clientRegistry.ts new file mode 100644 index 0000000..bc12cb3 --- /dev/null +++ b/src/clientRegistry.ts @@ -0,0 +1,478 @@ +import { Client, GatewayIntentBits, Partials } from 'discord.js'; +import { logger, type IAgentRuntime, EventType, createUniqueUuid, type World, Role } from '@elizaos/core'; +import type { DiscordBotConfig } from './types'; +import { VoiceManager } from './voice'; +import type { DiscordService } from './service'; +import { DiscordEventTypes } from './types'; + +/** + * Information about a registered Discord bot client + */ +export interface BotClientInfo { + client: Client; + voiceManager: VoiceManager; + config: DiscordBotConfig; + botId?: string; // Set after login + username?: string; // Set after login +} + +/** + * Validates Discord bot token format + * @param token The Discord bot token to validate + * @returns True if token appears to be valid format, false otherwise + */ +function validateDiscordToken(token: string): { valid: boolean; error?: string } { + if (!token) { + return { valid: false, error: 'Token is empty or undefined' }; + } + + const trimmedToken = token.trim(); + + if (trimmedToken === '') { + return { valid: false, error: 'Token is empty after trimming whitespace' }; + } + + if (trimmedToken === 'undefined' || trimmedToken === 'null') { + return { valid: false, error: 'Token is literally "undefined" or "null" string' }; + } + + if (trimmedToken.length < 50) { + return { valid: false, error: `Token is too short (${trimmedToken.length} characters). Discord tokens are typically 70+ characters` }; + } + + // Discord tokens typically have the format: base64.timestamp.signature (contains dots) + if (!trimmedToken.includes('.')) { + return { valid: false, error: 'Token does not contain expected dot separators. Discord tokens typically have format: base64.timestamp.signature' }; + } + + const parts = trimmedToken.split('.'); + if (parts.length < 3) { + return { valid: false, error: `Token has ${parts.length} parts, expected at least 3 (base64.timestamp.signature)` }; + } + + // Check if parts are non-empty + if (parts.some(part => part.trim() === '')) { + return { valid: false, error: 'Token contains empty parts between dots' }; + } + + return { valid: true }; +} + +/** + * Manages multiple Discord bot clients for multi-room voice support + */ +export class DiscordClientRegistry { + private clients: Map = new Map(); + private runtime: IAgentRuntime; + private service: DiscordService; + private loginPromises: Map> = new Map(); + + constructor(runtime: IAgentRuntime, service: DiscordService) { + this.runtime = runtime; + this.service = service; + } + + /** + * Parse bot tokens from environment and create clients + */ + async initializeFromEnv(): Promise { + const tokensStr = this.runtime.getSetting('DISCORD_BOT_TOKENS') as string; + const aliasesStr = this.runtime.getSetting('DISCORD_BOT_ALIASES') as string; + + if (!tokensStr) { + // Fall back to single token for backward compatibility + // Note: DISCORD_APPLICATION_ID is NOT a valid token - it's the OAuth2 client/application ID + // used for invite URL generation, not for bot authentication + const singleToken = this.runtime.getSetting('DISCORD_API_TOKEN') as string; + + if (singleToken) { + const validation = validateDiscordToken(singleToken); + if (!validation.valid) { + logger.error(`[ClientRegistry] Invalid Discord token for 'default' bot: ${validation.error}`); + logger.error('[ClientRegistry] Please check your DISCORD_API_TOKEN environment variable'); + logger.error('[ClientRegistry] Discord tokens should be in format: base64.timestamp.signature'); + logger.error('[ClientRegistry] Note: DISCORD_APPLICATION_ID is your app\'s client ID, not a bot token'); + throw new Error(`Invalid Discord token: ${validation.error}`); + } + await this.registerBot({ token: singleToken, alias: 'default' }); + } else { + logger.warn('[ClientRegistry] No Discord bot tokens configured'); + logger.warn('[ClientRegistry] Please set DISCORD_BOT_TOKENS or DISCORD_API_TOKEN in your environment'); + logger.warn('[ClientRegistry] Note: DISCORD_APPLICATION_ID is your app\'s client ID, not a bot token'); + } + return; + } + + const tokens = tokensStr.split(',').map(t => t.trim()).filter(Boolean); + const aliases = aliasesStr ? aliasesStr.split(',').map(a => a.trim()).filter(Boolean) : []; + + if (tokens.length === 0) { + logger.warn('[ClientRegistry] DISCORD_BOT_TOKENS is set but contains no valid tokens'); + return; + } + + logger.log(`[ClientRegistry] Initializing ${tokens.length} Discord bot(s)`); + + for (let i = 0; i < tokens.length; i++) { + const alias = aliases[i] || `bot-${i}`; + const validation = validateDiscordToken(tokens[i]); + + if (!validation.valid) { + logger.error(`[ClientRegistry] Invalid Discord token for '${alias}': ${validation.error}`); + logger.error(`[ClientRegistry] Token index: ${i}, Alias: ${alias}`); + logger.error('[ClientRegistry] Please check your DISCORD_BOT_TOKENS environment variable'); + logger.error('[ClientRegistry] Discord tokens should be in format: base64.timestamp.signature'); + throw new Error(`Invalid Discord token for '${alias}': ${validation.error}`); + } + + const config: DiscordBotConfig = { + token: tokens[i], + alias: alias, + }; + await this.registerBot(config); + } + } + + /** + * Register and login a new Discord bot + */ + async registerBot(config: DiscordBotConfig): Promise { + const tempId = config.alias || `bot-${this.clients.size}`; + + // Validate token before attempting to register + const validation = validateDiscordToken(config.token); + if (!validation.valid) { + logger.error(`[ClientRegistry] Cannot register bot '${tempId}': ${validation.error}`); + throw new Error(`Invalid Discord token for '${tempId}': ${validation.error}`); + } + + logger.log(`[ClientRegistry] Registering bot: ${tempId}`); + + const client = new Client({ + intents: [ + GatewayIntentBits.Guilds, + GatewayIntentBits.GuildMessages, + GatewayIntentBits.GuildVoiceStates, + GatewayIntentBits.MessageContent, + GatewayIntentBits.DirectMessages, + ], + // Partials are required for DM messages - without them, DM channels aren't cached + // and messageCreate events won't fire for DMs + partials: [Partials.Channel, Partials.Message, Partials.User], + }); + + const voiceManager = new VoiceManager(this.service, this.runtime); + + const clientInfo: BotClientInfo = { + client, + voiceManager, + config, + }; + + // Store temporarily with alias + this.clients.set(tempId, clientInfo); + + // Login and update with real bot ID + const loginPromise = this.loginBot(tempId, config.token); + this.loginPromises.set(tempId, loginPromise); + + try { + await loginPromise; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.error(`[ClientRegistry] Failed to login bot '${tempId}': ${errorMessage}`); + + // Provide helpful troubleshooting information + if (errorMessage.includes('TokenInvalid') || errorMessage.includes('token')) { + logger.error('[ClientRegistry] Token validation passed but Discord rejected it. Common causes:'); + logger.error(' 1. Token was reset or regenerated in Discord Developer Portal'); + logger.error(' 2. Bot application was deleted'); + logger.error(' 3. Token copied incorrectly (check for extra spaces or missing characters)'); + logger.error(' 4. Using a user token instead of a bot token'); + logger.error('[ClientRegistry] Get a fresh token from: https://discord.com/developers/applications'); + } + + // Destroy the client to release resources (REST client, caches, WebSocket) + client.destroy(); + this.clients.delete(tempId); + this.loginPromises.delete(tempId); + throw error; + } + + return clientInfo; + } + + /** + * Login a bot and update registry with real bot ID + */ + private async loginBot(tempId: string, token: string): Promise { + const clientInfo = this.clients.get(tempId); + if (!clientInfo) { + throw new Error(`Client ${tempId} not found`); + } + + const { client } = clientInfo; + + return new Promise((resolve, reject) => { + let settled = false; + + // Cleanup function to remove all listeners and clear timeout + const cleanup = () => { + clearTimeout(timeout); + client.off('ready', onReady); + client.off('error', onError); + }; + + const onReady = async () => { + if (settled) return; + settled = true; + cleanup(); + + if (!client.user) { + reject(new Error(`Bot ${tempId} logged in but user is null`)); + return; + } + + const botId = client.user.id; + const username = client.user.username; + + clientInfo.botId = botId; + clientInfo.username = username; + + // Set the client on VoiceManager (if it wasn't available at construction) + clientInfo.voiceManager.setClient(client); + + // Set bot identification on VoiceManager + clientInfo.voiceManager.setBotIdentification(botId, clientInfo.config.alias); + + // Re-key with bot ID if different from temp ID + if (tempId !== botId) { + this.clients.delete(tempId); + this.clients.set(botId, clientInfo); + } + + logger.log(`[ClientRegistry] Bot logged in: ${username} (${botId})`); + + // Emit ready event for voice manager + client.emit('voiceManagerReady'); + + // Emit WORLD_CONNECTED events for all guilds + this.emitWorldConnectedEvents(client).catch(error => { + logger.error(`[ClientRegistry] Error emitting WORLD_CONNECTED events: ${error}`); + }); + + resolve(); + }; + + const onError = (error: Error) => { + logger.error(`[ClientRegistry] Bot ${tempId} error: ${error}`); + }; + + const timeout = setTimeout(() => { + if (settled) return; + settled = true; + cleanup(); + reject(new Error(`Bot ${tempId} login timeout`)); + }, 30000); + + client.once('ready', onReady); + client.on('error', onError); + + client.login(token).catch((error) => { + if (settled) return; + settled = true; + cleanup(); + reject(error); + }); + }); + } + + /** + * Get a client by bot ID or alias + */ + getClient(idOrAlias: string): BotClientInfo | undefined { + // Try direct lookup + let info = this.clients.get(idOrAlias); + if (info) return info; + + // Try alias lookup + for (const [_, clientInfo] of this.clients) { + if (clientInfo.config.alias === idOrAlias) { + return clientInfo; + } + } + + return undefined; + } + + /** + * Get all registered clients + */ + getAllClients(): BotClientInfo[] { + return Array.from(this.clients.values()); + } + + /** + * Get client by guild ID (finds first bot connected to that guild) + */ + getClientForGuild(guildId: string): BotClientInfo | undefined { + for (const clientInfo of this.clients.values()) { + if (clientInfo.client.guilds.cache.has(guildId)) { + return clientInfo; + } + } + return undefined; + } + + /** + * Remove a bot from the registry + */ + async removeBot(idOrAlias: string): Promise { + const clientInfo = this.getClient(idOrAlias); + if (!clientInfo) { + logger.warn(`[ClientRegistry] Bot ${idOrAlias} not found for removal`); + return; + } + + const botId = clientInfo.botId || idOrAlias; + + logger.log(`[ClientRegistry] Removing bot: ${botId}`); + + // Cleanup + clientInfo.client.destroy(); + this.clients.delete(botId); + if (clientInfo.config.alias) { + this.clients.delete(clientInfo.config.alias); + } + } + + /** + * Get the primary/default client (for backward compatibility) + */ + getPrimaryClient(): BotClientInfo | undefined { + // Return the first client, or one marked as default + const defaultClient = this.getClient('default'); + if (defaultClient) return defaultClient; + + // Return first available + const all = this.getAllClients(); + return all.length > 0 ? all[0] : undefined; + } + + /** + * Destroy all clients + */ + async destroyAll(): Promise { + logger.log('[ClientRegistry] Destroying all bot clients'); + + // First, wait for any in-flight logins to complete or fail + // This prevents race conditions where a login callback adds a client + // back to the map after we've cleared it + const pendingLogins = Array.from(this.loginPromises.values()); + if (pendingLogins.length > 0) { + logger.debug(`[ClientRegistry] Waiting for ${pendingLogins.length} pending login(s) to complete`); + await Promise.allSettled(pendingLogins); + } + + // Now destroy all clients + const destroyPromises = Array.from(this.clients.keys()).map(id => + this.removeBot(id) + ); + + await Promise.all(destroyPromises); + this.clients.clear(); + this.loginPromises.clear(); + } + + /** + * Check if any clients are registered + */ + hasClients(): boolean { + return this.clients.size > 0; + } + + /** + * Get count of registered clients + */ + getClientCount(): number { + return this.clients.size; + } + + /** + * Emit WORLD_CONNECTED events for all guilds the client is connected to + * This implements the logic that was previously in the deprecated onReady() method + */ + private async emitWorldConnectedEvents(client: Client): Promise { + try { + const guilds = await client.guilds.fetch(); + if (!guilds) { + logger.warn('[ClientRegistry] Could not fetch guilds for WORLD_CONNECTED events'); + return; + } + + logger.log(`[ClientRegistry] Emitting WORLD_CONNECTED events for ${guilds.size} guild(s)`); + + for (const [, guild] of guilds) { + try { + const fullGuild = await guild.fetch(); + + logger.log(`[ClientRegistry] DISCORD SERVER CONNECTED: ${fullGuild.name}`); + + // Emit Discord-specific event with full guild object + this.runtime.emitEvent([DiscordEventTypes.WORLD_CONNECTED], { + runtime: this.runtime, + server: fullGuild, + source: 'discord', + }); + + // Create platform-agnostic world data structure + const worldId = createUniqueUuid(this.runtime, fullGuild.id); + const ownerId = createUniqueUuid(this.runtime, fullGuild.ownerId); + + const standardizedData = { + name: fullGuild.name, + runtime: this.runtime, + rooms: await this.service.buildStandardizedRooms(fullGuild, worldId), + entities: [], // Entities will be discovered lazily when users send messages + world: { + id: worldId, + name: fullGuild.name, + agentId: this.runtime.agentId, + serverId: fullGuild.id, + metadata: { + ownership: fullGuild.ownerId ? { ownerId } : undefined, + roles: { + [ownerId]: Role.OWNER, + }, + }, + } as World, + source: 'discord', + }; + + // Emit standardized WORLD_CONNECTED event immediately + this.runtime.emitEvent([EventType.WORLD_CONNECTED], standardizedData); + + logger.log(`[ClientRegistry] Emitted WORLD_CONNECTED for ${fullGuild.name} with ${standardizedData.rooms.length} rooms`); + + // For large guilds, skip user pre-population - users are discovered when they interact + // For small guilds, optionally fetch users in background without blocking + if (fullGuild.memberCount <= 1000) { + // Small guild - fetch users in background (non-blocking) + this.service.buildStandardizedUsers(fullGuild).then(entities => { + logger.debug(`[ClientRegistry] Background user sync completed for ${fullGuild.name}: ${entities.length} users`); + }).catch(error => { + logger.debug(`[ClientRegistry] Background user sync failed for ${fullGuild.name}: ${error instanceof Error ? error.message : String(error)}`); + }); + } else { + logger.info(`[ClientRegistry] Skipping user pre-fetch for large guild ${fullGuild.name} (${fullGuild.memberCount.toLocaleString()} members) - users will be discovered organically`); + } + } catch (error) { + logger.error(`[ClientRegistry] Error emitting WORLD_CONNECTED for guild: ${error instanceof Error ? error.message : String(error)}`); + } + } + } catch (error) { + logger.error(`[ClientRegistry] Error in emitWorldConnectedEvents: ${error instanceof Error ? error.message : String(error)}`); + } + } +} + diff --git a/src/compat.ts b/src/compat.ts index ffbbaf1..dd92e28 100644 --- a/src/compat.ts +++ b/src/compat.ts @@ -91,8 +91,11 @@ export function createCompatRuntime(runtime: IAgentRuntime): ICompatRuntime { ); } - return value; - }, - }); + // IMPORTANT: Bind all functions to target to preserve private field access. + // Without this, methods using private fields (e.g., #conversationLength) + // will fail because `this` would refer to the Proxy, not the class instance. + return value.bind(target); + }, + }); } diff --git a/src/contracts.ts b/src/contracts.ts new file mode 100644 index 0000000..0a29662 --- /dev/null +++ b/src/contracts.ts @@ -0,0 +1,69 @@ +import { EventEmitter } from 'events'; +import type { Readable } from 'node:stream'; + +/** + * Audio Sink Contracts + * + * WHY THIS EXISTS: + * Discord voice connections are complex (network hiccups, reconnections, state changes). + * The sink abstraction lets music-player treat Discord as "just another audio output" + * without knowing about Discord internals. + * + * WHY STATUS EVENTS: + * When Discord reconnects, something needs to know so it can resubscribe to the + * broadcast. The sink emits status changes, and MusicService listens to auto-reconnect. + * This keeps Discord plugin unaware of broadcast architecture. + * + * WHY FEED() NOT PLAY(): + * "Feed" suggests the sink is passive - you give it audio and it consumes it. + * The sink handles its own playback details internally. Music-player doesn't care + * about Discord's AudioPlayer, channels, or connection states. + */ + +/** + * Audio sink connection status + */ +export type AudioSinkStatus = 'connected' | 'disconnected' | 'reconnecting'; + +/** + * Audio sink interface - represents a destination for audio streams. + * + * Implementations must: + * - Handle connection lifecycle independently + * - Emit status changes for orchestration layers + * - Accept new streams when reconnecting + * - Clean up resources on stop() + */ +export interface IAudioSink extends EventEmitter { + /** Unique identifier for this sink */ + readonly id: string; + + /** Current connection status */ + readonly status: AudioSinkStatus; + + /** + * Feed an audio stream into this sink + * @param stream Audio stream to play + * @returns Promise that resolves when playback starts + */ + feed(stream: Readable): Promise; + + /** + * Stop current audio playback + * @returns Promise that resolves when stopped + */ + stop(): Promise; + + /** + * Get human-readable description of this sink + */ + getDescription(): string; + + // Event emitters (typed via EventEmitter) + on(event: 'statusChange', listener: (status: AudioSinkStatus) => void): this; + on(event: 'error', listener: (error: Error) => void): this; + + emit(event: 'statusChange', status: AudioSinkStatus): boolean; + emit(event: 'error', error: Error): boolean; +} + diff --git a/src/environment.ts b/src/environment.ts index 34fc359..3e7ffe7 100644 --- a/src/environment.ts +++ b/src/environment.ts @@ -18,6 +18,16 @@ function getEnvArray(name: string, fallback: string[]): string[] { return value.split(',').map(item => item.trim()).filter(item => item.length > 0); } +/** + * Helper function to parse number from environment variable + */ +function getEnvNumber(name: string, fallback: number): number { + const value = process.env?.[name]; + if (!value) return fallback; + const parsed = parseFloat(value); + return isNaN(parsed) ? fallback : parsed; +} + /** * Default values that can be overridden by environment variables */ @@ -26,6 +36,11 @@ export const DISCORD_DEFAULTS = { SHOULD_IGNORE_DIRECT_MESSAGES: getEnvBoolean('DISCORD_SHOULD_IGNORE_DIRECT_MESSAGES', false), SHOULD_RESPOND_ONLY_TO_MENTIONS: getEnvBoolean('DISCORD_SHOULD_RESPOND_ONLY_TO_MENTIONS', false), ALLOWED_CHANNEL_IDS: getEnvArray('CHANNEL_IDS', []), + VOICE_DUCK_VOLUME: getEnvNumber('VOICE_DUCK_VOLUME', 0.2), + VOICE_DUCK_SILENCE_TIMEOUT: getEnvNumber('VOICE_DUCK_SILENCE_TIMEOUT', 60000), + VOICE_DUCK_RAMP_DURATION: getEnvNumber('VOICE_DUCK_RAMP_DURATION', 3000), + VOICE_SPEAKING_THRESHOLD: getEnvNumber('VOICE_SPEAKING_THRESHOLD', 0.1), + VOICE_LISTEN_ONLY: getEnvBoolean('DISCORD_VOICE_LISTEN_ONLY', false), } as const; export const discordEnvSchema = z.object({ @@ -108,6 +123,14 @@ export function getDiscordSettings(runtime: IAgentRuntime): DiscordSettings { .filter((s) => s.length > 0) ); + // Helper to parse number from string with optional fallback + // Returns the fallback value when parseFloat yields NaN, instead of defaulting to 0 + // which would incorrectly override configured defaults for settings like volume (0.2) + const parseNumber = (value: string, fallback?: number): number => { + const parsed = parseFloat(value); + return isNaN(parsed) ? (fallback ?? 0) : parsed; + }; + return { ...characterSettings, shouldIgnoreBotMessages: resolveSetting( @@ -134,6 +157,41 @@ export function getDiscordSettings(runtime: IAgentRuntime): DiscordSettings { // Collapse empty allow-lists back to undefined to keep default open behavior allowedChannelIds: resolvedAllowedChannelIds.length > 0 ? resolvedAllowedChannelIds : undefined, + + voiceDuckVolume: resolveSetting( + 'VOICE_DUCK_VOLUME', + characterSettings.voiceDuckVolume, + DISCORD_DEFAULTS.VOICE_DUCK_VOLUME, + (v) => parseNumber(v, DISCORD_DEFAULTS.VOICE_DUCK_VOLUME) + ), + + voiceDuckSilenceTimeout: resolveSetting( + 'VOICE_DUCK_SILENCE_TIMEOUT', + characterSettings.voiceDuckSilenceTimeout, + DISCORD_DEFAULTS.VOICE_DUCK_SILENCE_TIMEOUT, + (v) => parseNumber(v, DISCORD_DEFAULTS.VOICE_DUCK_SILENCE_TIMEOUT) + ), + + voiceDuckRampDuration: resolveSetting( + 'VOICE_DUCK_RAMP_DURATION', + characterSettings.voiceDuckRampDuration, + DISCORD_DEFAULTS.VOICE_DUCK_RAMP_DURATION, + (v) => parseNumber(v, DISCORD_DEFAULTS.VOICE_DUCK_RAMP_DURATION) + ), + + voiceSpeakingThreshold: resolveSetting( + 'VOICE_SPEAKING_THRESHOLD', + characterSettings.voiceSpeakingThreshold, + DISCORD_DEFAULTS.VOICE_SPEAKING_THRESHOLD, + (v) => parseNumber(v, DISCORD_DEFAULTS.VOICE_SPEAKING_THRESHOLD) + ), + + voiceListenOnly: resolveSetting( + 'DISCORD_VOICE_LISTEN_ONLY', + characterSettings.voiceListenOnly, + DISCORD_DEFAULTS.VOICE_LISTEN_ONLY, + parseBooleanFromText + ), }; } @@ -141,14 +199,51 @@ export function getDiscordSettings(runtime: IAgentRuntime): DiscordSettings { * Validates the Discord configuration by retrieving the Discord API token from the runtime settings * and parsing it with the Discord environment schema. * + * Token lookup priority: + * - DISCORD_API_TOKEN (primary, recommended) + * - DISCORD_BOT_TOKENS (for multi-bot setups) + * + * Note: DISCORD_APPLICATION_ID is NOT a valid token - it's the application/client ID + * used for OAuth2 flows and invite URL generation, not for bot authentication. + * * @param {IAgentRuntime} runtime The agent runtime instance. * @returns {Promise} A promise that resolves with the validated Discord configuration. - * @throws {Error} If the Discord configuration validation fails, an error with detailed error messages is thrown. + * @throws {Error} If the Discord configuration validation fails or no token is found. */ export async function validateDiscordConfig(runtime: IAgentRuntime): Promise { try { + // Look for bot token in supported environment variables + // Note: DISCORD_APPLICATION_ID is intentionally NOT included here - it's the + // application/client ID (numeric), not a bot token. Using it as a token would + // cause authentication failures. Application ID is only needed for invite URLs. + let token = runtime.getSetting('DISCORD_API_TOKEN') as string | undefined; + + // Fall back to DISCORD_BOT_TOKENS if DISCORD_API_TOKEN not set + // DISCORD_BOT_TOKENS may contain comma-separated tokens for multi-bot setups + // Extract just the first token for single-client validation + if (!token || token.trim() === '') { + const botTokens = runtime.getSetting('DISCORD_BOT_TOKENS') as string | undefined; + if (botTokens && botTokens.trim()) { + // Extract first token from comma-separated list + // Multi-bot setup is handled by ClientRegistry, not here + const firstToken = botTokens.split(',')[0]?.trim(); + if (firstToken) { + token = firstToken; + } + } + } + + // Validate token exists before proceeding + if (!token || token.trim() === '') { + throw new Error( + 'Discord bot token not found. Please set DISCORD_API_TOKEN in your environment or character settings.\n' + + 'You can get a bot token from the Discord Developer Portal: https://discord.com/developers/applications\n' + + 'Note: DISCORD_APPLICATION_ID is your application\'s client ID, not a bot token.' + ); + } + const config = { - DISCORD_API_TOKEN: runtime.getSetting('DISCORD_API_TOKEN'), + DISCORD_API_TOKEN: token, CHANNEL_IDS: runtime.getSetting('CHANNEL_IDS'), DISCORD_SHOULD_IGNORE_BOT_MESSAGES: runtime.getSetting('DISCORD_SHOULD_IGNORE_BOT_MESSAGES'), DISCORD_SHOULD_IGNORE_DIRECT_MESSAGES: runtime.getSetting('DISCORD_SHOULD_IGNORE_DIRECT_MESSAGES'), diff --git a/src/index.ts b/src/index.ts index 385dd36..24b41d7 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,4 +1,5 @@ -import { type IAgentRuntime, type Plugin, logger } from '@elizaos/core'; +import { type IAgentRuntime, type Plugin, type Media } from '@elizaos/core'; +import { isDataUrl } from './utils'; import chatWithAttachments from './actions/chatWithAttachments'; import { downloadMedia } from './actions/downloadMedia'; import joinChannel from './actions/joinChannel'; @@ -15,14 +16,116 @@ import reactToMessage from './actions/reactToMessage'; import pinMessage from './actions/pinMessage'; import unpinMessage from './actions/unpinMessage'; import serverInfo from './actions/serverInfo'; +import setVoiceChannelStatus from './actions/setVoiceChannelStatus'; +import setListeningActivity from './actions/setListeningActivity'; import { channelStateProvider } from './providers/channelState'; import { voiceStateProvider } from './providers/voiceState'; +import { audioStateProvider } from './providers/audioState'; +import { agentRoleProvider } from './providers/agentRole'; +import { discordInstructionsProvider, discordSettingsProvider } from './providers/plugin-info'; import { DiscordService } from './service'; import { DiscordTestSuite } from './tests'; import { printBanner } from './banner'; import { getPermissionValues } from './permissions'; +// Export audio channel types and constants for use by other plugins +export type { AudioChannelConfig, PlaybackHandle } from './voice'; +export { + CHANNEL_TTS, + CHANNEL_MUSIC, + CHANNEL_SFX, + CHANNEL_AMBIENT, + DEFAULT_CHANNEL_CONFIGS, + getChannelName, + canInterrupt, +} from './audioChannels'; + +// Export progressive message helper for use by other plugins +export { ProgressiveMessage } from './progressiveMessage'; + +// Export multi-bot voice types +export type { VoiceTarget, DiscordBotConfig } from './types'; +export { VoiceConnectionManager } from './voiceConnectionManager'; +export { DiscordClientRegistry } from './clientRegistry'; + +// Export audio sink contracts +export type { IAudioSink, AudioSinkStatus } from './contracts'; +export { DiscordAudioSink } from './sinks'; + +/** + * Scrubs base64 images from existing memories to prevent context bloat. + * This fixes memories that were saved before we started filtering them. + * + * @param runtime - The agent runtime + */ +async function scrubBase64ImagesFromMemories(runtime: IAgentRuntime): Promise { + try { + runtime.logger.info({ src: 'plugin:discord' }, 'Checking for base64 images in memories...'); + + // Get recent memories that might have base64 attachments + // We check the last 1000 messages as a reasonable limit + const memories = await runtime.getMemories({ + tableName: 'messages', + count: 1000, + }); + + let scrubbed = 0; + + for (const memory of memories) { + // Skip memories without IDs (shouldn't happen, but be safe) + if (!memory.id) continue; + + const attachments = memory.content?.attachments as Media[] | undefined; + if (!attachments || attachments.length === 0) continue; + + // Check if any attachments have base64 data URLs + const hasBase64 = attachments.some((att) => att.url && isDataUrl(att.url)); + if (!hasBase64) continue; + + // Filter out base64 attachments + const filteredAttachments = attachments.filter((att) => !att.url || !isDataUrl(att.url)); + + // Count how many were removed + const removedCount = attachments.length - filteredAttachments.length; + if (removedCount > 0) { + // Add placeholder for removed images + filteredAttachments.push({ + id: 'scrubbed-data-url-images', + url: '', + title: `${removedCount} image(s) scrubbed`, + description: `${removedCount} base64 image(s) were removed from memory to prevent context bloat`, + }); + } + + // Update the memory with filtered attachments + await runtime.updateMemory({ + id: memory.id, + content: { + ...memory.content, + attachments: filteredAttachments.length > 0 ? filteredAttachments : undefined, + }, + }); + scrubbed++; + } + + if (scrubbed > 0) { + runtime.logger.info( + { src: 'plugin:discord', scrubbedCount: scrubbed }, + `Scrubbed base64 images from ${scrubbed} memories` + ); + } else { + runtime.logger.debug({ src: 'plugin:discord' }, 'No base64 images found in memories'); + } + } catch (error) { + // Don't fail plugin init if cleanup fails + runtime.logger.warn( + { src: 'plugin:discord', error: error instanceof Error ? error.message : String(error) }, + 'Failed to scrub base64 images from memories (non-fatal)' + ); + } +} + const discordPlugin: Plugin = { name: 'discord', description: 'Discord service plugin for integration with Discord servers and channels', @@ -44,13 +147,16 @@ const discordPlugin: Plugin = { pinMessage, unpinMessage, serverInfo, + setVoiceChannelStatus, + setListeningActivity, ], - providers: [channelStateProvider, voiceStateProvider], + providers: [channelStateProvider, voiceStateProvider, audioStateProvider, agentRoleProvider, discordInstructionsProvider, discordSettingsProvider], tests: [new DiscordTestSuite()], init: async (_config: Record, runtime: IAgentRuntime) => { // Gather ALL Discord settings + const appId = runtime.getSetting('DISCORD_APPLICATION_ID') as string; const token = runtime.getSetting('DISCORD_API_TOKEN') as string; - const applicationId = runtime.getSetting('DISCORD_APPLICATION_ID') as string; + const botTokens = runtime.getSetting('DISCORD_BOT_TOKENS') as string; const voiceChannelId = runtime.getSetting('DISCORD_VOICE_CHANNEL_ID') as string; const channelIds = runtime.getSetting('CHANNEL_IDS') as string; const listenChannelIds = runtime.getSetting('DISCORD_LISTEN_CHANNEL_IDS') as string; @@ -65,8 +171,8 @@ const discordPlugin: Plugin = { printBanner({ pluginName: 'plugin-discord', description: 'Discord bot integration for servers and channels', - applicationId: applicationId || undefined, - discordPermissions: applicationId ? getPermissionValues() : undefined, + applicationId: appId || undefined, + discordPermissions: appId ? getPermissionValues() : undefined, settings: [ { name: 'DISCORD_API_TOKEN', @@ -76,7 +182,12 @@ const discordPlugin: Plugin = { }, { name: 'DISCORD_APPLICATION_ID', - value: applicationId, + value: appId, + }, + { + name: 'DISCORD_BOT_TOKENS', + value: botTokens, + sensitive: true, }, { name: 'DISCORD_VOICE_CHANNEL_ID', @@ -109,14 +220,30 @@ const discordPlugin: Plugin = { runtime, }); - if (!token || token.trim() === '') { - logger.warn( - 'Discord API Token not provided - Discord plugin is loaded but will not be functional' - ); - logger.warn( - 'To enable Discord functionality, please provide DISCORD_API_TOKEN in your .eliza/.env file' - ); + // Check for valid bot token - only DISCORD_API_TOKEN and DISCORD_BOT_TOKENS are valid + // Note: DISCORD_APPLICATION_ID is the OAuth2 client/application ID (numeric), NOT a bot token + if ((!token || token.trim() === '') && (!botTokens || botTokens.trim() === '')) { + runtime.logger.warn(''); + runtime.logger.warn('═══════════════════════════════════════════════════════════════'); + runtime.logger.warn('Discord Bot Token not provided - Discord plugin will not work'); + runtime.logger.warn('═══════════════════════════════════════════════════════════════'); + runtime.logger.warn('To enable Discord functionality, add ONE of these to your .env:'); + runtime.logger.warn(' • DISCORD_API_TOKEN=your_bot_token (recommended)'); + runtime.logger.warn(' • DISCORD_BOT_TOKENS=token1,token2,... (multi-bot setup)'); + runtime.logger.warn(''); + runtime.logger.warn('Get your bot token from the Discord Developer Portal:'); + runtime.logger.warn(' https://discord.com/developers/applications'); + runtime.logger.warn(' Your Application → Bot → Token → Reset Token / Copy'); + runtime.logger.warn(''); + runtime.logger.warn('Note: DISCORD_APPLICATION_ID is your app\'s OAuth2 client ID'); + runtime.logger.warn(' (used for invite URLs), not a bot token for authentication.'); + runtime.logger.warn('═══════════════════════════════════════════════════════════════'); + runtime.logger.warn(''); } + + // Clean up base64 images from existing memories to prevent context bloat + // This runs once on startup to fix any memories that were saved before this fix + await scrubBase64ImagesFromMemories(runtime); }, }; diff --git a/src/messages.ts b/src/messages.ts index dd932ff..53c79fe 100644 --- a/src/messages.ts +++ b/src/messages.ts @@ -27,10 +27,12 @@ import { getDiscordSettings } from "./environment"; import { DiscordSettings, IDiscordService } from "./types"; import { canSendMessage, + createAttachmentFromMedia, extractUrls, - getAttachmentFileName, + filterAttachmentsForMemory, getMessageService, getUnifiedMessagingAPI, + editMessageContent, sendMessageInChunks, } from "./utils"; @@ -45,6 +47,18 @@ export class MessageManager { private getChannelType: (channel: Channel) => Promise; private discordSettings: DiscordSettings; private discordService: IDiscordService; + private progressiveMessages: Map = new Map(); + + /** + * Get a human-readable identifier for logging (character name or agentId fallback) + */ + private get agentIdentifier(): string { + return this.runtime?.character?.name || this.runtime.agentId; + } + /** * Constructor for a new instance of MessageManager. * @param {IDiscordService} discordService - The Discord service instance. @@ -57,7 +71,7 @@ export class MessageManager { const errorMsg = "Discord client not initialized - cannot create MessageManager"; runtime.logger.error( - { src: "plugin:discord", agentId: runtime.agentId }, + { src: "plugin:discord", agentId: runtime.character?.name || runtime.agentId }, errorMsg, ); throw new Error(errorMsg); @@ -72,6 +86,69 @@ export class MessageManager { this.discordSettings = getDiscordSettings(this.runtime); } + /** + * Track a progressive message with TTL cleanup + * + * Why track messages: When an action sends multiple updates with the same + * correlation ID, we need to remember which Discord message to edit. This + * map stores the message reference so we can edit it later. + * + * Why 60-second TTL: If an action crashes, hangs, or throws an exception + * before calling complete/fail, we'd leak memory forever. The TTL ensures + * stale entries are cleaned up automatically. 60 seconds is generous enough + * for any legitimate action while preventing unbounded growth. + * + * Why clear existing timeout: If an action sends multiple updates (which is + * the whole point!), we need to reset the TTL with each update. Otherwise + * a long-running action with frequent updates could have its tracking expire + * mid-execution. + */ + private trackProgressiveMessage(key: string, message: DiscordMessage): void { + // Clear any existing timeout for this key + const existing = this.progressiveMessages.get(key); + if (existing?.timeout) { + clearTimeout(existing.timeout); + } + + // Set 60-second TTL + const timeout = setTimeout(() => { + this.progressiveMessages.delete(key); + this.runtime.logger.debug(`Progressive message ${key} TTL expired`); + }, 60000); + + this.progressiveMessages.set(key, { message, timeout }); + } + + /** + * Reset TTL for an existing progressive message + */ + private resetProgressiveTTL(key: string): void { + const existing = this.progressiveMessages.get(key); + if (!existing) return; + + // Clear old timeout + clearTimeout(existing.timeout); + + // Set new 60-second TTL + const timeout = setTimeout(() => { + this.progressiveMessages.delete(key); + this.runtime.logger.debug(`Progressive message ${key} TTL expired`); + }, 60000); + + existing.timeout = timeout; + } + + /** + * Clean up a progressive message tracking entry + */ + private cleanupProgressiveMessage(key: string): void { + const existing = this.progressiveMessages.get(key); + if (existing) { + clearTimeout(existing.timeout); + this.progressiveMessages.delete(key); + } + } + /** * Handles incoming Discord messages and processes them accordingly. * @@ -119,7 +196,7 @@ export class MessageManager { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId: message.channel.id, }, "Strict mode: ignoring message (no mention or reply)", @@ -130,7 +207,7 @@ export class MessageManager { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId: message.channel.id, }, "Strict mode: processing message", @@ -141,7 +218,8 @@ export class MessageManager { const userName = message.author.bot ? `${message.author.username}#${message.author.discriminator}` : message.author.username; - const name = message.author.displayName; + // Use server-specific displayName (nickname) if available, fallback to global displayName + const name = message.member?.displayName || message.author.displayName; const channelId = message.channel.id; const roomId = createUniqueUuid(this.runtime, channelId); @@ -158,7 +236,7 @@ export class MessageManager { this.runtime.logger.warn( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId: message.channel.id, }, "Null channel type", @@ -191,7 +269,7 @@ export class MessageManager { return this.runtime.logger.warn( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId: message.channel.id, reason: canSendResult.reason, }, @@ -246,10 +324,10 @@ export class MessageManager { // so other agents can ignore it (only the replied-to agent should respond) replyToAuthor: message.mentions.repliedUser ? { - id: message.mentions.repliedUser.id, - username: message.mentions.repliedUser.username, - isBot: message.mentions.repliedUser.bot, - } + id: message.mentions.repliedUser.id, + username: message.mentions.repliedUser.username, + isBot: message.mentions.repliedUser.bot, + } : undefined, }, }, @@ -259,7 +337,7 @@ export class MessageManager { this.runtime.logger.warn( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, messageId: message.id, }, "Failed to build memory from message", @@ -294,7 +372,7 @@ export class MessageManager { this.runtime.logger.warn( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: err instanceof Error ? err.message : String(err), }, "Error sending typing indicator", @@ -317,6 +395,176 @@ export class MessageManager { content.inReplyTo = createUniqueUuid(this.runtime, message.id); } + // Handle progressive updates + // + // Why check metadata: Actions use ProgressiveMessage helper, which sets + // metadata.progressiveUpdate to signal this is part of a progressive chain. + // Normal actions don't set this, so they skip this logic entirely. + const progressiveUpdate = (content.metadata as any)?.progressiveUpdate; + if (progressiveUpdate?.correlationId) { + const key = `${channel.id}:${progressiveUpdate.correlationId}`; + const existing = this.progressiveMessages.get(key); + + if (existing) { + // Edit existing message + // + // Why edit instead of send new: This is the core of progressive updates. + // By editing the same message repeatedly, we show live status without + // cluttering chat history. Users see "Searching..." turn into "Found!" + // turn into "Now playing!" in a single message bubble. + const edited = await editMessageContent(existing.message, content.text || ''); + if (edited) { + this.resetProgressiveTTL(key); + + // If this is an interim update, don't create memory + // + // Why skip memory for interim: Only the final message should be saved + // to conversation history. Saving "Searching..." and "Found!" and + // "Setting up..." would bloat the database with transient status text + // that has no value after the action completes. + if (progressiveUpdate.isInterim) { + return []; + } + + // Final message - clean up tracking and create memory + // + // Why clean up: This action is done, so we don't need to track its + // message anymore. Free up the memory and clear the timeout. + this.cleanupProgressiveMessage(key); + + // Clear typing indicator for final progressive message + // + // Why clear here: Progressive updates complete, user has their answer. + // Without this, typing indicator stays active indefinitely, making + // users think the bot is still processing. + if (typingData.interval && !typingData.cleared) { + clearInterval(typingData.interval); + typingData.cleared = true; + } + + const memory: Memory = { + id: createUniqueUuid(this.runtime, edited.id), + entityId: this.runtime.agentId, + agentId: this.runtime.agentId, + content: { + ...content, + // Filter out base64 attachments to prevent context bloat + attachments: filterAttachmentsForMemory(content.attachments), + actions: content.actions, + inReplyTo: messageId, + url: edited.url, + channelType: type, + }, + roomId, + createdAt: edited.createdTimestamp, + }; + await this.runtime.createMemory(memory, 'messages'); + return [memory]; + } else { + // Edit failed, fall back to sending new message + // + // Why fallback: Discord edit can fail if the message was deleted, + // the bot lost permissions, or we hit rate limits. Rather than + // failing silently, send a new message so the user still gets + // feedback. This graceful degradation prevents user-facing errors. + this.runtime.logger.warn(`Failed to edit progressive message ${key}, falling back to new message`); + this.cleanupProgressiveMessage(key); + } + } + + // First update or edit failed - send new message and track it + // + // Why send new on first: The first update has no existing message to edit, + // so we must send a new one. We then track it so subsequent updates can + // edit this message. + // Convert Media attachments to Discord AttachmentBuilder format + const files: AttachmentBuilder[] = []; + if (content.attachments && content.attachments.length > 0) { + for (const media of content.attachments) { + const attachment = createAttachmentFromMedia(media); + if (attachment) { + files.push(attachment); + } + } + } + + let messages: any[] = []; + if (content?.channelType === 'DM') { + const u = await this.client.users.fetch(message.author.id); + if (!u) { + this.runtime.logger.warn('Discord - User not found', message.author.id); + return []; + } + // Send DM with both text content and file attachments + // Previously only sent text, dropping any prepared attachments + const sentMessage = await u.send({ + content: content.text ?? '', + files: files.length > 0 ? files : undefined, + }); + // Wrap in array for consistent handling with channel messages + messages = [sentMessage]; + } else { + messages = await sendMessageInChunks( + channel, + content.text ?? '', + message.id!, + files + ); + } + + // Track the first message for future edits + // + // Why track: Store this message so subsequent updates with the same + // correlation ID can edit it instead of sending new messages. + if (messages.length > 0 && messages[0].id) { + this.trackProgressiveMessage(key, messages[0]); + } + + // If interim, don't create memory + if (progressiveUpdate.isInterim) { + return []; + } + + // Final message - clean up and create memory + this.cleanupProgressiveMessage(key); + + // Clear typing indicator for final progressive message + // + // Why clear here: This path handles first-time sends that are final + // (e.g., when edit fallback occurs). Same reasoning as edit path - + // progressive updates complete, stop showing typing. + if (typingData.interval && !typingData.cleared) { + clearInterval(typingData.interval); + typingData.cleared = true; + } + + const memories: Memory[] = []; + for (const m of messages) { + const memory: Memory = { + id: createUniqueUuid(this.runtime, m.id), + entityId: this.runtime.agentId, + agentId: this.runtime.agentId, + content: { + ...content, + // Filter out base64 attachments to prevent context bloat + attachments: filterAttachmentsForMemory(content.attachments), + actions: content.actions, + inReplyTo: messageId, + url: m.url, + channelType: type, + }, + roomId, + createdAt: m.createdTimestamp, + }; + memories.push(memory); + } + for (const m of memories) { + await this.runtime.createMemory(m, 'messages'); + } + return memories; + } + + // Normal (non-progressive) message flow let messages: any[] = []; if (content?.channelType === "DM") { const u = await this.client.users.fetch(message.author.id); @@ -324,7 +572,7 @@ export class MessageManager { this.runtime.logger.warn( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, entityId: message.author.id, }, "User not found for DM", @@ -336,11 +584,9 @@ export class MessageManager { const files: AttachmentBuilder[] = []; if (content.attachments && content.attachments.length > 0) { for (const media of content.attachments) { - if (media.url) { - const fileName = getAttachmentFileName(media); - files.push( - new AttachmentBuilder(media.url, { name: fileName }), - ); + const attachment = createAttachmentFromMedia(media); + if (attachment) { + files.push(attachment); } } } @@ -349,7 +595,7 @@ export class MessageManager { const hasText = textContent.trim().length > 0; if (!hasText && files.length === 0) { this.runtime.logger.warn( - { src: "plugin:discord", agentId: this.runtime.agentId }, + { src: "plugin:discord", agentId: this.agentIdentifier }, "Skipping DM response: no text or attachments", ); return []; @@ -365,11 +611,9 @@ export class MessageManager { const files: AttachmentBuilder[] = []; if (content.attachments && content.attachments.length > 0) { for (const media of content.attachments) { - if (media.url) { - const fileName = getAttachmentFileName(media); - files.push( - new AttachmentBuilder(media.url, { name: fileName }), - ); + const attachment = createAttachmentFromMedia(media); + if (attachment) { + files.push(attachment); } } } @@ -402,9 +646,10 @@ export class MessageManager { url: m.url, channelType: type, // Only include attachments for the message chunk that actually has them + // Filter out base64 data URLs to prevent context bloat in RECENT_MESSAGES attachments: hasAttachments && content.attachments - ? content.attachments + ? filterAttachmentsForMemory(content.attachments) : undefined, }, roomId, @@ -428,7 +673,7 @@ export class MessageManager { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Error handling message callback", @@ -449,7 +694,7 @@ export class MessageManager { if (unifiedAPI) { this.runtime.logger.debug( - { src: "plugin:discord", agentId: this.runtime.agentId }, + { src: "plugin:discord", agentId: this.agentIdentifier }, "Using unified messaging API", ); await unifiedAPI.sendMessage(this.runtime.agentId, newMessage, { @@ -458,14 +703,14 @@ export class MessageManager { } else if (messageService) { // Newer core with messageService this.runtime.logger.debug( - { src: "plugin:discord", agentId: this.runtime.agentId }, + { src: "plugin:discord", agentId: this.agentIdentifier }, "Using messageService API", ); await messageService.handleMessage(this.runtime, newMessage, callback); } else { // Older core - use event-based message handling (backwards compatible) this.runtime.logger.debug( - { src: "plugin:discord", agentId: this.runtime.agentId }, + { src: "plugin:discord", agentId: this.agentIdentifier }, "Using event-based message handling", ); await this.runtime.emitEvent([EventType.MESSAGE_RECEIVED], { @@ -482,7 +727,7 @@ export class MessageManager { clearInterval(typingData.interval); typingData.cleared = true; this.runtime.logger.warn( - { src: "plugin:discord", agentId: this.runtime.agentId }, + { src: "plugin:discord", agentId: this.agentIdentifier }, "Typing indicator failsafe timeout triggered", ); } @@ -491,7 +736,7 @@ export class MessageManager { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Error handling message", @@ -536,9 +781,8 @@ export class MessageManager { } if (messageId) { // context currently doesn't know message ID - processedContent += `\nReferencing MessageID ${messageId} (discord: ${ - message.reference.messageId - })`; + processedContent += `\nReferencing MessageID ${messageId} (discord: ${message.reference.messageId + })`; // in our channel if (message.reference.channelId !== message.channel.id) { const roomId = createUniqueUuid( @@ -633,7 +877,7 @@ export class MessageManager { ) as any; // Cast to any if (!browserService) { this.runtime.logger.warn( - { src: "plugin:discord", agentId: this.runtime.agentId }, + { src: "plugin:discord", agentId: this.agentIdentifier }, "Browser service not found", ); continue; diff --git a/src/progressiveMessage.ts b/src/progressiveMessage.ts new file mode 100644 index 0000000..421e7ce --- /dev/null +++ b/src/progressiveMessage.ts @@ -0,0 +1,368 @@ +import type { HandlerCallback, Content, Memory } from '@elizaos/core'; +import { logger } from '@elizaos/core'; + +/** + * ProgressiveMessage - Helper for actions with long-running pipelines + * + * Provides progressive status updates that edit the same message in Discord, + * showing users real-time feedback instead of long pauses. + * + * ## Why This Exists + * + * Long-running operations (like searching for music, fetching data) traditionally + * leave users staring at silence for 5-10+ seconds. This creates anxiety about + * whether the bot is working. Progressive updates solve this by showing what's + * happening in real-time. + * + * ## Why Message Editing (Not Multiple Messages) + * + * Sending separate status messages ("Searching...", "Found!", "Playing!") clutters + * chat history. Discord's message editing feature lets us update a single message, + * keeping the conversation clean while still providing feedback. + * + * ## Why Debouncing (minDelay) + * + * If an operation completes in < 300ms, showing "Searching..." is just noise. + * The debounce suppresses fast operations, only showing updates for genuinely + * long-running tasks. This prevents spam when operations are instant. + * + * ## Why Throttling (500ms between updates) + * + * Discord rate limits message edits (5 per 5 seconds per channel). Without + * throttling, rapid-fire updates could trigger rate limits. The 500ms throttle + * ensures we stay well under limits (~2 edits/second max) while still feeling + * responsive. + * + * ## Why "Important" Flag for Non-Editing Platforms + * + * Web/CLI clients can't edit messages - they just send new ones. Sending every + * transient update ("Checking...", "Setting up...") floods the UI. The important + * flag lets actions mark which updates are worth showing on non-editing platforms + * (e.g., "Searching..." for a 5-second search is important, but "Setting up..." + * for a 100ms operation isn't). + * + * ## Why No "isFinal" Flag + * + * Originally we had an `isFinal: true` flag to signal completion. But if an + * exception occurs, we'd never send isFinal, leaving orphaned "Searching..." + * messages. Instead, the last message naturally becomes final, and TTL cleanup + * handles crashes gracefully. + * + * ## Usage + * + * ```typescript + * const progress = new ProgressiveMessage(callback, message.content.source); + * try { + * progress.update("🔍 Searching...", { important: true }); // Show on all platforms + * // ... do work ... + * progress.update("✨ Found! Preparing..."); // Skip on web/CLI (transient) + * // ... more work ... + * return await progress.complete("🎵 Done!"); // Always shown + * } catch (error) { + * return await progress.fail("❌ Something went wrong"); // Always shown + * } + * ``` + */ +export class ProgressiveMessage { + private correlationId: string; + private callback: HandlerCallback; + private source: string; + private minDelay: number; + private throttle: number; + + private pendingUpdate: string | null = null; + private updateTimer: NodeJS.Timeout | null = null; + private lastUpdateTime: number = 0; + private firstUpdateSent: boolean = false; + private startTime: number = Date.now(); + private flushInProgress: boolean = false; + + /** + * Create a progressive message helper + * + * @param callback The handler callback to send messages through + * @param source The message source (e.g., 'discord', 'web') + * @param options Configuration options + * @param options.minDelay Milliseconds to wait before showing first update (default: 300ms) + * Why: Prevents showing spinners for instant operations. If the + * action completes in < 300ms, users only see the final result. + * @param options.throttle Milliseconds between updates (default: 500ms) + * Why: Discord rate limits message edits. 500ms = ~2 edits/sec, + * safely under Discord's 5/5sec limit while feeling responsive. + */ + constructor( + callback: HandlerCallback, + source: string, + options?: { + minDelay?: number; + throttle?: number; + } + ) { + this.callback = callback; + this.source = source; + this.correlationId = this.generateCorrelationId(); + this.minDelay = options?.minDelay ?? 300; + this.throttle = options?.throttle ?? 500; + } + + /** + * Generate a unique correlation ID for this message chain + * + * Why: The MessageManager needs to know which message to edit when we send + * updates. The correlation ID links all updates (interim and final) from a + * single action invocation, so they all edit the same Discord message. + * + * Why timestamp + random: Ensures uniqueness even if multiple actions run + * simultaneously in the same channel. Timestamp provides temporal ordering, + * random suffix prevents collisions. + */ + private generateCorrelationId(): string { + return `${Date.now()}-${Math.random().toString(36).substring(2, 11)}`; + } + + /** + * Check if the source supports progressive updates (message editing) + * + * Why only Discord: Currently only Discord supports editing messages after + * they're sent. Web/CLI clients would need to implement streaming updates + * or live-updating UI components to achieve the same effect. + * + * Future: Could add 'telegram' here when that client adds edit support. + */ + private supportsProgressive(): boolean { + return this.source === 'discord'; + } + + /** + * Send an interim update (can be edited later) + * @param text The status message to display + * @param options Optional configuration for this update + */ + update(text: string, options?: { important?: boolean }): void { + if (!this.supportsProgressive()) { + // For non-Discord sources, skip transient updates to avoid flooding + // + // Why skip non-important updates: Web/CLI can't edit messages, so each + // update creates a new message. Sending "Checking...", "Searching...", + // "Found!", "Setting up...", "Done!" would spam 5 messages for one action. + // + // Why honor important flag: Some operations genuinely take 5-10+ seconds + // (e.g., searching for music). Users need feedback that something is + // happening, or they'll think the bot is broken. The important flag marks + // these cases where even non-editing platforms should show an update. + if (options?.important) { + this.callback({ + text, + source: this.source, + }).catch(error => { + logger.warn(`Progressive update failed: ${error}`); + }); + } + return; + } + + // Store the update + this.pendingUpdate = text; + + // If we haven't sent the first update yet, wait minDelay + if (!this.firstUpdateSent) { + if (!this.updateTimer) { + this.updateTimer = setTimeout(() => { + this.flushUpdate(); + }, this.minDelay); + } + return; + } + + // Throttle subsequent updates + const now = Date.now(); + const timeSinceLastUpdate = now - this.lastUpdateTime; + + if (timeSinceLastUpdate >= this.throttle) { + // Enough time has passed, send immediately + this.flushUpdate(); + } else { + // Clear existing timer and schedule new one + if (this.updateTimer) { + clearTimeout(this.updateTimer); + } + const delay = this.throttle - timeSinceLastUpdate; + this.updateTimer = setTimeout(() => { + this.flushUpdate(); + }, delay); + } + } + + /** + * Flush the pending update to the callback + * + * Why track firstUpdateSent: The minDelay only applies to the first update. + * Once we've sent one update, subsequent updates use the throttle timing. + * This prevents initial spam while allowing rapid updates during active work. + * + * Why mark isInterim: true: Tells MessageManager this isn't the final message, + * so don't create a memory for it. Only the final message gets persisted to + * the conversation history. + * + * Why flushInProgress flag: Prevents race condition where a second update arrives + * after the throttle period but before the first callback completes tracking the + * message in progressiveMessages. Without this, both callbacks would see an empty + * map and create separate Discord messages instead of editing one. + */ + private flushUpdate(): void { + if (!this.pendingUpdate) return; + + // Prevent concurrent flushes - if a flush is in progress, the pending update + // will be picked up by a subsequent timer or the next update() call + if (this.flushInProgress) { + // Schedule a retry after the throttle period + if (!this.updateTimer) { + this.updateTimer = setTimeout(() => { + this.flushUpdate(); + }, this.throttle); + } + return; + } + + this.flushInProgress = true; + // Mark firstUpdateSent immediately so subsequent update() calls respect + // throttle timing instead of bypassing minDelay while this flush is in-flight + this.firstUpdateSent = true; + + const text = this.pendingUpdate; + this.pendingUpdate = null; + this.updateTimer = null; + this.lastUpdateTime = Date.now(); + + const content: Content = { + text, + source: this.source, + metadata: { + progressiveUpdate: { + correlationId: this.correlationId, + isInterim: true, + }, + }, + }; + + this.callback(content) + .catch(error => { + logger.warn(`Progressive update flush failed: ${error}`); + }) + .finally(() => { + this.flushInProgress = false; + }); + } + + /** + * Send the final success message + * @param text The final message to display + * @returns Promise resolving to created memories + */ + async complete(text: string): Promise { + // Clear any pending timers + if (this.updateTimer) { + clearTimeout(this.updateTimer); + this.updateTimer = null; + } + + // Wait for any in-progress flush to complete before sending final message + // + // Why wait: If flushUpdate() is in progress, the callback hasn't yet tracked + // the message in progressiveMessages. If we send the final message now, both + // callbacks will see an empty map and create separate Discord messages instead + // of editing one. Waiting ensures the interim message is tracked before we + // try to edit it with the final message. + await this.waitForFlushComplete(); + + const elapsed = Date.now() - this.startTime; + + // If we haven't sent any updates and the operation was fast, just send final + if (!this.firstUpdateSent && elapsed < this.minDelay) { + return this.sendFinal(text, false); + } + + // Send final message (either edit or new depending on whether updates were sent) + return this.sendFinal(text, this.firstUpdateSent && this.supportsProgressive()); + } + + /** + * Send a final failure message + * @param text The error message to display + * @returns Promise resolving to created memories + */ + async fail(text: string): Promise { + // Clear any pending timers + if (this.updateTimer) { + clearTimeout(this.updateTimer); + this.updateTimer = null; + } + + // Wait for any in-progress flush to complete (same reasoning as complete()) + await this.waitForFlushComplete(); + + // Send final error message + return this.sendFinal(text, this.firstUpdateSent && this.supportsProgressive()); + } + + /** + * Wait for any in-progress flush operation to complete + * + * Why this exists: The callback in flushUpdate() is fire-and-forget, but we need + * to wait for it to complete before sending the final message. Otherwise, both + * the interim flush and final send will race to check progressiveMessages, and + * both will see it as empty, resulting in duplicate messages. + * + * Why poll instead of Promise: The flushUpdate callback completes asynchronously + * and sets flushInProgress = false in .finally(). We can't easily await that + * promise chain, so we poll the flag with a short interval. + * + * Why 50ms intervals: Frequent enough to minimize delay (max 50ms overhead), + * but not so frequent as to busy-wait. Discord API typically responds in 50-200ms. + * + * Why 2s timeout: Safety net to prevent infinite waiting if something goes wrong. + * 2s is longer than any reasonable Discord API response time. + */ + private async waitForFlushComplete(): Promise { + if (!this.flushInProgress) return; + + const maxWait = 2000; // 2 second timeout + const pollInterval = 50; // Check every 50ms + const startTime = Date.now(); + + while (this.flushInProgress && (Date.now() - startTime) < maxWait) { + await new Promise(resolve => setTimeout(resolve, pollInterval)); + } + + if (this.flushInProgress) { + logger.warn('Progressive message flush timed out - proceeding with final message'); + } + } + + /** + * Send the final message (progressive or normal) + */ + private async sendFinal(text: string, useProgressive: boolean): Promise { + const content: Content = { + text, + source: this.source, + }; + + if (useProgressive) { + content.metadata = { + progressiveUpdate: { + correlationId: this.correlationId, + isInterim: false, // This creates a memory + }, + }; + } + + try { + return await this.callback(content); + } catch (error) { + logger.error(`Failed to send final message: ${error}`); + return []; + } + } +} + diff --git a/src/providers/agentRole.ts b/src/providers/agentRole.ts new file mode 100644 index 0000000..63885bf --- /dev/null +++ b/src/providers/agentRole.ts @@ -0,0 +1,258 @@ +import type { IAgentRuntime, Memory, Provider, ProviderResult, State } from '@elizaos/core'; +import { ChannelType } from '@elizaos/core'; +import type { GuildChannel, GuildMember, PermissionsBitField } from 'discord.js'; +import type { DiscordService } from '../service'; +import { DISCORD_SERVICE_NAME } from '../constants'; + +/** + * Agent Role Provider + * + * Provides information about the agent's role, permissions, and status in the current Discord server. + * This helps the agent understand what actions it can take and how to behave appropriately. + * + * Information provided: + * - Agent's roles in the guild + * - Key permissions (admin, moderate, manage channels, etc.) + * - Agent's nickname in the guild + * - Whether the agent owns the server + * - Bot vs user status + */ +export const agentRoleProvider: Provider = { + name: 'agentRole', + description: 'Information about the agent\'s role and permissions in the current Discord server', + dynamic: true, + + get: async (runtime: IAgentRuntime, message: Memory, state: State): Promise => { + // Only applies to Discord messages + if (message.content.source !== 'discord') { + return { + data: {}, + values: {}, + text: '', + }; + } + + const room = state.data?.room ?? (await runtime.getRoom(message.roomId)); + if (!room) { + return { + data: {}, + values: {}, + text: '', + }; + } + + // DMs don't have roles + if (room.type === ChannelType.DM) { + return { + data: { + isDM: true, + hasRoles: false, + }, + values: { + isDM: 'true', + hasRoles: 'false', + }, + text: 'This is a direct message conversation. Server roles and permissions do not apply.', + }; + } + + const channelId = room.channelId ?? ''; + if (!channelId) { + return { + data: {}, + values: {}, + text: '', + }; + } + + const discordService = runtime.getService(DISCORD_SERVICE_NAME) as DiscordService; + if (!discordService?.client) { + return { + data: {}, + values: {}, + text: '', + }; + } + + // Get the channel and guild + let channel = discordService.client.channels.cache.get(channelId) as GuildChannel | undefined; + if (!channel) { + try { + channel = await discordService.client.channels.fetch(channelId) as GuildChannel | undefined; + } catch { + return { + data: {}, + values: {}, + text: '', + }; + } + } + + const guild = channel?.guild; + if (!guild) { + return { + data: {}, + values: {}, + text: '', + }; + } + + // Get the bot's member object in this guild + let botMember: GuildMember | undefined; + try { + botMember = guild.members.cache.get(discordService.client.user?.id ?? ''); + if (!botMember && discordService.client.user?.id) { + botMember = await guild.members.fetch(discordService.client.user.id); + } + } catch { + // Bot might not be in guild cache yet + } + + if (!botMember) { + return { + data: { + guildName: guild.name, + guildId: guild.id, + }, + values: { + guildName: guild.name, + }, + text: `In server "${guild.name}" but role information is not available.`, + }; + } + + // Extract role information + const roles = botMember.roles.cache + .filter(role => role.name !== '@everyone') + .sort((a, b) => b.position - a.position) + .map(role => ({ + name: role.name, + color: role.hexColor, + position: role.position, + isHoisted: role.hoist, + isMentionable: role.mentionable, + })); + + const roleNames = roles.map(r => r.name); + const highestRole = roles[0]?.name || 'None'; + + // Extract key permissions + const permissions = botMember.permissions as PermissionsBitField; + const keyPermissions = { + administrator: permissions.has('Administrator'), + manageGuild: permissions.has('ManageGuild'), + manageChannels: permissions.has('ManageChannels'), + manageRoles: permissions.has('ManageRoles'), + manageMessages: permissions.has('ManageMessages'), + kickMembers: permissions.has('KickMembers'), + banMembers: permissions.has('BanMembers'), + moderateMembers: permissions.has('ModerateMembers'), + manageNicknames: permissions.has('ManageNicknames'), + manageWebhooks: permissions.has('ManageWebhooks'), + manageEmojisAndStickers: permissions.has('ManageEmojisAndStickers'), + mentionEveryone: permissions.has('MentionEveryone'), + useExternalEmojis: permissions.has('UseExternalEmojis'), + addReactions: permissions.has('AddReactions'), + attachFiles: permissions.has('AttachFiles'), + embedLinks: permissions.has('EmbedLinks'), + readMessageHistory: permissions.has('ReadMessageHistory'), + connect: permissions.has('Connect'), + speak: permissions.has('Speak'), + muteMembers: permissions.has('MuteMembers'), + deafenMembers: permissions.has('DeafenMembers'), + moveMembers: permissions.has('MoveMembers'), + prioritySpeaker: permissions.has('PrioritySpeaker'), + }; + + // Determine role type + const isAdmin = keyPermissions.administrator; + const isModerator = keyPermissions.manageMessages || keyPermissions.kickMembers || keyPermissions.moderateMembers; + const isOwner = guild.ownerId === discordService.client.user?.id; + + // Build descriptive text + const agentName = runtime.character?.name || 'The agent'; + let text = `# ${agentName}'s Discord Role Information\n\n`; + text += `**Server:** ${guild.name}\n`; + text += `**Nickname:** ${botMember.nickname || botMember.user.username}\n`; + text += `**Highest Role:** ${highestRole}\n`; + + if (roleNames.length > 0) { + text += `**All Roles:** ${roleNames.join(', ')}\n`; + } + + text += '\n## Status\n'; + if (isOwner) { + text += '- 👑 Server Owner\n'; + } + if (isAdmin) { + text += '- ⚡ Administrator (full permissions)\n'; + } else if (isModerator) { + text += '- 🛡️ Moderator (can manage messages/members)\n'; + } else { + text += '- 👤 Regular member\n'; + } + + text += '\n## Key Permissions\n'; + const enabledPermissions = Object.entries(keyPermissions) + .filter(([, enabled]) => enabled) + .map(([perm]) => perm); + + if (enabledPermissions.length > 0) { + text += enabledPermissions.map(p => `- ✓ ${formatPermissionName(p)}`).join('\n'); + } else { + text += '- Basic permissions only'; + } + + text += '\n\n## Behavioral Guidelines\n'; + if (isAdmin) { + text += `${agentName} has administrator privileges and should use them responsibly. `; + text += 'Avoid making server-wide changes unless explicitly requested.\n'; + } else if (isModerator) { + text += `${agentName} has moderation capabilities. `; + text += 'Use moderation actions only when necessary and appropriate.\n'; + } else { + text += `${agentName} has standard member permissions. `; + text += 'Focus on conversation and available actions within these permissions.\n'; + } + + return { + data: { + guildId: guild.id, + guildName: guild.name, + nickname: botMember.nickname, + username: botMember.user.username, + roles, + roleNames, + highestRole, + permissions: keyPermissions, + isOwner, + isAdmin, + isModerator, + joinedAt: botMember.joinedAt?.toISOString(), + }, + values: { + guildName: guild.name, + highestRole, + isOwner: String(isOwner), + isAdmin: String(isAdmin), + isModerator: String(isModerator), + roleCount: String(roleNames.length), + }, + text, + }; + }, +}; + +/** + * Format a permission name for display + */ +function formatPermissionName(permission: string): string { + // Convert camelCase to Title Case with spaces + return permission + .replace(/([A-Z])/g, ' $1') + .replace(/^./, str => str.toUpperCase()) + .trim(); +} + +export default agentRoleProvider; + diff --git a/src/providers/audioState.ts b/src/providers/audioState.ts new file mode 100644 index 0000000..4361182 --- /dev/null +++ b/src/providers/audioState.ts @@ -0,0 +1,185 @@ +import type { IAgentRuntime, Memory, Provider, State } from '@elizaos/core'; +import { ChannelType } from '@elizaos/core'; +import type { DiscordService } from '../service'; +import { DISCORD_SERVICE_NAME } from '../constants'; + +/** + * Provides information about the bot's audio state, including server mute/deafen status. + * This helps the agent understand and communicate about audio problems. + * + * @param {IAgentRuntime} runtime - The runtime object for the agent + * @param {Memory} message - The message object containing room ID + * @param {State} [state] - Optional state object for the user + * @returns {Object} An object containing information about the audio state + */ +export const audioStateProvider: Provider = { + name: 'audioState', + description: 'Provides information about the bot\'s audio state including server mute/deafen status', + dynamic: true, + get: async (runtime: IAgentRuntime, message: Memory, state?: State) => { + const room = await runtime.getRoom(message.roomId); + if (!room) { + return { + data: { + isInVoiceChannel: false, + hasAudioIssues: false, + }, + values: { + isInVoiceChannel: 'false', + hasAudioIssues: 'false', + }, + text: 'Not in a voice channel', + }; + } + + if (room.type !== ChannelType.GROUP) { + return { + data: { + isInVoiceChannel: false, + hasAudioIssues: false, + roomType: room.type, + }, + values: { + isInVoiceChannel: 'false', + hasAudioIssues: 'false', + roomType: room.type, + }, + text: 'Not in a group voice channel', + }; + } + + const serverId = room.serverId; + if (!serverId) { + return { + data: { + isInVoiceChannel: false, + hasAudioIssues: false, + }, + values: { + isInVoiceChannel: 'false', + hasAudioIssues: 'false', + }, + text: 'No server ID found', + }; + } + + // Get Discord service and voice manager + const discordService = runtime.getService(DISCORD_SERVICE_NAME) as DiscordService | null; + if (!discordService || !discordService.voiceManager) { + return { + data: { + isInVoiceChannel: false, + hasAudioIssues: false, + }, + values: { + isInVoiceChannel: 'false', + hasAudioIssues: 'false', + }, + text: 'Discord service not available', + }; + } + + // Use voiceManager.getVoiceConnection to resolve the correct bot/group + // in multi-bot deployments, instead of @discordjs/voice getVoiceConnection + // which defaults to the 'default' group + const connection = discordService.voiceManager.getVoiceConnection(serverId); + if (!connection) { + return { + data: { + isInVoiceChannel: false, + hasAudioIssues: false, + serverId, + }, + values: { + isInVoiceChannel: 'false', + hasAudioIssues: 'false', + serverId, + }, + text: 'Not currently connected to a voice channel', + }; + } + + // Extract serializable connection info instead of returning the raw + // VoiceConnection object, which is non-serializable and exposes internal state + const voiceChannelId = connection.joinConfig?.channelId ?? null; + const connectionStatus = connection.state?.status ?? 'unknown'; + const isConnected = connectionStatus === 'ready'; + + // Get audio state from voice manager + const audioState = discordService.voiceManager.getAudioState(serverId); + const agentName = state?.agentName || 'The agent'; + + if (!audioState) { + return { + data: { + isInVoiceChannel: true, + hasAudioIssues: false, + serverId, + voiceChannelId, + connectionStatus, + isConnected, + }, + values: { + isInVoiceChannel: 'true', + hasAudioIssues: 'false', + serverId, + voiceChannelId: voiceChannelId ?? '', + connectionStatus, + isConnected: isConnected ? 'true' : 'false', + }, + text: `${agentName} is in a voice channel with no known audio issues`, + }; + } + + const { + serverMute, + serverDeaf, + selfMute, + selfDeaf, + } = audioState; + + const issues: string[] = []; + if (serverMute) issues.push('server muted'); + if (serverDeaf) issues.push('server deafened'); + if (selfMute) issues.push('self muted'); + if (selfDeaf) issues.push('self deafened'); + + const hasAudioIssues = issues.length > 0; + const issueText = issues.length > 0 + ? `${agentName} is ${issues.join(' and ')}` + : `${agentName} has no audio restrictions`; + + return { + data: { + isInVoiceChannel: true, + hasAudioIssues, + serverMute, + serverDeaf, + selfMute, + selfDeaf, + serverId, + voiceChannelId, + connectionStatus, + isConnected, + issues, + }, + values: { + isInVoiceChannel: 'true', + hasAudioIssues: hasAudioIssues ? 'true' : 'false', + serverMute: serverMute ? 'true' : 'false', + serverDeaf: serverDeaf ? 'true' : 'false', + selfMute: selfMute ? 'true' : 'false', + selfDeaf: selfDeaf ? 'true' : 'false', + serverId, + voiceChannelId: voiceChannelId ?? '', + connectionStatus, + isConnected: isConnected ? 'true' : 'false', + issues: issues.join(', '), + }, + text: issueText, + }; + }, +}; + +export default audioStateProvider; + diff --git a/src/providers/channelState.ts b/src/providers/channelState.ts index 157ac73..716d550 100644 --- a/src/providers/channelState.ts +++ b/src/providers/channelState.ts @@ -2,7 +2,7 @@ import type { IAgentRuntime, Memory, Provider, State } from '@elizaos/core'; import { ChannelType } from '@elizaos/core'; import type { GuildChannel } from 'discord.js'; import type { DiscordService } from '../service'; -import { ServiceType } from '../types'; +import { DISCORD_SERVICE_NAME } from '../constants'; /** * Represents a provider for retrieving channel state information. @@ -16,6 +16,7 @@ import { ServiceType } from '../types'; */ export const channelStateProvider: Provider = { name: 'channelState', + dynamic: true, get: async (runtime: IAgentRuntime, message: Memory, state: State) => { const room = state.data?.room ?? (await runtime.getRoom(message.roomId)); if (!room) { @@ -59,7 +60,8 @@ export const channelStateProvider: Provider = { }; } - const discordService = runtime.getService(ServiceType.DISCORD) as DiscordService; + // is ServiceType.DISCORD better? + const discordService = runtime.getService(DISCORD_SERVICE_NAME) as DiscordService; if (!discordService) { runtime.logger.warn({ src: 'plugin:discord:provider:channelState', agentId: runtime.agentId, channelId }, 'No discord client found'); return { diff --git a/src/providers/plugin-info.ts b/src/providers/plugin-info.ts new file mode 100644 index 0000000..1212957 --- /dev/null +++ b/src/providers/plugin-info.ts @@ -0,0 +1,169 @@ +/** + * Plugin Information Providers for Discord Plugin + * + * Two dynamic providers: + * 1. discordInstructionsProvider - Usage instructions for the agent/LLM + * 2. discordSettingsProvider - Current configuration (non-sensitive) + */ + +import type { IAgentRuntime, Provider, ProviderResult, Memory, State } from '@elizaos/core'; + +/** + * Instructions Provider + * + * Provides usage instructions for the Discord plugin. + * Helps the agent understand and explain Discord capabilities. + */ +export const discordInstructionsProvider: Provider = { + name: 'discordInstructions', + description: 'Instructions and capabilities for the Discord integration plugin', + dynamic: true, + + get: async (runtime: IAgentRuntime, _message: Memory, _state: State): Promise => { + const instructions = ` +# Discord Plugin Capabilities + +## What This Plugin Does + +The Discord plugin integrates agents with Discord servers. It enables the agent to: +- Communicate in text channels and DMs +- Join and participate in voice channels +- Manage channel interactions +- Search and analyze conversations + +## Available Actions + +### Messaging +- **SEND_MESSAGE**: Send a message to a specific channel +- **SEND_DM**: Send a direct message to a user +- **CHAT_WITH_ATTACHMENTS**: Send messages with file attachments +- **REACT_TO_MESSAGE**: Add emoji reactions to messages + +### Channel Management +- **JOIN_CHANNEL**: Join a voice channel +- **LEAVE_CHANNEL**: Leave the current voice channel +- **LIST_CHANNELS**: List available channels in the server +- **READ_CHANNEL**: Read recent messages from a channel + +### Content Operations +- **SEARCH_MESSAGES**: Search for messages in channels +- **SUMMARIZE**: Summarize conversation history +- **TRANSCRIBE_MEDIA**: Transcribe audio/video content +- **DOWNLOAD_MEDIA**: Download media attachments + +### Moderation +- **PIN_MESSAGE**: Pin important messages +- **UNPIN_MESSAGE**: Unpin messages +- **CREATE_POLL**: Create polls for user feedback +- **GET_USER_INFO**: Get information about a user +- **SERVER_INFO**: Get server information + +## Voice Features + +When voice is enabled: +- Join voice channels for audio interaction +- Transcribe voice messages +- Participate in voice conversations + +## Best Practices + +1. **Respect Permissions**: Only perform actions the bot has permissions for +2. **Rate Limits**: Discord has rate limits - avoid spamming messages +3. **Mentions**: Use @mentions sparingly to avoid notification fatigue +4. **Context**: Read recent messages before responding to ongoing conversations + +## Common User Requests + +- "Can you send a message to #general?" → Use SEND_MESSAGE +- "What's happening in the voice channel?" → Check voice state +- "Search for messages about X" → Use SEARCH_MESSAGES +- "Summarize the last conversation" → Use SUMMARIZE +`; + + return { + text: instructions.trim(), + data: { + pluginName: 'discord', + platform: 'Discord', + capabilities: [ + 'text-messaging', + 'voice-channels', + 'dm-support', + 'message-search', + 'media-handling', + 'polls', + 'reactions', + ], + }, + }; + }, +}; + +/** + * Settings Provider + * + * Exposes current Discord configuration (non-sensitive values only). + * NEVER exposes tokens or secrets. + */ +export const discordSettingsProvider: Provider = { + name: 'discordSettings', + description: 'Current Discord plugin configuration (non-sensitive)', + dynamic: true, + + get: async (runtime: IAgentRuntime, _message: Memory, _state: State): Promise => { + // Only expose non-sensitive configuration + // NEVER expose: DISCORD_API_TOKEN, DISCORD_BOT_TOKENS, or other secrets + const hasApiToken = !!runtime.getSetting('DISCORD_API_TOKEN'); + const hasBotTokens = !!(runtime.getSetting('DISCORD_BOT_TOKENS') as string || '').trim(); + + const settings = { + // Check if configured (but don't expose the actual values) + // Plugin supports either DISCORD_API_TOKEN (single bot) or DISCORD_BOT_TOKENS (multi-bot) + isConfigured: hasApiToken || hasBotTokens, + hasApplicationId: !!runtime.getSetting('DISCORD_APPLICATION_ID'), + hasVoiceChannel: !!runtime.getSetting('DISCORD_VOICE_CHANNEL_ID'), + + // Non-sensitive behavior settings + ignoreBotMessages: runtime.getSetting('DISCORD_SHOULD_IGNORE_BOT_MESSAGES') === 'true', + ignoreDirectMessages: runtime.getSetting('DISCORD_SHOULD_IGNORE_DIRECT_MESSAGES') === 'true', + respondOnlyToMentions: runtime.getSetting('DISCORD_SHOULD_RESPOND_ONLY_TO_MENTIONS') === 'true', + + // Channel configuration (IDs are not sensitive) + hasListenChannels: !!(runtime.getSetting('DISCORD_LISTEN_CHANNEL_IDS') as string || '').trim(), + }; + + const text = ` +# Discord Plugin Settings + +## Connection Status +- **Configured**: ${settings.isConfigured ? 'Yes' : 'No - DISCORD_API_TOKEN or DISCORD_BOT_TOKENS required'} +- **Application ID**: ${settings.hasApplicationId ? 'Set' : 'Not set'} + +## Voice Configuration +- **Default Voice Channel**: ${settings.hasVoiceChannel ? 'Configured' : 'Not configured'} + +## Message Behavior +- **Ignore Bot Messages**: ${settings.ignoreBotMessages ? 'Yes' : 'No'} +- **Ignore Direct Messages**: ${settings.ignoreDirectMessages ? 'Yes' : 'No'} +- **Respond Only to Mentions**: ${settings.respondOnlyToMentions ? 'Yes' : 'No'} + +## Channel Filters +- **Listen Channels**: ${settings.hasListenChannels ? 'Specific channels configured' : 'All channels'} + +## Notes +${!settings.isConfigured ? '⚠️ Discord integration is not functional without DISCORD_API_TOKEN or DISCORD_BOT_TOKENS' : '✓ Discord integration is ready'} +`; + + return { + text: text.trim(), + data: settings, + values: { + isConfigured: String(settings.isConfigured), + ignoreBotMessages: String(settings.ignoreBotMessages), + ignoreDirectMessages: String(settings.ignoreDirectMessages), + respondOnlyToMentions: String(settings.respondOnlyToMentions), + }, + }; + }, +}; + diff --git a/src/providers/voiceState.ts b/src/providers/voiceState.ts index 24d4bf5..6f14e14 100644 --- a/src/providers/voiceState.ts +++ b/src/providers/voiceState.ts @@ -15,6 +15,7 @@ import { ServiceType } from '../types'; */ export const voiceStateProvider: Provider = { name: 'voiceState', + dynamic: true, get: async (runtime: IAgentRuntime, message: Memory, state?: State) => { // Voice doesn't get a discord message, so we need to use the channel for guild data const room = await runtime.getRoom(message.roomId); diff --git a/src/service.ts b/src/service.ts index 7a9022c..87fac6c 100644 --- a/src/service.ts +++ b/src/service.ts @@ -54,11 +54,10 @@ import { import { AttachmentBuilder, AuditLogEvent, + type BaseGuildVoiceChannel, type Channel, ChannelType as DiscordChannelType, Client as DiscordJsClient, - Events, - GatewayIntentBits, type Guild, type GuildChannel, type GuildMember, @@ -67,46 +66,41 @@ import { type MessageReaction, type PartialMessageReaction, type PartialUser, - Partials, PermissionsBitField, type Role as DiscordRole, type TextChannel, type User, type Interaction, Collection, -} from "discord.js"; -import { DISCORD_SERVICE_NAME } from "./constants"; -import { getDiscordSettings } from "./environment"; -import { MessageManager } from "./messages"; + type VoiceChannel, +} from 'discord.js'; +import { DISCORD_SERVICE_NAME } from './constants'; +import { getDiscordSettings } from './environment'; +import { MessageManager } from './messages'; import { DiscordEventTypes, type IDiscordService, type DiscordSettings, type DiscordSlashCommand, - type DiscordRegisterCommandsPayload, type ChannelHistoryOptions, type ChannelHistoryResult, type ChannelSpiderState, -} from "./types"; -import { - getAttachmentFileName, - splitMessage, - MAX_MESSAGE_LENGTH, -} from "./utils"; -import { generateInviteUrl } from "./permissions"; -import { VoiceManager } from "./voice"; +} from './types'; +import { createAttachmentFromMedia, splitMessage, MAX_MESSAGE_LENGTH } from './utils'; +import { VoiceManager } from './voice'; import { diffOverwrites, diffRolePermissions, diffMemberRoles, fetchAuditEntry, -} from "./permissionEvents"; -import { - createCompatRuntime, - type ICompatRuntime, - type WorldCompat, -} from "./compat"; +} from './permissionEvents'; +import { type ICompatRuntime, type WorldCompat } from './compat'; +import { DiscordClientRegistry } from './clientRegistry'; +import { VoiceConnectionManager } from './voiceConnectionManager'; +import type { VoiceTarget } from './types'; +import { DiscordAudioSink } from './sinks/discordAudioSink'; +import type { IAudioSink } from './contracts'; /** * DiscordService class representing a service for interacting with Discord. @@ -125,22 +119,24 @@ export class DiscordService extends Service implements IDiscordService { declare protected runtime: ICompatRuntime; static serviceType: string = DISCORD_SERVICE_NAME; - capabilityDescription = - "The agent is able to send and receive messages on discord"; - client: DiscordJsClient | null; + capabilityDescription = 'The agent is able to send and receive messages on discord, set voice channel status, and manage user presence/activity status'; + client: DiscordJsClient | null = null; // Kept for backward compatibility, points to primary client character: Character; messageManager?: MessageManager; - voiceManager?: VoiceManager; + voiceManager?: VoiceManager; // Kept for backward compatibility, points to primary voice manager + clientRegistry: DiscordClientRegistry; + voiceConnectionManager: VoiceConnectionManager; private discordSettings: DiscordSettings; private userSelections: Map = new Map(); - private timeouts: ReturnType[] = []; - public clientReadyPromise: Promise | null = null; + private timeouts: NodeJS.Timeout[] = []; + public readonly clientReadyPromise: Promise; // | null = null private slashCommands: DiscordSlashCommand[] = []; private commandRegistrationQueue: Promise = Promise.resolve(); /** * Slash command names that should bypass allowed channel restrictions. */ private allowAllSlashCommands: Set = new Set(); + private audioSinks: Map = new Map(); // guildId -> AudioSink /** * List of allowed channel IDs (parsed from CHANNEL_IDS env var). * If undefined, all channels are allowed. @@ -153,6 +149,13 @@ export class DiscordService extends Service implements IDiscordService { */ private dynamicChannelIds: Set = new Set(); + /** + * Get a human-readable identifier for logging (character name or agentId fallback) + */ + private get agentIdentifier(): string { + return this.runtime?.character?.name || this.runtime.agentId; + } + /** * Constructor for Discord client. * Initializes the Discord client with specified intents and partials, @@ -168,6 +171,16 @@ export class DiscordService extends Service implements IDiscordService { this.character = runtime.character; + // Initialize multi-bot infrastructure + this.clientRegistry = new DiscordClientRegistry(runtime, this); + this.voiceConnectionManager = new VoiceConnectionManager(); + + // Initialize clientReadyPromise - will be set properly below + let readyResolver: () => void; + this.clientReadyPromise = new Promise(resolve => { + readyResolver = resolve; + }); + // Parse CHANNEL_IDS env var to restrict the bot to specific channels const channelIdsRaw = runtime.getSetting("CHANNEL_IDS") as | string @@ -180,7 +193,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, allowedChannelIds: this.allowedChannelIds, }, "Channel restrictions enabled", @@ -188,97 +201,151 @@ export class DiscordService extends Service implements IDiscordService { } // Check if Discord API token is available and valid - const token = runtime.getSetting("DISCORD_API_TOKEN") as string; - if (!token || (token?.trim && token.trim() === "") || token === null) { - this.runtime.logger.warn("Discord API Token not provided"); + // Support multiple token environment variables for backwards compatibility + // Note: DISCORD_APPLICATION_ID is NOT a valid token - it's the OAuth2 client/application ID + const rawToken = + (runtime.getSetting('DISCORD_API_TOKEN') as string) || + (runtime.getSetting('DISCORD_BOT_TOKENS') as string); + const token = rawToken?.trim(); + if (!token) { + this.runtime.logger.warn('Discord Bot Token not provided - Discord functionality will be unavailable'); + this.runtime.logger.warn('Set DISCORD_API_TOKEN or DISCORD_BOT_TOKENS in your .env file to enable Discord'); + this.runtime.logger.warn('Note: DISCORD_APPLICATION_ID is your app\'s OAuth2 client ID, not a bot token'); this.client = null; + readyResolver!(); return; } - try { - const client = new DiscordJsClient({ - intents: [ - GatewayIntentBits.Guilds, - GatewayIntentBits.GuildMembers, - GatewayIntentBits.GuildPresences, - GatewayIntentBits.DirectMessages, - GatewayIntentBits.GuildVoiceStates, - GatewayIntentBits.MessageContent, - GatewayIntentBits.GuildMessages, - GatewayIntentBits.DirectMessageTyping, - GatewayIntentBits.GuildMessageTyping, - GatewayIntentBits.GuildMessageReactions, - ], - partials: [ - Partials.Channel, - Partials.Message, - Partials.User, - Partials.Reaction, - ], - }); - this.client = client; + // Initialize clients from configuration (supports multi-bot via DISCORD_BOT_TOKENS) + const initPromise = this.initializeClients().then(() => { + // Set up backward compatibility pointers + const primaryClient = this.clientRegistry.getPrimaryClient(); + if (primaryClient) { + this.client = primaryClient.client; + this.voiceManager = primaryClient.voiceManager; + this.messageManager = new MessageManager(this, this.runtime); + this.setupEventListeners(); + this.registerDiscordEvents(); + // Note: registerSendHandler is called automatically by runtime via static registerSendHandlers() + + // Auto-join voice channel if configured (after everything is ready) + this.handleAutoJoinVoiceChannel(); + } else { + this.client = null; + this.runtime.logger.warn('No Discord clients initialized'); + } + readyResolver!(); + }).catch((error) => { + const errorMessage = error instanceof Error ? error.message : String(error); + runtime.logger.error(`Error initializing Discord clients: ${errorMessage}`); + + // Provide helpful context based on error type + if (errorMessage.includes('Invalid Discord token') || errorMessage.includes('TokenInvalid')) { + runtime.logger.error(''); + runtime.logger.error('Discord token validation failed. Please check:'); + runtime.logger.error(' 1. Your token is correct and up-to-date'); + runtime.logger.error(' 2. No extra spaces or special characters were copied'); + runtime.logger.error(' 3. You\'re using a BOT token (not a user token)'); + runtime.logger.error(' 4. The bot application still exists in Discord Developer Portal'); + runtime.logger.error(''); + runtime.logger.error('Get a valid token from: https://discord.com/developers/applications'); + runtime.logger.error('Navigate to: Your Application → Bot → Token'); + runtime.logger.error(''); + } - this.runtime = createCompatRuntime(runtime); - this.voiceManager = new VoiceManager(this, this.runtime); - this.messageManager = new MessageManager(this, this.runtime); + // Cleanup any partially initialized clients + if (this.client) { + this.client.destroy().catch(() => { }); + } + this.client = null; + readyResolver!(); + }); - this.clientReadyPromise = new Promise((resolve, reject) => { - // once logged in - client.once(Events.ClientReady, async (readyClient) => { - try { - await this.onReady(readyClient); - resolve(); - } catch (error) { - this.runtime.logger.error( - `Error in onReady: ${error instanceof Error ? error.message : String(error)}`, - ); - reject(error); - } - }); - // Handle client errors that might prevent ready event - client.once(Events.Error, (error) => { - this.runtime.logger.error( - `Discord client error: ${error instanceof Error ? error.message : String(error)}`, - ); - reject(error); - }); - // now start login - client.login(token).catch((error) => { - this.runtime.logger.error( - `Failed to login to Discord: ${error instanceof Error ? error.message : String(error)}`, - ); - if (this.client) { - this.client.destroy().catch(() => {}); - } - this.client = null; - reject(error); - }); - }); + // Attach error handler to prevent unhandled promise rejection + // This ensures the promise rejection is handled even if no one awaits it immediately + initPromise.catch((_error) => { + // Error is already logged in the promise handlers above + // This catch prevents unhandled promise rejection warnings + }); + } - // Attach error handler to prevent unhandled promise rejection - // This ensures the promise rejection is handled even if no one awaits it immediately - this.clientReadyPromise.catch((_error) => { - // Error is already logged in the promise handlers above - // This catch prevents unhandled promise rejection warnings - // The promise is public and may be awaited elsewhere, but we need to handle - // the case where it's not immediately awaited - }); + static async start(runtime: IAgentRuntime) { + const service = new DiscordService(runtime); + return service; + } - this.setupEventListeners(); - // Note: send handler is registered automatically by runtime via registerSendHandlers() static method + /** + * Initialize Discord clients from environment configuration + * @private + */ + private async initializeClients(): Promise { + try { + await this.clientRegistry.initializeFromEnv(); + this.runtime.logger.info(`[DiscordService] Initialized ${this.clientRegistry.getClientCount()} client(s)`); } catch (error) { - runtime.logger.error( - `Error initializing Discord client: ${error instanceof Error ? error.message : String(error)}`, - ); - this.client = null; + this.runtime.logger.error(`[DiscordService] Failed to initialize clients: ${error}`); + throw error; } } - static async start(runtime: IAgentRuntime) { - const service = new DiscordService(runtime); - return service; + /** + * Get all available voice targets across all bots + * @returns Array of voice targets + */ + getVoiceTargets(): VoiceTarget[] { + return this.voiceConnectionManager.getVoiceTargets(); } + /** + * Get or create an audio sink for a guild + * @param guildId Guild/server ID + * @returns IAudioSink instance for this guild + */ + getAudioSink(guildId: string): IAudioSink | null { + // Return existing sink if available + if (this.audioSinks.has(guildId)) { + return this.audioSinks.get(guildId)!; + } + + // Find voice manager for this guild - check voiceConnectionManager first for multi-bot support + let voiceManagerToUse: VoiceManager | undefined; + + const guildConnections = this.voiceConnectionManager.getConnectionsForGuild(guildId); + if (guildConnections.length > 0) { + // Use the voice manager from the first bot connected to this guild + voiceManagerToUse = guildConnections[0].connection.voiceManager; + this.runtime.logger.debug(`[DiscordService] Using voice manager from bot ${guildConnections[0].connection.botId} for guild ${guildId}`); + } else { + // Fall back to primary voice manager + voiceManagerToUse = this.voiceManager; + } + + if (!voiceManagerToUse) { + this.runtime.logger.warn(`[DiscordService] No VoiceManager available for guild ${guildId}`); + return null; + } + + const sinkId = `discord-${guildId}`; + const sink = new DiscordAudioSink(sinkId, guildId, voiceManagerToUse); + this.audioSinks.set(guildId, sink); + + this.runtime.logger.debug(`[DiscordService] Created audio sink for guild ${guildId}`); + return sink; + } + + /** + * Get a specific client by ID or alias + */ + getClient(idOrAlias: string) { + return this.clientRegistry.getClient(idOrAlias); + } + + /** + * Get all registered clients + */ + getAllClients() { + return this.clientRegistry.getAllClients(); + } /** * The SendHandlerFunction implementation for Discord. * @param {IAgentRuntime} runtime - The runtime instance. @@ -354,11 +421,9 @@ export class DiscordService extends Service implements IDiscordService { const files: AttachmentBuilder[] = []; if (content.attachments && content.attachments.length > 0) { for (const media of content.attachments) { - if (media.url) { - const fileName = getAttachmentFileName(media); - files.push( - new AttachmentBuilder(media.url, { name: fileName }), - ); + const attachment = createAttachmentFromMedia(media); + if (attachment) { + files.push(attachment); } } } @@ -488,32 +553,64 @@ export class DiscordService extends Service implements IDiscordService { } } + /** + * Register Discord-specific event handlers via the runtime event system. + * Plugins can emit DISCORD_REGISTER_COMMANDS to register slash commands. + * @private + */ + private registerDiscordEvents(): void { + // Listen for slash command registration requests from other plugins + this.runtime.registerEvent('DISCORD_REGISTER_COMMANDS', async (payload: { commands: DiscordSlashCommand[] }) => { + if (payload?.commands && Array.isArray(payload.commands)) { + this.runtime.logger.info(`[DiscordService] Received ${payload.commands.length} slash commands to register`); + await this.registerSlashCommands(payload.commands); + } + }); + } + /** * Set up event listeners for the client. * @private */ private setupEventListeners() { if (!this.client) { + this.runtime.logger.error('[Discord] setupEventListeners called but this.client is null!'); return; // Skip if client is not available } + this.runtime.logger.debug( + { src: 'plugin:discord', botId: this.client.user?.id, botUsername: this.client.user?.username }, + '[Discord] Setting up event listeners' + ); + const listenCidsRaw = this.runtime.getSetting( "DISCORD_LISTEN_CHANNEL_IDS", ) as string | string[] | undefined; const listenCids = Array.isArray(listenCidsRaw) ? listenCidsRaw - : listenCidsRaw && - typeof listenCidsRaw === "string" && - listenCidsRaw.trim() - ? listenCidsRaw - .trim() - .split(",") - .map((s) => s.trim()) - .filter((s) => s.length > 0) + : (listenCidsRaw && typeof listenCidsRaw === 'string' && listenCidsRaw.trim()) + ? listenCidsRaw.trim().split(',').map(s => s.trim()).filter(s => s.length > 0) : []; + /* + const talkCids = this.allowedChannelIds ?? [] // CHANNEL_IDS + // allowedCids computed but not currently used - kept for potential filtering + const allowedCids = [...listenCids, ...talkCids] + */ // Setup handling for direct messages this.client.on("messageCreate", async (message) => { + // Debug: log every incoming message to diagnose connection issues + this.runtime.logger.debug( + { + src: 'plugin:discord', + agentId: this.agentIdentifier, + channelId: message.channel.id, + authorId: message.author.id, + content: message.content?.substring(0, 50), + }, + 'Discord messageCreate event received' + ); + // Skip if we're sending the message or in deleted state if ( message.author.id === this.client?.user?.id || @@ -521,12 +618,14 @@ export class DiscordService extends Service implements IDiscordService { ) { this.runtime.logger.debug( { - src: "plugin:discord", - agentId: this.runtime.agentId, + src: 'plugin:discord', + agentId: this.agentIdentifier, authorId: message.author.id, isBot: message.author.bot, }, - "Ignoring message from bot or self", + message.author.bot + ? 'Ignoring message from bot (set shouldIgnoreBotMessages=false to reply)' + : 'Ignoring message from self' ); return; } @@ -539,22 +638,56 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.warn( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, messageId: message.id, }, "Failed to build memory from listen channel message", ); return; } + /* + // Uncomment to enable entity/room tracking for listen channels: + const entityId = createUniqueUuid(this.runtime, message.author.id); + const userName = message.author.username; + const name = message.member?.displayName || message.author.displayName || userName; + const channelId = message.channel.id; + const roomId = createUniqueUuid(this.runtime, channelId); + + let type: ChannelType; + let serverId: string | undefined; + + if (message.guild) { + const guild = await message.guild.fetch(); + type = await this.getChannelType(message.channel as Channel); + if (type === null) { + this.runtime.logger.warn(`null channel type, discord message: ${message.id}`); + } + serverId = guild.id; + } else { + type = ChannelType.DM; + serverId = message.channel.id; + } + + await this.runtime.ensureConnection({ + entityId, + roomId, + userName, + name, + source: 'discord', + channelId, + messageServerId: serverId ? stringToUuid(serverId) : undefined, + type, + worldId: createUniqueUuid(this.runtime, serverId ?? roomId) as UUID, + worldName: message.guild?.name, + }); + */ // Emit event for listen channel handlers - this.runtime.emitEvent( - "DISCORD_LISTEN_CHANNEL_MESSAGE" as string, - { - runtime: this.runtime, - message: newMessage, - } as any, - ); + // and then you can handle these anyway you want + this.runtime.emitEvent('DISCORD_LISTEN_CHANNEL_MESSAGE' as string, { + runtime: this.runtime, + message: newMessage, + } as any); } // Skip if channel restrictions are set and this channel is not allowed @@ -577,7 +710,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId: message.channel.id, }, "Channel not found", @@ -586,26 +719,12 @@ export class DiscordService extends Service implements IDiscordService { } if (channel.isThread()) { if (!channel.parentId || !this.isChannelAllowed(channel.parentId)) { - this.runtime.logger.debug( - { - src: "plugin:discord", - agentId: this.runtime.agentId, - parentChannelId: channel.parentId, - }, - "Thread not in allowed channel", - ); + this.runtime.logger.debug({ src: 'plugin:discord', agentId: this.agentIdentifier, parentChannelId: channel.parentId }, 'Ignoring thread message: parent channel not in CHANNEL_IDS whitelist'); return; } } else { if (channel?.isTextBased()) { - this.runtime.logger.debug( - { - src: "plugin:discord", - agentId: this.runtime.agentId, - channelId: channel.id, - }, - "Channel not allowed", - ); + this.runtime.logger.debug({ src: 'plugin:discord', agentId: this.agentIdentifier, channelId: channel.id }, 'Ignoring message: channel not in CHANNEL_IDS whitelist'); } return; } @@ -618,7 +737,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Error handling message", @@ -645,7 +764,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Error handling reaction add", @@ -672,7 +791,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Error handling reaction remove", @@ -688,7 +807,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Error handling guild create", @@ -704,7 +823,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Error handling guild member add", @@ -742,7 +861,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, interactionType: interaction.type, commandName: isSlashCommand ? interaction.commandName : undefined, channelId: interaction.channelId, @@ -758,8 +877,8 @@ export class DiscordService extends Service implements IDiscordService { // Slash commands respect the whitelist unless bypassChannelWhitelist: true. const isFollowUpInteraction = Boolean( interaction.isModalSubmit() || - interaction.isMessageComponent() || - interaction.isAutocomplete(), + interaction.isMessageComponent() || + interaction.isAutocomplete(), ); // Skip if channel restrictions are set and this interaction is not in an allowed channel @@ -784,7 +903,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: responseError instanceof Error ? responseError.message @@ -797,7 +916,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId: interaction.channelId, allowedChannelIds: this.allowedChannelIds, isSlashCommand, @@ -848,7 +967,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, commandName: interaction.commandName, error: responseError instanceof Error @@ -862,7 +981,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, commandName: interaction.commandName, }, "[DiscordService] interactionCreate ignored (custom validator returned false)", @@ -890,7 +1009,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, commandName: interaction.commandName, error: responseError instanceof Error @@ -904,7 +1023,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, commandName: interaction.commandName, error: error instanceof Error ? error.message : String(error), }, @@ -921,7 +1040,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Error handling interaction", @@ -1044,7 +1163,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: err instanceof Error ? err.message : String(err), }, "Error in channelUpdate handler", @@ -1086,7 +1205,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: err instanceof Error ? err.message : String(err), }, "Error in roleUpdate handler", @@ -1155,7 +1274,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: err instanceof Error ? err.message : String(err), }, "Error in guildMemberUpdate handler", @@ -1195,7 +1314,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: err instanceof Error ? err.message : String(err), }, "Error in roleCreate handler", @@ -1235,7 +1354,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: err instanceof Error ? err.message : String(err), }, "Error in roleDelete handler", @@ -1243,6 +1362,72 @@ export class DiscordService extends Service implements IDiscordService { } }); } // end if (isAuditLogEnabled) + + // ========================================================================= + // Voice State Update Handler (agent connect/disconnect/move tracking) + // ========================================================================= + this.client.on('voiceStateUpdate', async (oldState, newState) => { + try { + // Handle voice state updates for other users + await this.voiceManager?.handleVoiceStateUpdate(oldState, newState); + + // Check if this is the agent's own voice state change + // Check both oldState and newState members in case one is null + const agentId = this.client?.user?.id; + const isAgentStateChange = + (oldState.member?.id === agentId) || (newState.member?.id === agentId); + + if (isAgentStateChange && agentId) { + const oldChannelId = oldState.channelId; + const newChannelId = newState.channelId; + const guildId = newState.guild.id; + + // Update audio state (mute/deafen status) + await this.voiceManager?.updateAudioState(guildId, newState); + + // Agent left a voice channel (disconnect detected) + if (oldChannelId && !newChannelId) { + this.runtime.logger.log( + `[Voice] Agent disconnected from voice channel ${oldChannelId} in guild ${guildId}` + ); + await this.voiceManager?.handleAgentDisconnect(guildId, oldChannelId); + } + // Agent joined a voice channel + else if (!oldChannelId && newChannelId) { + this.runtime.logger.log( + `[Voice] Agent joined voice channel ${newChannelId} in guild ${guildId}` + ); + await this.voiceManager?.handleAgentConnect(guildId, newChannelId); + } + // Agent moved between channels + else if (oldChannelId && newChannelId && oldChannelId !== newChannelId) { + this.runtime.logger.log( + `[Voice] Agent moved from channel ${oldChannelId} to ${newChannelId} in guild ${guildId}` + ); + await this.voiceManager?.handleAgentChannelChange( + guildId, + oldChannelId, + newChannelId + ); + } + // Check for mute/deafen changes (same channel) + else if (oldChannelId === newChannelId) { + const oldMute = oldState.serverMute || oldState.selfMute; + const newMute = newState.serverMute || newState.selfMute; + const oldDeaf = oldState.serverDeaf || oldState.selfDeaf; + const newDeaf = newState.serverDeaf || newState.selfDeaf; + + if (oldMute !== newMute || oldDeaf !== newDeaf) { + this.runtime.logger.log( + `[Voice] Audio state changed in guild ${guildId}: mute=${newMute}, deaf=${newDeaf}` + ); + } + } + } + } catch (error) { + this.runtime.logger.error(`Error handling voice state update: ${error}`); + } + }); } /** @@ -1403,7 +1588,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, commandCount: commands.length, commands: sanitizedCommands, }, @@ -1412,7 +1597,7 @@ export class DiscordService extends Service implements IDiscordService { if (!this.client?.application) { this.runtime.logger.warn( - { src: "plugin:discord", agentId: this.runtime.agentId }, + { src: "plugin:discord", agentId: this.agentIdentifier }, "Cannot register commands - Discord client application not available", ); return; @@ -1420,7 +1605,7 @@ export class DiscordService extends Service implements IDiscordService { if (!Array.isArray(commands) || commands.length === 0) { this.runtime.logger.warn( - { src: "plugin:discord", agentId: this.runtime.agentId }, + { src: "plugin:discord", agentId: this.agentIdentifier }, "Cannot register commands - no commands provided", ); return; @@ -1432,7 +1617,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.warn( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, command: sanitizeCommandForLogging(cmd), }, "Cannot register commands - invalid command (missing name or description)", @@ -1477,7 +1662,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, bypassCommands: Array.from(this.allowAllSlashCommands), }, "[DiscordService] Rebuilt bypassChannelWhitelist set from merged commands", @@ -1517,7 +1702,7 @@ export class DiscordService extends Service implements IDiscordService { if (!this.client?.application) { this.runtime.logger.error( - { src: "plugin:discord", agentId: this.runtime.agentId }, + { src: "plugin:discord", agentId: this.agentIdentifier }, "Cannot register commands - Discord client application is not available", ); throw new Error("Discord client application is not available"); @@ -1541,7 +1726,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, count: transformedGlobalCommands.length, }, transformedGlobalCommands.length > 0 @@ -1552,7 +1737,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: err instanceof Error ? err.message : String(err), }, "Failed to register/clear global commands", @@ -1586,7 +1771,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, guildId, guildName: guild.name, }, @@ -1598,7 +1783,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.warn( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, guildId, guildName: guild.name, error: err.message, @@ -1631,7 +1816,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.warn( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, commandName: cmd.name, guildId, }, @@ -1653,7 +1838,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, commandName: cmd.name, guildId: fullGuild.id, guildName: fullGuild.name, @@ -1665,7 +1850,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, commandName: cmd.name, guildId: fullGuild.id, guildName: fullGuild.name, @@ -1679,7 +1864,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, commandName: cmd.name, guildId, error: @@ -1702,7 +1887,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.info( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, newCommands: commands.length, totalCommands: this.slashCommands.length, globalCommands: transformedGlobalCommands.length, @@ -1725,7 +1910,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: registrationError.message, }, "Error registering Discord commands", @@ -1859,7 +2044,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.info( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, guildId: fullGuild.id, guildName: fullGuild.name, generalCount: generalCommands.length, @@ -1873,7 +2058,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.warn( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, guildId: fullGuild.id, guildName: fullGuild.name, error: error instanceof Error ? error.message : String(error), @@ -1894,7 +2079,7 @@ export class DiscordService extends Service implements IDiscordService { world: { id: worldId, name: fullGuild.name, - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, serverId: fullGuild.id, metadata: { ownership: fullGuild.ownerId ? { ownerId } : undefined, @@ -1950,7 +2135,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.warn( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId: interaction.channel?.id, }, "Null channel type for interaction", @@ -1982,7 +2167,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, commandName: interaction.commandName, type: interaction.commandType, channelId: interaction.channelId, @@ -2002,7 +2187,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, commandName: interaction.commandName, }, "[DiscordService] Slash command emitted to runtime", @@ -2011,7 +2196,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, commandName: interaction.commandName, error: error instanceof Error ? error.message : String(error), }, @@ -2037,7 +2222,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, customId: interaction.customId, }, "Received component interaction", @@ -2054,7 +2239,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, entityId: userId, }, "User selections map unexpectedly missing", @@ -2068,7 +2253,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, entityId: userId, customId: interaction.customId, values: interaction.values, @@ -2086,7 +2271,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, messageId, selections: userSelections[messageId], }, @@ -2106,7 +2291,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, entityId: userId, customId: interaction.customId, }, @@ -2117,7 +2302,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, formSelections, }, "Form data being submitted", @@ -2140,7 +2325,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, customId: interaction.customId, }, "Acknowledged button interaction via fallback", @@ -2151,7 +2336,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: ackError instanceof Error ? ackError.message @@ -2208,7 +2393,7 @@ export class DiscordService extends Service implements IDiscordService { delete userSelections[messageId]; // No need to call set again this.runtime.logger.debug( - { src: "plugin:discord", agentId: this.runtime.agentId, messageId }, + { src: "plugin:discord", agentId: this.agentIdentifier, messageId }, "Cleared selections for message", ); @@ -2220,7 +2405,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Error handling component interaction", @@ -2234,7 +2419,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: followUpError instanceof Error ? followUpError.message @@ -2253,12 +2438,9 @@ export class DiscordService extends Service implements IDiscordService { * @param {Guild} guild The guild to build rooms for. * @param {UUID} _worldId The ID of the world to associate with the rooms (currently unused in favor of direct channel to room mapping). * @returns {Promise} An array of standardized room objects. - * @private + * Made public to support WORLD_CONNECTED event emission from ClientRegistry */ - private async buildStandardizedRooms( - guild: Guild, - _worldId: UUID, - ): Promise { + public async buildStandardizedRooms(guild: Guild, _worldId: UUID): Promise { const rooms: any[] = []; for (const [channelId, channel] of guild.channels.cache) { @@ -2305,7 +2487,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.warn( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId: channel.id, error: error instanceof Error ? error.message : String(error), }, @@ -2347,9 +2529,9 @@ export class DiscordService extends Service implements IDiscordService { * * @param {Guild} guild - The guild from which to build the user list. * @returns {Promise} A promise that resolves with an array of standardized entity objects. - * @private + * Made public to support WORLD_CONNECTED event emission from ClientRegistry */ - private async buildStandardizedUsers(guild: Guild): Promise { + public async buildStandardizedUsers(guild: Guild): Promise { const entities: Entity[] = []; const botId = this.client?.user?.id; @@ -2358,7 +2540,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, guildId: guild.id, memberCount: guild.memberCount.toLocaleString(), }, @@ -2393,87 +2575,106 @@ export class DiscordService extends Service implements IDiscordService { }, discord: member.user.globalName ? { - username: tag, - name: member.displayName || member.user.username, - globalName: member.user.globalName, - userId: member.id, - } + username: tag, + name: member.displayName || member.user.username, + globalName: member.user.globalName, + userId: member.id, + } : { - username: tag, - name: member.displayName || member.user.username, - userId: member.id, - }, + username: tag, + name: member.displayName || member.user.username, + userId: member.id, + }, }, }); } } - // If cache has very few members, try to get online members + // If cache has very few members, try to get online members with timeout if (entities.length < 100) { - this.runtime.logger.debug( - { - src: "plugin:discord", - agentId: this.runtime.agentId, - guildId: guild.id, - }, - "Adding online members", - ); - // This is a more targeted fetch that is less likely to hit rate limits - const onlineMembers = await guild.members.fetch({ limit: 100 }); - - for (const [, member] of onlineMembers) { - if (member.id !== botId) { - const entityId = createUniqueUuid(this.runtime, member.id); - // Avoid duplicates - if (!entities.some((u) => u.id === entityId)) { - const tag = member.user.bot - ? `${member.user.username}#${member.user.discriminator}` - : member.user.username; - - entities.push({ - id: entityId, - names: Array.from( - new Set( - [ - member.user.username, - member.displayName, - member.user.globalName, - ].filter(Boolean) as string[], + try { + this.runtime.logger.debug( + { + src: 'plugin:discord', + agentId: this.agentIdentifier, + guildId: guild.id, + guildName: guild.name, + }, + 'Adding online members' + ); + // This is a more targeted fetch with timeout protection + const fetchPromise = guild.members.fetch({ limit: 100 }); + const timeoutPromise = new Promise((_, reject) => + setTimeout(() => reject(new Error('Member fetch timeout')), 5000) + ); + + const onlineMembers = (await Promise.race([fetchPromise, timeoutPromise])) as any; + + for (const [, member] of onlineMembers) { + if (member.id !== botId) { + const entityId = createUniqueUuid(this.runtime, member.id); + // Avoid duplicates + if (!entities.some((u) => u.id === entityId)) { + const tag = member.user.bot + ? `${member.user.username}#${member.user.discriminator}` + : member.user.username; + + entities.push({ + id: entityId, + names: Array.from( + new Set( + [member.user.username, member.displayName, member.user.globalName].filter( + Boolean + ) as string[] + ) ), - ), - agentId: this.runtime.agentId, - metadata: { - default: { - username: tag, - name: member.displayName || member.user.username, - }, - discord: member.user.globalName - ? { + agentId: this.runtime.agentId, + metadata: { + default: { + username: tag, + name: member.displayName || member.user.username, + }, + discord: member.user.globalName + ? { username: tag, name: member.displayName || member.user.username, globalName: member.user.globalName, userId: member.id, } - : { + : { username: tag, name: member.displayName || member.user.username, userId: member.id, }, - }, - }); + }, + }); + } } } + } catch (fetchError) { + // Non-fatal: Member fetch timeout for large guilds is expected + this.runtime.logger.debug( + { + src: 'plugin:discord', + guildName: guild.name, + cachedMembers: entities.length, + error: fetchError instanceof Error ? fetchError.message : String(fetchError), + }, + 'Member fetch skipped (using cached members)' + ); } } } catch (error) { + // Outer catch for cache processing errors this.runtime.logger.error( { - src: "plugin:discord", - agentId: this.runtime.agentId, + src: 'plugin:discord', + agentId: this.agentIdentifier, guildId: guild.id, + guildName: guild.name, error: error instanceof Error ? error.message : String(error), }, - "Error fetching members", + 'Error processing members' ); } } else { @@ -2481,7 +2682,12 @@ export class DiscordService extends Service implements IDiscordService { try { let members = guild.members.cache; if (members.size === 0) { - members = await guild.members.fetch(); + this.runtime.logger.debug(`Fetching members for ${guild.name} (${guild.memberCount} members)`); + const fetchPromise = guild.members.fetch(); + const timeoutPromise = new Promise((_, reject) => + setTimeout(() => reject(new Error('Member fetch timeout')), 10000) + ); + members = (await Promise.race([fetchPromise, timeoutPromise])) as typeof members; } for (const [, member] of members) { @@ -2509,29 +2715,33 @@ export class DiscordService extends Service implements IDiscordService { }, discord: member.user.globalName ? { - username: tag, - name: member.displayName || member.user.username, - globalName: member.user.globalName, - userId: member.id, - } + username: tag, + name: member.displayName || member.user.username, + globalName: member.user.globalName, + userId: member.id, + } : { - username: tag, - name: member.displayName || member.user.username, - userId: member.id, - }, + username: tag, + name: member.displayName || member.user.username, + userId: member.id, + }, }, }); } } + this.runtime.logger.debug(`Successfully synced ${entities.length} members from ${guild.name}`); } catch (error) { + // Non-fatal: Use cached members if fetch fails this.runtime.logger.error( { - src: "plugin:discord", - agentId: this.runtime.agentId, + src: 'plugin:discord', + agentId: this.agentIdentifier, guildId: guild.id, + guildName: guild.name, + cachedMembers: entities.length, error: error instanceof Error ? error.message : String(error), }, - "Error fetching members", + 'Member fetch failed, using cached members' ); } } @@ -2540,213 +2750,253 @@ export class DiscordService extends Service implements IDiscordService { } /** - * Handles tasks to be performed once the Discord client is fully ready and connected. - * This includes fetching guilds, scanning for voice data, and emitting connection events. + * Handles auto-joining voice channels if AUTO_JOIN_VOICE_CHANNEL_ID is configured. + * + * This method supports multiple channels (comma-separated) and multiple bots, enabling + * complex deployment scenarios: + * - Single bot joining channels across multiple guilds + * - Multiple bots joining different channels within the same guild + * - Mixed scenarios (e.g., 3 bots across 2 guilds) + * + * **Why comma-separated IDs?** + * - Allows configuring multiple target channels in a single environment variable + * - Simplifies deployment configuration for multi-guild or multi-channel setups + * - Maintains backward compatibility (single ID still works) + * + * **Why iterate through all bot clients?** + * - Discord limitation: Each bot can only maintain ONE voice connection per guild + * - To join multiple channels in the same guild, you need multiple bot tokens + * - The ClientRegistry manages multiple bot instances via DISCORD_BOT_TOKENS + * + * **Algorithm:** + * 1. Parse channel IDs (trim whitespace for user convenience) + * 2. For each channel ID, find an available bot: + * - Bot must have access to the channel (same guild membership) + * - Bot must NOT already be connected to a voice channel in that guild + * 3. First available bot joins the channel + * 4. Track the connection to prevent duplicate joins + * * @private - * @returns {Promise} A promise that resolves when all on-ready tasks are completed. */ - private async onReady(readyClient) { - this.runtime.logger.success("Discord client ready"); - - // Initialize slash commands array (empty initially - commands registered via DISCORD_REGISTER_COMMANDS) - this.slashCommands = []; - - /** - * DISCORD_REGISTER_COMMANDS event handler - * - * Delegates to registerSlashCommands() method. - * Also handles deprecated allowAllChannels parameter for backward compatibility. - * - * @param params.commands - Array of commands to register - * @param params.allowAllChannels - (Deprecated) Map of command names to bypass flags - */ - this.runtime.registerEvent( - "DISCORD_REGISTER_COMMANDS", - async (params) => { - // Delegate to the public method first - it handles registration and bypassChannelWhitelist - await this.registerSlashCommands(params.commands); - - // Handle deprecated allowAllChannels flags AFTER successful registration (backward compatibility) - // The deprecated API can only ADD bypasses, not remove them - bypassChannelWhitelist on - // the command definition is authoritative. This prevents legacy code from accidentally - // overriding the new API's bypass settings. + private handleAutoJoinVoiceChannel(): void { + // Parse comma-separated channel IDs + // Why split and trim? Users may format the list as "id1, id2" with spaces for readability + const autoJoinChannelIds = (this.runtime.getSetting('AUTO_JOIN_VOICE_CHANNEL_ID') as string || '') + .split(',') + .map(id => id.trim()) + .filter(id => id.length > 0); + + if (autoJoinChannelIds.length === 0) { + this.runtime.logger.debug('AUTO_JOIN_VOICE_CHANNEL_ID not configured, skipping auto-join'); + return; + } + + this.runtime.logger.debug(`Auto-join configured for ${autoJoinChannelIds.length} channel(s): ${autoJoinChannelIds.join(', ')}`); + + // Why setTimeout with 5 seconds? + // - Bots need time to fully initialize and populate their guild caches + // - Discord API may not immediately provide complete guild/channel data after login + // - Multiple bots may login at different rates + // - 5 seconds is a safe buffer to ensure all bots are ready and guild caches are populated + const autoJoinTimeout = setTimeout(async () => { + try { + // Get all registered bot clients from the ClientRegistry + // Why use all clients instead of just this.client/this.voiceManager? + // - Backward compatibility: this.client points to primary bot only + // - Multi-bot support: DISCORD_BOT_TOKENS can register multiple bots + // - Each bot has its own voiceManager that can join channels independently + const clients = this.clientRegistry.getAllClients(); + if (clients.length === 0) { + this.runtime.logger.warn('No Discord clients available for auto-join'); + return; + } + + this.runtime.logger.debug(`Scanning for channels with ${clients.length} bot(s)...`); + + // Why track active connections per guild-bot pair? + // - Discord API limitation: ONE voice connection per guild per bot token + // - Attempting to join a second channel in the same guild with the same bot will fail + // - Key format `${guildId}:${botId}` allows different bots in the same guild + // - Example: Bot1 in Guild A Channel 1, Bot2 in Guild A Channel 2 (both valid) + // - Counter-example: Bot1 in Guild A Channel 1 AND Channel 2 (invalid, only first succeeds) // - // To survive subsequent registerSlashCommands calls (which rebuild allowAllSlashCommands - // from this.slashCommands), we also update the command definition itself. - const allowAllChannelsMap = params.allowAllChannels ?? {}; - for (const [commandName, shouldBypass] of Object.entries( - allowAllChannelsMap, - )) { - if (shouldBypass) { - this.allowAllSlashCommands.add(commandName); - // Also update the command definition so bypass survives rebuild - const cmd = this.slashCommands.find((c) => c.name === commandName); - if (cmd) { - cmd.bypassChannelWhitelist = true; + // IMPORTANT: Pre-populate with existing connections to avoid duplicate join attempts + // If handleAutoJoinVoiceChannel() is called multiple times (retry, manual trigger), + // we need to know which bots are already connected to which guilds + const activeConnections = new Set(); + + // Pre-populate activeConnections with existing voice connections + for (const clientInfo of clients) { + const { client, voiceManager, config } = clientInfo; + const botId = client?.user?.id; + if (!botId) continue; + + // Skip clients without voiceManager (text-only clients or failed voiceManager construction) + if (!voiceManager) { + this.runtime.logger.debug(`Skipping bot ${config?.alias || botId} in pre-population: no voiceManager available`); + continue; + } + + // Check all guilds this bot is in for existing voice connections + for (const [guildId] of client.guilds.cache) { + const existingConnection = voiceManager.getVoiceConnection(guildId); + if (existingConnection) { + const connectionKey = `${guildId}:${botId}`; + activeConnections.add(connectionKey); + this.runtime.logger.debug(`Pre-existing voice connection found: bot ${botId} in guild ${guildId}`); } - this.runtime.logger.debug( - { - src: "plugin:discord", - agentId: this.runtime.agentId, - commandName, - }, - "[DiscordService] Command registered with allowAllChannels bypass (deprecated - use bypassChannelWhitelist instead)", - ); } - // Note: We intentionally ignore shouldBypass === false here. - // The deprecated allowAllChannels API should not remove bypasses set by - // bypassChannelWhitelist on the command definition (which is authoritative). } - }, - ); - // Check if audit log tracking is enabled (for permission change events) - const auditLogSettingForInvite = this.runtime.getSetting( - "DISCORD_AUDIT_LOG_ENABLED", - ); - const isAuditLogEnabledForInvite = - auditLogSettingForInvite !== "false" && - auditLogSettingForInvite !== false; - - // Generate invite URL using centralized permission tiers (MODERATOR_VOICE is recommended default) - // Note: If audit log tracking is enabled (DISCORD_AUDIT_LOG_ENABLED), you may need to manually - // grant ViewAuditLog permission to the bot role after it joins, as this is an elevated permission - // that should be granted per-server rather than requested in the OAuth invite. - const inviteUrl = readyClient.user?.id - ? generateInviteUrl(readyClient.user.id, "MODERATOR_VOICE") - : undefined; - - // Log a note if audit log tracking is enabled - if (isAuditLogEnabledForInvite) { - this.runtime.logger.info( - { src: "plugin:discord", agentId: this.runtime.agentId }, - "Audit log tracking enabled - ensure bot has ViewAuditLog permission in server settings", - ); - } - - // Use character name if available, otherwise fallback to username, then agentId - const agentName = - this.runtime.character.name || - readyClient.user?.username || - this.runtime.agentId; + // Track results for comprehensive logging at the end + // Why track results? Provides clear feedback about which channels were joined/failed + const joinResults: { + channelId: string; + status: 'joined' | 'not_found' | 'error'; + botAlias?: string; + channelName?: string; + guildName?: string; + error?: string + }[] = []; + + // Why iterate channels in outer loop and bots in inner loop? + // - Prioritizes filling ALL channel slots before leaving bots idle + // - For each channel, we find the first available bot + // - Alternative (bots outer, channels inner) would fill one bot's capacity before using next bot + // - Current approach: More balanced distribution across bots + for (const channelId of autoJoinChannelIds) { + let channelJoined = false; + + // Try each bot until we find one that can join this channel + for (const clientInfo of clients) { + const { client, voiceManager, config } = clientInfo; + + // Why check isReady()? Bot might still be logging in or connecting to Discord gateway + if (!client?.isReady()) { + continue; + } - if (inviteUrl) { - this.runtime.logger.info( - { src: "plugin:discord", agentId: this.runtime.agentId, inviteUrl }, - "Bot invite URL generated", - ); - this.runtime.logger.info( - `Use this URL to add the "${agentName}" bot to your Discord server: ${inviteUrl}`, - ); - } else { - this.runtime.logger.warn( - { src: "plugin:discord", agentId: this.runtime.agentId }, - "Could not generate invite URL - bot user ID unavailable", - ); - } + // Skip clients without voiceManager (text-only clients or failed voiceManager construction) + if (!voiceManager) { + this.runtime.logger.debug(`Skipping bot ${config?.alias || client?.user?.id || 'unknown'} for channel ${channelId}: no voiceManager available`); + continue; + } - this.runtime.logger.success( - `Discord client logged in successfully as ${readyClient.user?.username || agentName}`, - ); + try { + // Why fetch the channel instead of using cache? + // - Cache might not be populated yet, especially right after startup + // - fetch() makes an API call to ensure we get current data + // - .catch(() => null) handles cases where bot doesn't have access (404 error) + const channel = await client.channels.fetch(channelId).catch(() => null); + + // Why check isVoiceBased()? Channel ID might be a text channel (user error) + if (channel && channel.isVoiceBased()) { + const guildId = channel.guild.id; + const botId = client.user?.id; + + if (!botId) continue; // Shouldn't happen if isReady(), but safety check + + const connectionKey = `${guildId}:${botId}`; + + // Why skip if activeConnections.has(connectionKey)? + // - This bot is already connected to a voice channel in this guild + // - Discord won't allow a second connection - would fail or disconnect from first + // - Move to next bot that might be available for this guild + if (activeConnections.has(connectionKey)) { + this.runtime.logger.debug(`Bot ${config.alias || botId} already active in guild ${channel.guild.name}, skipping channel ${channel.name}`); + continue; // Try next bot + } - const guilds = await this.client?.guilds.fetch(); - if (!guilds) { - this.runtime.logger.warn("Could not fetch guilds"); - return; - } - for (const [, guild] of guilds) { - // Disabled automatic voice joining - now controlled by joinVoiceChannel action - // await this.voiceManager?.scanGuild(fullGuild); + // Attempt to join the channel + this.runtime.logger.log(`Bot ${config.alias || botId} auto-joining channel: ${channel.name} (${channelId}) in ${channel.guild.name}`); + + // Why use voiceManager.joinChannel? + // - Each bot has its own VoiceManager instance managing its voice connections + // - VoiceManager handles the @discordjs/voice connection setup + await voiceManager.joinChannel(channel as BaseGuildVoiceChannel); + + // Mark this bot as busy in this guild + // Why add to activeConnections? Prevents this bot from being selected for another channel in same guild + activeConnections.add(connectionKey); + channelJoined = true; + + joinResults.push({ + channelId, + status: 'joined', + botAlias: config.alias || botId, + channelName: channel.name, + guildName: channel.guild.name + }); - // Send after a brief delay - const timeoutId = setTimeout(async () => { - // For each server the client is in, fire a connected event - try { - const fullGuild = await guild.fetch(); - this.runtime.logger.info( - `Discord server connected: ${fullGuild.name} (${fullGuild.id})`, - ); + // Why break here? + // - Channel successfully joined by this bot + // - No need to try other bots for this channel + // - Move to next channel ID in outer loop + break; + } + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error); + this.runtime.logger.warn(`Bot ${config.alias || 'unknown'} failed to join/check channel ${channelId}: ${errorMsg}`); + + // Why check !channelJoined before pushing error? + // - If channel was joined but then error occurred after, don't mark as error + // - Only record as error if the join actually failed + if (!channelJoined) { + joinResults.push({ + channelId, + status: 'error', + botAlias: config.alias, + error: errorMsg + }); + } + } + } - // Emit Discord-specific event with full guild object - this.runtime.emitEvent( - [DiscordEventTypes.WORLD_CONNECTED] as string[], - { - runtime: this.runtime, - server: fullGuild, - source: "discord", - } as any, - ); + // Why check if channel wasn't joined and not already in results? + // - After trying all bots, if none could join, mark as not found + // - Could be: channel doesn't exist, no bot has access, or all bots busy in that guild + if (!channelJoined && !joinResults.some(r => r.channelId === channelId)) { + this.runtime.logger.warn(`Could not join channel ${channelId}: No available bot found with access or bot already busy in guild.`); + joinResults.push({ + channelId, + status: 'not_found' + }); + } + } - // Create platform-agnostic world data structure with simplified structure - const worldId = createUniqueUuid(this.runtime, fullGuild.id); - const ownerId = createUniqueUuid(this.runtime, fullGuild.ownerId); + // Why provide summary logging? + // - User gets clear overview of what succeeded/failed across all channels + // - Easier debugging compared to parsing individual log lines + // - Shows which bot joined which channel for multi-bot scenarios + const joined = joinResults.filter(r => r.status === 'joined'); + const notFound = joinResults.filter(r => r.status === 'not_found'); + const errors = joinResults.filter(r => r.status === 'error'); - const standardizedData = { - name: fullGuild.name, - runtime: this.runtime, - rooms: await this.buildStandardizedRooms(fullGuild, worldId), - entities: await this.buildStandardizedUsers(fullGuild), - world: { - id: worldId, - name: fullGuild.name, - agentId: this.runtime.agentId, - serverId: fullGuild.id, - metadata: { - ownership: fullGuild.ownerId ? { ownerId } : undefined, - roles: { - [ownerId]: Role.OWNER, - }, - }, - } as World, - source: "discord", - }; + this.runtime.logger.log(`Auto-join summary: ${joined.length} joined, ${notFound.length} not found, ${errors.length} errors`); - // Emit standardized event - this.runtime.emitEvent([EventType.WORLD_CONNECTED], standardizedData); - } catch (error) { - // Add error handling to prevent crashes if the client is already destroyed - this.runtime.logger.error( - { - src: "plugin:discord", - agentId: this.runtime.agentId, - error: error instanceof Error ? error.message : String(error), - }, - "Error during Discord world connection", - ); + if (joined.length > 0) { + joined.forEach(r => { + this.runtime.logger.log(` ✓ ${r.channelName} in ${r.guildName} (Bot: ${r.botAlias})`); + }); } - }, 1000); - // Store the timeout reference to be able to cancel it when stopping - this.timeouts.push(timeoutId); - } + if (notFound.length > 0) { + notFound.forEach(r => { + this.runtime.logger.warn(` ✗ Channel ${r.channelId} not found`); + }); + } - // Validate audit log access for permission tracking (if enabled) - const auditLogEnabled = this.runtime.getSetting( - "DISCORD_AUDIT_LOG_ENABLED", - ); - if (auditLogEnabled !== "false" && auditLogEnabled !== false) { - try { - const testGuild = guilds.first(); - if (testGuild) { - const fullGuild = await testGuild.fetch(); - await fullGuild.fetchAuditLogs({ limit: 1 }); - this.runtime.logger.debug( - "Audit log access verified for permission tracking", - ); + if (errors.length > 0) { + errors.forEach(r => { + this.runtime.logger.error(` ✗ Channel ${r.channelId}: ${r.error}`); + }); } - } catch (err) { - this.runtime.logger.warn( - { - src: "plugin:discord", - agentId: this.runtime.agentId, - error: err instanceof Error ? err.message : String(err), - }, - "Cannot access audit logs - permission change alerts will not include executor info", - ); + } catch (error) { + this.runtime.logger.error(`Error in auto-join process: ${error instanceof Error ? error.message : String(error)}`); } - } - - this.client?.emit("voiceManagerReady"); + }, 5000); + // Track timeout for cleanup on service stop + this.timeouts.push(autoJoinTimeout); } /** @@ -2783,7 +3033,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId, useCache, }, @@ -2799,7 +3049,7 @@ export class DiscordService extends Service implements IDiscordService { // Validate channel if (!channel) { this.runtime.logger.error( - { src: "plugin:discord", agentId: this.runtime.agentId, channelId }, + { src: "plugin:discord", agentId: this.agentIdentifier, channelId }, "Channel not found", ); return []; @@ -2807,7 +3057,7 @@ export class DiscordService extends Service implements IDiscordService { if (channel.type !== DiscordChannelType.GuildText) { this.runtime.logger.error( - { src: "plugin:discord", agentId: this.runtime.agentId, channelId }, + { src: "plugin:discord", agentId: this.agentIdentifier, channelId }, "Channel is not a text channel", ); return []; @@ -2816,7 +3066,7 @@ export class DiscordService extends Service implements IDiscordService { const guild = channel.guild; if (!guild) { this.runtime.logger.error( - { src: "plugin:discord", agentId: this.runtime.agentId, channelId }, + { src: "plugin:discord", agentId: this.agentIdentifier, channelId }, "Channel is not in a guild", ); return []; @@ -2830,7 +3080,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, guildId: guild.id, memberCount: guild.memberCount.toLocaleString(), }, @@ -2844,7 +3094,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, cacheSize: guild.members.cache.size, }, "Using cached members", @@ -2854,7 +3104,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, guildId: guild.id, }, "Fetching members for guild", @@ -2863,7 +3113,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, memberCount: members.size.toLocaleString(), }, "Fetched members", @@ -2873,7 +3123,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Error fetching members", @@ -2883,7 +3133,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, cacheSize: members.size, }, "Fallback to cache", @@ -2895,7 +3145,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId: channel.id, }, "Filtering members for channel access", @@ -2926,7 +3176,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId: channel.id, memberCount: channelMembers.length.toLocaleString(), }, @@ -2937,7 +3187,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Error fetching channel members", @@ -2978,7 +3228,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId, error: error instanceof Error ? error.message : String(error), }, @@ -3003,7 +3253,7 @@ export class DiscordService extends Service implements IDiscordService { const preposition = type === "add" ? "to" : "from"; this.runtime.logger.debug( - { src: "plugin:discord", agentId: this.runtime.agentId, type }, + { src: "plugin:discord", agentId: this.agentIdentifier, type }, `Reaction ${actionVerb}`, ); @@ -3027,7 +3277,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Failed to fetch partial reaction", @@ -3053,7 +3303,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, entityId, roomId, }, @@ -3080,6 +3330,7 @@ export class DiscordService extends Service implements IDiscordService { ((user as any).globalName as string | undefined) || (reaction.message.author as any)?.displayName || userName; + //const name = reaction.message.member?.displayName || reaction.message.author?.displayName || userName; // Get channel type once and reuse const channelType = await this.getChannelType( @@ -3123,7 +3374,7 @@ export class DiscordService extends Service implements IDiscordService { const callback: HandlerCallback = async (content): Promise => { if (!reaction.message.channel) { this.runtime.logger.error( - { src: "plugin:discord", agentId: this.runtime.agentId }, + { src: "plugin:discord", agentId: this.agentIdentifier }, "No channel found for reaction message", ); return []; @@ -3154,7 +3405,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.error( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Error handling reaction", @@ -3174,14 +3425,17 @@ export class DiscordService extends Service implements IDiscordService { } /** - * Handles reaction removal. + * Handles reaction removal by delegating to the generic handleReaction method. * @private */ private async handleReactionRemove( reaction: MessageReaction | PartialMessageReaction, user: User | PartialUser, ) { - await this.handleReaction(reaction, user, "remove"); + // Delegate to handleReaction which properly handles both 'add' and 'remove' types + // with correct event emission (DiscordEventTypes.REACTION_REMOVED) and + // uses messageServerId instead of deprecated serverId + await this.handleReaction(reaction, user, 'remove'); } /** @@ -3290,7 +3544,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId, state, }, @@ -3302,7 +3556,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.warn( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), channelId, }, @@ -3503,7 +3757,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.warn( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: errorMsg, cause: String(causeMsg), causeCode, @@ -3536,7 +3790,7 @@ export class DiscordService extends Service implements IDiscordService { ): Promise { if (!this.client?.isReady()) { this.runtime.logger.warn( - { src: "plugin:discord", agentId: this.runtime.agentId, channelId }, + { src: "plugin:discord", agentId: this.agentIdentifier, channelId }, "Discord client not ready for history fetch", ); return { @@ -3551,7 +3805,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.warn( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId, channelType: fetchedChannel?.type ?? null, }, @@ -3626,8 +3880,8 @@ export class DiscordService extends Service implements IDiscordService { if (!options.force && spiderState && spiderState.newestMessageId) { const lastDate = spiderState.newestMessageTimestamp ? new Date(spiderState.newestMessageTimestamp) - .toISOString() - .split("T")[0] + .toISOString() + .split("T")[0] : "unknown"; this.runtime.logger.info( `#${channelName}: Catching up on new messages since ${lastDate}`, @@ -3706,7 +3960,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId, limit: options.limit, }, @@ -3787,7 +4041,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId, page: pagesProcessed, }, @@ -3809,7 +4063,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.warn( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, memoryId: memory.id, error: error instanceof Error ? error.message : String(error), }, @@ -3870,8 +4124,8 @@ export class DiscordService extends Service implements IDiscordService { before = spiderState.oldestMessageId; const oldestDate = spiderState.oldestMessageTimestamp ? new Date(spiderState.oldestMessageTimestamp) - .toISOString() - .split("T")[0] + .toISOString() + .split("T")[0] : "unknown"; this.runtime.logger.info( `#${channelName}: Resuming backfill from ${oldestDate}`, @@ -3888,7 +4142,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId, limit: options.limit, }, @@ -4003,7 +4257,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId, page: pagesProcessed, }, @@ -4025,7 +4279,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.warn( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, memoryId: memory.id, error: error instanceof Error ? error.message : String(error), }, @@ -4072,7 +4326,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId, batchSize: batch.size, storedThisBatch: batchMemories.length, @@ -4088,7 +4342,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId, limit: options.limit, }, @@ -4107,7 +4361,7 @@ export class DiscordService extends Service implements IDiscordService { // But DON'T mark as fullyBackfilled - we may have more older history to fetch if (consecutiveNoNew >= 3) { this.runtime.logger.debug( - { src: "plugin:discord", agentId: this.runtime.agentId, channelId }, + { src: "plugin:discord", agentId: this.agentIdentifier, channelId }, "Stopping backfill: 3 consecutive pages of existing messages (will resume from oldest on next run)", ); break; @@ -4217,12 +4471,66 @@ export class DiscordService extends Service implements IDiscordService { textContent = processed?.processedContent && - processed.processedContent.trim().length > 0 + processed.processedContent.trim().length > 0 ? processed.processedContent : message.content || " "; attachments = processed?.attachments ?? []; } + // Fetch referenced message chain if this is a reply (up to 5 levels deep) + const replyChain: Array<{ author: string; authorId: string; text: string }> = []; + if (message.reference?.messageId) { + const MAX_REPLY_DEPTH = 5; + let currentMessageId: string | undefined = message.reference.messageId; + let depth = 0; + + while (currentMessageId && depth < MAX_REPLY_DEPTH) { + try { + const referencedMessage = await message.channel.messages.fetch(currentMessageId); + if (referencedMessage) { + const author = referencedMessage.author?.username || + (referencedMessage.member as any)?.displayName || + 'Unknown'; + const authorId = referencedMessage.author?.id || ''; + const text = referencedMessage.content || ''; + + if (text) { + replyChain.push({ author, authorId, text }); + } + + // Check if this message is also a reply + currentMessageId = referencedMessage.reference?.messageId; + } else { + break; + } + } catch (fetchError) { + // Referenced message may have been deleted or inaccessible + this.runtime.logger.debug( + { + src: 'plugin:discord', + agentId: this.agentIdentifier, + messageId: currentMessageId, + error: fetchError instanceof Error ? fetchError.message : String(fetchError), + }, + 'Could not fetch referenced message in chain' + ); + break; + } + depth++; + } + } + + // Format reply chain and embed directly in text content (so agent sees it without core changes) + // Format: "[Reply to Author]: message text" prepended to the actual message + const replyContext = replyChain.length > 0 + ? replyChain.map((r, i) => `[${i === 0 ? 'Reply to' : 'Which was replying to'} ${r.author}]: ${r.text}`).join('\n') + : undefined; + + // Embed reply context directly in the message text so agent sees full context + const finalTextContent = replyContext + ? `${replyContext}\n\n${textContent || ' '}` + : (textContent || ' '); + const metadata = { type: "message" as const, entityName: @@ -4239,6 +4547,11 @@ export class DiscordService extends Service implements IDiscordService { "guild" in message.channel && message.channel.guild ? message.channel.guild.id : message.guild?.id, + // Store immediate reply info for easy programmatic access + inReplyToAuthor: replyChain.length > 0 ? replyChain[0].author : undefined, + // Raw Discord IDs for the message being replied to (not transformed by createUniqueUuid) + discordInReplyToMessageId: message.reference?.messageId, + discordInReplyToUserId: replyChain.length > 0 ? replyChain[0].authorId : undefined, tags: [] as string[], ...options?.extraMetadata, }; @@ -4249,7 +4562,7 @@ export class DiscordService extends Service implements IDiscordService { agentId: this.runtime.agentId, roomId, content: { - text: textContent || " ", + text: finalTextContent, attachments, source: "discord", channelType, @@ -4363,7 +4676,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, authorCount: uniqueAuthors.size, error: error instanceof Error ? error.message : String(error), }, @@ -4387,8 +4700,7 @@ export class DiscordService extends Service implements IDiscordService { } // Additional cleanup if needed (e.g., voice manager) if (this.voiceManager) { - // Assuming voiceManager has a stop or cleanup method - // await this.voiceManager.stop(); + this.voiceManager.cleanup(); } this.runtime.logger.info("Discord service stopped"); } @@ -4424,7 +4736,7 @@ export class DiscordService extends Service implements IDiscordService { this.runtime.logger.debug( { src: "plugin:discord", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelType: channel.type, }, "Unknown channel type, defaulting to GROUP", @@ -4432,6 +4744,131 @@ export class DiscordService extends Service implements IDiscordService { return ChannelType.GROUP; } } + + /** + * Sets the status of a voice channel. + * Voice channel status is a text message that appears at the top of the channel. + * + * @param {string} channelId - The Discord ID of the voice channel. + * @param {string} status - The status text to set (max 500 characters, empty string to clear). + * @returns {Promise} Whether the status was successfully set. + */ + public async setVoiceChannelStatus(channelId: string, status: string): Promise { + try { + if (!this.client?.isReady()) { + this.runtime.logger.error('[Discord] Client not ready for setting voice channel status.'); + return false; + } + + // Fetch the channel + const channel = await this.client.channels.fetch(channelId); + + if (!channel) { + this.runtime.logger.error(`[Discord] Channel ${channelId} not found.`); + return false; + } + + // Verify it's a voice channel + if (channel.type !== DiscordChannelType.GuildVoice) { + this.runtime.logger.error(`[Discord] Channel ${channelId} is not a voice channel.`); + return false; + } + + const voiceChannel = channel as VoiceChannel; + + // Validate status length (Discord limit is 500 characters) + if (status.length > 500) { + this.runtime.logger.warn(`[Discord] Status truncated to 500 characters (was ${status.length}).`); + status = status.substring(0, 500); + } + + // Set the voice channel status using REST API + // Discord API endpoint: PUT /channels/{channel.id}/voice-status + await this.client.rest.put( + `/channels/${channelId}/voice-status`, + { + body: { + status: status || null, + }, + } + ); + + this.runtime.logger.log(`[Discord] Set voice channel status for ${voiceChannel.name}: "${status}"`); + + return true; + } catch (error) { + this.runtime.logger.error( + `[Discord] Error setting voice channel status: ${error instanceof Error ? error.message : String(error)}` + ); + return false; + } + } + + /** + * Sets the bot's "listening to" activity/presence. + * This updates what users see under the bot's name in the member list. + * + * @param {string} activity - The activity text (e.g., "Spotify", "your commands"). + * @param {string} [url] - Optional URL for streaming activity. + * @returns {Promise} Whether the activity was successfully set. + */ + public async setListeningActivity(activity: string, url?: string): Promise { + try { + if (!this.client?.isReady()) { + this.runtime.logger.error('[Discord] Client not ready for setting listening activity.'); + return false; + } + + if (!this.client.user) { + this.runtime.logger.error('[Discord] Client user not available.'); + return false; + } + + // Set the activity with listening type + await this.client.user.setActivity(activity, { + type: 2, // ActivityType.Listening + url: url, + }); + + this.runtime.logger.log(`[Discord] Set listening activity: "${activity}"`); + return true; + } catch (error) { + this.runtime.logger.error( + `[Discord] Error setting listening activity: ${error instanceof Error ? error.message : String(error)}` + ); + return false; + } + } + + /** + * Clears the bot's activity/presence, resetting to default. + * + * @returns {Promise} Whether the activity was successfully cleared. + */ + public async clearActivity(): Promise { + try { + if (!this.client?.isReady()) { + this.runtime.logger.error('[Discord] Client not ready for clearing activity.'); + return false; + } + + if (!this.client.user) { + this.runtime.logger.error('[Discord] Client user not available.'); + return false; + } + + // Clear the activity by setting it to null + await this.client.user.setPresence({ activities: [] }); + + this.runtime.logger.log('[Discord] Cleared activity/presence.'); + return true; + } catch (error) { + this.runtime.logger.error( + `[Discord] Error clearing activity: ${error instanceof Error ? error.message : String(error)}` + ); + return false; + } + } } export default DiscordService; diff --git a/src/sinks/discordAudioSink.ts b/src/sinks/discordAudioSink.ts new file mode 100644 index 0000000..fbcf812 --- /dev/null +++ b/src/sinks/discordAudioSink.ts @@ -0,0 +1,273 @@ +import { EventEmitter } from 'events'; +import type { Readable } from 'node:stream'; +import { logger } from '@elizaos/core'; +import type { IAudioSink, AudioSinkStatus } from '../contracts'; +import type { VoiceManager } from '../voice'; + +/** + * DiscordAudioSink - Implements IAudioSink for Discord voice connections + * + * WHY THIS WRAPPER: + * VoiceManager is complex - it handles multiple guilds, channels, audio players, + * connection states, etc. Consumer code (music-player) shouldn't need to know: + * - How Discord manages voice connections + * - What an AudioPlayer is + * - How to handle Discord reconnection states + * + * SIMPLIFIED INTERFACE: + * ``` + * feed(stream) → play this audio + * stop() → stop audio + * status → are you connected? + * event 'statusChange' → you reconnected/disconnected + * ``` + * + * WHY STATUS MONITORING: + * Discord voice connections can disconnect for many reasons (network hiccups, + * region changes, voice server migrations). The sink monitors the VoiceConnection + * and translates its complex state machine into simple status events. + * + * WHO HANDLES RECONNECTION: + * NOT this class. The sink just reports "I'm connected again". It's MusicService's + * job to say "great, here's a new audio stream". This separation of concerns means: + * - Discord plugin: manages Discord connection lifecycle + * - Music plugin: manages audio stream lifecycle + * - Neither knows about the other's internals + * + * Wraps VoiceManager to provide a clean audio sink interface. + * Emits status changes based on voice connection state. + * Does NOT handle reconnect logic - that's the orchestrator's job. + */ +export class DiscordAudioSink extends EventEmitter implements IAudioSink { + readonly id: string; + private guildId: string; + private voiceManager: VoiceManager; + private _status: AudioSinkStatus = 'disconnected'; + + constructor(id: string, guildId: string, voiceManager: VoiceManager) { + super(); + this.id = id; + this.guildId = guildId; + this.voiceManager = voiceManager; + + // Monitor voice connection state + this.setupVoiceConnectionMonitoring(); + } + + /** + * Get current status + */ + get status(): AudioSinkStatus { + return this._status; + } + + /** + * Feed an audio stream into this sink + * @param stream Audio stream to play + */ + async feed(stream: Readable): Promise { + logger.debug(`[DiscordAudioSink:${this.id}] Feeding audio stream`); + + logger.debug(`[DiscordAudioSink:${this.id}] Calling voiceManager.playAudio`); + + try { + // Use VoiceManager to play audio + await this.voiceManager.playAudio(stream, { + guildId: this.guildId, + channel: 1, // Music channel + }); + + logger.debug(`[DiscordAudioSink:${this.id}] Audio stream started`); + } catch (error) { + logger.error(`[DiscordAudioSink:${this.id}] Error feeding stream: ${error}`); + this.emit('error', error instanceof Error ? error : new Error(String(error))); + throw error; + } + } + + /** + * Stop current audio playback + */ + async stop(): Promise { + logger.debug(`[DiscordAudioSink:${this.id}] Stopping audio`); + + try { + await this.voiceManager.stopAudio(this.guildId, 1); + } catch (error) { + logger.error(`[DiscordAudioSink:${this.id}] Error stopping: ${error}`); + this.emit('error', error instanceof Error ? error : new Error(String(error))); + } + } + + /** + * Get human-readable description + */ + getDescription(): string { + return `Discord Audio Sink (Guild: ${this.guildId})`; + } + + /** + * Setup monitoring of voice connection state + * + * WHY MONITOR CONNECTION: + * VoiceConnection has a complex state machine (ready, connecting, disconnected, etc.). + * We translate this into simple AudioSinkStatus (connected, disconnected, reconnecting). + * + * This abstraction means: + * - Music-player doesn't need to understand Discord's state machine + * - We can emit events when connection is restored + * - Orchestrator (MusicService) can react to reconnections + * + * STATE MAPPING: + * - Discord 'ready' → AudioSink 'connected' (can receive audio) + * - Discord 'disconnected/destroyed' → AudioSink 'disconnected' (can't receive audio) + * - Discord 'connecting/signalling' → AudioSink 'reconnecting' (attempting to restore) + */ + private setupVoiceConnectionMonitoring(): void { + // Try to get and monitor voice connection + this.tryAttachToConnection(); + + // WHY POLL FOR CONNECTION: + // The sink might be created BEFORE the bot joins a voice channel. + // We need to periodically check for a new connection until we find one. + // + // WHY NOT STOP POLLING ON ATTACH: + // Voice connections can disconnect (network issues, bot kicked, etc). + // When disconnect happens, connectionAttached is set to false (line ~195). + // If we stopped polling on initial attach, we couldn't detect reconnection. + // Keeping the poll running allows automatic re-attachment after disconnects. + this.connectionPollInterval = setInterval(() => { + if (!this.connectionAttached) { + // Not attached - try to find and attach to a connection + this.tryAttachToConnection(); + } + // If already attached, do nothing - stateChangeListener handles state updates + }, 500); // Check every 500ms + } + + private connectionPollInterval: NodeJS.Timeout | null = null; + private connectionAttached = false; + // Store reference to the connection and listener for cleanup + // Without storing these, we can't remove the listener in destroy(), causing memory leaks + private attachedConnection: any = null; + private stateChangeListener: ((oldState: any, newState: any) => void) | null = null; + + /** + * Try to find and attach to a voice connection + */ + private tryAttachToConnection(): void { + const connection = this.voiceManager.getVoiceConnection(this.guildId); + + if (!connection) { + logger.debug(`[DiscordAudioSink:${this.id}] No voice connection yet for guild ${this.guildId}`); + this.updateStatus('disconnected'); + return; + } + + // Clean up any previous listener before attaching to new/same connection + // This prevents listener accumulation when re-attaching after disconnect + if (this.attachedConnection && this.stateChangeListener) { + try { + this.attachedConnection.off('stateChange', this.stateChangeListener); + logger.debug(`[DiscordAudioSink:${this.id}] Cleaned up previous stateChange listener`); + } catch { + // Connection may already be destroyed, that's fine + } + } + + // Mark as attached so we don't re-attach + this.connectionAttached = true; + this.attachedConnection = connection; + logger.debug(`[DiscordAudioSink:${this.id}] Attached to voice connection for guild ${this.guildId}`); + + // Initial status based on connection state + const initialStatus = connection.state.status; + if (initialStatus === 'ready') { + this.updateStatus('connected'); + } else if (initialStatus === 'disconnected' || initialStatus === 'destroyed') { + this.updateStatus('disconnected'); + } else { + this.updateStatus('reconnecting'); + } + + // Create named listener so we can remove it later + // Anonymous listeners can't be removed, causing listener leaks + this.stateChangeListener = (oldState: any, newState: any) => { + logger.debug( + `[DiscordAudioSink:${this.id}] Voice connection state: ${oldState.status} -> ${newState.status}` + ); + + switch (newState.status) { + case 'ready': + this.updateStatus('connected'); + break; + case 'disconnected': + case 'destroyed': + this.updateStatus('disconnected'); + this.connectionAttached = false; // Allow re-attach if reconnected + break; + case 'connecting': + case 'signalling': + this.updateStatus('reconnecting'); + break; + } + }; + + // Monitor state changes + connection.on('stateChange', this.stateChangeListener); + } + + /** + * Update status and emit event + */ + private updateStatus(newStatus: AudioSinkStatus): void { + if (this._status === newStatus) { + return; // No change + } + + const oldStatus = this._status; + this._status = newStatus; + + logger.debug(`[DiscordAudioSink:${this.id}] Status: ${oldStatus} -> ${newStatus}`); + this.emit('statusChange', newStatus); + } + + /** + * Clean up resources + */ + destroy(): void { + logger.debug(`[DiscordAudioSink:${this.id}] Destroying sink`); + + // Stop polling for connection + if (this.connectionPollInterval) { + clearInterval(this.connectionPollInterval); + this.connectionPollInterval = null; + } + + // Remove the stateChange listener from the voice connection + // Without this, the listener keeps running even after destroy(), causing memory leaks + // and potential errors when the sink is garbage collected but the listener fires + if (this.attachedConnection && this.stateChangeListener) { + try { + this.attachedConnection.off('stateChange', this.stateChangeListener); + logger.debug(`[DiscordAudioSink:${this.id}] Removed stateChange listener from voice connection`); + } catch (error) { + logger.debug(`[DiscordAudioSink:${this.id}] Error removing stateChange listener: ${error}`); + } + } + + // Null out references to allow garbage collection and prevent reuse + this.attachedConnection = null; + this.stateChangeListener = null; + this.connectionAttached = false; + + // Stop any current playback + this.stop().catch((error) => { + logger.debug(`Error stopping during destroy: ${error}`); + }); + + // Remove all listeners from this EventEmitter instance + this.removeAllListeners(); + } +} + diff --git a/src/sinks/index.ts b/src/sinks/index.ts new file mode 100644 index 0000000..071d64b --- /dev/null +++ b/src/sinks/index.ts @@ -0,0 +1,2 @@ +export { DiscordAudioSink } from './discordAudioSink'; + diff --git a/src/tests.ts b/src/tests.ts index 2b5b6b5..ff2ff01 100644 --- a/src/tests.ts +++ b/src/tests.ts @@ -10,7 +10,7 @@ import { import { type IAgentRuntime, ModelType, type TestSuite, logger } from '@elizaos/core'; import { ChannelType, Events, type TextChannel, AttachmentBuilder } from 'discord.js'; import type { DiscordService } from './service'; -import { ServiceType } from './types'; +import { DISCORD_SERVICE_NAME } from './constants'; import { sendMessageInChunks } from './utils'; const TEST_IMAGE_URL = @@ -73,7 +73,7 @@ export class DiscordTestSuite implements TestSuite { */ async testCreatingDiscordClient(runtime: IAgentRuntime) { try { - this.discordClient = runtime.getService(ServiceType.DISCORD) as DiscordService; + this.discordClient = runtime.getService(DISCORD_SERVICE_NAME) as DiscordService; if (!this.discordClient) { throw new Error('Failed to get DiscordService from runtime.'); } diff --git a/src/types.ts b/src/types.ts index 4908712..e1aa7d6 100644 --- a/src/types.ts +++ b/src/types.ts @@ -8,6 +8,7 @@ import type { Media, ChannelType, IAgentRuntime, + Service, } from "@elizaos/core"; import type { Channel, @@ -19,10 +20,11 @@ import type { MessageReaction, User, VoiceState, -} from "discord.js"; +} from 'discord.js'; +import type { Readable } from 'node:stream'; /** - * Discord-specific event types + * Discord event types for custom event emission */ export enum DiscordEventTypes { // Message events (prefixed versions of core events) @@ -54,15 +56,17 @@ export enum DiscordEventTypes { VOICE_STATE_CHANGED = "DISCORD_VOICE_STATE_CHANGED", // Permission audit events - CHANNEL_PERMISSIONS_CHANGED = "DISCORD_CHANNEL_PERMISSIONS_CHANGED", - ROLE_PERMISSIONS_CHANGED = "DISCORD_ROLE_PERMISSIONS_CHANGED", - MEMBER_ROLES_CHANGED = "DISCORD_MEMBER_ROLES_CHANGED", - ROLE_CREATED = "DISCORD_ROLE_CREATED", - ROLE_DELETED = "DISCORD_ROLE_DELETED", + CHANNEL_PERMISSIONS_CHANGED = 'DISCORD_CHANNEL_PERMISSIONS_CHANGED', + ROLE_PERMISSIONS_CHANGED = 'DISCORD_ROLE_PERMISSIONS_CHANGED', + MEMBER_ROLES_CHANGED = 'DISCORD_MEMBER_ROLES_CHANGED', + ROLE_CREATED = 'DISCORD_ROLE_CREATED', + ROLE_DELETED = 'DISCORD_ROLE_DELETED', + VOICE_TRANSCRIPTION = 'DISCORD_VOICE_TRANSCRIPTION', } + /** - * Discord-specific message received payload + * Service type constant for Discord */ export interface DiscordMessageReceivedPayload extends MessagePayload { /** The original Discord message */ @@ -495,7 +499,7 @@ export interface DiscordEventPayloadMap { * @property {DiscordJsClient} client - The Discord client object. * @property {Character} character - The character object. */ -export interface IDiscordService { +export interface IDiscordService extends Service { // Allow client to be null to handle initialization failures client: DiscordJsClient | null; character: Character; @@ -517,33 +521,83 @@ export const ServiceType = { DISCORD: "discord", } as const; -export interface DiscordComponentOptions { - type: number; - custom_id: string; - label?: string; - style?: number; - placeholder?: string; - min_values?: number; - max_values?: number; - options?: Array<{ - label: string; - value: string; - description?: string; - }>; -} - -export interface DiscordActionRow { - type: 1; - components: DiscordComponentOptions[]; -} - -// maybe discord character settings makes more sense? +/** + * Discord settings interface + */ export interface DiscordSettings { - allowedChannelIds?: string[]; + /** Whether to ignore messages from other bots */ shouldIgnoreBotMessages?: boolean; + + /** Whether to ignore direct messages */ shouldIgnoreDirectMessages?: boolean; + + /** Whether to respond only when mentioned */ shouldRespondOnlyToMentions?: boolean; - //[key: string]: any; // still allows extension + + /** List of allowed channel IDs (if empty/undefined, all channels allowed) */ + allowedChannelIds?: string[]; + + /** Volume level when ducked during voice activity (0.0 to 1.0, default: 0.2) */ + voiceDuckVolume?: number; + + /** Milliseconds of silence before restoring volume (default: 60000) */ + voiceDuckSilenceTimeout?: number; + + /** Milliseconds to gradually restore volume (default: 3000) */ + voiceDuckRampDuration?: number; + + /** Threshold for voice activity detection (0.0 to 1.0, default: 0.1) */ + voiceSpeakingThreshold?: number; + + /** Enable listen-only mode for voice channels (transcribe but don't respond, default: false) */ + voiceListenOnly?: boolean; +} + + +/** + * Represents a voice connection target that can receive audio + */ +export interface VoiceTarget { + /** Unique identifier: bot-uuid:guild-id:channel-id */ + id: string; + + /** Discord client user ID */ + botId: string; + + /** Optional friendly name for the bot */ + botAlias?: string; + + /** Discord guild (server) ID */ + guildId: string; + + /** Discord channel ID */ + channelId: string; + + /** Human-readable channel name */ + channelName: string; + + /** Play an audio stream to this target */ + play(stream: Readable): Promise; + + /** Stop audio playback on this target */ + stop(): Promise; + + /** Get connection status */ + getStatus(): 'connected' | 'disconnected'; +} + +/** + * Configuration for a Discord bot client + */ +export interface DiscordBotConfig { + /** Discord bot token */ + token: string; + + /** Optional friendly alias */ + alias?: string; + + /** Channels to auto-join on startup */ + autoJoin?: string[]; } /** @@ -609,3 +663,44 @@ export interface ChannelHistoryResult { fullyBackfilled: boolean; }; } + +/** + * Discord select menu option + */ +export interface DiscordSelectOption { + label: string; + value: string; + description?: string; +} + +/** + * Discord component options (buttons, select menus, etc.) + */ +export interface DiscordComponentOptions { + /** Component type: 2 = Button, 3 = Select Menu */ + type: number; + /** Custom ID for the component */ + custom_id: string; + /** Button label */ + label?: string; + /** Button style (1-5) */ + style?: number; + /** Select menu placeholder */ + placeholder?: string; + /** Minimum values for select menu */ + min_values?: number; + /** Maximum values for select menu */ + max_values?: number; + /** Options for select menu */ + options?: DiscordSelectOption[]; +} + +/** + * Discord action row containing components + */ +export interface DiscordActionRow { + /** Row type: 1 = Action Row */ + type: 1; + /** Components in this row */ + components: DiscordComponentOptions[]; +} diff --git a/src/utils.ts b/src/utils.ts index b68e9ac..8b92d52 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -177,6 +177,68 @@ export function extractUrls(text: string, runtime?: IAgentRuntime): string[] { }); } +/** + * Checks if a URL is a base64 data URL + * + * @param {string} url - The URL to check + * @returns {boolean} True if the URL is a base64 data URL + */ +export function isDataUrl(url: string): boolean { + return url.startsWith('data:'); +} + +/** + * Parses a data URL and returns the mime type and buffer + * + * @param {string} dataUrl - The data URL to parse + * @returns {{ mimeType: string; buffer: Buffer } | null} The parsed data or null if invalid + */ +export function parseDataUrl(dataUrl: string): { mimeType: string; buffer: Buffer } | null { + const match = dataUrl.match(/^data:([^;]+);base64,(.+)$/); + if (!match) { + return null; + } + + const [, mimeType, base64Data] = match; + try { + const buffer = Buffer.from(base64Data, 'base64'); + return { mimeType, buffer }; + } catch { + return null; + } +} + +/** + * Gets the file extension from a MIME type + * + * @param {string} mimeType - The MIME type + * @returns {string} The file extension (with dot) + */ +function getExtensionFromMimeType(mimeType: string): string { + const mimeToExtension: Record = { + 'image/png': '.png', + 'image/jpeg': '.jpg', + 'image/jpg': '.jpg', + 'image/gif': '.gif', + 'image/webp': '.webp', + 'image/svg+xml': '.svg', + 'image/bmp': '.bmp', + 'image/ico': '.ico', + 'video/mp4': '.mp4', + 'video/webm': '.webm', + 'video/ogg': '.ogg', + 'video/quicktime': '.mov', + 'audio/mpeg': '.mp3', + 'audio/mp3': '.mp3', + 'audio/wav': '.wav', + 'audio/ogg': '.ogg', + 'audio/aac': '.aac', + 'application/pdf': '.pdf', + 'text/plain': '.txt', + }; + return mimeToExtension[mimeType] || ''; +} + /** * Generates a filename with proper extension from Media object. * Extracts extension from URL if available, otherwise infers from contentType. @@ -187,20 +249,29 @@ export function extractUrls(text: string, runtime?: IAgentRuntime): string[] { export function getAttachmentFileName(media: Media): string { // Try to extract extension from URL first let extension = ''; - try { - const urlPath = new URL(media.url).pathname; - const urlExtension = urlPath.substring(urlPath.lastIndexOf('.')); - if (urlExtension && urlExtension.length > 1 && urlExtension.length <= 5) { - extension = urlExtension; + + // Handle data URLs specially - extract extension from MIME type + if (isDataUrl(media.url)) { + const parsed = parseDataUrl(media.url); + if (parsed) { + extension = getExtensionFromMimeType(parsed.mimeType); } - } catch { - // If URL parsing fails, try simple string extraction - const lastDot = media.url.lastIndexOf('.'); - const queryStart = media.url.indexOf('?', lastDot); - if (lastDot > 0 && (queryStart === -1 || queryStart > lastDot + 1)) { - const potentialExt = media.url.substring(lastDot, queryStart > -1 ? queryStart : undefined); - if (potentialExt.length > 1 && potentialExt.length <= 5) { - extension = potentialExt; + } else { + try { + const urlPath = new URL(media.url).pathname; + const urlExtension = urlPath.substring(urlPath.lastIndexOf('.')); + if (urlExtension && urlExtension.length > 1 && urlExtension.length <= 5) { + extension = urlExtension; + } + } catch { + // If URL parsing fails, try simple string extraction + const lastDot = media.url.lastIndexOf('.'); + const queryStart = media.url.indexOf('?', lastDot); + if (lastDot > 0 && (queryStart === -1 || queryStart > lastDot + 1)) { + const potentialExt = media.url.substring(lastDot, queryStart > -1 ? queryStart : undefined); + if (potentialExt.length > 1 && potentialExt.length <= 5) { + extension = potentialExt; + } } } } @@ -232,6 +303,68 @@ export function getAttachmentFileName(media: Media): string { return hasExtension ? baseName : `${baseName}${extension}`; } +/** + * Creates a Discord AttachmentBuilder from a Media object. + * Handles both regular URLs and base64 data URLs. + * + * @param {Media} media - The media object to create an attachment from + * @returns {AttachmentBuilder | null} The attachment builder or null if the media couldn't be processed + */ +export function createAttachmentFromMedia(media: Media): AttachmentBuilder | null { + if (!media.url) { + return null; + } + + const fileName = getAttachmentFileName(media); + + // Handle base64 data URLs + if (isDataUrl(media.url)) { + const parsed = parseDataUrl(media.url); + if (!parsed) { + logger.warn({ url: media.url.substring(0, 50) }, 'Failed to parse data URL'); + return null; + } + return new AttachmentBuilder(parsed.buffer, { name: fileName }); + } + + // Regular URL - pass directly + return new AttachmentBuilder(media.url, { name: fileName }); +} + +/** + * Filters attachments for memory storage by removing base64 data URLs. + * Base64 images are huge and shouldn't be stored in memories that get + * loaded into LLM context (via RECENT_MESSAGES provider). + * + * @param {Media[] | undefined} attachments - The attachments to filter + * @returns {Media[] | undefined} Filtered attachments without base64 data + */ +export function filterAttachmentsForMemory(attachments: Media[] | undefined): Media[] | undefined { + if (!attachments || attachments.length === 0) { + return undefined; + } + + const filtered = attachments + .filter((att) => att.url && !isDataUrl(att.url)) + .map((att) => ({ + ...att, + // Keep URL-based attachments as-is + })); + + // Also add placeholders for data URL attachments so we know they were sent + const dataUrlCount = attachments.filter((att) => att.url && isDataUrl(att.url)).length; + if (dataUrlCount > 0) { + filtered.push({ + id: 'data-url-images', + url: '', + title: `${dataUrlCount} image(s) sent`, + description: `${dataUrlCount} generated image(s) were sent (data not stored in memory)`, + }); + } + + return filtered.length > 0 ? filtered : undefined; +} + /** * Generates a summary for a given text using a specified model. * @@ -409,10 +542,8 @@ export async function sendMessageInChunks( .setCustomId(comp.custom_id) .setPlaceholder(comp.placeholder || 'Select an option'); - if (typeof comp.min_values === 'number') - {selectMenu.setMinValues(comp.min_values);} - if (typeof comp.max_values === 'number') - {selectMenu.setMaxValues(comp.max_values);} + if (typeof comp.min_values === 'number') { selectMenu.setMinValues(comp.min_values); } + if (typeof comp.max_values === 'number') { selectMenu.setMaxValues(comp.max_values); } if (Array.isArray(comp.options)) { selectMenu.addOptions( @@ -432,7 +563,7 @@ export async function sendMessageInChunks( } return null; }) - .filter(Boolean); + .filter((c): c is ButtonBuilder | StringSelectMenuBuilder => c !== null); if (validComponents.length > 0) { actionRow.addComponents(validComponents); @@ -498,20 +629,20 @@ export async function sendMessageInChunks( export function needsSmartSplit(content: string): boolean { // Check for code blocks - these shouldn't be split mid-block const codeBlockCount = (content.match(/```/g) || []).length; - if (codeBlockCount >= 2) {return true;} + if (codeBlockCount >= 2) { return true; } // Check for markdown headers - content has structure - if (/^#{1,3}\s/m.test(content)) {return true;} + if (/^#{1,3}\s/m.test(content)) { return true; } // Check for numbered lists (1. 2. 3.) - should stay together when possible - if (/^\d+\.\s/m.test(content)) {return true;} + if (/^\d+\.\s/m.test(content)) { return true; } // Check for very long lines without natural breakpoints const lines = content.split('\n'); const hasLongUnbreakableLines = lines.some(line => line.length > 500 && !line.includes('. ') && !line.includes(', ') ); - if (hasLongUnbreakableLines) {return true;} + if (hasLongUnbreakableLines) { return true; } return false; } @@ -747,3 +878,47 @@ export function canSendMessage(channel) { : null, }; } + +/** + * Edits an existing Discord message with new content. + * + * Why this exists: Progressive updates need to modify messages after they're sent. + * This wraps Discord.js message.edit() with error handling and length validation. + * + * Why truncate instead of split: Unlike sending new messages (where we can send + * multiple messages), editing can only update one message. If content exceeds + * Discord's 2000 char limit, we truncate with "..." rather than failing. + * + * Why return null on error: Allows callers to gracefully degrade (e.g., send a + * new message) rather than throwing and stopping the entire action. + * + * @param {DiscordMessage} message - The message to edit. + * @param {string} content - The new content for the message. + * @returns {Promise} The edited message, or null if edit failed. + */ +export async function editMessageContent( + message: DiscordMessage, + content: string +): Promise { + try { + if (!content || content.trim().length === 0) { + logger.warn('Cannot edit message with empty content'); + return null; + } + + // Split content if it exceeds Discord's limit + const MAX_LENGTH = 2000; + if (content.length > MAX_LENGTH) { + // For edited messages, we can only update with the truncated content + // Multiple messages aren't possible with edits + content = content.substring(0, MAX_LENGTH - 3) + '...'; + logger.warn(`Content truncated to ${MAX_LENGTH} characters for message edit`); + } + + const edited = await message.edit(content); + return edited; + } catch (error) { + logger.error(`Failed to edit message ${message.id}: ${error instanceof Error ? error.message : String(error)}`); + return null; + } +} diff --git a/src/voice.ts b/src/voice.ts index 7c5a30f..6106fc2 100644 --- a/src/voice.ts +++ b/src/voice.ts @@ -7,6 +7,7 @@ import { VoiceConnectionStatus, createAudioPlayer, createAudioResource, + demuxProbe, entersState, getVoiceConnections, joinVoiceChannel, @@ -37,12 +38,15 @@ import { type GuildMember, type VoiceChannel, type VoiceState, -} from "discord.js"; -import { EventEmitter } from "node:events"; -import { Readable, pipeline } from "node:stream"; -import prism from "prism-media"; -import type { DiscordService } from "./service"; -import { getMessageService } from "./utils"; +} from 'discord.js'; +import { EventEmitter } from 'node:events'; +import { Readable, pipeline } from 'node:stream'; +import prism from 'prism-media'; +import type { DiscordService } from './service'; +import { getMessageService } from './utils'; +import { getDiscordSettings } from './environment'; +import { DEFAULT_CHANNEL_CONFIGS } from './audioChannels'; +import { DiscordEventTypes } from './types'; // These values are chosen for compatibility with picovoice components const DECODE_FRAME_SIZE = 1024; @@ -166,11 +170,8 @@ export class AudioMonitor { this.lastFlagged--; } }); - this.readable.on("end", () => { - logger.debug( - { src: "plugin:discord:service:voice" }, - "AudioMonitor ended", - ); + this.readable.on('end', () => { + // Debug log removed - too noisy for production this.ended = true; if (this.lastFlagged < 0) { return; @@ -178,14 +179,10 @@ export class AudioMonitor { callback(this.getBufferFromStart()); this.lastFlagged = -1; }); - this.readable.on("speakingStopped", () => { - if (this.ended) { - return; - } - logger.debug({ src: "plugin:discord:service:voice" }, "Speaking stopped"); - if (this.lastFlagged < 0) { - return; - } + this.readable.on('speakingStopped', () => { + if (this.ended) return; + // Debug log removed - too noisy for production + if (this.lastFlagged < 0) return; callback(this.getBufferFromStart()); }); this.readable.on("speakingStarted", () => { @@ -193,7 +190,7 @@ export class AudioMonitor { return; } onStart(); - logger.debug({ src: "plugin:discord:service:voice" }, "Speaking started"); + // Debug log removed - too noisy for production this.reset(); }); } @@ -258,8 +255,94 @@ export class AudioMonitor { } /** - * Class representing a VoiceManager that extends EventEmitter. + * Configuration for an audio channel + */ +export interface AudioChannelConfig { + channel: number; // Channel number (0, 1, 2, 3, ...) + priority: number; // Higher priority interrupts lower (TTS=100, music=50, sfx=30) + canPause: boolean; // Whether channel supports pause/resume + interruptible: boolean; // Whether higher priority channels can interrupt + volume?: number; // Channel volume (0.0 to 1.0) + duckVolume?: number; // Volume when ducked by higher priority (default: 0.3) +} + +/** + * Handle for controlling audio playback + */ +export interface PlaybackHandle { + finished: Promise; + cancelled: Promise; + abort(): void; +} + +/** + * Internal state for a channel player + */ +interface ChannelPlayerState { + player: AudioPlayer; + channel: number; + guildId: string; + resource: any; + finished: () => void; + cancelled: () => void; + abortController?: AbortController; + originalVolume?: number; + duckedVolume?: number; + volumeTransformer?: any; // VolumeTransformer from AudioResource when inlineVolume is enabled +} + +/** + * VoiceManager - Handles Discord voice connections and audio playback + * + * ## Overview + * This class manages all voice-related functionality for Discord bots: + * - Joining and leaving voice channels + * - Playing audio streams to voice channels + * - Monitoring user audio (listening/transcription) + * - Managing voice connection lifecycle + * + * ## Audio Playback Architecture + * Audio playback follows this pipeline: + * + * ``` + * Audio Source (file/stream) + * ↓ + * Clean Stream (no listeners!) + * ↓ + * demuxProbe (format detection) + * ↓ + * AudioResource (format-specific decoding) + * ↓ + * AudioPlayer (packet generation) + * ↓ + * VoiceConnection (transmission) + * ↓ + * Discord Voice Servers + * ``` + * + * ## Critical Stream Handling + * ⚠️ Audio streams passed to playAudio() MUST be clean: + * - NO event listeners (except 'error') + * - NO stream control methods called (resume(), pause(), etc.) + * - Let Discord.js handle all stream control + * + * Adding listeners puts streams in paused mode, preventing demuxProbe from + * reading stream headers and detecting format, which causes playback failure. + * * @extends EventEmitter + * + * @example + * ```typescript + * // Create VoiceManager + * const voiceManager = new VoiceManager(client, runtime); + * + * // Join a channel + * await voiceManager.handleUserConnected(guildId, channelId, userId); + * + * // Play audio (stream must be clean!) + * const stream = createReadStream('audio.opus'); + * await voiceManager.playAudio(stream, { guildId, channel: 1 }); + * ``` */ export class VoiceManager extends EventEmitter { private processingVoice = false; @@ -273,16 +356,77 @@ export class VoiceManager extends EventEmitter { transcriptionText: string; } > = new Map(); - private activeAudioPlayer: AudioPlayer | null = null; + private activeAudioPlayer: AudioPlayer | null = null; // Legacy - kept for backward compatibility private client: Client | null; private runtime: ICompatRuntime; + private service: DiscordService; private streams: Map = new Map(); - private connections: Map = new Map(); - private activeMonitors: Map< + private connections: Map = new Map(); // key: guildId + private activeMonitors: Map = + new Map(); + private monitoredUsers: Set = new Set(); // Track which users are currently being monitored + private voiceActivityStats: Map< string, - { channel: BaseGuildVoiceChannel; monitor: AudioMonitor } + { + count: number; + minVolume: number; + maxVolume: number; + sumVolume: number; + firstActive: number; + lastActive: number; + userName: string; + } > = new Map(); + private voiceStatFlushTimer: NodeJS.Timeout | null = null; private ready: boolean; + private botId: string | null = null; // Bot ID this VoiceManager belongs to + private botAlias: string | undefined; // Optional bot alias + + // Channel-based audio system + private channels: Map = new Map(); + private channelPlayers: Map = new Map(); // key: `${guildId}:${channel}` + + // Desired state tracking: guildId -> channelId that agent should be in + private desiredChannels: Map = new Map(); // key: guildId, value: channelId + private reconnectTimeouts: Map = new Map(); // key: guildId + + // Voice activity ducking state + private duckedGuilds: Map< + string, + { + originalVolume: number; + silenceTimer: NodeJS.Timeout | null; + rampTimer: NodeJS.Timeout | null; + } + > = new Map(); // key: guildId + private duckingConfig: { + duckVolume: number; + silenceTimeout: number; + rampDuration: number; + speakingThreshold: number; + }; + // Voice connection health tracking + private connectionHealth: Map = new Map(); // key: guildId + private connectionWatchdog: NodeJS.Timeout | null = null; + + // Audio state tracking (server mute/deafen) + private audioStates: Map< + string, + { + serverMute: boolean; + serverDeaf: boolean; + selfMute: boolean; + selfDeaf: boolean; + lastUpdated: number; + } + > = new Map(); // key: guildId + + /** + * Get a human-readable identifier for logging (character name or agentId fallback) + */ + private get agentIdentifier(): string { + return this.runtime?.character?.name || this.runtime.agentId; + } /** * Constructor for initializing a new instance of the class. @@ -293,20 +437,216 @@ export class VoiceManager extends EventEmitter { constructor(service: DiscordService, runtime: ICompatRuntime) { super(); this.client = service.client; + this.service = service; this.runtime = runtime; this.ready = false; + // Load ducking configuration from settings + const discordSettings = getDiscordSettings(runtime); + this.duckingConfig = { + duckVolume: discordSettings.voiceDuckVolume ?? 0.2, + silenceTimeout: discordSettings.voiceDuckSilenceTimeout ?? 60000, + rampDuration: discordSettings.voiceDuckRampDuration ?? 3000, + speakingThreshold: discordSettings.voiceSpeakingThreshold ?? 0.1, + }; + + // Register default audio channels (TTS, Music, SFX, Ambient) + // WHY REGISTER ON CONSTRUCTION: + // Ensures all standard channels are available immediately when VoiceManager starts. + // Other plugins can rely on these channels existing without explicit registration. + for (const config of Object.values(DEFAULT_CHANNEL_CONFIGS)) { + this.registerChannel(config); + } + + // Listen for channel registration requests from plugins + this.on('registerChannel', (config: AudioChannelConfig) => { + this.registerChannel(config); + }); + + // Note: Client may be null at construction time if called before login + // The setClient() method will be called later to set the client and register events if (this.client) { this.client.on("voiceManagerReady", () => { this.setReady(true); + // Set bot ID when client is ready + if (this.client?.user) { + this.botId = this.client.user.id; + } }); } else { this.runtime.logger.error( - { src: "plugin:discord:service:voice", agentId: this.runtime.agentId }, - "Discord client not available for voiceManagerReady event", + { src: 'plugin:discord:service:voice', agentId: this.agentIdentifier }, + '[VoiceManager] Client not available at construction time - will be set later via setClient()' ); this.ready = false; } + + // Start voice stats flush timer + this.voiceStatFlushTimer = setInterval(() => { + this.flushVoiceActivityStats(); + }, 30000); // Flush every 30 seconds + + // Start watchdog to detect stale/disconnected voice sessions faster + this.connectionWatchdog = setInterval(() => { + this.checkConnectionHealth(); + }, 5000); // Check every 5 seconds + } + + /** + * Set the bot identification for this VoiceManager + * @param botId Discord bot user ID + * @param botAlias Optional bot alias + */ + setBotIdentification(botId: string, botAlias?: string) { + this.botId = botId; + this.botAlias = botAlias; + this.runtime.logger.debug(`[VoiceManager] Bot identification set: ${botId} (${botAlias || 'no alias'})`); + } + + private flushVoiceActivityStats() { + if (this.voiceActivityStats.size === 0) return; + + this.voiceActivityStats.forEach((stats, _userId) => { + if (stats.count > 0) { + const avgVolume = stats.sumVolume / stats.count; + const timeSpanMs = stats.lastActive - stats.firstActive; + const timeSpanSec = (timeSpanMs / 1000).toFixed(1); + logger.info( + `[VoiceActivity] Summary for ${stats.userName} (${timeSpanSec}s): ${stats.count} detections, vol: ${stats.minVolume.toFixed(3)}-${stats.maxVolume.toFixed(3)} (avg ${avgVolume.toFixed(3)})` + ); + } + }); + + // Clear stats after flush + this.voiceActivityStats.clear(); + } + + /** + * Periodically verify voice connection health and trigger fast recovery + * Useful when Discord silently drops the voice session without emitting state changes. + */ + private checkConnectionHealth(): void { + const now = Date.now(); + + for (const [guildId, connection] of this.connections.entries()) { + // Skip while a scheduled reconnect is in-flight + if (this.reconnectTimeouts.has(guildId)) { + continue; + } + + const status = connection.state.status; + const health = this.connectionHealth.get(guildId) || { lastReady: now }; + + if (status === VoiceConnectionStatus.Ready) { + // Healthy connection - update last seen time + health.lastReady = now; + this.connectionHealth.set(guildId, health); + continue; + } + + const timeSinceReady = now - health.lastReady; + + // If we've been away from Ready for too long, attempt recovery + if (timeSinceReady > 15000) { + logger.warn( + `[VoiceManager] Connection for guild ${guildId} stuck in state ${status} for ${timeSinceReady}ms - attempting recovery` + ); + + const desiredChannel = this.desiredChannels.get(guildId); + + if (desiredChannel) { + this.reconnectToDesiredChannel(guildId, desiredChannel).catch((error) => { + logger.error( + `[VoiceManager] Health check reconnect failed for guild ${guildId}: ${error instanceof Error ? error.message : String(error)}` + ); + }); + } else { + // No desired state to restore; clean up the bad connection + connection.destroy(); + this.connections.delete(guildId); + this.connectionHealth.delete(guildId); + logger.debug( + `[VoiceManager] Destroyed stale connection for guild ${guildId} (no desired channel to recover)` + ); + } + } + } + + // Clean up health records for guilds without active connections + for (const guildId of Array.from(this.connectionHealth.keys())) { + if (!this.connections.has(guildId)) { + this.connectionHealth.delete(guildId); + } + } + } + + /** + * Clean up VoiceManager resources (timers, stats, etc.) + * Call this when the service is shutting down + */ + cleanup() { + // Flush any remaining stats + this.flushVoiceActivityStats(); + + // Clear the flush timer + if (this.voiceStatFlushTimer) { + clearInterval(this.voiceStatFlushTimer); + this.voiceStatFlushTimer = null; + } + + // Clear the connection watchdog + if (this.connectionWatchdog) { + clearInterval(this.connectionWatchdog); + this.connectionWatchdog = null; + } + + // Clear reconnect timeouts + for (const timeout of this.reconnectTimeouts.values()) { + clearTimeout(timeout); + } + this.reconnectTimeouts.clear(); + + // Clear ducking timers + for (const duckState of this.duckedGuilds.values()) { + if (duckState.silenceTimer) clearTimeout(duckState.silenceTimer); + if (duckState.rampTimer) clearTimeout(duckState.rampTimer as any); + } + this.duckedGuilds.clear(); + + // Clean up active bridges + for (const cleanup of this.activeBridges.values()) { + cleanup(); + } + this.activeBridges.clear(); + + logger.debug('[VoiceManager] Cleanup completed'); + } + + /** + * Set the Discord client and register event listeners + * Called after the client has logged in successfully + * @param client Discord.js client instance + */ + setClient(client: Client) { + this.client = client; + + // Register the voiceManagerReady event listener + this.client.on('voiceManagerReady', () => { + this.setReady(true); + // Set bot ID when client is ready + if (this.client?.user) { + this.botId = this.client.user.id; + } + }); + + logger.debug('[VoiceManager] Client set and event listeners registered'); + } + + /** + * Get the bot ID this VoiceManager belongs to + */ + getBotId(): string | null { + return this.botId; } /** @@ -325,7 +665,7 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.error( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId: channel.id, channelType: channel.type, }, @@ -345,7 +685,7 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.debug( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, ready: this.ready, }, "VoiceManager ready status changed", @@ -378,22 +718,23 @@ export class VoiceManager extends EventEmitter { return; } + const guildId = member.guild.id; + // Ignore mute/unmute events if (oldChannelId === newChannelId) { return; } - // User leaving a channel where the bot is present - if (oldChannelId && this.connections.has(oldChannelId)) { + // User leaving a channel where the agent is present + // Check if we have a connection for this guild + if (oldChannelId && this.connections.has(guildId)) { this.stopMonitoringMember(member.id); } - // User joining a channel where the bot is present - if (newChannelId && this.connections.has(newChannelId)) { - await this.monitorMember( - member, - newState.channel as BaseGuildVoiceChannel, - ); + // User joining a channel where the agent is present + // Check if we have a connection for this guild + if (newChannelId && this.connections.has(guildId)) { + await this.monitorMember(member, newState.channel as BaseGuildVoiceChannel); } } @@ -409,11 +750,12 @@ export class VoiceManager extends EventEmitter { // Remove all associated streams and monitors this.streams.clear(); this.activeMonitors.clear(); + this.monitoredUsers.clear(); } catch (error) { this.runtime.logger.error( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Error leaving voice channel", @@ -437,11 +779,27 @@ export class VoiceManager extends EventEmitter { entersState(connection, VoiceConnectionStatus.Signalling, 20_000), ]); + // Store connection by guildId (new system) + const guildId = channel.guild.id; + this.connections.set(guildId, connection); + + // Register with VoiceConnectionManager if available + if (this.botId && this.service.voiceConnectionManager) { + this.service.voiceConnectionManager.registerConnection( + this.botId, + guildId, + channel.id, + channel, + this, + this.botAlias + ); + } + // Log connection success this.runtime.logger.info( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, status: connection.state.status, }, "Voice connection established", @@ -449,21 +807,25 @@ export class VoiceManager extends EventEmitter { // Set up ongoing state change monitoring connection.on("stateChange", async (oldState, newState) => { - this.runtime.logger.debug( - { - src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, - oldState: oldState.status, - newState: newState.status, - }, - "Voice connection state changed", - ); + // Skip logging if state hasn't actually changed + // Discord.js may emit stateChange even when status is the same + if (oldState.status !== newState.status) { + this.runtime.logger.debug( + { + src: "plugin:discord:service:voice", + agentId: this.agentIdentifier, + oldState: oldState.status, + newState: newState.status, + }, + "Voice connection state changed", + ); + } if (newState.status === VoiceConnectionStatus.Disconnected) { this.runtime.logger.debug( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, }, "Handling disconnection", ); @@ -478,7 +840,7 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.debug( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, }, "Reconnecting to channel", ); @@ -487,22 +849,34 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.debug( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: e instanceof Error ? e.message : String(e), }, "Disconnection confirmed - cleaning up", ); connection.destroy(); - this.connections.delete(channel.id); + this.connections.delete(guildId); } } else if (newState.status === VoiceConnectionStatus.Destroyed) { - this.connections.delete(channel.id); + this.connections.delete(guildId); + } else if (newState.status === VoiceConnectionStatus.Ready) { + // Connection is ready - ensure it's in our map + if (!this.connections.has(guildId)) { + this.connections.set(guildId, connection); + } + + // Resume any autopaused players after reconnection + // This handles network hiccups where the player autopausesdue to missing connection + if (oldState.status === VoiceConnectionStatus.Connecting || + oldState.status === VoiceConnectionStatus.Signalling) { + logger.log(`[Voice] Connection restored for guild ${guildId}, checking for autopaused players...`); + await this.resumeAutopausedPlayers(guildId, connection); + } } else if ( - !this.connections.has(channel.id) && - (newState.status === VoiceConnectionStatus.Ready || - newState.status === VoiceConnectionStatus.Signalling) + !this.connections.has(guildId) && + newState.status === VoiceConnectionStatus.Signalling ) { - this.connections.set(channel.id, connection); + this.connections.set(guildId, connection); } }); @@ -510,7 +884,7 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.error( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Voice connection error", @@ -519,15 +893,12 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.debug( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, }, "Will attempt to recover", ); }); - // Store the connection - this.connections.set(channel.id, connection); - // Continue with voice state modifications const me = channel.guild.members.me; if (me?.voice && me.permissions.has("DeafenMembers")) { @@ -538,7 +909,7 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.warn( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Failed to modify voice state", @@ -547,7 +918,12 @@ export class VoiceManager extends EventEmitter { } } - connection.receiver.speaking.on("start", async (entityId: string) => { + // Initialize audio state from current voice state + if (me?.voice) { + await this.updateAudioState(guildId, me.voice); + } + + connection.receiver.speaking.on('start', async (entityId: string) => { let user = channel.members.get(entityId); if (!user) { try { @@ -556,7 +932,7 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.error( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, entityId, error: error instanceof Error ? error.message : String(error), }, @@ -566,8 +942,11 @@ export class VoiceManager extends EventEmitter { } if (user && !user?.user.bot) { - this.monitorMember(user as GuildMember, channel); - this.streams.get(entityId)?.emit("speakingStarted"); + // Only start monitoring if not already monitoring this user + if (!this.monitoredUsers.has(entityId)) { + this.monitorMember(user as GuildMember, channel); + } + this.streams.get(entityId)?.emit('speakingStarted'); } }); @@ -581,14 +960,15 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.error( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId: channel.id, error: error instanceof Error ? error.message : String(error), }, "Failed to establish voice connection", ); connection.destroy(); - this.connections.delete(channel.id); + const guildIdForCleanup = channel.guild.id; + this.connections.delete(guildIdForCleanup); throw error; } } @@ -602,7 +982,7 @@ export class VoiceManager extends EventEmitter { const userId = this.client?.user?.id; if (!userId) { this.runtime.logger.error( - { src: "plugin:discord:service:voice", agentId: this.runtime.agentId }, + { src: "plugin:discord:service:voice", agentId: this.agentIdentifier }, "Client user ID not available", ); return undefined; @@ -629,8 +1009,26 @@ export class VoiceManager extends EventEmitter { ) { const entityId = member?.id; const userName = member?.user?.username; - const name = member?.user?.displayName; - const connection = this.getVoiceConnection(member?.guild?.id); + // Use server-specific displayName (nickname) if available, fallback to global displayName + const name = member?.displayName || member?.user?.displayName; + const guildId = member?.guild?.id; + + // Check if we're already monitoring this user to prevent duplicate monitors + // IMPORTANT: Add to monitoredUsers immediately to prevent race conditions + // Between the has() check and add(), another call could pass the check + if (this.monitoredUsers.has(entityId)) { + this.runtime.logger.debug(`[monitorMember] Already monitoring user ${entityId}`); + return; + } + // Mark as monitored BEFORE any async work to prevent duplicate monitors + this.monitoredUsers.add(entityId); + + const connection = this.getVoiceConnection(guildId); + if (!connection) { + this.runtime.logger.warn(`[monitorMember] No voice connection for guild ${guildId}`); + this.monitoredUsers.delete(entityId); // Clean up on early exit + return; + } const receiveStream = connection?.receiver.subscribe(entityId, { autoDestroy: true, @@ -640,14 +1038,18 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.warn( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, entityId, }, "No receiveStream or empty stream", ); + this.monitoredUsers.delete(entityId); // Clean up on early exit return; } + // Set maxListeners to prevent warnings (pipeline adds multiple listeners) + receiveStream.setMaxListeners(20); + let opusDecoder: any; try { // Try to create opus decoder with error handling for Node.js 23 compatibility @@ -660,12 +1062,14 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.error( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, entityId, error: error instanceof Error ? error.message : String(error), }, "Failed to create opus decoder", ); + // Clean up monitoring state on failure + this.monitoredUsers.delete(entityId); // For now, log the error and return early. // In production, you might want to implement a PCM fallback or other audio processing return; @@ -673,78 +1077,106 @@ export class VoiceManager extends EventEmitter { const volumeBuffer: number[] = []; const VOLUME_WINDOW_SIZE = 30; - const SPEAKING_THRESHOLD = 0.05; - opusDecoder.on("data", (pcmData: Buffer) => { - // Monitor the audio volume while the agent is speaking. - // If the average volume of the user's audio exceeds the defined threshold, it indicates active speaking. - // When active speaking is detected, stop the agent's current audio playbook to avoid overlap. - - if (this.activeAudioPlayer) { - const samples = new Int16Array( - pcmData.buffer, - pcmData.byteOffset, - pcmData.length / 2, + let dataPacketCount = 0; + let lastLogTime = Date.now(); + const LOG_INTERVAL = 2000; // Log every 2 seconds + let firstDataReceived = false; + + opusDecoder.on('data', (pcmData: Buffer) => { + const SPEAKING_THRESHOLD = this.duckingConfig.speakingThreshold; + if (!firstDataReceived) { + firstDataReceived = true; + this.runtime.logger.debug(`[VoiceActivity] Audio stream active for user ${entityId}`); + } + + dataPacketCount++; + const now = Date.now(); + + const samples = new Int16Array(pcmData.buffer, pcmData.byteOffset, pcmData.length / 2); + const maxAmplitude = Math.max(...samples.map(Math.abs)) / 32768; + volumeBuffer.push(maxAmplitude); + + if (volumeBuffer.length > VOLUME_WINDOW_SIZE) { + volumeBuffer.shift(); + } + const avgVolume = volumeBuffer.reduce((sum, v) => sum + v, 0) / volumeBuffer.length; + + // Log periodically for debugging (reduced frequency) + if (now - lastLogTime >= LOG_INTERVAL) { + logger.debug( + `[VoiceActivity] ${userName}: avgVol=${avgVolume.toFixed(3)}, threshold=${SPEAKING_THRESHOLD}` ); - const maxAmplitude = Math.max(...samples.map(Math.abs)) / 32768; - volumeBuffer.push(maxAmplitude); + lastLogTime = now; + } - if (volumeBuffer.length > VOLUME_WINDOW_SIZE) { - volumeBuffer.shift(); + if (avgVolume > SPEAKING_THRESHOLD) { + // Accumulate stats instead of logging immediately + let stats = this.voiceActivityStats.get(entityId); + if (!stats) { + stats = { + count: 0, + minVolume: 1.0, + maxVolume: 0.0, + sumVolume: 0.0, + firstActive: now, + lastActive: now, + userName: userName + }; + this.voiceActivityStats.set(entityId, stats); } - const avgVolume = - volumeBuffer.reduce((sum, v) => sum + v, 0) / VOLUME_WINDOW_SIZE; - if (avgVolume > SPEAKING_THRESHOLD) { - volumeBuffer.length = 0; + stats.count++; + stats.minVolume = Math.min(stats.minVolume, avgVolume); + stats.maxVolume = Math.max(stats.maxVolume, avgVolume); + stats.sumVolume += avgVolume; + stats.lastActive = now; + + volumeBuffer.length = 0; + + // Stop TTS/activeAudioPlayer (channel 0) when others speak + if (this.activeAudioPlayer) { this.cleanupAudioPlayer(this.activeAudioPlayer); this.processingVoice = false; } + + // Duck music volume (channel 1) when others speak + if (guildId) { + this.duckMusicVolume(guildId); + } } }); - pipeline( - receiveStream as AudioReceiveStream, - opusDecoder as any, - (err: Error | null) => { - if (err) { - this.runtime.logger.debug( - { - src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, - entityId, - error: err.message, - }, - "Opus decoding pipeline error", - ); - } else { - this.runtime.logger.debug( - { - src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, - entityId, - }, - "Opus decoding pipeline finished", - ); - } - }, - ); + // User is already marked as monitored at function start (before async work) + // to prevent race conditions. Pipeline callback handles cleanup on completion. + + pipeline(receiveStream as AudioReceiveStream, opusDecoder as any, (err: Error | null) => { + if (err) { + this.runtime.logger.debug( + { src: 'plugin:discord:service:voice', agentId: this.agentIdentifier, entityId, error: err.message }, + 'Opus decoding pipeline error' + ); + } else { + this.runtime.logger.debug( + { src: 'plugin:discord:service:voice', agentId: this.agentIdentifier, entityId }, + 'Opus decoding pipeline finished' + ); + } + // Clean up monitoring state when pipeline ends + this.monitoredUsers.delete(entityId); + }); this.streams.set(entityId, opusDecoder); - this.connections.set(entityId, connection as VoiceConnection); - opusDecoder.on("error", (err: any) => { + // Note: Connection is already stored by guildId, no need to store by entityId + opusDecoder.on('error', (err: any) => { this.runtime.logger.debug( - { - src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, - error: err instanceof Error ? err.message : String(err), - }, - "Opus decoding error", + { src: 'plugin:discord:service:voice', agentId: this.agentIdentifier, error: err instanceof Error ? err.message : String(err) }, + 'Opus decoding error' ); }); const errorHandler = (err: any) => { this.runtime.logger.debug( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: err instanceof Error ? err.message : String(err), }, "Opus decoding error", @@ -754,26 +1186,24 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.debug( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, member: member?.displayName, }, "Voice stream closed", ); this.streams.delete(entityId); - this.connections.delete(entityId); + this.monitoredUsers.delete(entityId); + // Note: Connection is stored by guildId, not entityId }; const closeHandler = () => { this.runtime.logger.debug( - { - src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, - member: member?.displayName, - }, - "Opus decoder closed", + { src: 'plugin:discord:service:voice', agentId: this.agentIdentifier, member: member?.displayName }, + 'Opus decoder closed' ); - opusDecoder.removeListener("error", errorHandler); - opusDecoder.removeListener("close", closeHandler); - receiveStream?.removeListener("close", streamCloseHandler); + opusDecoder.removeListener('error', errorHandler); + opusDecoder.removeListener('close', closeHandler); + receiveStream?.removeListener('close', streamCloseHandler); + this.monitoredUsers.delete(entityId); }; opusDecoder.on("error", errorHandler); opusDecoder.on("close", closeHandler); @@ -796,10 +1226,31 @@ export class VoiceManager extends EventEmitter { * @param {BaseGuildVoiceChannel} channel - The voice channel to leave. */ leaveChannel(channel: BaseGuildVoiceChannel) { - const connection = this.connections.get(channel.id); + const guildId = channel.guild.id; + + // Clear desired state when intentionally leaving + this.desiredChannels.delete(guildId); + + // Clear any pending reconnect timeout + const existingTimeout = this.reconnectTimeouts.get(guildId); + if (existingTimeout) { + clearTimeout(existingTimeout); + this.reconnectTimeouts.delete(guildId); + } + + const connection = this.connections.get(guildId); if (connection) { connection.destroy(); - this.connections.delete(channel.id); + this.connections.delete(guildId); + } + + // Unregister from VoiceConnectionManager if available + if (this.botId && this.service.voiceConnectionManager) { + this.service.voiceConnectionManager.unregisterConnection( + this.botId, + guildId, + channel.id + ); } // Stop monitoring all members in this channel @@ -815,7 +1266,7 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.debug( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelId: channel.id, channelName: channel.name, }, @@ -824,72 +1275,284 @@ export class VoiceManager extends EventEmitter { } /** - * Stop monitoring a specific member by their member ID. - * @param {string} memberId - The ID of the member to stop monitoring. + * Handle agent disconnect from a voice channel. + * Attempts to reconnect if there's a desired channel state. + * @param {string} guildId - The guild ID where disconnect occurred + * @param {string} channelId - The channel ID that was left */ - stopMonitoringMember(memberId: string) { - const monitorInfo = this.activeMonitors.get(memberId); - if (monitorInfo) { - monitorInfo.monitor.stop(); - this.activeMonitors.delete(memberId); - this.streams.delete(memberId); - this.runtime.logger.debug( - { - src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, - memberId, - }, - "Stopped monitoring user", + /** + * Update audio state (mute/deafen status) for a guild + * @param guildId - Guild ID + * @param voiceState - Current voice state + */ + async updateAudioState(guildId: string, voiceState: VoiceState): Promise { + const currentState = this.audioStates.get(guildId) || { + serverMute: false, + serverDeaf: false, + selfMute: false, + selfDeaf: false, + lastUpdated: 0, + }; + + const newState = { + serverMute: voiceState.serverMute || false, + serverDeaf: voiceState.serverDeaf || false, + selfMute: voiceState.selfMute || false, + selfDeaf: voiceState.selfDeaf || false, + lastUpdated: Date.now(), + }; + + // Log changes + if ( + currentState.serverMute !== newState.serverMute || + currentState.serverDeaf !== newState.serverDeaf || + currentState.selfMute !== newState.selfMute || + currentState.selfDeaf !== newState.selfDeaf + ) { + logger.debug( + `[AudioState] Guild ${guildId}: serverMute=${newState.serverMute}, serverDeaf=${newState.serverDeaf}, selfMute=${newState.selfMute}, selfDeaf=${newState.selfDeaf}` ); } + + this.audioStates.set(guildId, newState); } /** - * Asynchronously debounces the process transcription function to prevent rapid execution. - * - * @param {UUID} entityId - The ID of the entity related to the transcription. - * @param {string} name - The name of the entity for transcription. - * @param {string} userName - The username of the user initiating the transcription. - * @param {BaseGuildVoiceChannel} channel - The voice channel where the transcription is happening. + * Get current audio state for a guild + * @param guildId - Guild ID + * @returns Audio state or null if not in voice */ + getAudioState(guildId: string): { + serverMute: boolean; + serverDeaf: boolean; + selfMute: boolean; + selfDeaf: boolean; + lastUpdated: number; + } | null { + return this.audioStates.get(guildId) || null; + } - async debouncedProcessTranscription( - entityId: UUID, - name: string, - userName: string, - channel: BaseGuildVoiceChannel, - ) { - const DEBOUNCE_TRANSCRIPTION_THRESHOLD = 1500; // wait for 1.5 seconds of silence - - if (this.activeAudioPlayer?.state?.status === "idle") { - this.runtime.logger.debug( - { src: "plugin:discord:service:voice", agentId: this.runtime.agentId }, - "Cleaning up idle audio player", - ); - this.cleanupAudioPlayer(this.activeAudioPlayer); - } + async handleAgentDisconnect(guildId: string, channelId: string): Promise { + logger.log(`[Voice] Handling agent disconnect from channel ${channelId} in guild ${guildId}`); - if (this.activeAudioPlayer || this.processingVoice) { - const state = this.userStates.get(entityId); - if (state) { - state.buffers.length = 0; - state.totalLength = 0; + // Clean up ducking state when bot disconnects + const duckState = this.duckedGuilds.get(guildId); + if (duckState) { + if (duckState.silenceTimer) { + clearTimeout(duckState.silenceTimer); } - return; + if (duckState.rampTimer) { + clearTimeout(duckState.rampTimer as any); + } + this.duckedGuilds.delete(guildId); + this.runtime.logger.debug(`[VoiceDucking] Cleaned up ducking state for guild ${guildId} (bot disconnected)`); } - if (this.transcriptionTimeout) { - clearTimeout(this.transcriptionTimeout); - } + // Clean up audio state when bot disconnects + this.audioStates.delete(guildId); - this.transcriptionTimeout = setTimeout(async () => { - this.processingVoice = true; - try { - await this.processTranscription( - entityId, - channel.id, - channel, - name, + // Clean up connection tracking + this.connections.delete(guildId); + + // Check if we have a desired channel state to restore + const desiredChannelId = this.desiredChannels.get(guildId); + + if (desiredChannelId && desiredChannelId === channelId) { + // This was an unexpected disconnect, attempt to reconnect + logger.log( + `[Voice] Unexpected disconnect detected. Attempting to reconnect to desired channel ${desiredChannelId} in guild ${guildId}` + ); + + // Clear any existing reconnect timeout + const existingTimeout = this.reconnectTimeouts.get(guildId); + if (existingTimeout) { + clearTimeout(existingTimeout); + } + + // Schedule reconnect attempt after a short delay + const reconnectTimeout = setTimeout(async () => { + try { + await this.reconnectToDesiredChannel(guildId, desiredChannelId); + } catch (error) { + logger.error( + `[Voice] Failed to reconnect to channel ${desiredChannelId} in guild ${guildId}: ${error}` + ); + // Retry once more after a longer delay + const retryTimeout = setTimeout(async () => { + try { + await this.reconnectToDesiredChannel(guildId, desiredChannelId); + } catch (retryError) { + logger.error( + `[Voice] Reconnect retry failed for channel ${desiredChannelId} in guild ${guildId}: ${retryError}` + ); + this.reconnectTimeouts.delete(guildId); + } + }, 10000); // 10 second retry + this.reconnectTimeouts.set(guildId, retryTimeout); + } + }, 2000); // 2 second initial delay + + this.reconnectTimeouts.set(guildId, reconnectTimeout); + } else { + // No desired state or different channel, just log + logger.log( + `[Voice] Agent disconnected from channel ${channelId} in guild ${guildId}. No reconnect needed.` + ); + } + } + + /** + * Handle agent connecting to a voice channel. + * @param {string} guildId - The guild ID where connection occurred + * @param {string} channelId - The channel ID that was joined + */ + async handleAgentConnect(guildId: string, channelId: string): Promise { + logger.log(`[Voice] Agent connected to channel ${channelId} in guild ${guildId}`); + + // Clear any pending reconnect timeout since we're now connected + const existingTimeout = this.reconnectTimeouts.get(guildId); + if (existingTimeout) { + clearTimeout(existingTimeout); + this.reconnectTimeouts.delete(guildId); + } + + // Update desired state if not already set + if (!this.desiredChannels.has(guildId)) { + this.desiredChannels.set(guildId, channelId); + } + } + + /** + * Handle agent moving between voice channels. + * @param {string} guildId - The guild ID + * @param {string} oldChannelId - The previous channel ID + * @param {string} newChannelId - The new channel ID + */ + async handleAgentChannelChange( + guildId: string, + oldChannelId: string, + newChannelId: string + ): Promise { + logger.log( + `[Voice] Agent moved from channel ${oldChannelId} to ${newChannelId} in guild ${guildId}` + ); + + // Update desired state + this.desiredChannels.set(guildId, newChannelId); + + // Clear any pending reconnect timeout + const existingTimeout = this.reconnectTimeouts.get(guildId); + if (existingTimeout) { + clearTimeout(existingTimeout); + this.reconnectTimeouts.delete(guildId); + } + } + + /** + * Attempt to reconnect to the desired channel. + * @param {string} guildId - The guild ID + * @param {string} channelId - The channel ID to reconnect to + */ + private async reconnectToDesiredChannel(guildId: string, channelId: string): Promise { + if (!this.client) { + throw new Error('Discord client not available'); + } + + const guild = this.client.guilds.cache.get(guildId); + if (!guild) { + throw new Error(`Guild ${guildId} not found`); + } + + const channel = await guild.channels.fetch(channelId); + if (!channel) { + throw new Error(`Channel ${channelId} not found in guild ${guildId}`); + } + + if (!channel.isVoiceBased()) { + throw new Error(`Channel ${channelId} is not a voice channel`); + } + + logger.log(`[Voice] Reconnecting to channel ${channel.name} (${channelId}) in guild ${guildId}`); + + try { + await this.joinChannel(channel as BaseGuildVoiceChannel); + logger.log(`[Voice] Successfully reconnected to channel ${channel.name} (${channelId})`); + } catch (error) { + logger.error( + `[Voice] Failed to reconnect to channel ${channelId}: ${error instanceof Error ? error.message : String(error)}` + ); + throw error; + } + } + + /** + * Stop monitoring a specific member by their member ID. + * @param {string} memberId - The ID of the member to stop monitoring. + */ + stopMonitoringMember(memberId: string) { + const monitorInfo = this.activeMonitors.get(memberId); + if (monitorInfo) { + monitorInfo.monitor.stop(); + this.activeMonitors.delete(memberId); + this.streams.delete(memberId); + this.monitoredUsers.delete(memberId); + this.runtime.logger.debug( + { src: 'plugin:discord:service:voice', agentId: this.agentIdentifier, memberId }, + 'Stopped monitoring user' + ); + } else { + // Even if no monitor info, clean up tracking + this.streams.delete(memberId); + this.monitoredUsers.delete(memberId); + } + } + + /** + * Asynchronously debounces the process transcription function to prevent rapid execution. + * + * @param {UUID} entityId - The ID of the entity related to the transcription. + * @param {string} name - The name of the entity for transcription. + * @param {string} userName - The username of the user initiating the transcription. + * @param {BaseGuildVoiceChannel} channel - The voice channel where the transcription is happening. + */ + + async debouncedProcessTranscription( + entityId: UUID, + name: string, + userName: string, + channel: BaseGuildVoiceChannel, + ) { + const DEBOUNCE_TRANSCRIPTION_THRESHOLD = 1500; // wait for 1.5 seconds of silence + + if (this.activeAudioPlayer?.state?.status === "idle") { + this.runtime.logger.debug( + { src: "plugin:discord:service:voice", agentId: this.agentIdentifier }, + "Cleaning up idle audio player", + ); + this.cleanupAudioPlayer(this.activeAudioPlayer); + } + + if (this.activeAudioPlayer || this.processingVoice) { + const state = this.userStates.get(entityId); + if (state) { + state.buffers.length = 0; + state.totalLength = 0; + } + return; + } + + if (this.transcriptionTimeout) { + clearTimeout(this.transcriptionTimeout); + } + + this.transcriptionTimeout = setTimeout(async () => { + this.processingVoice = true; + try { + await this.processTranscription( + entityId, + channel.id, + channel, + name, userName, ); @@ -923,7 +1586,7 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.debug( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, entityId, }, "Starting audio monitor", @@ -949,7 +1612,7 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.error( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, entityId, error: error instanceof Error ? error.message : String(error), }, @@ -971,7 +1634,7 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.error( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, }, "Received empty buffer", ); @@ -1000,9 +1663,21 @@ export class VoiceManager extends EventEmitter { userName: string, ) { const state = this.userStates.get(entityId); - if (!state || state.buffers.length === 0) { + if (!state || state.buffers.length === 0) return; + + // Minimum duration check: At 16kHz sample rate, 16-bit mono: + // 1 second = 16000 samples * 2 bytes = 32000 bytes + // Require at least 1.5 seconds of audio to avoid false positives from brief sounds + const MIN_AUDIO_BYTES = 48000; // ~1.5 seconds + const audioDurationMs = (state.totalLength / 32000) * 1000; + + if (state.totalLength < MIN_AUDIO_BYTES) { + this.runtime.logger.debug(`[VoiceActivity] Skipping transcription - audio too short: ${audioDurationMs.toFixed(0)}ms (need ${(MIN_AUDIO_BYTES / 32000 * 1000).toFixed(0)}ms)`); + state.buffers.length = 0; + state.totalLength = 0; return; } + try { const inputBuffer = Buffer.concat(state.buffers, state.totalLength); @@ -1011,43 +1686,153 @@ export class VoiceManager extends EventEmitter { // Convert Opus to WAV const wavBuffer = await this.convertOpusToWav(inputBuffer); this.runtime.logger.debug( - { src: "plugin:discord:service:voice", agentId: this.runtime.agentId }, + { src: "plugin:discord:service:voice", agentId: this.agentIdentifier }, "Starting transcription", ); - const transcriptionText = await this.runtime.useModel( - ModelType.TRANSCRIPTION, - wavBuffer, - ); + // Convert Buffer to File object for transcription API + const audioBlob = new Blob([new Uint8Array(wavBuffer)], { type: 'audio/wav' }); + const audioFile = new File([audioBlob], 'voice.wav', { type: 'audio/wav' }); + + const transcriptionText = await this.runtime.useModel(ModelType.TRANSCRIPTION, { + audio: audioFile, + }); function isValidTranscription(text: string): boolean { - if (!text || text.includes("[BLANK_AUDIO]")) { - return false; + if (!text || text.trim().length < 2) return false; + const lowText = text.toLowerCase().trim(); + const trimmedLength = lowText.length; + + // For longer phrases (>= 10 chars), be more lenient - likely real speech + if (trimmedLength >= 10) { + // Only filter out obvious hallucinations for longer text + if (lowText.includes('[blank_audio]')) return false; + if (lowText.includes('subtitles by')) return false; + if (lowText.includes('thank you for watching')) return false; + if (lowText.includes('transcribed by')) return false; + if (lowText.includes('copyright')) return false; + // Filter out common noise-generated phrases + if (lowText.includes('very low apparently')) return false; + if (/^(very )?(low|high) apparently\.?$/i.test(lowText)) return false; + + // For longer text, only reject if alphanumeric ratio is very low (< 0.3) + const alpha = lowText.replace(/[^a-z0-9]/g, '').length; + if (alpha < lowText.length * 0.3) return false; + + return true; // Longer phrases are likely valid } + + // For shorter phrases (2-9 chars), be more strict + // Filter out very short phrases (often noise/hallucinations) + if (trimmedLength <= 6) return false; + + // Filter out common Whisper hallucinations + if (lowText.includes('[blank_audio]')) return false; + if (lowText.includes('subtitles by')) return false; + if (lowText.includes('thank you for watching')) return false; + if (lowText.includes('transcribed by')) return false; + if (lowText.includes('copyright')) return false; + // Filter out noise-generated phrases + if (lowText.includes('very low apparently')) return false; + if (lowText.includes('apparently')) return false; // Common Whisper noise hallucination + if (lowText.startsWith('very low')) return false; + + // Filter out common short question words/phrases in various languages (often hallucinations) + const shortQuestionPatterns = [ + /^o\s*que\??$/i, // Portuguese "what?" + /^que\s*es\??$/i, // Spanish "what is?" + /^qu[ée]\s*es\??$/i, // Spanish/French "what is?" + /^what\s*is\??$/i, // English "what is?" + /^what\s*the\??$/i, // English "what the?" + /^c[oô]mo\??$/i, // Spanish "how?" + /^como\??$/i, // Portuguese "how?" + /^wie\??$/i, // German "how?" + /^was\??$/i, // German "what?" + /^quoi\??$/i, // French "what?" + ]; + if (shortQuestionPatterns.some(pattern => pattern.test(lowText))) return false; + + // Filter out short pronouns and common words (often hallucinations) + const shortWordPatterns = [ + /^eu\.?$/i, // Portuguese "I" + /^tôi\.?$/i, // Vietnamese "I/me" + /^je\.?$/i, // French "I" + /^ich\.?$/i, // German "I" + /^yo\.?$/i, // Spanish "I" + /^wouaou!?$/i, // French "wow!" + /^wow!?$/i, // English "wow!" + /^ah\.?$/i, // Common exclamation + /^oh\.?$/i, // Common exclamation + /^eh\.?$/i, // Common exclamation + ]; + if (shortWordPatterns.some(pattern => pattern.test(lowText))) return false; + + // Filter out repetitive single characters (e.g. "a. a. a.") + if (/^([a-z]\.?\s*){3,}$/.test(lowText)) return false; + + // For short phrases (7-9 chars), use stricter alphanumeric ratio + const alpha = lowText.replace(/[^a-z0-9]/g, '').length; + if (alpha < lowText.length * 0.5) return false; + return true; } - if (transcriptionText && isValidTranscription(transcriptionText)) { + // Adaptive threshold adjustment + if (!transcriptionText || !isValidTranscription(transcriptionText)) { + // Noise detected - increase threshold + const oldThreshold = this.duckingConfig.speakingThreshold; + this.duckingConfig.speakingThreshold = Math.min(0.2, oldThreshold + 0.005); + if (oldThreshold !== this.duckingConfig.speakingThreshold) { + this.runtime.logger.debug(`[VoiceActivity] 🔇 Invalid transcription ("${transcriptionText}"), increasing threshold to ${this.duckingConfig.speakingThreshold.toFixed(3)}`); + } + } else { + // Valid speech - slightly decrease threshold (if it was raised high) + const oldThreshold = this.duckingConfig.speakingThreshold; + this.duckingConfig.speakingThreshold = Math.max(0.05, oldThreshold - 0.001); state.transcriptionText += transcriptionText; + + if (oldThreshold !== this.duckingConfig.speakingThreshold) { + this.runtime.logger.debug(`[VoiceActivity] 🗣️ Valid speech, adjusting threshold to ${this.duckingConfig.speakingThreshold.toFixed(3)}`); + } } if (state.transcriptionText.length) { this.cleanupAudioPlayer(this.activeAudioPlayer); const finalText = state.transcriptionText; - state.transcriptionText = ""; - await this.handleMessage( - finalText, - entityId, - channelId, - channel, - name, - userName, - ); + state.transcriptionText = ''; + + // Always emit transcription event (follows messages.ts metadata pattern) + this.runtime.emitEvent([DiscordEventTypes.VOICE_TRANSCRIPTION], { + runtime: this.runtime, + entityId: createUniqueUuid(this.runtime, entityId), + roomId: createUniqueUuid(this.runtime, channelId), + content: { + text: finalText, + source: 'discord', + channelType: ChannelType.VOICE_GROUP, + }, + metadata: { + entityName: name, + fromId: entityId, + channelId: channelId, + guildId: channel.guild.id, + channelName: channel.name, + }, + timestamp: Date.now(), + }); + + // Only generate response if not in listen-only mode + // Use getDiscordSettings() for proper boolean parsing - raw getSetting() returns + // strings, so "false" would be truthy and incorrectly skip response generation + const settings = getDiscordSettings(this.runtime); + if (!settings.voiceListenOnly) { + await this.handleMessage(finalText, entityId, channelId, channel, name, userName); + } } } catch (error) { this.runtime.logger.error( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, entityId, error: error instanceof Error ? error.message : String(error), }, @@ -1123,6 +1908,22 @@ export class VoiceManager extends EventEmitter { _actionName?: string, ) => { try { + // Skip interim progressive updates for voice - only speak final responses + // + // Why skip interim: Progressive updates like "Thinking...", "Searching..." + // are meant for visual text feedback, not TTS. Speaking every interim update + // would be noisy and confusing. Users only want to hear the final response. + // + // Why check isInterim: ProgressiveMessage sets metadata.progressiveUpdate.isInterim + // for status updates. When isInterim=false (or no progressiveUpdate), it's a + // final message that should be spoken. + const progressiveUpdate = (content.metadata as any)?.progressiveUpdate; + if (progressiveUpdate?.isInterim) { + // Interim update - skip TTS but don't create memory either + // The final message will be spoken when isInterim=false + return []; + } + const responseMemory: Memory = { id: createUniqueUuid( this.runtime, @@ -1150,12 +1951,22 @@ export class VoiceManager extends EventEmitter { content.text, ); if (responseStream) { - // Convert Buffer/ArrayBuffer to Readable stream - const buffer = Buffer.isBuffer(responseStream) - ? responseStream - : Buffer.from(responseStream as ArrayBuffer); - const readable = Readable.from(buffer); - await this.playAudioStream(entityId, readable); + let audioStream: Readable; + if (Buffer.isBuffer(responseStream)) { + audioStream = Readable.from(responseStream, { objectMode: false }); + } else if (responseStream instanceof Readable) { + audioStream = responseStream; + } else { + // playAudio() handles Web ReadableStream conversion internally + // For other types, try to wrap with Readable.from() + audioStream = responseStream as any; + } + // Use mix: true so TTS ducks music instead of stopping it + await this.playAudio(audioStream, { + guildId: channel.guild.id, + channel: 0, + mix: true, + }); } } } @@ -1165,7 +1976,7 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.error( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Error in voice message callback", @@ -1178,13 +1989,13 @@ export class VoiceManager extends EventEmitter { const messageService = getMessageService(this.runtime); if (messageService) { this.runtime.logger.debug( - { src: "plugin:discord:voice", agentId: this.runtime.agentId }, + { src: "plugin:discord:voice", agentId: this.agentIdentifier }, "Using messageService API for voice", ); await messageService.handleMessage(this.runtime, memory, callback); } else { this.runtime.logger.debug( - { src: "plugin:discord:voice", agentId: this.runtime.agentId }, + { src: "plugin:discord:voice", agentId: this.agentIdentifier }, "Using event-based handling for voice", ); await this.runtime.emitEvent([EventType.VOICE_MESSAGE_RECEIVED], { @@ -1198,7 +2009,7 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.error( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Error processing voice message", @@ -1225,7 +2036,7 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.error( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Error converting PCM to WAV", @@ -1273,7 +2084,7 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.debug( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, channelName: chosenChannel.name, }, "Joining channel", @@ -1283,7 +2094,7 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.warn( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, }, "No suitable voice channel found to join", ); @@ -1292,7 +2103,7 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.error( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Error selecting or joining a voice channel", @@ -1301,11 +2112,47 @@ export class VoiceManager extends EventEmitter { } /** - * Play an audio stream for a given entity ID. - * - * @param {UUID} entityId - The ID of the entity to play the audio for. - * @param {Readable} audioStream - The audio stream to play. - * @returns {void} + * Register a new audio channel configuration + * @param config - Channel configuration + */ + registerChannel(config: AudioChannelConfig): void { + this.channels.set(config.channel, config); + } + + /** + * Get channel configuration + * @param channel - Channel number + * @returns Channel configuration or undefined + */ + private getChannelConfig(channel: number): AudioChannelConfig | undefined { + return this.channels.get(channel); + } + + /** + * Get or create channel configuration (with defaults) + * @param channel - Channel number + * @returns Channel configuration with defaults + */ + private getOrCreateChannelConfig(channel: number): AudioChannelConfig { + const existing = this.channels.get(channel); + if (existing) return existing; + + // Default config for unregistered channels + const defaultConfig: AudioChannelConfig = { + channel, + priority: 25, + canPause: false, + interruptible: true, + volume: 1.0, + }; + this.channels.set(channel, defaultConfig); + return defaultConfig; + } + + /** + * Play audio stream to a specific user's connection (legacy method) + * @param entityId - User entity ID + * @param audioStream - Audio stream to play */ async playAudioStream(entityId: UUID, audioStream: Readable) { const connection = this.connections.get(entityId); @@ -1313,58 +2160,827 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.debug( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, entityId, }, "No connection for user", ); return; } - this.cleanupAudioPlayer(this.activeAudioPlayer); + const audioPlayer = createAudioPlayer({ behaviors: { noSubscriber: NoSubscriberBehavior.Pause, }, }); - this.activeAudioPlayer = audioPlayer; - connection.subscribe(audioPlayer); - const audioStartTime = Date.now(); - - const resource = createAudioResource(audioStream, { - inputType: StreamType.Arbitrary, - }); + connection.subscribe(audioPlayer); + const resource = createAudioResource(audioStream, { inputType: StreamType.Arbitrary }); audioPlayer.play(resource); audioPlayer.on("error", (err: any) => { this.runtime.logger.error( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: err instanceof Error ? err.message : String(err), }, "Audio player error", ); }); + } - audioPlayer.on( - "stateChange", - (_oldState: any, newState: { status: string }) => { - if (newState.status === "idle") { - const idleTime = Date.now(); - this.runtime.logger.debug( - { - src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, - durationMs: idleTime - audioStartTime, - }, - "Audio playback completed", - ); + /** + * Handle priority-based interruption or ducking + * @param guildId - Guild ID + * @param newChannel - New channel being played + * @param mix - Whether to mix (duck) instead of interrupt + */ + private handleChannelPriority( + guildId: string, + newChannel: number, + mix: boolean = false + ): void { + const newConfig = this.getChannelConfig(newChannel); + if (!newConfig) return; + + // Find all active channels for this guild + const activeChannels: Array<{ channel: number; config: AudioChannelConfig; state: ChannelPlayerState }> = []; + for (const [, state] of this.channelPlayers.entries()) { + if (state.guildId === guildId) { + const config = this.getChannelConfig(state.channel); + if (config) { + activeChannels.push({ channel: state.channel, config, state }); } + } + } + + // Sort by priority (highest first) + activeChannels.sort((a, b) => b.config.priority - a.config.priority); + + for (const { channel, config, state } of activeChannels) { + // Skip same channel + if (channel === newChannel) continue; + + // If new channel has higher priority and old channel is interruptible + if (newConfig.priority > config.priority && config.interruptible) { + if (mix && config.duckVolume !== undefined && state.volumeTransformer) { + // Duck volume instead of stopping + const currentVolume = state.volumeTransformer.volume ?? 1.0; + if (!state.originalVolume) { + state.originalVolume = currentVolume; + } + state.duckedVolume = config.duckVolume; + state.volumeTransformer.setVolume(config.duckVolume); + this.runtime.logger.debug(`[VoiceDucking] Ducking channel ${channel} to ${config.duckVolume} (higher priority channel ${newChannel} playing)`); + this.emit('audio:ducked', { guildId, channel, by: newChannel }); + } else { + // Stop the lower priority channel + this.stopChannelPlayer(guildId, channel); + this.emit('audio:interrupted', { guildId, channel, by: newChannel }); + } + } + } + } + + /** + * Restore ducked channels when higher priority channel finishes + * @param guildId - Guild ID + * @param finishedChannel - Channel that finished + */ + private restoreDuckedChannels(guildId: string, finishedChannel: number): void { + const finishedConfig = this.getChannelConfig(finishedChannel); + if (!finishedConfig) return; + + // Find all active channels for this guild + for (const [, state] of this.channelPlayers.entries()) { + if (state.guildId === guildId && state.duckedVolume !== undefined) { + const config = this.getChannelConfig(state.channel); + if (config && config.priority < finishedConfig.priority) { + // Restore original volume + if (state.originalVolume !== undefined && state.volumeTransformer) { + state.volumeTransformer.setVolume(state.originalVolume); + this.runtime.logger.debug(`[VoiceDucking] Restoring channel ${state.channel} volume to ${state.originalVolume} (higher priority channel ${finishedChannel} finished)`); + state.originalVolume = undefined; + state.duckedVolume = undefined; + this.emit('audio:restored', { guildId, channel: state.channel }); + } + } + } + } + } + + /** + * Stop and cleanup a channel player + * @param guildId - Guild ID + * @param channel - Channel number + */ + private stopChannelPlayer(guildId: string, channel: number): void { + const key = `${guildId}:${channel}`; + const state = this.channelPlayers.get(key); + if (!state) return; + + // Clean up ducking state if music channel (channel 1) is stopping + if (channel === 1) { + const duckState = this.duckedGuilds.get(guildId); + if (duckState) { + if (duckState.silenceTimer) { + clearTimeout(duckState.silenceTimer); + } + if (duckState.rampTimer) { + clearTimeout(duckState.rampTimer as any); + } + this.duckedGuilds.delete(guildId); + this.runtime.logger.debug(`[VoiceDucking] Cleaned up ducking state for guild ${guildId} (music stopped)`); + } + } + + state.player.stop(); + state.player.removeAllListeners(); + if (state.abortController) { + state.abortController.abort(); + } + this.channelPlayers.delete(key); + } + + /** + * Get the first active connection's guildId, or throw if none exists + * @returns guildId of first active connection + */ + private getActiveGuildId(): string { + for (const [guildId] of this.connections.entries()) { + // Skip any non-guildId keys (shouldn't exist, but safety check) + if (guildId.includes(':')) { + continue; + } + return guildId; + } + throw new Error('No active voice connection found'); + } + + /** + * Play an audio stream to a voice channel. + * + * ## Stream Requirements (CRITICAL) + * ⚠️ The audio stream MUST be clean and unmodified: + * - NO event listeners attached (except 'error' for cleanup) + * - NO resume() or other control methods called + * - Stream should be in its natural paused state + * + * ## Why This Matters + * This method uses `demuxProbe` to detect the audio format (Opus, WebM, OGG, etc.). + * The probe needs to read the stream headers to determine format. If event listeners + * (especially 'readable' or 'data') are attached, the stream enters paused mode and + * the probe cannot read the headers, causing playback to fail. + * + * ## Supported Formats + * - OGG Opus (native, best performance) + * - WebM/Opus + * - MP3 + * - Raw PCM + * - Other formats detected by demuxProbe + * + * ## Stream Flow + * 1. demuxProbe reads stream headers to detect format + * 2. Creates AudioResource with detected format + * 3. AudioPlayer manages stream consumption and playback + * 4. Stream data flows to Discord voice connection + * + * @param audioStream - Clean, unmodified audio stream + * @param options - Playback options + * @param options.guildId - Discord guild (server) ID (defaults to active guild) + * @param options.channel - Voice channel number (default: 0) + * @param options.interrupt - Stop current audio to play this (default: true) + * @param options.signal - AbortSignal to cancel playback + * @param options.mix - Mix with current audio instead of replacing (default: false) + * @returns PlaybackHandle for controlling playback + * + * @example + * ```typescript + * // Create a clean file stream + * const stream = createReadStream('audio.opus'); + * + * // Play directly - don't add listeners or call resume() + * await voiceManager.playAudio(stream, { + * guildId: '123456789', + * channel: 1, + * interrupt: true + * }); + * ``` + */ + async playAudio( + audioStream: Readable, + options?: { + guildId?: string; + channel?: number; + interrupt?: boolean; + signal?: AbortSignal; + mix?: boolean; + } + ): Promise { + const opts = options ?? {}; + const guildId = opts.guildId ?? this.getActiveGuildId(); + + this.runtime.logger.debug(`[VoiceManager] playAudio called - guild: ${guildId}, channel: ${opts?.channel ?? 'default'}, interrupt: ${opts?.interrupt !== false}, mix: ${opts?.mix ?? false}`); + this.runtime.logger.debug(`[VoiceManager] Stream readable: ${audioStream.readable}, destroyed: ${audioStream.destroyed}`); + + const channel = opts?.channel ?? 0; // Default to channel 0 + this.getOrCreateChannelConfig(channel); // Ensure channel is registered + const connection = this.connections.get(guildId); + + if (!connection) { + this.runtime.logger.error(`[VoiceManager] No voice connection found for guild ${guildId}`); + throw new Error(`No voice connection for guild ${guildId}`); + } + + this.runtime.logger.debug(`[VoiceManager] Voice connection found - state: ${connection.state.status}`); + + const key = `${guildId}:${channel}`; + + // Stop existing playback on same channel if interrupt is true (default) + if (opts?.interrupt !== false) { + const existing = this.channelPlayers.get(key); + if (existing) { + this.runtime.logger.debug(`[VoiceManager] Stopping existing player on channel ${channel}`); + this.stopChannelPlayer(guildId, channel); + } + } + + // Handle priority-based interruption or ducking + const mix = opts?.mix ?? false; + this.handleChannelPriority(guildId, channel, mix); + + // Create abort controller + const abortController = new AbortController(); + if (opts?.signal) { + // If signal is already aborted, abort immediately + if (opts.signal.aborted) { + abortController.abort(); + } else { + // Listen for abort on the provided signal + opts.signal.addEventListener('abort', () => { + abortController.abort(); + }); + } + } + + // Create audio player + this.runtime.logger.debug(`[VoiceManager] Creating audio player for channel ${channel}`); + const audioPlayer = createAudioPlayer({ + behaviors: { + noSubscriber: NoSubscriberBehavior.Pause, }, + }); + + // === WEB READABLESTREAM CONVERSION === + // Handle Web ReadableStream (e.g., from fetch().body in OpenAI TTS) + // Web ReadableStream is NOT a Node.js Readable - must convert properly + if (typeof (audioStream as any)?.getReader === 'function' && typeof (audioStream as any)?.on !== 'function') { + this.runtime.logger.debug(`[VoiceManager] Converting Web ReadableStream to Node.js Readable`); + const webStream = audioStream as unknown as ReadableStream; + + // Use Readable.fromWeb() for streaming conversion without buffering + // This avoids OOM for large audio files by not loading everything into memory + // Readable.fromWeb is available in Node.js 18+ and Bun + if (typeof Readable.fromWeb === 'function') { + audioStream = Readable.fromWeb(webStream as any) as Readable; + this.runtime.logger.debug(`[VoiceManager] Converted Web ReadableStream using Readable.fromWeb (streaming)`); + } else { + // Fallback for older runtimes: buffer the entire stream + // This is less memory-efficient but ensures compatibility + this.runtime.logger.warn(`[VoiceManager] Readable.fromWeb not available, falling back to buffered conversion`); + const reader = webStream.getReader(); + const chunks: Uint8Array[] = []; + + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + if (value) chunks.push(value); + } + } finally { + reader.releaseLock(); + } + + const buffer = Buffer.concat(chunks); + this.runtime.logger.debug(`[VoiceManager] Converted Web ReadableStream to buffer: ${buffer.length} bytes`); + audioStream = Readable.from(buffer, { objectMode: false }); + } + } + + // === STREAM VALIDATION === + // Validate that the stream is a proper Node.js Readable stream + if (!audioStream || typeof audioStream.on !== 'function' || typeof audioStream.once !== 'function') { + throw new Error( + `Invalid audio stream: expected Node.js Readable stream, got ${typeof audioStream}. Stream must have .on() and .once() methods.` + ); + } + + // === STREAM FORMAT DETECTION === + // Probe stream type for better timing accuracy and optimal decoding + // This is especially important for Opus/WebM formats which are native to Discord + // + // demuxProbe reads the stream headers to detect format. It returns: + // - probe.stream: A new readable stream starting from the beginning + // - probe.type: The detected StreamType (OggOpus, WebmOpus, Arbitrary, etc.) + // + // CRITICAL: This is why the input stream must be clean (no event listeners) + // If listeners are attached, the stream is in paused mode and demuxProbe + // cannot read the headers, causing it to fail. + let resourceStream: Readable = audioStream; + let inputType = StreamType.Arbitrary; + try { + this.runtime.logger.debug(`[VoiceManager] Probing stream type...`); + const probe = await demuxProbe(audioStream); + resourceStream = probe.stream; + inputType = probe.type; + this.runtime.logger.debug(`[VoiceManager] Stream probe successful - type: ${inputType}`); + } catch (error) { + // Probe failed - this usually happens when: + // 1. Stream has event listeners attached (puts it in paused mode) + // 2. Stream format is not recognized + // 3. Stream is already consumed or ended + // 4. Stream is not a proper Node.js stream + logger.debug( + `[VoiceManager] demuxProbe failed for guild ${guildId}, channel ${channel}: ${error instanceof Error ? error.message : String(error)}` + ); + this.runtime.logger.debug(`[VoiceManager] Using arbitrary stream type as fallback`); + + // Validate that the fallback stream is still valid + if (!resourceStream || typeof resourceStream.on !== 'function') { + throw new Error( + `Stream became invalid after demuxProbe failure. Original error: ${error instanceof Error ? error.message : String(error)}` + ); + } + } + + // === AUDIO RESOURCE CREATION === + // Create an AudioResource from the probed stream + // The AudioResource wraps the stream and manages: + // - Format-specific decoding (Opus, WebM, etc.) + // - Audio packet generation for Discord + // - Stream lifecycle (start, end, errors) + // + // Note: Volume control is handled via AudioResource.volume when inlineVolume is enabled + this.runtime.logger.debug(`[VoiceManager] Creating audio resource with inputType: ${inputType}`); + this.runtime.logger.debug(`[VoiceManager] Resource stream readable: ${resourceStream.readable}, destroyed: ${resourceStream.destroyed}`); + + const resource = createAudioResource(resourceStream, { + inputType, // Detected format (OggOpus, WebmOpus, Arbitrary, etc.) + inlineVolume: true, // Enable runtime volume control + }); + + // === STREAM MONITORING === + // Track resource stream events for debugging and diagnostics + // These listeners are safe because they're added AFTER demuxProbe has already + // read the stream headers and created a new stream (resourceStream). + // Only add listeners if the stream supports them (it should, but validate to be safe) + let resourceBytesReceived = 0; + if (resourceStream && typeof resourceStream.on === 'function') { + resourceStream.on('data', (chunk) => { + if (resourceBytesReceived === 0) { + this.runtime.logger.debug(`[VoiceManager] Resource stream first data: ${chunk.length} bytes`); + } + resourceBytesReceived += chunk.length; + }); + + resourceStream.on('end', () => { + this.runtime.logger.debug(`[VoiceManager] Resource stream ended (${resourceBytesReceived} bytes total)`); + }); + + resourceStream.on('error', (err) => { + this.runtime.logger.error(`[VoiceManager] Resource stream error: ${err.message}`); + }); + } else { + this.runtime.logger.warn(`[VoiceManager] Resource stream does not support event listeners, skipping monitoring`); + } + + // === PLAYER SUBSCRIPTION === + // Subscribe the player to the voice connection + // This connects the audio pipeline: AudioResource → AudioPlayer → VoiceConnection → Discord + this.runtime.logger.debug(`[VoiceManager] Subscribing player to voice connection`); + const subscription = connection.subscribe(audioPlayer); + if (!subscription) { + this.runtime.logger.error(`[VoiceManager] Failed to subscribe player to connection!`); + throw new Error('Failed to subscribe audio player to voice connection'); + } + this.runtime.logger.debug(`[VoiceManager] Player subscribed successfully`); + + // === START PLAYBACK === + // Begin playing the audio resource + // The player will now: + // 1. Read audio data from the resource stream + // 2. Decode it according to the detected format + // 3. Send audio packets to Discord at the correct rate (20ms intervals) + this.runtime.logger.debug(`[VoiceManager] Starting playback...`); + const audioStartTime = Date.now(); + audioPlayer.play(resource); + this.runtime.logger.info(`[VoiceManager] ✅ Audio playback started on guild ${guildId}, channel ${channel}`); + + + // Create promise resolvers + let finishedResolver: () => void; + let cancelledResolver: () => void; + const finishedPromise = new Promise((resolve) => { + finishedResolver = resolve; + }); + const cancelledPromise = new Promise((resolve) => { + cancelledResolver = resolve; + }); + + // Store state + const state: ChannelPlayerState = { + player: audioPlayer, + channel, + guildId, + resource, + finished: finishedResolver!, + cancelled: cancelledResolver!, + abortController, + volumeTransformer: resource.volume, // Store volume transformer for runtime control + }; + this.channelPlayers.set(key, state); + + // Handle abort signal + if (opts?.signal) { + opts.signal.addEventListener('abort', () => { + this.stopChannelPlayer(guildId, channel); + cancelledResolver!(); + }); + } + + // Handle player events + audioPlayer.on('error', (err: any) => { + this.runtime.logger.error(`[VoiceManager] Audio player error on guild ${guildId}, channel ${channel}: ${err.message || err}`); + this.runtime.logger.error(`[VoiceManager] Error details: ${JSON.stringify({ name: err.name, message: err.message, resource: err.resource })}`); + this.stopChannelPlayer(guildId, channel); + this.emit('audio:error', { guildId, channel, error: err }); + cancelledResolver!(); + }); + + audioPlayer.on('stateChange', (oldState: any, newState: { status: string }) => { + // Only log when state actually changes + if (oldState.status !== newState.status) { + this.runtime.logger.debug(`[VoiceManager] Player state change on guild ${guildId}, channel ${channel}: ${oldState.status} -> ${newState.status}`); + } + if (newState.status === 'idle') { + const idleTime = Date.now(); + this.runtime.logger.debug({ src: 'plugin:discord:service:voice', agentId: this.agentIdentifier, durationMs: idleTime - audioStartTime }, 'Audio playback completed'); + // Restore ducked channels + this.restoreDuckedChannels(guildId, channel); + + // Cleanup + this.stopChannelPlayer(guildId, channel); + + // Resolve promises and emit events + finishedResolver!(); + this.emit('audio:finished', { guildId, channel }); + } else if (newState.status === 'playing') { + this.runtime.logger.debug(`[VoiceManager] Audio is now playing on guild ${guildId}, channel ${channel}`); + } else if (newState.status === 'paused') { + this.runtime.logger.debug(`[VoiceManager] Audio is now paused on guild ${guildId}, channel ${channel}`); + } else if (newState.status === 'buffering') { + this.runtime.logger.debug(`[VoiceManager] Audio is buffering on guild ${guildId}, channel ${channel}`); + } + }); + + // Emit started event + this.runtime.logger.debug(`[VoiceManager] Emitting audio:started event`); + this.emit('audio:started', { guildId, channel }); + + return { + finished: finishedPromise, + cancelled: cancelledPromise, + abort: () => { + abortController.abort(); + this.stopChannelPlayer(guildId, channel); + cancelledResolver!(); + }, + }; + } + + /** + * Stop audio playback + * @param guildId - Guild ID + * @param channel - Channel number (optional, stops all if not provided) + */ + async stopAudio(guildId: string, channel?: number): Promise { + if (channel !== undefined) { + this.stopChannelPlayer(guildId, channel); + this.emit('audio:stopped', { guildId, channel }); + } else { + // Stop all channels for this guild + for (const [, state] of this.channelPlayers.entries()) { + if (state.guildId === guildId) { + const channelToEmit = state.channel; + this.stopChannelPlayer(guildId, state.channel); + this.emit('audio:stopped', { guildId, channel: channelToEmit }); + } + } + } + } + + /** + * Pause audio playback (only if channel supports it) + * @param guildId - Guild ID + * @param channel - Channel number + */ + async pauseAudio(guildId: string, channel: number): Promise { + const config = this.getChannelConfig(channel); + if (!config || !config.canPause) { + throw new Error(`Channel ${channel} does not support pause`); + } + + const key = `${guildId}:${channel}`; + const state = this.channelPlayers.get(key); + if (!state) { + // Instead of throwing, just log and return gracefully + this.runtime.logger.debug(`No active playback to pause on channel ${channel} for guild ${guildId}`); + return; + } + + if (state.player.state.status === 'playing') { + state.player.pause(); + this.emit('audio:paused', { guildId, channel }); + this.runtime.logger.debug(`Paused playback on channel ${channel} for guild ${guildId}`); + } + } + + /** + * Resume audio playback (only if channel supports it) + * @param guildId - Guild ID + * @param channel - Channel number + */ + async resumeAudio(guildId: string, channel: number): Promise { + const config = this.getChannelConfig(channel); + if (!config || !config.canPause) { + throw new Error(`Channel ${channel} does not support resume`); + } + + const key = `${guildId}:${channel}`; + const state = this.channelPlayers.get(key); + if (!state) { + // Instead of throwing, just log and return gracefully + this.runtime.logger.debug(`No active playback to resume on channel ${channel} for guild ${guildId}`); + return; + } + + if (state.player.state.status === 'paused' || state.player.state.status === 'autopaused') { + // Ensure connection is subscribed to this player + const connection = this.connections.get(guildId); + if (connection) { + connection.subscribe(state.player); + } + + state.player.unpause(); + this.emit('audio:resumed', { guildId, channel }); + this.runtime.logger.debug(`Resumed playback on channel ${channel} for guild ${guildId}`); + } + } + + /** + * Resume all autopaused players for a guild after voice connection is restored + * This handles network hiccups where players autopause due to connection loss + * @param guildId - Guild ID + * @param connection - The restored voice connection + */ + private async resumeAutopausedPlayers(guildId: string, connection: VoiceConnection): Promise { + let resumedCount = 0; + + for (const [key, state] of this.channelPlayers.entries()) { + // Check if this player belongs to the guild + if (!key.startsWith(`${guildId}:`)) continue; + + const playerStatus = state.player.state.status; + if (playerStatus === 'autopaused') { + logger.log(`[Voice] Found autopaused player for guild ${guildId}, resuming...`); + + // Re-subscribe the connection to the player + connection.subscribe(state.player); + + // Unpause the player + state.player.unpause(); + resumedCount++; + + // Extract channel number from key + const channel = parseInt(key.split(':')[1], 10); + this.emit('audio:resumed', { guildId, channel }); + } + } + + if (resumedCount > 0) { + this.runtime.logger.info(`[Voice] Resumed ${resumedCount} autopaused player(s) for guild ${guildId} after reconnection`); + } else { + this.runtime.logger.debug(`[Voice] No autopaused players found for guild ${guildId}`); + } + } + + /** + * Set volume for a channel + * @param guildId - Guild ID + * @param channel - Channel number + * @param volume - Volume (0.0 to 1.0) + */ + async setVolume(guildId: string, channel: number, volume: number): Promise { + if (volume < 0 || volume > 1) { + throw new Error('Volume must be between 0.0 and 1.0'); + } + + const key = `${guildId}:${channel}`; + const state = this.channelPlayers.get(key); + if (state) { + // Use volume transformer if available (inlineVolume enabled) + if (state.volumeTransformer) { + state.volumeTransformer.setVolume(volume); + } + state.originalVolume = volume; + } + + // Update channel config + const config = this.getChannelConfig(channel); + if (config) { + config.volume = volume; + } + } + + /** + * Duck music volume when voice activity is detected + * @param guildId - Guild ID + */ + private duckMusicVolume(guildId: string): void { + const MUSIC_CHANNEL = 1; + const key = `${guildId}:${MUSIC_CHANNEL}`; + const state = this.channelPlayers.get(key); + + // Only duck if music is playing + if (!state || !state.volumeTransformer) { + return; + } + + // Get or create ducking state + let duckState = this.duckedGuilds.get(guildId); + const wasAlreadyDucked = !!duckState; + + if (!duckState) { + // Store original volume before ducking + const currentVolume = state.volumeTransformer.volume ?? 1.0; + duckState = { + originalVolume: currentVolume, + silenceTimer: null, + rampTimer: null, + }; + this.duckedGuilds.set(guildId, duckState); + // Log when ducking is first activated (not on every voice packet) + this.runtime.logger.info(`[VoiceDucking] 🔉 Ducking ON - Music volume ${(currentVolume * 100).toFixed(0)}% → ${(this.duckingConfig.duckVolume * 100).toFixed(0)}% (voice activity detected)`); + } + + // Cancel any existing silence timer or ramp timer + if (duckState.silenceTimer) { + clearTimeout(duckState.silenceTimer); + duckState.silenceTimer = null; + } + if (duckState.rampTimer) { + clearTimeout(duckState.rampTimer); + duckState.rampTimer = null; + if (wasAlreadyDucked) { + this.runtime.logger.debug(`[VoiceDucking] Cancelled volume restoration - voice activity continues`); + } + } + + // Immediately duck to configured volume + state.volumeTransformer.setVolume(this.duckingConfig.duckVolume); + + // Start silence timer + this.startSilenceTimer(guildId); + } + + /** + * Start or reset silence timer for volume restoration + * @param guildId - Guild ID + */ + private startSilenceTimer(guildId: string): void { + const duckState = this.duckedGuilds.get(guildId); + if (!duckState) { + return; + } + + // Clear existing timer + if (duckState.silenceTimer) { + clearTimeout(duckState.silenceTimer); + } + + // Start new timer + duckState.silenceTimer = setTimeout(() => { + duckState!.silenceTimer = null; + this.restoreVolumeGradually(guildId); + }, this.duckingConfig.silenceTimeout); + + logger.debug( + `[VoiceDucking] Silence timer started - will restore volume in ${(this.duckingConfig.silenceTimeout / 1000).toFixed(1)}s if no voice activity` ); } + /** + * Gradually restore volume to original level + * @param guildId - Guild ID + */ + private restoreVolumeGradually(guildId: string): void { + const MUSIC_CHANNEL = 1; + const key = `${guildId}:${MUSIC_CHANNEL}`; + const state = this.channelPlayers.get(key); + const duckState = this.duckedGuilds.get(guildId); + + if (!state || !state.volumeTransformer || !duckState) { + // Clean up if music is no longer playing + this.duckedGuilds.delete(guildId); + return; + } + + const targetVolume = duckState.originalVolume; + const currentVolume = state.volumeTransformer.volume ?? this.duckingConfig.duckVolume; + const volumeDiff = targetVolume - currentVolume; + + this.runtime.logger.info(`[VoiceDucking] 🔊 Ducking OFF - Restoring volume ${(currentVolume * 100).toFixed(0)}% → ${(targetVolume * 100).toFixed(0)}% (silence detected)`); + + if (Math.abs(volumeDiff) < 0.01) { + // Already at target, cleanup + state.volumeTransformer.setVolume(targetVolume); + this.duckedGuilds.delete(guildId); + this.runtime.logger.debug(`[VoiceDucking] Volume already at target ${(targetVolume * 100).toFixed(0)}%`); + return; + } + + // Calculate ramp steps (update every 50ms for smooth transition) + const stepInterval = 50; + const totalSteps = Math.ceil(this.duckingConfig.rampDuration / stepInterval); + const volumeStep = volumeDiff / totalSteps; + + this.runtime.logger.debug(`[VoiceDucking] Ramping volume over ${this.duckingConfig.rampDuration}ms (${totalSteps} steps)`); + + let currentStep = 0; + const rampInterval = setInterval(() => { + currentStep++; + const newVolume = Math.min( + targetVolume, + currentVolume + volumeStep * currentStep + ); + + if (state.volumeTransformer) { + state.volumeTransformer.setVolume(newVolume); + } + + if (currentStep >= totalSteps || Math.abs(newVolume - targetVolume) < 0.01) { + clearInterval(rampInterval); + if (state.volumeTransformer) { + state.volumeTransformer.setVolume(targetVolume); + } + this.duckedGuilds.delete(guildId); + this.runtime.logger.debug(`[VoiceDucking] Volume ramp complete - now at ${(targetVolume * 100).toFixed(0)}%`); + } + }, stepInterval); + + // Store ramp timer for cleanup if needed + duckState.rampTimer = rampInterval as any; + } + + /** + * Check if audio is playing + * @param guildId - Guild ID + * @param channel - Channel number (optional, checks all if not provided) + * @returns True if playing + */ + async isPlaying(guildId: string, channel?: number): Promise { + if (channel !== undefined) { + const key = `${guildId}:${channel}`; + const state = this.channelPlayers.get(key); + return state !== undefined && state.player.state.status !== 'idle'; + } else { + // Check if any channel is playing + for (const [, state] of this.channelPlayers.entries()) { + if (state.guildId === guildId && state.player.state.status !== 'idle') { + return true; + } + } + return false; + } + } + + /** + * Get active channels for a guild + * @param guildId - Guild ID + * @returns Array of active channel numbers + */ + async getActiveChannels(guildId: string): Promise { + const active: number[] = []; + for (const [, state] of this.channelPlayers.entries()) { + if (state.guildId === guildId && state.player.state.status !== 'idle') { + active.push(state.channel); + } + } + return active; + } + + /** * Cleans up the provided audio player by stopping it, removing all listeners, * and resetting the active audio player if it matches the provided player. @@ -1423,7 +3039,7 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.error( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Error joining voice channel", @@ -1435,7 +3051,7 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.error( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: err.message, }, "Failed to send error reply", @@ -1465,7 +3081,7 @@ export class VoiceManager extends EventEmitter { this.runtime.logger.error( { src: "plugin:discord:service:voice", - agentId: this.runtime.agentId, + agentId: this.agentIdentifier, error: error instanceof Error ? error.message : String(error), }, "Error leaving voice channel", @@ -1473,4 +3089,206 @@ export class VoiceManager extends EventEmitter { await interaction.reply("Failed to leave the voice channel."); } } + + // ============================================================================ + // TELEPHONE BRIDGE POC - Audio bridging between voice connections + // ============================================================================ + + /** + * Active audio bridges for telephone functionality + * Key: bridgeId, Value: cleanup function + */ + private activeBridges: Map void> = new Map(); + + /** + * Bridge audio from one guild's voice connection to another. + * This is a proof-of-concept for the telephone booth feature. + * + * @param sourceGuildId - Guild ID to capture audio from + * @param targetGuildId - Guild ID to play audio to + * @param userId - User ID to capture audio from (or 'all' for all users) + * @returns Bridge ID for cleanup, or null if bridging failed + */ + async bridgeAudio( + sourceGuildId: string, + targetGuildId: string, + userId?: string + ): Promise { + const sourceConn = this.connections.get(sourceGuildId); + const targetConn = this.connections.get(targetGuildId); + + if (!sourceConn) { + this.runtime.logger.error(`[BridgeAudio] No source connection for guild ${sourceGuildId}`); + return null; + } + + if (!targetConn) { + this.runtime.logger.error(`[BridgeAudio] No target connection for guild ${targetGuildId}`); + return null; + } + + const bridgeId = `bridge-${sourceGuildId}-${targetGuildId}-${Date.now()}`; + this.runtime.logger.info(`[BridgeAudio] Creating bridge ${bridgeId}`); + + try { + // Create audio player for the target connection + const player = createAudioPlayer({ + behaviors: { + noSubscriber: NoSubscriberBehavior.Play, + }, + }); + + // Subscribe target connection to the player + targetConn.subscribe(player); + + // Track active user streams for this bridge + const userStreams: Map void }> = new Map(); + + // Handler for when users start speaking + const speakingStartHandler = (speakingUserId: string) => { + // Skip if we're filtering to a specific user and this isn't them + if (userId && userId !== 'all' && speakingUserId !== userId) { + return; + } + + // Skip if already streaming this user + if (userStreams.has(speakingUserId)) { + return; + } + + // Skip bots + const member = this.client?.guilds.cache.get(sourceGuildId)?.members.cache.get(speakingUserId); + if (member?.user.bot) { + return; + } + + this.runtime.logger.debug(`[BridgeAudio] User ${speakingUserId} started speaking, bridging audio`); + + try { + // Subscribe to user's audio stream + const receiveStream = sourceConn.receiver.subscribe(speakingUserId, { + autoDestroy: true, + emitClose: true, + }); + + // Create audio resource from the Opus stream + // Note: Discord receiver outputs Opus packets + const resource = createAudioResource(receiveStream, { + inputType: StreamType.Opus, + }); + + // Play to the target connection + player.play(resource); + + // Track cleanup + const cleanup = () => { + receiveStream.destroy(); + userStreams.delete(speakingUserId); + }; + + receiveStream.on('close', cleanup); + receiveStream.on('error', (err) => { + this.runtime.logger.debug(`[BridgeAudio] Stream error for ${speakingUserId}: ${err.message}`); + cleanup(); + }); + + userStreams.set(speakingUserId, { stream: receiveStream, cleanup }); + } catch (error) { + this.runtime.logger.error(`[BridgeAudio] Failed to subscribe to user ${speakingUserId}: ${error}`); + } + }; + + // Listen for speaking events + sourceConn.receiver.speaking.on('start', speakingStartHandler); + + // Cleanup function for the entire bridge + const bridgeCleanup = () => { + this.runtime.logger.info(`[BridgeAudio] Cleaning up bridge ${bridgeId}`); + + // Remove speaking listener + sourceConn.receiver.speaking.off('start', speakingStartHandler); + + // Clean up all user streams + for (const [, { cleanup }] of userStreams) { + cleanup(); + } + userStreams.clear(); + + // Stop and clean up player + player.stop(); + player.removeAllListeners(); + + // Remove from active bridges + this.activeBridges.delete(bridgeId); + }; + + this.activeBridges.set(bridgeId, bridgeCleanup); + + this.runtime.logger.info(`[BridgeAudio] Bridge ${bridgeId} created successfully`); + return bridgeId; + } catch (error) { + this.runtime.logger.error(`[BridgeAudio] Failed to create bridge: ${error}`); + return null; + } + } + + /** + * Create a bidirectional audio bridge between two guilds. + * Audio from guild A plays in guild B and vice versa. + * + * @param guildIdA - First guild ID + * @param guildIdB - Second guild ID + * @returns Object with bridge IDs and cleanup function, or null if failed + */ + async bridgeBidirectional( + guildIdA: string, + guildIdB: string + ): Promise<{ bridgeAtoB: string; bridgeBtoA: string; cleanup: () => void } | null> { + this.runtime.logger.info(`[BridgeAudio] Creating bidirectional bridge between ${guildIdA} and ${guildIdB}`); + + // Create A → B bridge + const bridgeAtoB = await this.bridgeAudio(guildIdA, guildIdB, 'all'); + if (!bridgeAtoB) { + this.runtime.logger.error(`[BridgeAudio] Failed to create A→B bridge`); + return null; + } + + // Create B → A bridge + const bridgeBtoA = await this.bridgeAudio(guildIdB, guildIdA, 'all'); + if (!bridgeBtoA) { + this.runtime.logger.error(`[BridgeAudio] Failed to create B→A bridge, cleaning up A→B`); + this.stopBridge(bridgeAtoB); + return null; + } + + // Combined cleanup + const cleanup = () => { + this.stopBridge(bridgeAtoB); + this.stopBridge(bridgeBtoA); + }; + + this.runtime.logger.info(`[BridgeAudio] Bidirectional bridge created: ${bridgeAtoB} <-> ${bridgeBtoA}`); + return { bridgeAtoB, bridgeBtoA, cleanup }; + } + + /** + * Stop an active audio bridge. + * + * @param bridgeId - Bridge ID to stop + */ + stopBridge(bridgeId: string): void { + const cleanup = this.activeBridges.get(bridgeId); + if (cleanup) { + cleanup(); + } else { + this.runtime.logger.warn(`[BridgeAudio] Bridge ${bridgeId} not found`); + } + } + + /** + * Get list of active bridge IDs. + */ + getActiveBridges(): string[] { + return Array.from(this.activeBridges.keys()); + } } diff --git a/src/voiceConnectionManager.ts b/src/voiceConnectionManager.ts new file mode 100644 index 0000000..30db9fd --- /dev/null +++ b/src/voiceConnectionManager.ts @@ -0,0 +1,149 @@ +import { logger } from '@elizaos/core'; +import type { BaseGuildVoiceChannel } from 'discord.js'; +import type { Readable } from 'node:stream'; +import type { VoiceManager } from './voice'; +import type { VoiceTarget } from './types'; + +/** + * Represents a registered voice connection + */ +type ConnectionInfo = { + voiceManager: VoiceManager; + channel: BaseGuildVoiceChannel; + botId: string; + botAlias?: string; +}; + +/** + * Tracks voice connections across multiple Discord bots + */ +export class VoiceConnectionManager { + private connections: Map = new Map(); + + /** + * Register a voice connection + * @param botId Discord client user ID + * @param guildId Guild ID + * @param channelId Channel ID + * @param channel The voice channel + * @param voiceManager The VoiceManager instance + * @param botAlias Optional bot alias + */ + registerConnection( + botId: string, + guildId: string, + channelId: string, + channel: BaseGuildVoiceChannel, + voiceManager: VoiceManager, + botAlias?: string + ): void { + const id = this.makeId(botId, guildId, channelId); + this.connections.set(id, { voiceManager, channel, botId, botAlias }); + logger.log(`[VoiceConnectionManager] Registered connection: ${id}`); + } + + /** + * Unregister a voice connection + */ + unregisterConnection(botId: string, guildId: string, channelId: string): void { + const id = this.makeId(botId, guildId, channelId); + this.connections.delete(id); + logger.log(`[VoiceConnectionManager] Unregistered connection: ${id}`); + } + + /** + * Get a specific voice connection + */ + getConnection(botId: string, guildId: string, channelId: string) { + const id = this.makeId(botId, guildId, channelId); + return this.connections.get(id); + } + + /** + * Get all voice targets available for audio routing + */ + getVoiceTargets(): VoiceTarget[] { + const targets: VoiceTarget[] = []; + + for (const [id, conn] of this.connections.entries()) { + const target: VoiceTarget = { + id, + botId: conn.botId, + botAlias: conn.botAlias, + guildId: conn.channel.guild.id, + channelId: conn.channel.id, + channelName: conn.channel.name, + + play: async (stream: Readable) => { + try { + await conn.voiceManager.playAudio(stream, { + guildId: conn.channel.guild.id, + }); + } catch (error) { + logger.error(`[VoiceConnectionManager] Failed to play audio on target ${id}: ${error}`); + throw error; + } + }, + + stop: async () => { + try { + await conn.voiceManager.stopAudio(conn.channel.guild.id); + } catch (error) { + logger.error(`[VoiceConnectionManager] Failed to stop audio on target ${id}: ${error}`); + throw error; + } + }, + + getStatus: () => { + const voiceConn = conn.voiceManager.getVoiceConnection(conn.channel.guild.id); + return voiceConn ? 'connected' : 'disconnected'; + } + }; + + targets.push(target); + } + + return targets; + } + + /** + * Get all connections for a specific bot + */ + getConnectionsForBot(botId: string): Array<{id: string; connection: ConnectionInfo}> { + const results: Array<{id: string; connection: ConnectionInfo}> = []; + for (const [id, conn] of this.connections.entries()) { + if (conn.botId === botId) { + results.push({ id, connection: conn }); + } + } + return results; + } + + /** + * Get all connections for a specific guild + */ + getConnectionsForGuild(guildId: string): Array<{id: string; connection: ConnectionInfo}> { + const results: Array<{id: string; connection: ConnectionInfo}> = []; + for (const [id, conn] of this.connections.entries()) { + if (conn.channel.guild.id === guildId) { + results.push({ id, connection: conn }); + } + } + return results; + } + + /** + * Create a connection ID + */ + private makeId(botId: string, guildId: string, channelId: string): string { + return `${botId}:${guildId}:${channelId}`; + } + + /** + * Clear all connections + */ + clear(): void { + this.connections.clear(); + } +} +