From 0663d9bd3963d3453e35bade24f6ff05c43f3357 Mon Sep 17 00:00:00 2001 From: directorboint-arch Date: Tue, 28 Apr 2026 16:42:07 -0500 Subject: [PATCH] feat(signal): add native voice note support for Signal platform - tools/tts_tool.py: Include 'signal' in want_opus check so TTS tools generate Opus (.ogg) audio for Signal just like Telegram, enabling proper voice message format. - gateway/platforms/signal.py: Rewrite send_voice() to pass voiceNote=true to signal-cli's JSON-RPC API so Signal renders audio as inline voice messages with waveform playback instead of plain file attachments. Includes proper size validation and typing indicators. - website/docs/user-guide/messaging/signal.md: Update docs to reflect that send_voice now delivers inline voice notes rather than attachments. --- gateway/platforms/signal.py | 29 ++++++++++++++++++--- tools/tts_tool.py | 2 +- website/docs/user-guide/messaging/signal.md | 2 +- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py index 9a0a6256a4b..b6c26bff555 100644 --- a/gateway/platforms/signal.py +++ b/gateway/platforms/signal.py @@ -922,12 +922,33 @@ async def send_voice( reply_to: Optional[str] = None, **kwargs, ) -> SendResult: - """Send an audio file as a Signal attachment. + """Send an audio file as a Signal voice note. - Signal does not distinguish voice messages from file attachments at - the API level, so this routes through the same RPC send path. + Uses the voiceNote flag so Signal renders it as an inline voice + message rather than a plain file attachment. """ - return await self._send_attachment(chat_id, audio_path, "Audio", caption) + await self._stop_typing_indicator(chat_id) + try: + file_size = Path(audio_path).stat().st_size + except FileNotFoundError: + return SendResult(success=False, error=f"Audio file not found: {audio_path}") + if file_size > SIGNAL_MAX_ATTACHMENT_SIZE: + return SendResult(success=False, error=f"Audio too large ({file_size} bytes)") + params: Dict[str, Any] = { + "account": self.account, + "message": caption or "", + "attachments": [audio_path], + "voiceNote": True, + } + if chat_id.startswith("group:"): + params["groupId"] = chat_id[6:] + else: + params["recipient"] = [await self._resolve_recipient(chat_id)] + result = await self._rpc("send", params) + if result is not None: + self._track_sent_timestamp(result) + return SendResult(success=True) + return SendResult(success=False, error="RPC send voice failed") async def send_video( self, diff --git a/tools/tts_tool.py b/tools/tts_tool.py index a7ca57fab10..82ecccbc359 100644 --- a/tools/tts_tool.py +++ b/tools/tts_tool.py @@ -957,7 +957,7 @@ def text_to_speech_tool( # and needs ffmpeg for conversion. from gateway.session_context import get_session_env platform = get_session_env("HERMES_SESSION_PLATFORM", "").lower() - want_opus = (platform == "telegram") + want_opus = platform in ("telegram", "signal") # Determine output path if output_path: diff --git a/website/docs/user-guide/messaging/signal.md b/website/docs/user-guide/messaging/signal.md index bc72c27b207..c0ed23afe53 100644 --- a/website/docs/user-guide/messaging/signal.md +++ b/website/docs/user-guide/messaging/signal.md @@ -160,7 +160,7 @@ The adapter supports sending and receiving media in both directions. The agent can send media files via `MEDIA:` tags in responses. The following delivery methods are supported: - **Images** — `send_image_file` sends PNG, JPEG, GIF, WebP as native Signal attachments -- **Voice** — `send_voice` sends audio files (OGG, MP3, WAV, M4A, AAC) as attachments +- **Voice** — `send_voice` sends audio files (OGG, MP3, WAV, M4A, AAC) as inline voice notes - **Video** — `send_video` sends MP4 video files - **Documents** — `send_document` sends any file type (PDF, ZIP, etc.)